code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_UP = [constants.ADMINST_UP]
  72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
  74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  76
  77
  78 class ResultWithJobs:
  79   """Data container for LU results with jobs.
  80
  81   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  82   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  83   contained in the C{jobs} attribute and include the job IDs in the opcode
  84   result.
  85
  86   """
  87   def __init__(self, jobs, **kwargs):
  88     """Initializes this class.
  89
  90     Additional return values can be specified as keyword arguments.
  91
  92     @type jobs: list of lists of L{opcode.OpCode}
  93     @param jobs: A list of lists of opcode objects
  94
  95     """
  96     self.jobs = jobs
  97     self.other = kwargs
  98
  99
 100 class LogicalUnit(object):
 101   """Logical Unit base class.
 102
 103   Subclasses must follow these rules:
 104     - implement ExpandNames
 105     - implement CheckPrereq (except when tasklets are used)
 106     - implement Exec (except when tasklets are used)
 107     - implement BuildHooksEnv
 108     - implement BuildHooksNodes
 109     - redefine HPATH and HTYPE
 110     - optionally redefine their run requirements:
 111         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 112
 113   Note that all commands require root permissions.
 114
 115   @ivar dry_run_result: the value (if any) that will be returned to the caller
 116       in dry-run mode (signalled by opcode dry_run parameter)
 117
 118   """
 119   HPATH = None
 120   HTYPE = None
 121   REQ_BGL = True
 122
 123   def __init__(self, processor, op, context, rpc_runner):
 124     """Constructor for LogicalUnit.
 125
 126     This needs to be overridden in derived classes in order to check op
 127     validity.
 128
 129     """
 130     self.proc = processor
 131     self.op = op
 132     self.cfg = context.cfg
 133     self.glm = context.glm
 134     # readability alias
 135     self.owned_locks = context.glm.list_owned
 136     self.context = context
 137     self.rpc = rpc_runner
 138     # Dicts used to declare locking needs to mcpu
 139     self.needed_locks = None
 140     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 141     self.add_locks = {}
 142     self.remove_locks = {}
 143     # Used to force good behavior when calling helper functions
 144     self.recalculate_locks = {}
 145     # logging
 146     self.Log = processor.Log # pylint: disable=C0103
 147     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 148     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 149     self.LogStep = processor.LogStep # pylint: disable=C0103
 150     # support for dry-run
 151     self.dry_run_result = None
 152     # support for generic debug attribute
 153     if (not hasattr(self.op, "debug_level") or
 154         not isinstance(self.op.debug_level, int)):
 155       self.op.debug_level = 0
 156
 157     # Tasklets
 158     self.tasklets = None
 159
 160     # Validate opcode parameters and set defaults
 161     self.op.Validate(True)
 162
 163     self.CheckArguments()
 164
 165   def CheckArguments(self):
 166     """Check syntactic validity for the opcode arguments.
 167
 168     This method is for doing a simple syntactic check and ensure
 169     validity of opcode parameters, without any cluster-related
 170     checks. While the same can be accomplished in ExpandNames and/or
 171     CheckPrereq, doing these separate is better because:
 172
 173       - ExpandNames is left as as purely a lock-related function
 174       - CheckPrereq is run after we have acquired locks (and possible
 175         waited for them)
 176
 177     The function is allowed to change the self.op attribute so that
 178     later methods can no longer worry about missing parameters.
 179
 180     """
 181     pass
 182
 183   def ExpandNames(self):
 184     """Expand names for this LU.
 185
 186     This method is called before starting to execute the opcode, and it should
 187     update all the parameters of the opcode to their canonical form (e.g. a
 188     short node name must be fully expanded after this method has successfully
 189     completed). This way locking, hooks, logging, etc. can work correctly.
 190
 191     LUs which implement this method must also populate the self.needed_locks
 192     member, as a dict with lock levels as keys, and a list of needed lock names
 193     as values. Rules:
 194
 195       - use an empty dict if you don't need any lock
 196       - if you don't need any lock at a particular level omit that level
 197       - don't put anything for the BGL level
 198       - if you want all locks at a level use locking.ALL_SET as a value
 199
 200     If you need to share locks (rather than acquire them exclusively) at one
 201     level you can modify self.share_locks, setting a true value (usually 1) for
 202     that level. By default locks are not shared.
 203
 204     This function can also define a list of tasklets, which then will be
 205     executed in order instead of the usual LU-level CheckPrereq and Exec
 206     functions, if those are not defined by the LU.
 207
 208     Examples::
 209
 210       # Acquire all nodes and one instance
 211       self.needed_locks = {
 212         locking.LEVEL_NODE: locking.ALL_SET,
 213         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 214       }
 215       # Acquire just two nodes
 216       self.needed_locks = {
 217         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 218       }
 219       # Acquire no locks
 220       self.needed_locks = {} # No, you can't leave it to the default value None
 221
 222     """
 223     # The implementation of this method is mandatory only if the new LU is
 224     # concurrent, so that old LUs don't need to be changed all at the same
 225     # time.
 226     if self.REQ_BGL:
 227       self.needed_locks = {} # Exclusive LUs don't need locks.
 228     else:
 229       raise NotImplementedError
 230
 231   def DeclareLocks(self, level):
 232     """Declare LU locking needs for a level
 233
 234     While most LUs can just declare their locking needs at ExpandNames time,
 235     sometimes there's the need to calculate some locks after having acquired
 236     the ones before. This function is called just before acquiring locks at a
 237     particular level, but after acquiring the ones at lower levels, and permits
 238     such calculations. It can be used to modify self.needed_locks, and by
 239     default it does nothing.
 240
 241     This function is only called if you have something already set in
 242     self.needed_locks for the level.
 243
 244     @param level: Locking level which is going to be locked
 245     @type level: member of ganeti.locking.LEVELS
 246
 247     """
 248
 249   def CheckPrereq(self):
 250     """Check prerequisites for this LU.
 251
 252     This method should check that the prerequisites for the execution
 253     of this LU are fulfilled. It can do internode communication, but
 254     it should be idempotent - no cluster or system changes are
 255     allowed.
 256
 257     The method should raise errors.OpPrereqError in case something is
 258     not fulfilled. Its return value is ignored.
 259
 260     This method should also update all the parameters of the opcode to
 261     their canonical form if it hasn't been done by ExpandNames before.
 262
 263     """
 264     if self.tasklets is not None:
 265       for (idx, tl) in enumerate(self.tasklets):
 266         logging.debug("Checking prerequisites for tasklet %s/%s",
 267                       idx + 1, len(self.tasklets))
 268         tl.CheckPrereq()
 269     else:
 270       pass
 271
 272   def Exec(self, feedback_fn):
 273     """Execute the LU.
 274
 275     This method should implement the actual work. It should raise
 276     errors.OpExecError for failures that are somewhat dealt with in
 277     code, or expected.
 278
 279     """
 280     if self.tasklets is not None:
 281       for (idx, tl) in enumerate(self.tasklets):
 282         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 283         tl.Exec(feedback_fn)
 284     else:
 285       raise NotImplementedError
 286
 287   def BuildHooksEnv(self):
 288     """Build hooks environment for this LU.
 289
 290     @rtype: dict
 291     @return: Dictionary containing the environment that will be used for
 292       running the hooks for this LU. The keys of the dict must not be prefixed
 293       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 294       will extend the environment with additional variables. If no environment
 295       should be defined, an empty dictionary should be returned (not C{None}).
 296     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 297       will not be called.
 298
 299     """
 300     raise NotImplementedError
 301
 302   def BuildHooksNodes(self):
 303     """Build list of nodes to run LU's hooks.
 304
 305     @rtype: tuple; (list, list)
 306     @return: Tuple containing a list of node names on which the hook
 307       should run before the execution and a list of node names on which the
 308       hook should run after the execution. No nodes should be returned as an
 309       empty list (and not None).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 317     """Notify the LU about the results of its hooks.
 318
 319     This method is called every time a hooks phase is executed, and notifies
 320     the Logical Unit about the hooks' result. The LU can then use it to alter
 321     its result based on the hooks.  By default the method does nothing and the
 322     previous result is passed back unchanged but any LU can define it if it
 323     wants to use the local cluster hook-scripts somehow.
 324
 325     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 326         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 327     @param hook_results: the results of the multi-node hooks rpc call
 328     @param feedback_fn: function used send feedback back to the caller
 329     @param lu_result: the previous Exec result this LU had, or None
 330         in the PRE phase
 331     @return: the new Exec result, based on the previous result
 332         and hook results
 333
 334     """
 335     # API must be kept, thus we ignore the unused argument and could
 336     # be a function warnings
 337     # pylint: disable=W0613,R0201
 338     return lu_result
 339
 340   def _ExpandAndLockInstance(self):
 341     """Helper function to expand and lock an instance.
 342
 343     Many LUs that work on an instance take its name in self.op.instance_name
 344     and need to expand it and then declare the expanded name for locking. This
 345     function does it, and then updates self.op.instance_name to the expanded
 346     name. It also initializes needed_locks as a dict, if this hasn't been done
 347     before.
 348
 349     """
 350     if self.needed_locks is None:
 351       self.needed_locks = {}
 352     else:
 353       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 354         "_ExpandAndLockInstance called with instance-level locks set"
 355     self.op.instance_name = _ExpandInstanceName(self.cfg,
 356                                                 self.op.instance_name)
 357     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 358
 359   def _LockInstancesNodes(self, primary_only=False,
 360                           level=locking.LEVEL_NODE):
 361     """Helper function to declare instances' nodes for locking.
 362
 363     This function should be called after locking one or more instances to lock
 364     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 365     with all primary or secondary nodes for instances already locked and
 366     present in self.needed_locks[locking.LEVEL_INSTANCE].
 367
 368     It should be called from DeclareLocks, and for safety only works if
 369     self.recalculate_locks[locking.LEVEL_NODE] is set.
 370
 371     In the future it may grow parameters to just lock some instance's nodes, or
 372     to just lock primaries or secondary nodes, if needed.
 373
 374     If should be called in DeclareLocks in a way similar to::
 375
 376       if level == locking.LEVEL_NODE:
 377         self._LockInstancesNodes()
 378
 379     @type primary_only: boolean
 380     @param primary_only: only lock primary nodes of locked instances
 381     @param level: Which lock level to use for locking nodes
 382
 383     """
 384     assert level in self.recalculate_locks, \
 385       "_LockInstancesNodes helper function called with no nodes to recalculate"
 386
 387     # TODO: check if we're really been called with the instance locks held
 388
 389     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 390     # future we might want to have different behaviors depending on the value
 391     # of self.recalculate_locks[locking.LEVEL_NODE]
 392     wanted_nodes = []
 393     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 394     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 395       wanted_nodes.append(instance.primary_node)
 396       if not primary_only:
 397         wanted_nodes.extend(instance.secondary_nodes)
 398
 399     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 400       self.needed_locks[level] = wanted_nodes
 401     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 402       self.needed_locks[level].extend(wanted_nodes)
 403     else:
 404       raise errors.ProgrammerError("Unknown recalculation mode")
 405
 406     del self.recalculate_locks[level]
 407
 408
 409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 410   """Simple LU which runs no hooks.
 411
 412   This LU is intended as a parent for other LogicalUnits which will
 413   run no hooks, in order to reduce duplicate code.
 414
 415   """
 416   HPATH = None
 417   HTYPE = None
 418
 419   def BuildHooksEnv(self):
 420     """Empty BuildHooksEnv for NoHooksLu.
 421
 422     This just raises an error.
 423
 424     """
 425     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 426
 427   def BuildHooksNodes(self):
 428     """Empty BuildHooksNodes for NoHooksLU.
 429
 430     """
 431     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 432
 433
 434 class Tasklet:
 435   """Tasklet base class.
 436
 437   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 438   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 439   tasklets know nothing about locks.
 440
 441   Subclasses must follow these rules:
 442     - Implement CheckPrereq
 443     - Implement Exec
 444
 445   """
 446   def __init__(self, lu):
 447     self.lu = lu
 448
 449     # Shortcuts
 450     self.cfg = lu.cfg
 451     self.rpc = lu.rpc
 452
 453   def CheckPrereq(self):
 454     """Check prerequisites for this tasklets.
 455
 456     This method should check whether the prerequisites for the execution of
 457     this tasklet are fulfilled. It can do internode communication, but it
 458     should be idempotent - no cluster or system changes are allowed.
 459
 460     The method should raise errors.OpPrereqError in case something is not
 461     fulfilled. Its return value is ignored.
 462
 463     This method should also update all parameters to their canonical form if it
 464     hasn't been done before.
 465
 466     """
 467     pass
 468
 469   def Exec(self, feedback_fn):
 470     """Execute the tasklet.
 471
 472     This method should implement the actual work. It should raise
 473     errors.OpExecError for failures that are somewhat dealt with in code, or
 474     expected.
 475
 476     """
 477     raise NotImplementedError
 478
 479
 480 class _QueryBase:
 481   """Base for query utility classes.
 482
 483   """
 484   #: Attribute holding field definitions
 485   FIELDS = None
 486
 487   def __init__(self, qfilter, fields, use_locking):
 488     """Initializes this class.
 489
 490     """
 491     self.use_locking = use_locking
 492
 493     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 494                              namefield="name")
 495     self.requested_data = self.query.RequestedData()
 496     self.names = self.query.RequestedNames()
 497
 498     # Sort only if no names were requested
 499     self.sort_by_name = not self.names
 500
 501     self.do_locking = None
 502     self.wanted = None
 503
 504   def _GetNames(self, lu, all_names, lock_level):
 505     """Helper function to determine names asked for in the query.
 506
 507     """
 508     if self.do_locking:
 509       names = lu.owned_locks(lock_level)
 510     else:
 511       names = all_names
 512
 513     if self.wanted == locking.ALL_SET:
 514       assert not self.names
 515       # caller didn't specify names, so ordering is not important
 516       return utils.NiceSort(names)
 517
 518     # caller specified names and we must keep the same order
 519     assert self.names
 520     assert not self.do_locking or lu.glm.is_owned(lock_level)
 521
 522     missing = set(self.wanted).difference(names)
 523     if missing:
 524       raise errors.OpExecError("Some items were removed before retrieving"
 525                                " their data: %s" % missing)
 526
 527     # Return expanded names
 528     return self.wanted
 529
 530   def ExpandNames(self, lu):
 531     """Expand names for this query.
 532
 533     See L{LogicalUnit.ExpandNames}.
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def DeclareLocks(self, lu, level):
 539     """Declare locks for this query.
 540
 541     See L{LogicalUnit.DeclareLocks}.
 542
 543     """
 544     raise NotImplementedError()
 545
 546   def _GetQueryData(self, lu):
 547     """Collects all data for this query.
 548
 549     @return: Query data object
 550
 551     """
 552     raise NotImplementedError()
 553
 554   def NewStyleQuery(self, lu):
 555     """Collect data and execute query.
 556
 557     """
 558     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 559                                   sort_by_name=self.sort_by_name)
 560
 561   def OldStyleQuery(self, lu):
 562     """Collect data and execute query.
 563
 564     """
 565     return self.query.OldStyleQuery(self._GetQueryData(lu),
 566                                     sort_by_name=self.sort_by_name)
 567
 568
 569 def _ShareAll():
 570   """Returns a dict declaring all lock levels shared.
 571
 572   """
 573   return dict.fromkeys(locking.LEVELS, 1)
 574
 575
 576 def _MakeLegacyNodeInfo(data):
 577   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 578
 579   Converts the data into a single dictionary. This is fine for most use cases,
 580   but some require information from more than one volume group or hypervisor.
 581
 582   """
 583   (bootid, (vg_info, ), (hv_info, )) = data
 584
 585   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 586     "bootid": bootid,
 587     })
 588
 589
 590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 591   """Checks if the owned node groups are still correct for an instance.
 592
 593   @type cfg: L{config.ConfigWriter}
 594   @param cfg: The cluster configuration
 595   @type instance_name: string
 596   @param instance_name: Instance name
 597   @type owned_groups: set or frozenset
 598   @param owned_groups: List of currently owned node groups
 599
 600   """
 601   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 602
 603   if not owned_groups.issuperset(inst_groups):
 604     raise errors.OpPrereqError("Instance %s's node groups changed since"
 605                                " locks were acquired, current groups are"
 606                                " are '%s', owning groups '%s'; retry the"
 607                                " operation" %
 608                                (instance_name,
 609                                 utils.CommaJoin(inst_groups),
 610                                 utils.CommaJoin(owned_groups)),
 611                                errors.ECODE_STATE)
 612
 613   return inst_groups
 614
 615
 616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 617   """Checks if the instances in a node group are still correct.
 618
 619   @type cfg: L{config.ConfigWriter}
 620   @param cfg: The cluster configuration
 621   @type group_uuid: string
 622   @param group_uuid: Node group UUID
 623   @type owned_instances: set or frozenset
 624   @param owned_instances: List of currently owned instances
 625
 626   """
 627   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 628   if owned_instances != wanted_instances:
 629     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 630                                " locks were acquired, wanted '%s', have '%s';"
 631                                " retry the operation" %
 632                                (group_uuid,
 633                                 utils.CommaJoin(wanted_instances),
 634                                 utils.CommaJoin(owned_instances)),
 635                                errors.ECODE_STATE)
 636
 637   return wanted_instances
 638
 639
 640 def _SupportsOob(cfg, node):
 641   """Tells if node supports OOB.
 642
 643   @type cfg: L{config.ConfigWriter}
 644   @param cfg: The cluster configuration
 645   @type node: L{objects.Node}
 646   @param node: The node
 647   @return: The OOB script if supported or an empty string otherwise
 648
 649   """
 650   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 651
 652
 653 def _GetWantedNodes(lu, nodes):
 654   """Returns list of checked and expanded node names.
 655
 656   @type lu: L{LogicalUnit}
 657   @param lu: the logical unit on whose behalf we execute
 658   @type nodes: list
 659   @param nodes: list of node names or None for all nodes
 660   @rtype: list
 661   @return: the list of nodes, sorted
 662   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 663
 664   """
 665   if nodes:
 666     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 667
 668   return utils.NiceSort(lu.cfg.GetNodeList())
 669
 670
 671 def _GetWantedInstances(lu, instances):
 672   """Returns list of checked and expanded instance names.
 673
 674   @type lu: L{LogicalUnit}
 675   @param lu: the logical unit on whose behalf we execute
 676   @type instances: list
 677   @param instances: list of instance names or None for all instances
 678   @rtype: list
 679   @return: the list of instances, sorted
 680   @raise errors.OpPrereqError: if the instances parameter is wrong type
 681   @raise errors.OpPrereqError: if any of the passed instances is not found
 682
 683   """
 684   if instances:
 685     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 686   else:
 687     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 688   return wanted
 689
 690
 691 def _GetUpdatedParams(old_params, update_dict,
 692                       use_default=True, use_none=False):
 693   """Return the new version of a parameter dictionary.
 694
 695   @type old_params: dict
 696   @param old_params: old parameters
 697   @type update_dict: dict
 698   @param update_dict: dict containing new parameter values, or
 699       constants.VALUE_DEFAULT to reset the parameter to its default
 700       value
 701   @param use_default: boolean
 702   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 703       values as 'to be deleted' values
 704   @param use_none: boolean
 705   @type use_none: whether to recognise C{None} values as 'to be
 706       deleted' values
 707   @rtype: dict
 708   @return: the new parameter dictionary
 709
 710   """
 711   params_copy = copy.deepcopy(old_params)
 712   for key, val in update_dict.iteritems():
 713     if ((use_default and val == constants.VALUE_DEFAULT) or
 714         (use_none and val is None)):
 715       try:
 716         del params_copy[key]
 717       except KeyError:
 718         pass
 719     else:
 720       params_copy[key] = val
 721   return params_copy
 722
 723
 724 def _UpdateAndVerifySubDict(base, updates, type_check):
 725   """Updates and verifies a dict with sub dicts of the same type.
 726
 727   @param base: The dict with the old data
 728   @param updates: The dict with the new data
 729   @param type_check: Dict suitable to ForceDictType to verify correct types
 730   @returns: A new dict with updated and verified values
 731
 732   """
 733   def fn(old, value):
 734     new = _GetUpdatedParams(old, value)
 735     utils.ForceDictType(new, type_check)
 736     return new
 737
 738   ret = copy.deepcopy(base)
 739   ret.update(dict((key, fn(base.get(key, {}), value))
 740                   for key, value in updates.items()))
 741   return ret
 742
 743
 744 def _MergeAndVerifyHvState(op_input, obj_input):
 745   """Combines the hv state from an opcode with the one of the object
 746
 747   @param op_input: The input dict from the opcode
 748   @param obj_input: The input dict from the objects
 749   @return: The verified and updated dict
 750
 751   """
 752   if op_input:
 753     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 754     if invalid_hvs:
 755       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 756                                  " %s" % utils.CommaJoin(invalid_hvs),
 757                                  errors.ECODE_INVAL)
 758     if obj_input is None:
 759       obj_input = {}
 760     type_check = constants.HVSTS_PARAMETER_TYPES
 761     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 762
 763   return None
 764
 765
 766 def _MergeAndVerifyDiskState(op_input, obj_input):
 767   """Combines the disk state from an opcode with the one of the object
 768
 769   @param op_input: The input dict from the opcode
 770   @param obj_input: The input dict from the objects
 771   @return: The verified and updated dict
 772   """
 773   if op_input:
 774     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 775     if invalid_dst:
 776       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 777                                  utils.CommaJoin(invalid_dst),
 778                                  errors.ECODE_INVAL)
 779     type_check = constants.DSS_PARAMETER_TYPES
 780     if obj_input is None:
 781       obj_input = {}
 782     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 783                                               type_check))
 784                 for key, value in op_input.items())
 785
 786   return None
 787
 788
 789 def _ReleaseLocks(lu, level, names=None, keep=None):
 790   """Releases locks owned by an LU.
 791
 792   @type lu: L{LogicalUnit}
 793   @param level: Lock level
 794   @type names: list or None
 795   @param names: Names of locks to release
 796   @type keep: list or None
 797   @param keep: Names of locks to retain
 798
 799   """
 800   assert not (keep is not None and names is not None), \
 801          "Only one of the 'names' and the 'keep' parameters can be given"
 802
 803   if names is not None:
 804     should_release = names.__contains__
 805   elif keep:
 806     should_release = lambda name: name not in keep
 807   else:
 808     should_release = None
 809
 810   owned = lu.owned_locks(level)
 811   if not owned:
 812     # Not owning any lock at this level, do nothing
 813     pass
 814
 815   elif should_release:
 816     retain = []
 817     release = []
 818
 819     # Determine which locks to release
 820     for name in owned:
 821       if should_release(name):
 822         release.append(name)
 823       else:
 824         retain.append(name)
 825
 826     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 827
 828     # Release just some locks
 829     lu.glm.release(level, names=release)
 830
 831     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 832   else:
 833     # Release everything
 834     lu.glm.release(level)
 835
 836     assert not lu.glm.is_owned(level), "No locks should be owned"
 837
 838
 839 def _MapInstanceDisksToNodes(instances):
 840   """Creates a map from (node, volume) to instance name.
 841
 842   @type instances: list of L{objects.Instance}
 843   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 844
 845   """
 846   return dict(((node, vol), inst.name)
 847               for inst in instances
 848               for (node, vols) in inst.MapLVsByNode().items()
 849               for vol in vols)
 850
 851
 852 def _RunPostHook(lu, node_name):
 853   """Runs the post-hook for an opcode on a single node.
 854
 855   """
 856   hm = lu.proc.BuildHooksManager(lu)
 857   try:
 858     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 859   except:
 860     # pylint: disable=W0702
 861     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 862
 863
 864 def _CheckOutputFields(static, dynamic, selected):
 865   """Checks whether all selected fields are valid.
 866
 867   @type static: L{utils.FieldSet}
 868   @param static: static fields set
 869   @type dynamic: L{utils.FieldSet}
 870   @param dynamic: dynamic fields set
 871
 872   """
 873   f = utils.FieldSet()
 874   f.Extend(static)
 875   f.Extend(dynamic)
 876
 877   delta = f.NonMatching(selected)
 878   if delta:
 879     raise errors.OpPrereqError("Unknown output fields selected: %s"
 880                                % ",".join(delta), errors.ECODE_INVAL)
 881
 882
 883 def _CheckGlobalHvParams(params):
 884   """Validates that given hypervisor params are not global ones.
 885
 886   This will ensure that instances don't get customised versions of
 887   global params.
 888
 889   """
 890   used_globals = constants.HVC_GLOBALS.intersection(params)
 891   if used_globals:
 892     msg = ("The following hypervisor parameters are global and cannot"
 893            " be customized at instance level, please modify them at"
 894            " cluster level: %s" % utils.CommaJoin(used_globals))
 895     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 896
 897
 898 def _CheckNodeOnline(lu, node, msg=None):
 899   """Ensure that a given node is online.
 900
 901   @param lu: the LU on behalf of which we make the check
 902   @param node: the node to check
 903   @param msg: if passed, should be a message to replace the default one
 904   @raise errors.OpPrereqError: if the node is offline
 905
 906   """
 907   if msg is None:
 908     msg = "Can't use offline node"
 909   if lu.cfg.GetNodeInfo(node).offline:
 910     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 911
 912
 913 def _CheckNodeNotDrained(lu, node):
 914   """Ensure that a given node is not drained.
 915
 916   @param lu: the LU on behalf of which we make the check
 917   @param node: the node to check
 918   @raise errors.OpPrereqError: if the node is drained
 919
 920   """
 921   if lu.cfg.GetNodeInfo(node).drained:
 922     raise errors.OpPrereqError("Can't use drained node %s" % node,
 923                                errors.ECODE_STATE)
 924
 925
 926 def _CheckNodeVmCapable(lu, node):
 927   """Ensure that a given node is vm capable.
 928
 929   @param lu: the LU on behalf of which we make the check
 930   @param node: the node to check
 931   @raise errors.OpPrereqError: if the node is not vm capable
 932
 933   """
 934   if not lu.cfg.GetNodeInfo(node).vm_capable:
 935     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 936                                errors.ECODE_STATE)
 937
 938
 939 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 940   """Ensure that a node supports a given OS.
 941
 942   @param lu: the LU on behalf of which we make the check
 943   @param node: the node to check
 944   @param os_name: the OS to query about
 945   @param force_variant: whether to ignore variant errors
 946   @raise errors.OpPrereqError: if the node is not supporting the OS
 947
 948   """
 949   result = lu.rpc.call_os_get(node, os_name)
 950   result.Raise("OS '%s' not in supported OS list for node %s" %
 951                (os_name, node),
 952                prereq=True, ecode=errors.ECODE_INVAL)
 953   if not force_variant:
 954     _CheckOSVariant(result.payload, os_name)
 955
 956
 957 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 958   """Ensure that a node has the given secondary ip.
 959
 960   @type lu: L{LogicalUnit}
 961   @param lu: the LU on behalf of which we make the check
 962   @type node: string
 963   @param node: the node to check
 964   @type secondary_ip: string
 965   @param secondary_ip: the ip to check
 966   @type prereq: boolean
 967   @param prereq: whether to throw a prerequisite or an execute error
 968   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 969   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 970
 971   """
 972   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 973   result.Raise("Failure checking secondary ip on node %s" % node,
 974                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 975   if not result.payload:
 976     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 977            " please fix and re-run this command" % secondary_ip)
 978     if prereq:
 979       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 980     else:
 981       raise errors.OpExecError(msg)
 982
 983
 984 def _GetClusterDomainSecret():
 985   """Reads the cluster domain secret.
 986
 987   """
 988   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 989                                strict=True)
 990
 991
 992 def _CheckInstanceState(lu, instance, req_states, msg=None):
 993   """Ensure that an instance is in one of the required states.
 994
 995   @param lu: the LU on behalf of which we make the check
 996   @param instance: the instance to check
 997   @param msg: if passed, should be a message to replace the default one
 998   @raise errors.OpPrereqError: if the instance is not in the required state
 999
1000   """
1001   if msg is None:
1002     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1003   if instance.admin_state not in req_states:
1004     raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1005                                (instance, instance.admin_state, msg),
1006                                errors.ECODE_STATE)
1007
1008   if constants.ADMINST_UP not in req_states:
1009     pnode = instance.primary_node
1010     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1011     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1012                 prereq=True, ecode=errors.ECODE_ENVIRON)
1013
1014     if instance.name in ins_l.payload:
1015       raise errors.OpPrereqError("Instance %s is running, %s" %
1016                                  (instance.name, msg), errors.ECODE_STATE)
1017
1018
1019 def _ExpandItemName(fn, name, kind):
1020   """Expand an item name.
1021
1022   @param fn: the function to use for expansion
1023   @param name: requested item name
1024   @param kind: text description ('Node' or 'Instance')
1025   @return: the resolved (full) name
1026   @raise errors.OpPrereqError: if the item is not found
1027
1028   """
1029   full_name = fn(name)
1030   if full_name is None:
1031     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1032                                errors.ECODE_NOENT)
1033   return full_name
1034
1035
1036 def _ExpandNodeName(cfg, name):
1037   """Wrapper over L{_ExpandItemName} for nodes."""
1038   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1039
1040
1041 def _ExpandInstanceName(cfg, name):
1042   """Wrapper over L{_ExpandItemName} for instance."""
1043   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1044
1045
1046 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1047                           minmem, maxmem, vcpus, nics, disk_template, disks,
1048                           bep, hvp, hypervisor_name, tags):
1049   """Builds instance related env variables for hooks
1050
1051   This builds the hook environment from individual variables.
1052
1053   @type name: string
1054   @param name: the name of the instance
1055   @type primary_node: string
1056   @param primary_node: the name of the instance's primary node
1057   @type secondary_nodes: list
1058   @param secondary_nodes: list of secondary nodes as strings
1059   @type os_type: string
1060   @param os_type: the name of the instance's OS
1061   @type status: string
1062   @param status: the desired status of the instance
1063   @type minmem: string
1064   @param minmem: the minimum memory size of the instance
1065   @type maxmem: string
1066   @param maxmem: the maximum memory size of the instance
1067   @type vcpus: string
1068   @param vcpus: the count of VCPUs the instance has
1069   @type nics: list
1070   @param nics: list of tuples (ip, mac, mode, link) representing
1071       the NICs the instance has
1072   @type disk_template: string
1073   @param disk_template: the disk template of the instance
1074   @type disks: list
1075   @param disks: the list of (size, mode) pairs
1076   @type bep: dict
1077   @param bep: the backend parameters for the instance
1078   @type hvp: dict
1079   @param hvp: the hypervisor parameters for the instance
1080   @type hypervisor_name: string
1081   @param hypervisor_name: the hypervisor for the instance
1082   @type tags: list
1083   @param tags: list of instance tags as strings
1084   @rtype: dict
1085   @return: the hook environment for this instance
1086
1087   """
1088   env = {
1089     "OP_TARGET": name,
1090     "INSTANCE_NAME": name,
1091     "INSTANCE_PRIMARY": primary_node,
1092     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1093     "INSTANCE_OS_TYPE": os_type,
1094     "INSTANCE_STATUS": status,
1095     "INSTANCE_MINMEM": minmem,
1096     "INSTANCE_MAXMEM": maxmem,
1097     # TODO(2.7) remove deprecated "memory" value
1098     "INSTANCE_MEMORY": maxmem,
1099     "INSTANCE_VCPUS": vcpus,
1100     "INSTANCE_DISK_TEMPLATE": disk_template,
1101     "INSTANCE_HYPERVISOR": hypervisor_name,
1102   }
1103   if nics:
1104     nic_count = len(nics)
1105     for idx, (ip, mac, mode, link) in enumerate(nics):
1106       if ip is None:
1107         ip = ""
1108       env["INSTANCE_NIC%d_IP" % idx] = ip
1109       env["INSTANCE_NIC%d_MAC" % idx] = mac
1110       env["INSTANCE_NIC%d_MODE" % idx] = mode
1111       env["INSTANCE_NIC%d_LINK" % idx] = link
1112       if mode == constants.NIC_MODE_BRIDGED:
1113         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1114   else:
1115     nic_count = 0
1116
1117   env["INSTANCE_NIC_COUNT"] = nic_count
1118
1119   if disks:
1120     disk_count = len(disks)
1121     for idx, (size, mode) in enumerate(disks):
1122       env["INSTANCE_DISK%d_SIZE" % idx] = size
1123       env["INSTANCE_DISK%d_MODE" % idx] = mode
1124   else:
1125     disk_count = 0
1126
1127   env["INSTANCE_DISK_COUNT"] = disk_count
1128
1129   if not tags:
1130     tags = []
1131
1132   env["INSTANCE_TAGS"] = " ".join(tags)
1133
1134   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1135     for key, value in source.items():
1136       env["INSTANCE_%s_%s" % (kind, key)] = value
1137
1138   return env
1139
1140
1141 def _NICListToTuple(lu, nics):
1142   """Build a list of nic information tuples.
1143
1144   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1145   value in LUInstanceQueryData.
1146
1147   @type lu:  L{LogicalUnit}
1148   @param lu: the logical unit on whose behalf we execute
1149   @type nics: list of L{objects.NIC}
1150   @param nics: list of nics to convert to hooks tuples
1151
1152   """
1153   hooks_nics = []
1154   cluster = lu.cfg.GetClusterInfo()
1155   for nic in nics:
1156     ip = nic.ip
1157     mac = nic.mac
1158     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1159     mode = filled_params[constants.NIC_MODE]
1160     link = filled_params[constants.NIC_LINK]
1161     hooks_nics.append((ip, mac, mode, link))
1162   return hooks_nics
1163
1164
1165 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1166   """Builds instance related env variables for hooks from an object.
1167
1168   @type lu: L{LogicalUnit}
1169   @param lu: the logical unit on whose behalf we execute
1170   @type instance: L{objects.Instance}
1171   @param instance: the instance for which we should build the
1172       environment
1173   @type override: dict
1174   @param override: dictionary with key/values that will override
1175       our values
1176   @rtype: dict
1177   @return: the hook environment dictionary
1178
1179   """
1180   cluster = lu.cfg.GetClusterInfo()
1181   bep = cluster.FillBE(instance)
1182   hvp = cluster.FillHV(instance)
1183   args = {
1184     "name": instance.name,
1185     "primary_node": instance.primary_node,
1186     "secondary_nodes": instance.secondary_nodes,
1187     "os_type": instance.os,
1188     "status": instance.admin_state,
1189     "maxmem": bep[constants.BE_MAXMEM],
1190     "minmem": bep[constants.BE_MINMEM],
1191     "vcpus": bep[constants.BE_VCPUS],
1192     "nics": _NICListToTuple(lu, instance.nics),
1193     "disk_template": instance.disk_template,
1194     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1195     "bep": bep,
1196     "hvp": hvp,
1197     "hypervisor_name": instance.hypervisor,
1198     "tags": instance.tags,
1199   }
1200   if override:
1201     args.update(override)
1202   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1203
1204
1205 def _AdjustCandidatePool(lu, exceptions):
1206   """Adjust the candidate pool after node operations.
1207
1208   """
1209   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1210   if mod_list:
1211     lu.LogInfo("Promoted nodes to master candidate role: %s",
1212                utils.CommaJoin(node.name for node in mod_list))
1213     for name in mod_list:
1214       lu.context.ReaddNode(name)
1215   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1216   if mc_now > mc_max:
1217     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1218                (mc_now, mc_max))
1219
1220
1221 def _DecideSelfPromotion(lu, exceptions=None):
1222   """Decide whether I should promote myself as a master candidate.
1223
1224   """
1225   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1226   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1227   # the new node will increase mc_max with one, so:
1228   mc_should = min(mc_should + 1, cp_size)
1229   return mc_now < mc_should
1230
1231
1232 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1233   """Check that the brigdes needed by a list of nics exist.
1234
1235   """
1236   cluster = lu.cfg.GetClusterInfo()
1237   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1238   brlist = [params[constants.NIC_LINK] for params in paramslist
1239             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1240   if brlist:
1241     result = lu.rpc.call_bridges_exist(target_node, brlist)
1242     result.Raise("Error checking bridges on destination node '%s'" %
1243                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1244
1245
1246 def _CheckInstanceBridgesExist(lu, instance, node=None):
1247   """Check that the brigdes needed by an instance exist.
1248
1249   """
1250   if node is None:
1251     node = instance.primary_node
1252   _CheckNicsBridgesExist(lu, instance.nics, node)
1253
1254
1255 def _CheckOSVariant(os_obj, name):
1256   """Check whether an OS name conforms to the os variants specification.
1257
1258   @type os_obj: L{objects.OS}
1259   @param os_obj: OS object to check
1260   @type name: string
1261   @param name: OS name passed by the user, to check for validity
1262
1263   """
1264   variant = objects.OS.GetVariant(name)
1265   if not os_obj.supported_variants:
1266     if variant:
1267       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1268                                  " passed)" % (os_obj.name, variant),
1269                                  errors.ECODE_INVAL)
1270     return
1271   if not variant:
1272     raise errors.OpPrereqError("OS name must include a variant",
1273                                errors.ECODE_INVAL)
1274
1275   if variant not in os_obj.supported_variants:
1276     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1277
1278
1279 def _GetNodeInstancesInner(cfg, fn):
1280   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1281
1282
1283 def _GetNodeInstances(cfg, node_name):
1284   """Returns a list of all primary and secondary instances on a node.
1285
1286   """
1287
1288   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1289
1290
1291 def _GetNodePrimaryInstances(cfg, node_name):
1292   """Returns primary instances on a node.
1293
1294   """
1295   return _GetNodeInstancesInner(cfg,
1296                                 lambda inst: node_name == inst.primary_node)
1297
1298
1299 def _GetNodeSecondaryInstances(cfg, node_name):
1300   """Returns secondary instances on a node.
1301
1302   """
1303   return _GetNodeInstancesInner(cfg,
1304                                 lambda inst: node_name in inst.secondary_nodes)
1305
1306
1307 def _GetStorageTypeArgs(cfg, storage_type):
1308   """Returns the arguments for a storage type.
1309
1310   """
1311   # Special case for file storage
1312   if storage_type == constants.ST_FILE:
1313     # storage.FileStorage wants a list of storage directories
1314     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1315
1316   return []
1317
1318
1319 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1320   faulty = []
1321
1322   for dev in instance.disks:
1323     cfg.SetDiskID(dev, node_name)
1324
1325   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1326   result.Raise("Failed to get disk status from node %s" % node_name,
1327                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1328
1329   for idx, bdev_status in enumerate(result.payload):
1330     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1331       faulty.append(idx)
1332
1333   return faulty
1334
1335
1336 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1337   """Check the sanity of iallocator and node arguments and use the
1338   cluster-wide iallocator if appropriate.
1339
1340   Check that at most one of (iallocator, node) is specified. If none is
1341   specified, then the LU's opcode's iallocator slot is filled with the
1342   cluster-wide default iallocator.
1343
1344   @type iallocator_slot: string
1345   @param iallocator_slot: the name of the opcode iallocator slot
1346   @type node_slot: string
1347   @param node_slot: the name of the opcode target node slot
1348
1349   """
1350   node = getattr(lu.op, node_slot, None)
1351   iallocator = getattr(lu.op, iallocator_slot, None)
1352
1353   if node is not None and iallocator is not None:
1354     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1355                                errors.ECODE_INVAL)
1356   elif node is None and iallocator is None:
1357     default_iallocator = lu.cfg.GetDefaultIAllocator()
1358     if default_iallocator:
1359       setattr(lu.op, iallocator_slot, default_iallocator)
1360     else:
1361       raise errors.OpPrereqError("No iallocator or node given and no"
1362                                  " cluster-wide default iallocator found;"
1363                                  " please specify either an iallocator or a"
1364                                  " node, or set a cluster-wide default"
1365                                  " iallocator")
1366
1367
1368 def _GetDefaultIAllocator(cfg, iallocator):
1369   """Decides on which iallocator to use.
1370
1371   @type cfg: L{config.ConfigWriter}
1372   @param cfg: Cluster configuration object
1373   @type iallocator: string or None
1374   @param iallocator: Iallocator specified in opcode
1375   @rtype: string
1376   @return: Iallocator name
1377
1378   """
1379   if not iallocator:
1380     # Use default iallocator
1381     iallocator = cfg.GetDefaultIAllocator()
1382
1383   if not iallocator:
1384     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1385                                " opcode nor as a cluster-wide default",
1386                                errors.ECODE_INVAL)
1387
1388   return iallocator
1389
1390
1391 class LUClusterPostInit(LogicalUnit):
1392   """Logical unit for running hooks after cluster initialization.
1393
1394   """
1395   HPATH = "cluster-init"
1396   HTYPE = constants.HTYPE_CLUSTER
1397
1398   def BuildHooksEnv(self):
1399     """Build hooks env.
1400
1401     """
1402     return {
1403       "OP_TARGET": self.cfg.GetClusterName(),
1404       }
1405
1406   def BuildHooksNodes(self):
1407     """Build hooks nodes.
1408
1409     """
1410     return ([], [self.cfg.GetMasterNode()])
1411
1412   def Exec(self, feedback_fn):
1413     """Nothing to do.
1414
1415     """
1416     return True
1417
1418
1419 class LUClusterDestroy(LogicalUnit):
1420   """Logical unit for destroying the cluster.
1421
1422   """
1423   HPATH = "cluster-destroy"
1424   HTYPE = constants.HTYPE_CLUSTER
1425
1426   def BuildHooksEnv(self):
1427     """Build hooks env.
1428
1429     """
1430     return {
1431       "OP_TARGET": self.cfg.GetClusterName(),
1432       }
1433
1434   def BuildHooksNodes(self):
1435     """Build hooks nodes.
1436
1437     """
1438     return ([], [])
1439
1440   def CheckPrereq(self):
1441     """Check prerequisites.
1442
1443     This checks whether the cluster is empty.
1444
1445     Any errors are signaled by raising errors.OpPrereqError.
1446
1447     """
1448     master = self.cfg.GetMasterNode()
1449
1450     nodelist = self.cfg.GetNodeList()
1451     if len(nodelist) != 1 or nodelist[0] != master:
1452       raise errors.OpPrereqError("There are still %d node(s) in"
1453                                  " this cluster." % (len(nodelist) - 1),
1454                                  errors.ECODE_INVAL)
1455     instancelist = self.cfg.GetInstanceList()
1456     if instancelist:
1457       raise errors.OpPrereqError("There are still %d instance(s) in"
1458                                  " this cluster." % len(instancelist),
1459                                  errors.ECODE_INVAL)
1460
1461   def Exec(self, feedback_fn):
1462     """Destroys the cluster.
1463
1464     """
1465     master_params = self.cfg.GetMasterNetworkParameters()
1466
1467     # Run post hooks on master node before it's removed
1468     _RunPostHook(self, master_params.name)
1469
1470     ems = self.cfg.GetUseExternalMipScript()
1471     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1472                                                      master_params, ems)
1473     result.Raise("Could not disable the master role")
1474
1475     return master_params.name
1476
1477
1478 def _VerifyCertificate(filename):
1479   """Verifies a certificate for L{LUClusterVerifyConfig}.
1480
1481   @type filename: string
1482   @param filename: Path to PEM file
1483
1484   """
1485   try:
1486     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1487                                            utils.ReadFile(filename))
1488   except Exception, err: # pylint: disable=W0703
1489     return (LUClusterVerifyConfig.ETYPE_ERROR,
1490             "Failed to load X509 certificate %s: %s" % (filename, err))
1491
1492   (errcode, msg) = \
1493     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1494                                 constants.SSL_CERT_EXPIRATION_ERROR)
1495
1496   if msg:
1497     fnamemsg = "While verifying %s: %s" % (filename, msg)
1498   else:
1499     fnamemsg = None
1500
1501   if errcode is None:
1502     return (None, fnamemsg)
1503   elif errcode == utils.CERT_WARNING:
1504     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1505   elif errcode == utils.CERT_ERROR:
1506     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1507
1508   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1509
1510
1511 def _GetAllHypervisorParameters(cluster, instances):
1512   """Compute the set of all hypervisor parameters.
1513
1514   @type cluster: L{objects.Cluster}
1515   @param cluster: the cluster object
1516   @param instances: list of L{objects.Instance}
1517   @param instances: additional instances from which to obtain parameters
1518   @rtype: list of (origin, hypervisor, parameters)
1519   @return: a list with all parameters found, indicating the hypervisor they
1520        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1521
1522   """
1523   hvp_data = []
1524
1525   for hv_name in cluster.enabled_hypervisors:
1526     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1527
1528   for os_name, os_hvp in cluster.os_hvp.items():
1529     for hv_name, hv_params in os_hvp.items():
1530       if hv_params:
1531         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1532         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1533
1534   # TODO: collapse identical parameter values in a single one
1535   for instance in instances:
1536     if instance.hvparams:
1537       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1538                        cluster.FillHV(instance)))
1539
1540   return hvp_data
1541
1542
1543 class _VerifyErrors(object):
1544   """Mix-in for cluster/group verify LUs.
1545
1546   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1547   self.op and self._feedback_fn to be available.)
1548
1549   """
1550
1551   ETYPE_FIELD = "code"
1552   ETYPE_ERROR = "ERROR"
1553   ETYPE_WARNING = "WARNING"
1554
1555   def _Error(self, ecode, item, msg, *args, **kwargs):
1556     """Format an error message.
1557
1558     Based on the opcode's error_codes parameter, either format a
1559     parseable error code, or a simpler error string.
1560
1561     This must be called only from Exec and functions called from Exec.
1562
1563     """
1564     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1565     itype, etxt, _ = ecode
1566     # first complete the msg
1567     if args:
1568       msg = msg % args
1569     # then format the whole message
1570     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1571       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1572     else:
1573       if item:
1574         item = " " + item
1575       else:
1576         item = ""
1577       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1578     # and finally report it via the feedback_fn
1579     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1580
1581   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1582     """Log an error message if the passed condition is True.
1583
1584     """
1585     cond = (bool(cond)
1586             or self.op.debug_simulate_errors) # pylint: disable=E1101
1587
1588     # If the error code is in the list of ignored errors, demote the error to a
1589     # warning
1590     (_, etxt, _) = ecode
1591     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1592       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1593
1594     if cond:
1595       self._Error(ecode, *args, **kwargs)
1596
1597     # do not mark the operation as failed for WARN cases only
1598     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1599       self.bad = self.bad or cond
1600
1601
1602 class LUClusterVerify(NoHooksLU):
1603   """Submits all jobs necessary to verify the cluster.
1604
1605   """
1606   REQ_BGL = False
1607
1608   def ExpandNames(self):
1609     self.needed_locks = {}
1610
1611   def Exec(self, feedback_fn):
1612     jobs = []
1613
1614     if self.op.group_name:
1615       groups = [self.op.group_name]
1616       depends_fn = lambda: None
1617     else:
1618       groups = self.cfg.GetNodeGroupList()
1619
1620       # Verify global configuration
1621       jobs.append([
1622         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1623         ])
1624
1625       # Always depend on global verification
1626       depends_fn = lambda: [(-len(jobs), [])]
1627
1628     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1629                                             ignore_errors=self.op.ignore_errors,
1630                                             depends=depends_fn())]
1631                 for group in groups)
1632
1633     # Fix up all parameters
1634     for op in itertools.chain(*jobs): # pylint: disable=W0142
1635       op.debug_simulate_errors = self.op.debug_simulate_errors
1636       op.verbose = self.op.verbose
1637       op.error_codes = self.op.error_codes
1638       try:
1639         op.skip_checks = self.op.skip_checks
1640       except AttributeError:
1641         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1642
1643     return ResultWithJobs(jobs)
1644
1645
1646 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1647   """Verifies the cluster config.
1648
1649   """
1650   REQ_BGL = True
1651
1652   def _VerifyHVP(self, hvp_data):
1653     """Verifies locally the syntax of the hypervisor parameters.
1654
1655     """
1656     for item, hv_name, hv_params in hvp_data:
1657       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1658              (item, hv_name))
1659       try:
1660         hv_class = hypervisor.GetHypervisor(hv_name)
1661         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1662         hv_class.CheckParameterSyntax(hv_params)
1663       except errors.GenericError, err:
1664         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1665
1666   def ExpandNames(self):
1667     # Information can be safely retrieved as the BGL is acquired in exclusive
1668     # mode
1669     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1670     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1671     self.all_node_info = self.cfg.GetAllNodesInfo()
1672     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1673     self.needed_locks = {}
1674
1675   def Exec(self, feedback_fn):
1676     """Verify integrity of cluster, performing various test on nodes.
1677
1678     """
1679     self.bad = False
1680     self._feedback_fn = feedback_fn
1681
1682     feedback_fn("* Verifying cluster config")
1683
1684     for msg in self.cfg.VerifyConfig():
1685       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1686
1687     feedback_fn("* Verifying cluster certificate files")
1688
1689     for cert_filename in constants.ALL_CERT_FILES:
1690       (errcode, msg) = _VerifyCertificate(cert_filename)
1691       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1692
1693     feedback_fn("* Verifying hypervisor parameters")
1694
1695     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1696                                                 self.all_inst_info.values()))
1697
1698     feedback_fn("* Verifying all nodes belong to an existing group")
1699
1700     # We do this verification here because, should this bogus circumstance
1701     # occur, it would never be caught by VerifyGroup, which only acts on
1702     # nodes/instances reachable from existing node groups.
1703
1704     dangling_nodes = set(node.name for node in self.all_node_info.values()
1705                          if node.group not in self.all_group_info)
1706
1707     dangling_instances = {}
1708     no_node_instances = []
1709
1710     for inst in self.all_inst_info.values():
1711       if inst.primary_node in dangling_nodes:
1712         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1713       elif inst.primary_node not in self.all_node_info:
1714         no_node_instances.append(inst.name)
1715
1716     pretty_dangling = [
1717         "%s (%s)" %
1718         (node.name,
1719          utils.CommaJoin(dangling_instances.get(node.name,
1720                                                 ["no instances"])))
1721         for node in dangling_nodes]
1722
1723     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1724                   None,
1725                   "the following nodes (and their instances) belong to a non"
1726                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1727
1728     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1729                   None,
1730                   "the following instances have a non-existing primary-node:"
1731                   " %s", utils.CommaJoin(no_node_instances))
1732
1733     return not self.bad
1734
1735
1736 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1737   """Verifies the status of a node group.
1738
1739   """
1740   HPATH = "cluster-verify"
1741   HTYPE = constants.HTYPE_CLUSTER
1742   REQ_BGL = False
1743
1744   _HOOKS_INDENT_RE = re.compile("^", re.M)
1745
1746   class NodeImage(object):
1747     """A class representing the logical and physical status of a node.
1748
1749     @type name: string
1750     @ivar name: the node name to which this object refers
1751     @ivar volumes: a structure as returned from
1752         L{ganeti.backend.GetVolumeList} (runtime)
1753     @ivar instances: a list of running instances (runtime)
1754     @ivar pinst: list of configured primary instances (config)
1755     @ivar sinst: list of configured secondary instances (config)
1756     @ivar sbp: dictionary of {primary-node: list of instances} for all
1757         instances for which this node is secondary (config)
1758     @ivar mfree: free memory, as reported by hypervisor (runtime)
1759     @ivar dfree: free disk, as reported by the node (runtime)
1760     @ivar offline: the offline status (config)
1761     @type rpc_fail: boolean
1762     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1763         not whether the individual keys were correct) (runtime)
1764     @type lvm_fail: boolean
1765     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1766     @type hyp_fail: boolean
1767     @ivar hyp_fail: whether the RPC call didn't return the instance list
1768     @type ghost: boolean
1769     @ivar ghost: whether this is a known node or not (config)
1770     @type os_fail: boolean
1771     @ivar os_fail: whether the RPC call didn't return valid OS data
1772     @type oslist: list
1773     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1774     @type vm_capable: boolean
1775     @ivar vm_capable: whether the node can host instances
1776
1777     """
1778     def __init__(self, offline=False, name=None, vm_capable=True):
1779       self.name = name
1780       self.volumes = {}
1781       self.instances = []
1782       self.pinst = []
1783       self.sinst = []
1784       self.sbp = {}
1785       self.mfree = 0
1786       self.dfree = 0
1787       self.offline = offline
1788       self.vm_capable = vm_capable
1789       self.rpc_fail = False
1790       self.lvm_fail = False
1791       self.hyp_fail = False
1792       self.ghost = False
1793       self.os_fail = False
1794       self.oslist = {}
1795
1796   def ExpandNames(self):
1797     # This raises errors.OpPrereqError on its own:
1798     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1799
1800     # Get instances in node group; this is unsafe and needs verification later
1801     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1802
1803     self.needed_locks = {
1804       locking.LEVEL_INSTANCE: inst_names,
1805       locking.LEVEL_NODEGROUP: [self.group_uuid],
1806       locking.LEVEL_NODE: [],
1807       }
1808
1809     self.share_locks = _ShareAll()
1810
1811   def DeclareLocks(self, level):
1812     if level == locking.LEVEL_NODE:
1813       # Get members of node group; this is unsafe and needs verification later
1814       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1815
1816       all_inst_info = self.cfg.GetAllInstancesInfo()
1817
1818       # In Exec(), we warn about mirrored instances that have primary and
1819       # secondary living in separate node groups. To fully verify that
1820       # volumes for these instances are healthy, we will need to do an
1821       # extra call to their secondaries. We ensure here those nodes will
1822       # be locked.
1823       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1824         # Important: access only the instances whose lock is owned
1825         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1826           nodes.update(all_inst_info[inst].secondary_nodes)
1827
1828       self.needed_locks[locking.LEVEL_NODE] = nodes
1829
1830   def CheckPrereq(self):
1831     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1832     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1833
1834     group_nodes = set(self.group_info.members)
1835     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1836
1837     unlocked_nodes = \
1838         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1839
1840     unlocked_instances = \
1841         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1842
1843     if unlocked_nodes:
1844       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1845                                  utils.CommaJoin(unlocked_nodes))
1846
1847     if unlocked_instances:
1848       raise errors.OpPrereqError("Missing lock for instances: %s" %
1849                                  utils.CommaJoin(unlocked_instances))
1850
1851     self.all_node_info = self.cfg.GetAllNodesInfo()
1852     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1853
1854     self.my_node_names = utils.NiceSort(group_nodes)
1855     self.my_inst_names = utils.NiceSort(group_instances)
1856
1857     self.my_node_info = dict((name, self.all_node_info[name])
1858                              for name in self.my_node_names)
1859
1860     self.my_inst_info = dict((name, self.all_inst_info[name])
1861                              for name in self.my_inst_names)
1862
1863     # We detect here the nodes that will need the extra RPC calls for verifying
1864     # split LV volumes; they should be locked.
1865     extra_lv_nodes = set()
1866
1867     for inst in self.my_inst_info.values():
1868       if inst.disk_template in constants.DTS_INT_MIRROR:
1869         group = self.my_node_info[inst.primary_node].group
1870         for nname in inst.secondary_nodes:
1871           if self.all_node_info[nname].group != group:
1872             extra_lv_nodes.add(nname)
1873
1874     unlocked_lv_nodes = \
1875         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1876
1877     if unlocked_lv_nodes:
1878       raise errors.OpPrereqError("these nodes could be locked: %s" %
1879                                  utils.CommaJoin(unlocked_lv_nodes))
1880     self.extra_lv_nodes = list(extra_lv_nodes)
1881
1882   def _VerifyNode(self, ninfo, nresult):
1883     """Perform some basic validation on data returned from a node.
1884
1885       - check the result data structure is well formed and has all the
1886         mandatory fields
1887       - check ganeti version
1888
1889     @type ninfo: L{objects.Node}
1890     @param ninfo: the node to check
1891     @param nresult: the results from the node
1892     @rtype: boolean
1893     @return: whether overall this call was successful (and we can expect
1894          reasonable values in the respose)
1895
1896     """
1897     node = ninfo.name
1898     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1899
1900     # main result, nresult should be a non-empty dict
1901     test = not nresult or not isinstance(nresult, dict)
1902     _ErrorIf(test, constants.CV_ENODERPC, node,
1903                   "unable to verify node: no data returned")
1904     if test:
1905       return False
1906
1907     # compares ganeti version
1908     local_version = constants.PROTOCOL_VERSION
1909     remote_version = nresult.get("version", None)
1910     test = not (remote_version and
1911                 isinstance(remote_version, (list, tuple)) and
1912                 len(remote_version) == 2)
1913     _ErrorIf(test, constants.CV_ENODERPC, node,
1914              "connection to node returned invalid data")
1915     if test:
1916       return False
1917
1918     test = local_version != remote_version[0]
1919     _ErrorIf(test, constants.CV_ENODEVERSION, node,
1920              "incompatible protocol versions: master %s,"
1921              " node %s", local_version, remote_version[0])
1922     if test:
1923       return False
1924
1925     # node seems compatible, we can actually try to look into its results
1926
1927     # full package version
1928     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1929                   constants.CV_ENODEVERSION, node,
1930                   "software version mismatch: master %s, node %s",
1931                   constants.RELEASE_VERSION, remote_version[1],
1932                   code=self.ETYPE_WARNING)
1933
1934     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1935     if ninfo.vm_capable and isinstance(hyp_result, dict):
1936       for hv_name, hv_result in hyp_result.iteritems():
1937         test = hv_result is not None
1938         _ErrorIf(test, constants.CV_ENODEHV, node,
1939                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1940
1941     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1942     if ninfo.vm_capable and isinstance(hvp_result, list):
1943       for item, hv_name, hv_result in hvp_result:
1944         _ErrorIf(True, constants.CV_ENODEHV, node,
1945                  "hypervisor %s parameter verify failure (source %s): %s",
1946                  hv_name, item, hv_result)
1947
1948     test = nresult.get(constants.NV_NODESETUP,
1949                        ["Missing NODESETUP results"])
1950     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1951              "; ".join(test))
1952
1953     return True
1954
1955   def _VerifyNodeTime(self, ninfo, nresult,
1956                       nvinfo_starttime, nvinfo_endtime):
1957     """Check the node time.
1958
1959     @type ninfo: L{objects.Node}
1960     @param ninfo: the node to check
1961     @param nresult: the remote results for the node
1962     @param nvinfo_starttime: the start time of the RPC call
1963     @param nvinfo_endtime: the end time of the RPC call
1964
1965     """
1966     node = ninfo.name
1967     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1968
1969     ntime = nresult.get(constants.NV_TIME, None)
1970     try:
1971       ntime_merged = utils.MergeTime(ntime)
1972     except (ValueError, TypeError):
1973       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1974       return
1975
1976     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1977       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1978     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1979       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1980     else:
1981       ntime_diff = None
1982
1983     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1984              "Node time diverges by at least %s from master node time",
1985              ntime_diff)
1986
1987   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1988     """Check the node LVM results.
1989
1990     @type ninfo: L{objects.Node}
1991     @param ninfo: the node to check
1992     @param nresult: the remote results for the node
1993     @param vg_name: the configured VG name
1994
1995     """
1996     if vg_name is None:
1997       return
1998
1999     node = ninfo.name
2000     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2001
2002     # checks vg existence and size > 20G
2003     vglist = nresult.get(constants.NV_VGLIST, None)
2004     test = not vglist
2005     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2006     if not test:
2007       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2008                                             constants.MIN_VG_SIZE)
2009       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2010
2011     # check pv names
2012     pvlist = nresult.get(constants.NV_PVLIST, None)
2013     test = pvlist is None
2014     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2015     if not test:
2016       # check that ':' is not present in PV names, since it's a
2017       # special character for lvcreate (denotes the range of PEs to
2018       # use on the PV)
2019       for _, pvname, owner_vg in pvlist:
2020         test = ":" in pvname
2021         _ErrorIf(test, constants.CV_ENODELVM, node,
2022                  "Invalid character ':' in PV '%s' of VG '%s'",
2023                  pvname, owner_vg)
2024
2025   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2026     """Check the node bridges.
2027
2028     @type ninfo: L{objects.Node}
2029     @param ninfo: the node to check
2030     @param nresult: the remote results for the node
2031     @param bridges: the expected list of bridges
2032
2033     """
2034     if not bridges:
2035       return
2036
2037     node = ninfo.name
2038     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2039
2040     missing = nresult.get(constants.NV_BRIDGES, None)
2041     test = not isinstance(missing, list)
2042     _ErrorIf(test, constants.CV_ENODENET, node,
2043              "did not return valid bridge information")
2044     if not test:
2045       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2046                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2047
2048   def _VerifyNodeUserScripts(self, ninfo, nresult):
2049     """Check the results of user scripts presence and executability on the node
2050
2051     @type ninfo: L{objects.Node}
2052     @param ninfo: the node to check
2053     @param nresult: the remote results for the node
2054
2055     """
2056     node = ninfo.name
2057
2058     test = not constants.NV_USERSCRIPTS in nresult
2059     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2060                   "did not return user scripts information")
2061
2062     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2063     if not test:
2064       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2065                     "user scripts not present or not executable: %s" %
2066                     utils.CommaJoin(sorted(broken_scripts)))
2067
2068   def _VerifyNodeNetwork(self, ninfo, nresult):
2069     """Check the node network connectivity results.
2070
2071     @type ninfo: L{objects.Node}
2072     @param ninfo: the node to check
2073     @param nresult: the remote results for the node
2074
2075     """
2076     node = ninfo.name
2077     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2078
2079     test = constants.NV_NODELIST not in nresult
2080     _ErrorIf(test, constants.CV_ENODESSH, node,
2081              "node hasn't returned node ssh connectivity data")
2082     if not test:
2083       if nresult[constants.NV_NODELIST]:
2084         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2085           _ErrorIf(True, constants.CV_ENODESSH, node,
2086                    "ssh communication with node '%s': %s", a_node, a_msg)
2087
2088     test = constants.NV_NODENETTEST not in nresult
2089     _ErrorIf(test, constants.CV_ENODENET, node,
2090              "node hasn't returned node tcp connectivity data")
2091     if not test:
2092       if nresult[constants.NV_NODENETTEST]:
2093         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2094         for anode in nlist:
2095           _ErrorIf(True, constants.CV_ENODENET, node,
2096                    "tcp communication with node '%s': %s",
2097                    anode, nresult[constants.NV_NODENETTEST][anode])
2098
2099     test = constants.NV_MASTERIP not in nresult
2100     _ErrorIf(test, constants.CV_ENODENET, node,
2101              "node hasn't returned node master IP reachability data")
2102     if not test:
2103       if not nresult[constants.NV_MASTERIP]:
2104         if node == self.master_node:
2105           msg = "the master node cannot reach the master IP (not configured?)"
2106         else:
2107           msg = "cannot reach the master IP"
2108         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2109
2110   def _VerifyInstance(self, instance, instanceconfig, node_image,
2111                       diskstatus):
2112     """Verify an instance.
2113
2114     This function checks to see if the required block devices are
2115     available on the instance's node.
2116
2117     """
2118     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2119     node_current = instanceconfig.primary_node
2120
2121     node_vol_should = {}
2122     instanceconfig.MapLVsByNode(node_vol_should)
2123
2124     for node in node_vol_should:
2125       n_img = node_image[node]
2126       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2127         # ignore missing volumes on offline or broken nodes
2128         continue
2129       for volume in node_vol_should[node]:
2130         test = volume not in n_img.volumes
2131         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2132                  "volume %s missing on node %s", volume, node)
2133
2134     if instanceconfig.admin_state == constants.ADMINST_UP:
2135       pri_img = node_image[node_current]
2136       test = instance not in pri_img.instances and not pri_img.offline
2137       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2138                "instance not running on its primary node %s",
2139                node_current)
2140
2141     diskdata = [(nname, success, status, idx)
2142                 for (nname, disks) in diskstatus.items()
2143                 for idx, (success, status) in enumerate(disks)]
2144
2145     for nname, success, bdev_status, idx in diskdata:
2146       # the 'ghost node' construction in Exec() ensures that we have a
2147       # node here
2148       snode = node_image[nname]
2149       bad_snode = snode.ghost or snode.offline
2150       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2151                not success and not bad_snode,
2152                constants.CV_EINSTANCEFAULTYDISK, instance,
2153                "couldn't retrieve status for disk/%s on %s: %s",
2154                idx, nname, bdev_status)
2155       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2156                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2157                constants.CV_EINSTANCEFAULTYDISK, instance,
2158                "disk/%s on %s is faulty", idx, nname)
2159
2160   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2161     """Verify if there are any unknown volumes in the cluster.
2162
2163     The .os, .swap and backup volumes are ignored. All other volumes are
2164     reported as unknown.
2165
2166     @type reserved: L{ganeti.utils.FieldSet}
2167     @param reserved: a FieldSet of reserved volume names
2168
2169     """
2170     for node, n_img in node_image.items():
2171       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2172         # skip non-healthy nodes
2173         continue
2174       for volume in n_img.volumes:
2175         test = ((node not in node_vol_should or
2176                 volume not in node_vol_should[node]) and
2177                 not reserved.Matches(volume))
2178         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2179                       "volume %s is unknown", volume)
2180
2181   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2182     """Verify N+1 Memory Resilience.
2183
2184     Check that if one single node dies we can still start all the
2185     instances it was primary for.
2186
2187     """
2188     cluster_info = self.cfg.GetClusterInfo()
2189     for node, n_img in node_image.items():
2190       # This code checks that every node which is now listed as
2191       # secondary has enough memory to host all instances it is
2192       # supposed to should a single other node in the cluster fail.
2193       # FIXME: not ready for failover to an arbitrary node
2194       # FIXME: does not support file-backed instances
2195       # WARNING: we currently take into account down instances as well
2196       # as up ones, considering that even if they're down someone
2197       # might want to start them even in the event of a node failure.
2198       if n_img.offline:
2199         # we're skipping offline nodes from the N+1 warning, since
2200         # most likely we don't have good memory infromation from them;
2201         # we already list instances living on such nodes, and that's
2202         # enough warning
2203         continue
2204       #TODO(dynmem): use MINMEM for checking
2205       #TODO(dynmem): also consider ballooning out other instances
2206       for prinode, instances in n_img.sbp.items():
2207         needed_mem = 0
2208         for instance in instances:
2209           bep = cluster_info.FillBE(instance_cfg[instance])
2210           if bep[constants.BE_AUTO_BALANCE]:
2211             needed_mem += bep[constants.BE_MAXMEM]
2212         test = n_img.mfree < needed_mem
2213         self._ErrorIf(test, constants.CV_ENODEN1, node,
2214                       "not enough memory to accomodate instance failovers"
2215                       " should node %s fail (%dMiB needed, %dMiB available)",
2216                       prinode, needed_mem, n_img.mfree)
2217
2218   @classmethod
2219   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2220                    (files_all, files_opt, files_mc, files_vm)):
2221     """Verifies file checksums collected from all nodes.
2222
2223     @param errorif: Callback for reporting errors
2224     @param nodeinfo: List of L{objects.Node} objects
2225     @param master_node: Name of master node
2226     @param all_nvinfo: RPC results
2227
2228     """
2229     # Define functions determining which nodes to consider for a file
2230     files2nodefn = [
2231       (files_all, None),
2232       (files_mc, lambda node: (node.master_candidate or
2233                                node.name == master_node)),
2234       (files_vm, lambda node: node.vm_capable),
2235       ]
2236
2237     # Build mapping from filename to list of nodes which should have the file
2238     nodefiles = {}
2239     for (files, fn) in files2nodefn:
2240       if fn is None:
2241         filenodes = nodeinfo
2242       else:
2243         filenodes = filter(fn, nodeinfo)
2244       nodefiles.update((filename,
2245                         frozenset(map(operator.attrgetter("name"), filenodes)))
2246                        for filename in files)
2247
2248     assert set(nodefiles) == (files_all | files_mc | files_vm)
2249
2250     fileinfo = dict((filename, {}) for filename in nodefiles)
2251     ignore_nodes = set()
2252
2253     for node in nodeinfo:
2254       if node.offline:
2255         ignore_nodes.add(node.name)
2256         continue
2257
2258       nresult = all_nvinfo[node.name]
2259
2260       if nresult.fail_msg or not nresult.payload:
2261         node_files = None
2262       else:
2263         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2264
2265       test = not (node_files and isinstance(node_files, dict))
2266       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2267               "Node did not return file checksum data")
2268       if test:
2269         ignore_nodes.add(node.name)
2270         continue
2271
2272       # Build per-checksum mapping from filename to nodes having it
2273       for (filename, checksum) in node_files.items():
2274         assert filename in nodefiles
2275         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2276
2277     for (filename, checksums) in fileinfo.items():
2278       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2279
2280       # Nodes having the file
2281       with_file = frozenset(node_name
2282                             for nodes in fileinfo[filename].values()
2283                             for node_name in nodes) - ignore_nodes
2284
2285       expected_nodes = nodefiles[filename] - ignore_nodes
2286
2287       # Nodes missing file
2288       missing_file = expected_nodes - with_file
2289
2290       if filename in files_opt:
2291         # All or no nodes
2292         errorif(missing_file and missing_file != expected_nodes,
2293                 constants.CV_ECLUSTERFILECHECK, None,
2294                 "File %s is optional, but it must exist on all or no"
2295                 " nodes (not found on %s)",
2296                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2297       else:
2298         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2299                 "File %s is missing from node(s) %s", filename,
2300                 utils.CommaJoin(utils.NiceSort(missing_file)))
2301
2302         # Warn if a node has a file it shouldn't
2303         unexpected = with_file - expected_nodes
2304         errorif(unexpected,
2305                 constants.CV_ECLUSTERFILECHECK, None,
2306                 "File %s should not exist on node(s) %s",
2307                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2308
2309       # See if there are multiple versions of the file
2310       test = len(checksums) > 1
2311       if test:
2312         variants = ["variant %s on %s" %
2313                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2314                     for (idx, (checksum, nodes)) in
2315                       enumerate(sorted(checksums.items()))]
2316       else:
2317         variants = []
2318
2319       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2320               "File %s found with %s different checksums (%s)",
2321               filename, len(checksums), "; ".join(variants))
2322
2323   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2324                       drbd_map):
2325     """Verifies and the node DRBD status.
2326
2327     @type ninfo: L{objects.Node}
2328     @param ninfo: the node to check
2329     @param nresult: the remote results for the node
2330     @param instanceinfo: the dict of instances
2331     @param drbd_helper: the configured DRBD usermode helper
2332     @param drbd_map: the DRBD map as returned by
2333         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2334
2335     """
2336     node = ninfo.name
2337     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2338
2339     if drbd_helper:
2340       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2341       test = (helper_result == None)
2342       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2343                "no drbd usermode helper returned")
2344       if helper_result:
2345         status, payload = helper_result
2346         test = not status
2347         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2348                  "drbd usermode helper check unsuccessful: %s", payload)
2349         test = status and (payload != drbd_helper)
2350         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2351                  "wrong drbd usermode helper: %s", payload)
2352
2353     # compute the DRBD minors
2354     node_drbd = {}
2355     for minor, instance in drbd_map[node].items():
2356       test = instance not in instanceinfo
2357       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2358                "ghost instance '%s' in temporary DRBD map", instance)
2359         # ghost instance should not be running, but otherwise we
2360         # don't give double warnings (both ghost instance and
2361         # unallocated minor in use)
2362       if test:
2363         node_drbd[minor] = (instance, False)
2364       else:
2365         instance = instanceinfo[instance]
2366         node_drbd[minor] = (instance.name,
2367                             instance.admin_state == constants.ADMINST_UP)
2368
2369     # and now check them
2370     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2371     test = not isinstance(used_minors, (tuple, list))
2372     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2373              "cannot parse drbd status file: %s", str(used_minors))
2374     if test:
2375       # we cannot check drbd status
2376       return
2377
2378     for minor, (iname, must_exist) in node_drbd.items():
2379       test = minor not in used_minors and must_exist
2380       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2381                "drbd minor %d of instance %s is not active", minor, iname)
2382     for minor in used_minors:
2383       test = minor not in node_drbd
2384       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2385                "unallocated drbd minor %d is in use", minor)
2386
2387   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2388     """Builds the node OS structures.
2389
2390     @type ninfo: L{objects.Node}
2391     @param ninfo: the node to check
2392     @param nresult: the remote results for the node
2393     @param nimg: the node image object
2394
2395     """
2396     node = ninfo.name
2397     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2398
2399     remote_os = nresult.get(constants.NV_OSLIST, None)
2400     test = (not isinstance(remote_os, list) or
2401             not compat.all(isinstance(v, list) and len(v) == 7
2402                            for v in remote_os))
2403
2404     _ErrorIf(test, constants.CV_ENODEOS, node,
2405              "node hasn't returned valid OS data")
2406
2407     nimg.os_fail = test
2408
2409     if test:
2410       return
2411
2412     os_dict = {}
2413
2414     for (name, os_path, status, diagnose,
2415          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2416
2417       if name not in os_dict:
2418         os_dict[name] = []
2419
2420       # parameters is a list of lists instead of list of tuples due to
2421       # JSON lacking a real tuple type, fix it:
2422       parameters = [tuple(v) for v in parameters]
2423       os_dict[name].append((os_path, status, diagnose,
2424                             set(variants), set(parameters), set(api_ver)))
2425
2426     nimg.oslist = os_dict
2427
2428   def _VerifyNodeOS(self, ninfo, nimg, base):
2429     """Verifies the node OS list.
2430
2431     @type ninfo: L{objects.Node}
2432     @param ninfo: the node to check
2433     @param nimg: the node image object
2434     @param base: the 'template' node we match against (e.g. from the master)
2435
2436     """
2437     node = ninfo.name
2438     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2439
2440     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2441
2442     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2443     for os_name, os_data in nimg.oslist.items():
2444       assert os_data, "Empty OS status for OS %s?!" % os_name
2445       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2446       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2447                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2448       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2449                "OS '%s' has multiple entries (first one shadows the rest): %s",
2450                os_name, utils.CommaJoin([v[0] for v in os_data]))
2451       # comparisons with the 'base' image
2452       test = os_name not in base.oslist
2453       _ErrorIf(test, constants.CV_ENODEOS, node,
2454                "Extra OS %s not present on reference node (%s)",
2455                os_name, base.name)
2456       if test:
2457         continue
2458       assert base.oslist[os_name], "Base node has empty OS status?"
2459       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2460       if not b_status:
2461         # base OS is invalid, skipping
2462         continue
2463       for kind, a, b in [("API version", f_api, b_api),
2464                          ("variants list", f_var, b_var),
2465                          ("parameters", beautify_params(f_param),
2466                           beautify_params(b_param))]:
2467         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2468                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2469                  kind, os_name, base.name,
2470                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2471
2472     # check any missing OSes
2473     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2474     _ErrorIf(missing, constants.CV_ENODEOS, node,
2475              "OSes present on reference node %s but missing on this node: %s",
2476              base.name, utils.CommaJoin(missing))
2477
2478   def _VerifyOob(self, ninfo, nresult):
2479     """Verifies out of band functionality of a node.
2480
2481     @type ninfo: L{objects.Node}
2482     @param ninfo: the node to check
2483     @param nresult: the remote results for the node
2484
2485     """
2486     node = ninfo.name
2487     # We just have to verify the paths on master and/or master candidates
2488     # as the oob helper is invoked on the master
2489     if ((ninfo.master_candidate or ninfo.master_capable) and
2490         constants.NV_OOB_PATHS in nresult):
2491       for path_result in nresult[constants.NV_OOB_PATHS]:
2492         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2493
2494   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2495     """Verifies and updates the node volume data.
2496
2497     This function will update a L{NodeImage}'s internal structures
2498     with data from the remote call.
2499
2500     @type ninfo: L{objects.Node}
2501     @param ninfo: the node to check
2502     @param nresult: the remote results for the node
2503     @param nimg: the node image object
2504     @param vg_name: the configured VG name
2505
2506     """
2507     node = ninfo.name
2508     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2509
2510     nimg.lvm_fail = True
2511     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2512     if vg_name is None:
2513       pass
2514     elif isinstance(lvdata, basestring):
2515       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2516                utils.SafeEncode(lvdata))
2517     elif not isinstance(lvdata, dict):
2518       _ErrorIf(True, constants.CV_ENODELVM, node,
2519                "rpc call to node failed (lvlist)")
2520     else:
2521       nimg.volumes = lvdata
2522       nimg.lvm_fail = False
2523
2524   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2525     """Verifies and updates the node instance list.
2526
2527     If the listing was successful, then updates this node's instance
2528     list. Otherwise, it marks the RPC call as failed for the instance
2529     list key.
2530
2531     @type ninfo: L{objects.Node}
2532     @param ninfo: the node to check
2533     @param nresult: the remote results for the node
2534     @param nimg: the node image object
2535
2536     """
2537     idata = nresult.get(constants.NV_INSTANCELIST, None)
2538     test = not isinstance(idata, list)
2539     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2540                   "rpc call to node failed (instancelist): %s",
2541                   utils.SafeEncode(str(idata)))
2542     if test:
2543       nimg.hyp_fail = True
2544     else:
2545       nimg.instances = idata
2546
2547   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2548     """Verifies and computes a node information map
2549
2550     @type ninfo: L{objects.Node}
2551     @param ninfo: the node to check
2552     @param nresult: the remote results for the node
2553     @param nimg: the node image object
2554     @param vg_name: the configured VG name
2555
2556     """
2557     node = ninfo.name
2558     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2559
2560     # try to read free memory (from the hypervisor)
2561     hv_info = nresult.get(constants.NV_HVINFO, None)
2562     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2563     _ErrorIf(test, constants.CV_ENODEHV, node,
2564              "rpc call to node failed (hvinfo)")
2565     if not test:
2566       try:
2567         nimg.mfree = int(hv_info["memory_free"])
2568       except (ValueError, TypeError):
2569         _ErrorIf(True, constants.CV_ENODERPC, node,
2570                  "node returned invalid nodeinfo, check hypervisor")
2571
2572     # FIXME: devise a free space model for file based instances as well
2573     if vg_name is not None:
2574       test = (constants.NV_VGLIST not in nresult or
2575               vg_name not in nresult[constants.NV_VGLIST])
2576       _ErrorIf(test, constants.CV_ENODELVM, node,
2577                "node didn't return data for the volume group '%s'"
2578                " - it is either missing or broken", vg_name)
2579       if not test:
2580         try:
2581           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2582         except (ValueError, TypeError):
2583           _ErrorIf(True, constants.CV_ENODERPC, node,
2584                    "node returned invalid LVM info, check LVM status")
2585
2586   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2587     """Gets per-disk status information for all instances.
2588
2589     @type nodelist: list of strings
2590     @param nodelist: Node names
2591     @type node_image: dict of (name, L{objects.Node})
2592     @param node_image: Node objects
2593     @type instanceinfo: dict of (name, L{objects.Instance})
2594     @param instanceinfo: Instance objects
2595     @rtype: {instance: {node: [(succes, payload)]}}
2596     @return: a dictionary of per-instance dictionaries with nodes as
2597         keys and disk information as values; the disk information is a
2598         list of tuples (success, payload)
2599
2600     """
2601     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2602
2603     node_disks = {}
2604     node_disks_devonly = {}
2605     diskless_instances = set()
2606     diskless = constants.DT_DISKLESS
2607
2608     for nname in nodelist:
2609       node_instances = list(itertools.chain(node_image[nname].pinst,
2610                                             node_image[nname].sinst))
2611       diskless_instances.update(inst for inst in node_instances
2612                                 if instanceinfo[inst].disk_template == diskless)
2613       disks = [(inst, disk)
2614                for inst in node_instances
2615                for disk in instanceinfo[inst].disks]
2616
2617       if not disks:
2618         # No need to collect data
2619         continue
2620
2621       node_disks[nname] = disks
2622
2623       # Creating copies as SetDiskID below will modify the objects and that can
2624       # lead to incorrect data returned from nodes
2625       devonly = [dev.Copy() for (_, dev) in disks]
2626
2627       for dev in devonly:
2628         self.cfg.SetDiskID(dev, nname)
2629
2630       node_disks_devonly[nname] = devonly
2631
2632     assert len(node_disks) == len(node_disks_devonly)
2633
2634     # Collect data from all nodes with disks
2635     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2636                                                           node_disks_devonly)
2637
2638     assert len(result) == len(node_disks)
2639
2640     instdisk = {}
2641
2642     for (nname, nres) in result.items():
2643       disks = node_disks[nname]
2644
2645       if nres.offline:
2646         # No data from this node
2647         data = len(disks) * [(False, "node offline")]
2648       else:
2649         msg = nres.fail_msg
2650         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2651                  "while getting disk information: %s", msg)
2652         if msg:
2653           # No data from this node
2654           data = len(disks) * [(False, msg)]
2655         else:
2656           data = []
2657           for idx, i in enumerate(nres.payload):
2658             if isinstance(i, (tuple, list)) and len(i) == 2:
2659               data.append(i)
2660             else:
2661               logging.warning("Invalid result from node %s, entry %d: %s",
2662                               nname, idx, i)
2663               data.append((False, "Invalid result from the remote node"))
2664
2665       for ((inst, _), status) in zip(disks, data):
2666         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2667
2668     # Add empty entries for diskless instances.
2669     for inst in diskless_instances:
2670       assert inst not in instdisk
2671       instdisk[inst] = {}
2672
2673     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2674                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2675                       compat.all(isinstance(s, (tuple, list)) and
2676                                  len(s) == 2 for s in statuses)
2677                       for inst, nnames in instdisk.items()
2678                       for nname, statuses in nnames.items())
2679     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2680
2681     return instdisk
2682
2683   @staticmethod
2684   def _SshNodeSelector(group_uuid, all_nodes):
2685     """Create endless iterators for all potential SSH check hosts.
2686
2687     """
2688     nodes = [node for node in all_nodes
2689              if (node.group != group_uuid and
2690                  not node.offline)]
2691     keyfunc = operator.attrgetter("group")
2692
2693     return map(itertools.cycle,
2694                [sorted(map(operator.attrgetter("name"), names))
2695                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2696                                                   keyfunc)])
2697
2698   @classmethod
2699   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2700     """Choose which nodes should talk to which other nodes.
2701
2702     We will make nodes contact all nodes in their group, and one node from
2703     every other group.
2704
2705     @warning: This algorithm has a known issue if one node group is much
2706       smaller than others (e.g. just one node). In such a case all other
2707       nodes will talk to the single node.
2708
2709     """
2710     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2711     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2712
2713     return (online_nodes,
2714             dict((name, sorted([i.next() for i in sel]))
2715                  for name in online_nodes))
2716
2717   def BuildHooksEnv(self):
2718     """Build hooks env.
2719
2720     Cluster-Verify hooks just ran in the post phase and their failure makes
2721     the output be logged in the verify output and the verification to fail.
2722
2723     """
2724     env = {
2725       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2726       }
2727
2728     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2729                for node in self.my_node_info.values())
2730
2731     return env
2732
2733   def BuildHooksNodes(self):
2734     """Build hooks nodes.
2735
2736     """
2737     return ([], self.my_node_names)
2738
2739   def Exec(self, feedback_fn):
2740     """Verify integrity of the node group, performing various test on nodes.
2741
2742     """
2743     # This method has too many local variables. pylint: disable=R0914
2744     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2745
2746     if not self.my_node_names:
2747       # empty node group
2748       feedback_fn("* Empty node group, skipping verification")
2749       return True
2750
2751     self.bad = False
2752     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2753     verbose = self.op.verbose
2754     self._feedback_fn = feedback_fn
2755
2756     vg_name = self.cfg.GetVGName()
2757     drbd_helper = self.cfg.GetDRBDHelper()
2758     cluster = self.cfg.GetClusterInfo()
2759     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2760     hypervisors = cluster.enabled_hypervisors
2761     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2762
2763     i_non_redundant = [] # Non redundant instances
2764     i_non_a_balanced = [] # Non auto-balanced instances
2765     i_offline = 0 # Count of offline instances
2766     n_offline = 0 # Count of offline nodes
2767     n_drained = 0 # Count of nodes being drained
2768     node_vol_should = {}
2769
2770     # FIXME: verify OS list
2771
2772     # File verification
2773     filemap = _ComputeAncillaryFiles(cluster, False)
2774
2775     # do local checksums
2776     master_node = self.master_node = self.cfg.GetMasterNode()
2777     master_ip = self.cfg.GetMasterIP()
2778
2779     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2780
2781     user_scripts = []
2782     if self.cfg.GetUseExternalMipScript():
2783       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2784
2785     node_verify_param = {
2786       constants.NV_FILELIST:
2787         utils.UniqueSequence(filename
2788                              for files in filemap
2789                              for filename in files),
2790       constants.NV_NODELIST:
2791         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2792                                   self.all_node_info.values()),
2793       constants.NV_HYPERVISOR: hypervisors,
2794       constants.NV_HVPARAMS:
2795         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2796       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2797                                  for node in node_data_list
2798                                  if not node.offline],
2799       constants.NV_INSTANCELIST: hypervisors,
2800       constants.NV_VERSION: None,
2801       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2802       constants.NV_NODESETUP: None,
2803       constants.NV_TIME: None,
2804       constants.NV_MASTERIP: (master_node, master_ip),
2805       constants.NV_OSLIST: None,
2806       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2807       constants.NV_USERSCRIPTS: user_scripts,
2808       }
2809
2810     if vg_name is not None:
2811       node_verify_param[constants.NV_VGLIST] = None
2812       node_verify_param[constants.NV_LVLIST] = vg_name
2813       node_verify_param[constants.NV_PVLIST] = [vg_name]
2814       node_verify_param[constants.NV_DRBDLIST] = None
2815
2816     if drbd_helper:
2817       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2818
2819     # bridge checks
2820     # FIXME: this needs to be changed per node-group, not cluster-wide
2821     bridges = set()
2822     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2823     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2824       bridges.add(default_nicpp[constants.NIC_LINK])
2825     for instance in self.my_inst_info.values():
2826       for nic in instance.nics:
2827         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2828         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2829           bridges.add(full_nic[constants.NIC_LINK])
2830
2831     if bridges:
2832       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2833
2834     # Build our expected cluster state
2835     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2836                                                  name=node.name,
2837                                                  vm_capable=node.vm_capable))
2838                       for node in node_data_list)
2839
2840     # Gather OOB paths
2841     oob_paths = []
2842     for node in self.all_node_info.values():
2843       path = _SupportsOob(self.cfg, node)
2844       if path and path not in oob_paths:
2845         oob_paths.append(path)
2846
2847     if oob_paths:
2848       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2849
2850     for instance in self.my_inst_names:
2851       inst_config = self.my_inst_info[instance]
2852
2853       for nname in inst_config.all_nodes:
2854         if nname not in node_image:
2855           gnode = self.NodeImage(name=nname)
2856           gnode.ghost = (nname not in self.all_node_info)
2857           node_image[nname] = gnode
2858
2859       inst_config.MapLVsByNode(node_vol_should)
2860
2861       pnode = inst_config.primary_node
2862       node_image[pnode].pinst.append(instance)
2863
2864       for snode in inst_config.secondary_nodes:
2865         nimg = node_image[snode]
2866         nimg.sinst.append(instance)
2867         if pnode not in nimg.sbp:
2868           nimg.sbp[pnode] = []
2869         nimg.sbp[pnode].append(instance)
2870
2871     # At this point, we have the in-memory data structures complete,
2872     # except for the runtime information, which we'll gather next
2873
2874     # Due to the way our RPC system works, exact response times cannot be
2875     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2876     # time before and after executing the request, we can at least have a time
2877     # window.
2878     nvinfo_starttime = time.time()
2879     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2880                                            node_verify_param,
2881                                            self.cfg.GetClusterName())
2882     nvinfo_endtime = time.time()
2883
2884     if self.extra_lv_nodes and vg_name is not None:
2885       extra_lv_nvinfo = \
2886           self.rpc.call_node_verify(self.extra_lv_nodes,
2887                                     {constants.NV_LVLIST: vg_name},
2888                                     self.cfg.GetClusterName())
2889     else:
2890       extra_lv_nvinfo = {}
2891
2892     all_drbd_map = self.cfg.ComputeDRBDMap()
2893
2894     feedback_fn("* Gathering disk information (%s nodes)" %
2895                 len(self.my_node_names))
2896     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2897                                      self.my_inst_info)
2898
2899     feedback_fn("* Verifying configuration file consistency")
2900
2901     # If not all nodes are being checked, we need to make sure the master node
2902     # and a non-checked vm_capable node are in the list.
2903     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2904     if absent_nodes:
2905       vf_nvinfo = all_nvinfo.copy()
2906       vf_node_info = list(self.my_node_info.values())
2907       additional_nodes = []
2908       if master_node not in self.my_node_info:
2909         additional_nodes.append(master_node)
2910         vf_node_info.append(self.all_node_info[master_node])
2911       # Add the first vm_capable node we find which is not included
2912       for node in absent_nodes:
2913         nodeinfo = self.all_node_info[node]
2914         if nodeinfo.vm_capable and not nodeinfo.offline:
2915           additional_nodes.append(node)
2916           vf_node_info.append(self.all_node_info[node])
2917           break
2918       key = constants.NV_FILELIST
2919       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2920                                                  {key: node_verify_param[key]},
2921                                                  self.cfg.GetClusterName()))
2922     else:
2923       vf_nvinfo = all_nvinfo
2924       vf_node_info = self.my_node_info.values()
2925
2926     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2927
2928     feedback_fn("* Verifying node status")
2929
2930     refos_img = None
2931
2932     for node_i in node_data_list:
2933       node = node_i.name
2934       nimg = node_image[node]
2935
2936       if node_i.offline:
2937         if verbose:
2938           feedback_fn("* Skipping offline node %s" % (node,))
2939         n_offline += 1
2940         continue
2941
2942       if node == master_node:
2943         ntype = "master"
2944       elif node_i.master_candidate:
2945         ntype = "master candidate"
2946       elif node_i.drained:
2947         ntype = "drained"
2948         n_drained += 1
2949       else:
2950         ntype = "regular"
2951       if verbose:
2952         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2953
2954       msg = all_nvinfo[node].fail_msg
2955       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2956                msg)
2957       if msg:
2958         nimg.rpc_fail = True
2959         continue
2960
2961       nresult = all_nvinfo[node].payload
2962
2963       nimg.call_ok = self._VerifyNode(node_i, nresult)
2964       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2965       self._VerifyNodeNetwork(node_i, nresult)
2966       self._VerifyNodeUserScripts(node_i, nresult)
2967       self._VerifyOob(node_i, nresult)
2968
2969       if nimg.vm_capable:
2970         self._VerifyNodeLVM(node_i, nresult, vg_name)
2971         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2972                              all_drbd_map)
2973
2974         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2975         self._UpdateNodeInstances(node_i, nresult, nimg)
2976         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2977         self._UpdateNodeOS(node_i, nresult, nimg)
2978
2979         if not nimg.os_fail:
2980           if refos_img is None:
2981             refos_img = nimg
2982           self._VerifyNodeOS(node_i, nimg, refos_img)
2983         self._VerifyNodeBridges(node_i, nresult, bridges)
2984
2985         # Check whether all running instancies are primary for the node. (This
2986         # can no longer be done from _VerifyInstance below, since some of the
2987         # wrong instances could be from other node groups.)
2988         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2989
2990         for inst in non_primary_inst:
2991           # FIXME: investigate best way to handle offline insts
2992           if inst.admin_state == constants.ADMINST_OFFLINE:
2993             if verbose:
2994               feedback_fn("* Skipping offline instance %s" % inst.name)
2995             i_offline += 1
2996             continue
2997           test = inst in self.all_inst_info
2998           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2999                    "instance should not run on node %s", node_i.name)
3000           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3001                    "node is running unknown instance %s", inst)
3002
3003     for node, result in extra_lv_nvinfo.items():
3004       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3005                               node_image[node], vg_name)
3006
3007     feedback_fn("* Verifying instance status")
3008     for instance in self.my_inst_names:
3009       if verbose:
3010         feedback_fn("* Verifying instance %s" % instance)
3011       inst_config = self.my_inst_info[instance]
3012       self._VerifyInstance(instance, inst_config, node_image,
3013                            instdisk[instance])
3014       inst_nodes_offline = []
3015
3016       pnode = inst_config.primary_node
3017       pnode_img = node_image[pnode]
3018       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3019                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3020                " primary node failed", instance)
3021
3022       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3023                pnode_img.offline,
3024                constants.CV_EINSTANCEBADNODE, instance,
3025                "instance is marked as running and lives on offline node %s",
3026                inst_config.primary_node)
3027
3028       # If the instance is non-redundant we cannot survive losing its primary
3029       # node, so we are not N+1 compliant. On the other hand we have no disk
3030       # templates with more than one secondary so that situation is not well
3031       # supported either.
3032       # FIXME: does not support file-backed instances
3033       if not inst_config.secondary_nodes:
3034         i_non_redundant.append(instance)
3035
3036       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3037                constants.CV_EINSTANCELAYOUT,
3038                instance, "instance has multiple secondary nodes: %s",
3039                utils.CommaJoin(inst_config.secondary_nodes),
3040                code=self.ETYPE_WARNING)
3041
3042       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3043         pnode = inst_config.primary_node
3044         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3045         instance_groups = {}
3046
3047         for node in instance_nodes:
3048           instance_groups.setdefault(self.all_node_info[node].group,
3049                                      []).append(node)
3050
3051         pretty_list = [
3052           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3053           # Sort so that we always list the primary node first.
3054           for group, nodes in sorted(instance_groups.items(),
3055                                      key=lambda (_, nodes): pnode in nodes,
3056                                      reverse=True)]
3057
3058         self._ErrorIf(len(instance_groups) > 1,
3059                       constants.CV_EINSTANCESPLITGROUPS,
3060                       instance, "instance has primary and secondary nodes in"
3061                       " different groups: %s", utils.CommaJoin(pretty_list),
3062                       code=self.ETYPE_WARNING)
3063
3064       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3065         i_non_a_balanced.append(instance)
3066
3067       for snode in inst_config.secondary_nodes:
3068         s_img = node_image[snode]
3069         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3070                  snode, "instance %s, connection to secondary node failed",
3071                  instance)
3072
3073         if s_img.offline:
3074           inst_nodes_offline.append(snode)
3075
3076       # warn that the instance lives on offline nodes
3077       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3078                "instance has offline secondary node(s) %s",
3079                utils.CommaJoin(inst_nodes_offline))
3080       # ... or ghost/non-vm_capable nodes
3081       for node in inst_config.all_nodes:
3082         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3083                  instance, "instance lives on ghost node %s", node)
3084         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3085                  instance, "instance lives on non-vm_capable node %s", node)
3086
3087     feedback_fn("* Verifying orphan volumes")
3088     reserved = utils.FieldSet(*cluster.reserved_lvs)
3089
3090     # We will get spurious "unknown volume" warnings if any node of this group
3091     # is secondary for an instance whose primary is in another group. To avoid
3092     # them, we find these instances and add their volumes to node_vol_should.
3093     for inst in self.all_inst_info.values():
3094       for secondary in inst.secondary_nodes:
3095         if (secondary in self.my_node_info
3096             and inst.name not in self.my_inst_info):
3097           inst.MapLVsByNode(node_vol_should)
3098           break
3099
3100     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3101
3102     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3103       feedback_fn("* Verifying N+1 Memory redundancy")
3104       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3105
3106     feedback_fn("* Other Notes")
3107     if i_non_redundant:
3108       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3109                   % len(i_non_redundant))
3110
3111     if i_non_a_balanced:
3112       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3113                   % len(i_non_a_balanced))
3114
3115     if i_offline:
3116       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3117
3118     if n_offline:
3119       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3120
3121     if n_drained:
3122       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3123
3124     return not self.bad
3125
3126   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3127     """Analyze the post-hooks' result
3128
3129     This method analyses the hook result, handles it, and sends some
3130     nicely-formatted feedback back to the user.
3131
3132     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3133         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3134     @param hooks_results: the results of the multi-node hooks rpc call
3135     @param feedback_fn: function used send feedback back to the caller
3136     @param lu_result: previous Exec result
3137     @return: the new Exec result, based on the previous result
3138         and hook results
3139
3140     """
3141     # We only really run POST phase hooks, only for non-empty groups,
3142     # and are only interested in their results
3143     if not self.my_node_names:
3144       # empty node group
3145       pass
3146     elif phase == constants.HOOKS_PHASE_POST:
3147       # Used to change hooks' output to proper indentation
3148       feedback_fn("* Hooks Results")
3149       assert hooks_results, "invalid result from hooks"
3150
3151       for node_name in hooks_results:
3152         res = hooks_results[node_name]
3153         msg = res.fail_msg
3154         test = msg and not res.offline
3155         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3156                       "Communication failure in hooks execution: %s", msg)
3157         if res.offline or msg:
3158           # No need to investigate payload if node is offline or gave
3159           # an error.
3160           continue
3161         for script, hkr, output in res.payload:
3162           test = hkr == constants.HKR_FAIL
3163           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3164                         "Script %s failed, output:", script)
3165           if test:
3166             output = self._HOOKS_INDENT_RE.sub("      ", output)
3167             feedback_fn("%s" % output)
3168             lu_result = False
3169
3170     return lu_result
3171
3172
3173 class LUClusterVerifyDisks(NoHooksLU):
3174   """Verifies the cluster disks status.
3175
3176   """
3177   REQ_BGL = False
3178
3179   def ExpandNames(self):
3180     self.share_locks = _ShareAll()
3181     self.needed_locks = {
3182       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3183       }
3184
3185   def Exec(self, feedback_fn):
3186     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3187
3188     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3189     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3190                            for group in group_names])
3191
3192
3193 class LUGroupVerifyDisks(NoHooksLU):
3194   """Verifies the status of all disks in a node group.
3195
3196   """
3197   REQ_BGL = False
3198
3199   def ExpandNames(self):
3200     # Raises errors.OpPrereqError on its own if group can't be found
3201     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3202
3203     self.share_locks = _ShareAll()
3204     self.needed_locks = {
3205       locking.LEVEL_INSTANCE: [],
3206       locking.LEVEL_NODEGROUP: [],
3207       locking.LEVEL_NODE: [],
3208       }
3209
3210   def DeclareLocks(self, level):
3211     if level == locking.LEVEL_INSTANCE:
3212       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3213
3214       # Lock instances optimistically, needs verification once node and group
3215       # locks have been acquired
3216       self.needed_locks[locking.LEVEL_INSTANCE] = \
3217         self.cfg.GetNodeGroupInstances(self.group_uuid)
3218
3219     elif level == locking.LEVEL_NODEGROUP:
3220       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3221
3222       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3223         set([self.group_uuid] +
3224             # Lock all groups used by instances optimistically; this requires
3225             # going via the node before it's locked, requiring verification
3226             # later on
3227             [group_uuid
3228              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3229              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3230
3231     elif level == locking.LEVEL_NODE:
3232       # This will only lock the nodes in the group to be verified which contain
3233       # actual instances
3234       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3235       self._LockInstancesNodes()
3236
3237       # Lock all nodes in group to be verified
3238       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3239       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3240       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3241
3242   def CheckPrereq(self):
3243     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3244     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3245     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3246
3247     assert self.group_uuid in owned_groups
3248
3249     # Check if locked instances are still correct
3250     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3251
3252     # Get instance information
3253     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3254
3255     # Check if node groups for locked instances are still correct
3256     for (instance_name, inst) in self.instances.items():
3257       assert owned_nodes.issuperset(inst.all_nodes), \
3258         "Instance %s's nodes changed while we kept the lock" % instance_name
3259
3260       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3261                                              owned_groups)
3262
3263       assert self.group_uuid in inst_groups, \
3264         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3265
3266   def Exec(self, feedback_fn):
3267     """Verify integrity of cluster disks.
3268
3269     @rtype: tuple of three items
3270     @return: a tuple of (dict of node-to-node_error, list of instances
3271         which need activate-disks, dict of instance: (node, volume) for
3272         missing volumes
3273
3274     """
3275     res_nodes = {}
3276     res_instances = set()
3277     res_missing = {}
3278
3279     nv_dict = _MapInstanceDisksToNodes([inst
3280             for inst in self.instances.values()
3281             if inst.admin_state == constants.ADMINST_UP])
3282
3283     if nv_dict:
3284       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3285                              set(self.cfg.GetVmCapableNodeList()))
3286
3287       node_lvs = self.rpc.call_lv_list(nodes, [])
3288
3289       for (node, node_res) in node_lvs.items():
3290         if node_res.offline:
3291           continue
3292
3293         msg = node_res.fail_msg
3294         if msg:
3295           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3296           res_nodes[node] = msg
3297           continue
3298
3299         for lv_name, (_, _, lv_online) in node_res.payload.items():
3300           inst = nv_dict.pop((node, lv_name), None)
3301           if not (lv_online or inst is None):
3302             res_instances.add(inst)
3303
3304       # any leftover items in nv_dict are missing LVs, let's arrange the data
3305       # better
3306       for key, inst in nv_dict.iteritems():
3307         res_missing.setdefault(inst, []).append(list(key))
3308
3309     return (res_nodes, list(res_instances), res_missing)
3310
3311
3312 class LUClusterRepairDiskSizes(NoHooksLU):
3313   """Verifies the cluster disks sizes.
3314
3315   """
3316   REQ_BGL = False
3317
3318   def ExpandNames(self):
3319     if self.op.instances:
3320       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3321       self.needed_locks = {
3322         locking.LEVEL_NODE_RES: [],
3323         locking.LEVEL_INSTANCE: self.wanted_names,
3324         }
3325       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3326     else:
3327       self.wanted_names = None
3328       self.needed_locks = {
3329         locking.LEVEL_NODE_RES: locking.ALL_SET,
3330         locking.LEVEL_INSTANCE: locking.ALL_SET,
3331         }
3332     self.share_locks = {
3333       locking.LEVEL_NODE_RES: 1,
3334       locking.LEVEL_INSTANCE: 0,
3335       }
3336
3337   def DeclareLocks(self, level):
3338     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3339       self._LockInstancesNodes(primary_only=True, level=level)
3340
3341   def CheckPrereq(self):
3342     """Check prerequisites.
3343
3344     This only checks the optional instance list against the existing names.
3345
3346     """
3347     if self.wanted_names is None:
3348       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3349
3350     self.wanted_instances = \
3351         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3352
3353   def _EnsureChildSizes(self, disk):
3354     """Ensure children of the disk have the needed disk size.
3355
3356     This is valid mainly for DRBD8 and fixes an issue where the
3357     children have smaller disk size.
3358
3359     @param disk: an L{ganeti.objects.Disk} object
3360
3361     """
3362     if disk.dev_type == constants.LD_DRBD8:
3363       assert disk.children, "Empty children for DRBD8?"
3364       fchild = disk.children[0]
3365       mismatch = fchild.size < disk.size
3366       if mismatch:
3367         self.LogInfo("Child disk has size %d, parent %d, fixing",
3368                      fchild.size, disk.size)
3369         fchild.size = disk.size
3370
3371       # and we recurse on this child only, not on the metadev
3372       return self._EnsureChildSizes(fchild) or mismatch
3373     else:
3374       return False
3375
3376   def Exec(self, feedback_fn):
3377     """Verify the size of cluster disks.
3378
3379     """
3380     # TODO: check child disks too
3381     # TODO: check differences in size between primary/secondary nodes
3382     per_node_disks = {}
3383     for instance in self.wanted_instances:
3384       pnode = instance.primary_node
3385       if pnode not in per_node_disks:
3386         per_node_disks[pnode] = []
3387       for idx, disk in enumerate(instance.disks):
3388         per_node_disks[pnode].append((instance, idx, disk))
3389
3390     assert not (frozenset(per_node_disks.keys()) -
3391                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3392       "Not owning correct locks"
3393     assert not self.owned_locks(locking.LEVEL_NODE)
3394
3395     changed = []
3396     for node, dskl in per_node_disks.items():
3397       newl = [v[2].Copy() for v in dskl]
3398       for dsk in newl:
3399         self.cfg.SetDiskID(dsk, node)
3400       result = self.rpc.call_blockdev_getsize(node, newl)
3401       if result.fail_msg:
3402         self.LogWarning("Failure in blockdev_getsize call to node"
3403                         " %s, ignoring", node)
3404         continue
3405       if len(result.payload) != len(dskl):
3406         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3407                         " result.payload=%s", node, len(dskl), result.payload)
3408         self.LogWarning("Invalid result from node %s, ignoring node results",
3409                         node)
3410         continue
3411       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3412         if size is None:
3413           self.LogWarning("Disk %d of instance %s did not return size"
3414                           " information, ignoring", idx, instance.name)
3415           continue
3416         if not isinstance(size, (int, long)):
3417           self.LogWarning("Disk %d of instance %s did not return valid"
3418                           " size information, ignoring", idx, instance.name)
3419           continue
3420         size = size >> 20
3421         if size != disk.size:
3422           self.LogInfo("Disk %d of instance %s has mismatched size,"
3423                        " correcting: recorded %d, actual %d", idx,
3424                        instance.name, disk.size, size)
3425           disk.size = size
3426           self.cfg.Update(instance, feedback_fn)
3427           changed.append((instance.name, idx, size))
3428         if self._EnsureChildSizes(disk):
3429           self.cfg.Update(instance, feedback_fn)
3430           changed.append((instance.name, idx, disk.size))
3431     return changed
3432
3433
3434 class LUClusterRename(LogicalUnit):
3435   """Rename the cluster.
3436
3437   """
3438   HPATH = "cluster-rename"
3439   HTYPE = constants.HTYPE_CLUSTER
3440
3441   def BuildHooksEnv(self):
3442     """Build hooks env.
3443
3444     """
3445     return {
3446       "OP_TARGET": self.cfg.GetClusterName(),
3447       "NEW_NAME": self.op.name,
3448       }
3449
3450   def BuildHooksNodes(self):
3451     """Build hooks nodes.
3452
3453     """
3454     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3455
3456   def CheckPrereq(self):
3457     """Verify that the passed name is a valid one.
3458
3459     """
3460     hostname = netutils.GetHostname(name=self.op.name,
3461                                     family=self.cfg.GetPrimaryIPFamily())
3462
3463     new_name = hostname.name
3464     self.ip = new_ip = hostname.ip
3465     old_name = self.cfg.GetClusterName()
3466     old_ip = self.cfg.GetMasterIP()
3467     if new_name == old_name and new_ip == old_ip:
3468       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3469                                  " cluster has changed",
3470                                  errors.ECODE_INVAL)
3471     if new_ip != old_ip:
3472       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3473         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3474                                    " reachable on the network" %
3475                                    new_ip, errors.ECODE_NOTUNIQUE)
3476
3477     self.op.name = new_name
3478
3479   def Exec(self, feedback_fn):
3480     """Rename the cluster.
3481
3482     """
3483     clustername = self.op.name
3484     new_ip = self.ip
3485
3486     # shutdown the master IP
3487     master_params = self.cfg.GetMasterNetworkParameters()
3488     ems = self.cfg.GetUseExternalMipScript()
3489     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3490                                                      master_params, ems)
3491     result.Raise("Could not disable the master role")
3492
3493     try:
3494       cluster = self.cfg.GetClusterInfo()
3495       cluster.cluster_name = clustername
3496       cluster.master_ip = new_ip
3497       self.cfg.Update(cluster, feedback_fn)
3498
3499       # update the known hosts file
3500       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3501       node_list = self.cfg.GetOnlineNodeList()
3502       try:
3503         node_list.remove(master_params.name)
3504       except ValueError:
3505         pass
3506       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3507     finally:
3508       master_params.ip = new_ip
3509       result = self.rpc.call_node_activate_master_ip(master_params.name,
3510                                                      master_params, ems)
3511       msg = result.fail_msg
3512       if msg:
3513         self.LogWarning("Could not re-enable the master role on"
3514                         " the master, please restart manually: %s", msg)
3515
3516     return clustername
3517
3518
3519 def _ValidateNetmask(cfg, netmask):
3520   """Checks if a netmask is valid.
3521
3522   @type cfg: L{config.ConfigWriter}
3523   @param cfg: The cluster configuration
3524   @type netmask: int
3525   @param netmask: the netmask to be verified
3526   @raise errors.OpPrereqError: if the validation fails
3527
3528   """
3529   ip_family = cfg.GetPrimaryIPFamily()
3530   try:
3531     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3532   except errors.ProgrammerError:
3533     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3534                                ip_family)
3535   if not ipcls.ValidateNetmask(netmask):
3536     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3537                                 (netmask))
3538
3539
3540 class LUClusterSetParams(LogicalUnit):
3541   """Change the parameters of the cluster.
3542
3543   """
3544   HPATH = "cluster-modify"
3545   HTYPE = constants.HTYPE_CLUSTER
3546   REQ_BGL = False
3547
3548   def CheckArguments(self):
3549     """Check parameters
3550
3551     """
3552     if self.op.uid_pool:
3553       uidpool.CheckUidPool(self.op.uid_pool)
3554
3555     if self.op.add_uids:
3556       uidpool.CheckUidPool(self.op.add_uids)
3557
3558     if self.op.remove_uids:
3559       uidpool.CheckUidPool(self.op.remove_uids)
3560
3561     if self.op.master_netmask is not None:
3562       _ValidateNetmask(self.cfg, self.op.master_netmask)
3563
3564     if self.op.diskparams:
3565       for dt_params in self.op.diskparams.values():
3566         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3567
3568   def ExpandNames(self):
3569     # FIXME: in the future maybe other cluster params won't require checking on
3570     # all nodes to be modified.
3571     self.needed_locks = {
3572       locking.LEVEL_NODE: locking.ALL_SET,
3573     }
3574     self.share_locks[locking.LEVEL_NODE] = 1
3575
3576   def BuildHooksEnv(self):
3577     """Build hooks env.
3578
3579     """
3580     return {
3581       "OP_TARGET": self.cfg.GetClusterName(),
3582       "NEW_VG_NAME": self.op.vg_name,
3583       }
3584
3585   def BuildHooksNodes(self):
3586     """Build hooks nodes.
3587
3588     """
3589     mn = self.cfg.GetMasterNode()
3590     return ([mn], [mn])
3591
3592   def CheckPrereq(self):
3593     """Check prerequisites.
3594
3595     This checks whether the given params don't conflict and
3596     if the given volume group is valid.
3597
3598     """
3599     if self.op.vg_name is not None and not self.op.vg_name:
3600       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3601         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3602                                    " instances exist", errors.ECODE_INVAL)
3603
3604     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3605       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3606         raise errors.OpPrereqError("Cannot disable drbd helper while"
3607                                    " drbd-based instances exist",
3608                                    errors.ECODE_INVAL)
3609
3610     node_list = self.owned_locks(locking.LEVEL_NODE)
3611
3612     # if vg_name not None, checks given volume group on all nodes
3613     if self.op.vg_name:
3614       vglist = self.rpc.call_vg_list(node_list)
3615       for node in node_list:
3616         msg = vglist[node].fail_msg
3617         if msg:
3618           # ignoring down node
3619           self.LogWarning("Error while gathering data on node %s"
3620                           " (ignoring node): %s", node, msg)
3621           continue
3622         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3623                                               self.op.vg_name,
3624                                               constants.MIN_VG_SIZE)
3625         if vgstatus:
3626           raise errors.OpPrereqError("Error on node '%s': %s" %
3627                                      (node, vgstatus), errors.ECODE_ENVIRON)
3628
3629     if self.op.drbd_helper:
3630       # checks given drbd helper on all nodes
3631       helpers = self.rpc.call_drbd_helper(node_list)
3632       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3633         if ninfo.offline:
3634           self.LogInfo("Not checking drbd helper on offline node %s", node)
3635           continue
3636         msg = helpers[node].fail_msg
3637         if msg:
3638           raise errors.OpPrereqError("Error checking drbd helper on node"
3639                                      " '%s': %s" % (node, msg),
3640                                      errors.ECODE_ENVIRON)
3641         node_helper = helpers[node].payload
3642         if node_helper != self.op.drbd_helper:
3643           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3644                                      (node, node_helper), errors.ECODE_ENVIRON)
3645
3646     self.cluster = cluster = self.cfg.GetClusterInfo()
3647     # validate params changes
3648     if self.op.beparams:
3649       objects.UpgradeBeParams(self.op.beparams)
3650       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3651       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3652
3653     if self.op.ndparams:
3654       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3655       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3656
3657       # TODO: we need a more general way to handle resetting
3658       # cluster-level parameters to default values
3659       if self.new_ndparams["oob_program"] == "":
3660         self.new_ndparams["oob_program"] = \
3661             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3662
3663     if self.op.hv_state:
3664       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3665                                             self.cluster.hv_state_static)
3666       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3667                                for hv, values in new_hv_state.items())
3668
3669     if self.op.disk_state:
3670       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3671                                                 self.cluster.disk_state_static)
3672       self.new_disk_state = \
3673         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3674                             for name, values in svalues.items()))
3675              for storage, svalues in new_disk_state.items())
3676
3677     if self.op.nicparams:
3678       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3679       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3680       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3681       nic_errors = []
3682
3683       # check all instances for consistency
3684       for instance in self.cfg.GetAllInstancesInfo().values():
3685         for nic_idx, nic in enumerate(instance.nics):
3686           params_copy = copy.deepcopy(nic.nicparams)
3687           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3688
3689           # check parameter syntax
3690           try:
3691             objects.NIC.CheckParameterSyntax(params_filled)
3692           except errors.ConfigurationError, err:
3693             nic_errors.append("Instance %s, nic/%d: %s" %
3694                               (instance.name, nic_idx, err))
3695
3696           # if we're moving instances to routed, check that they have an ip
3697           target_mode = params_filled[constants.NIC_MODE]
3698           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3699             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3700                               " address" % (instance.name, nic_idx))
3701       if nic_errors:
3702         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3703                                    "\n".join(nic_errors))
3704
3705     # hypervisor list/parameters
3706     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3707     if self.op.hvparams:
3708       for hv_name, hv_dict in self.op.hvparams.items():
3709         if hv_name not in self.new_hvparams:
3710           self.new_hvparams[hv_name] = hv_dict
3711         else:
3712           self.new_hvparams[hv_name].update(hv_dict)
3713
3714     # disk template parameters
3715     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3716     if self.op.diskparams:
3717       for dt_name, dt_params in self.op.diskparams.items():
3718         if dt_name not in self.op.diskparams:
3719           self.new_diskparams[dt_name] = dt_params
3720         else:
3721           self.new_diskparams[dt_name].update(dt_params)
3722
3723     # os hypervisor parameters
3724     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3725     if self.op.os_hvp:
3726       for os_name, hvs in self.op.os_hvp.items():
3727         if os_name not in self.new_os_hvp:
3728           self.new_os_hvp[os_name] = hvs
3729         else:
3730           for hv_name, hv_dict in hvs.items():
3731             if hv_name not in self.new_os_hvp[os_name]:
3732               self.new_os_hvp[os_name][hv_name] = hv_dict
3733             else:
3734               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3735
3736     # os parameters
3737     self.new_osp = objects.FillDict(cluster.osparams, {})
3738     if self.op.osparams:
3739       for os_name, osp in self.op.osparams.items():
3740         if os_name not in self.new_osp:
3741           self.new_osp[os_name] = {}
3742
3743         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3744                                                   use_none=True)
3745
3746         if not self.new_osp[os_name]:
3747           # we removed all parameters
3748           del self.new_osp[os_name]
3749         else:
3750           # check the parameter validity (remote check)
3751           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3752                          os_name, self.new_osp[os_name])
3753
3754     # changes to the hypervisor list
3755     if self.op.enabled_hypervisors is not None:
3756       self.hv_list = self.op.enabled_hypervisors
3757       for hv in self.hv_list:
3758         # if the hypervisor doesn't already exist in the cluster
3759         # hvparams, we initialize it to empty, and then (in both
3760         # cases) we make sure to fill the defaults, as we might not
3761         # have a complete defaults list if the hypervisor wasn't
3762         # enabled before
3763         if hv not in new_hvp:
3764           new_hvp[hv] = {}
3765         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3766         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3767     else:
3768       self.hv_list = cluster.enabled_hypervisors
3769
3770     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3771       # either the enabled list has changed, or the parameters have, validate
3772       for hv_name, hv_params in self.new_hvparams.items():
3773         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3774             (self.op.enabled_hypervisors and
3775              hv_name in self.op.enabled_hypervisors)):
3776           # either this is a new hypervisor, or its parameters have changed
3777           hv_class = hypervisor.GetHypervisor(hv_name)
3778           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3779           hv_class.CheckParameterSyntax(hv_params)
3780           _CheckHVParams(self, node_list, hv_name, hv_params)
3781
3782     if self.op.os_hvp:
3783       # no need to check any newly-enabled hypervisors, since the
3784       # defaults have already been checked in the above code-block
3785       for os_name, os_hvp in self.new_os_hvp.items():
3786         for hv_name, hv_params in os_hvp.items():
3787           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3788           # we need to fill in the new os_hvp on top of the actual hv_p
3789           cluster_defaults = self.new_hvparams.get(hv_name, {})
3790           new_osp = objects.FillDict(cluster_defaults, hv_params)
3791           hv_class = hypervisor.GetHypervisor(hv_name)
3792           hv_class.CheckParameterSyntax(new_osp)
3793           _CheckHVParams(self, node_list, hv_name, new_osp)
3794
3795     if self.op.default_iallocator:
3796       alloc_script = utils.FindFile(self.op.default_iallocator,
3797                                     constants.IALLOCATOR_SEARCH_PATH,
3798                                     os.path.isfile)
3799       if alloc_script is None:
3800         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3801                                    " specified" % self.op.default_iallocator,
3802                                    errors.ECODE_INVAL)
3803
3804   def Exec(self, feedback_fn):
3805     """Change the parameters of the cluster.
3806
3807     """
3808     if self.op.vg_name is not None:
3809       new_volume = self.op.vg_name
3810       if not new_volume:
3811         new_volume = None
3812       if new_volume != self.cfg.GetVGName():
3813         self.cfg.SetVGName(new_volume)
3814       else:
3815         feedback_fn("Cluster LVM configuration already in desired"
3816                     " state, not changing")
3817     if self.op.drbd_helper is not None:
3818       new_helper = self.op.drbd_helper
3819       if not new_helper:
3820         new_helper = None
3821       if new_helper != self.cfg.GetDRBDHelper():
3822         self.cfg.SetDRBDHelper(new_helper)
3823       else:
3824         feedback_fn("Cluster DRBD helper already in desired state,"
3825                     " not changing")
3826     if self.op.hvparams:
3827       self.cluster.hvparams = self.new_hvparams
3828     if self.op.os_hvp:
3829       self.cluster.os_hvp = self.new_os_hvp
3830     if self.op.enabled_hypervisors is not None:
3831       self.cluster.hvparams = self.new_hvparams
3832       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3833     if self.op.beparams:
3834       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3835     if self.op.nicparams:
3836       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3837     if self.op.osparams:
3838       self.cluster.osparams = self.new_osp
3839     if self.op.ndparams:
3840       self.cluster.ndparams = self.new_ndparams
3841     if self.op.diskparams:
3842       self.cluster.diskparams = self.new_diskparams
3843     if self.op.hv_state:
3844       self.cluster.hv_state_static = self.new_hv_state
3845     if self.op.disk_state:
3846       self.cluster.disk_state_static = self.new_disk_state
3847
3848     if self.op.candidate_pool_size is not None:
3849       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3850       # we need to update the pool size here, otherwise the save will fail
3851       _AdjustCandidatePool(self, [])
3852
3853     if self.op.maintain_node_health is not None:
3854       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3855         feedback_fn("Note: CONFD was disabled at build time, node health"
3856                     " maintenance is not useful (still enabling it)")
3857       self.cluster.maintain_node_health = self.op.maintain_node_health
3858
3859     if self.op.prealloc_wipe_disks is not None:
3860       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3861
3862     if self.op.add_uids is not None:
3863       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3864
3865     if self.op.remove_uids is not None:
3866       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3867
3868     if self.op.uid_pool is not None:
3869       self.cluster.uid_pool = self.op.uid_pool
3870
3871     if self.op.default_iallocator is not None:
3872       self.cluster.default_iallocator = self.op.default_iallocator
3873
3874     if self.op.reserved_lvs is not None:
3875       self.cluster.reserved_lvs = self.op.reserved_lvs
3876
3877     if self.op.use_external_mip_script is not None:
3878       self.cluster.use_external_mip_script = self.op.use_external_mip_script
3879
3880     def helper_os(aname, mods, desc):
3881       desc += " OS list"
3882       lst = getattr(self.cluster, aname)
3883       for key, val in mods:
3884         if key == constants.DDM_ADD:
3885           if val in lst:
3886             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3887           else:
3888             lst.append(val)
3889         elif key == constants.DDM_REMOVE:
3890           if val in lst:
3891             lst.remove(val)
3892           else:
3893             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3894         else:
3895           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3896
3897     if self.op.hidden_os:
3898       helper_os("hidden_os", self.op.hidden_os, "hidden")
3899
3900     if self.op.blacklisted_os:
3901       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3902
3903     if self.op.master_netdev:
3904       master_params = self.cfg.GetMasterNetworkParameters()
3905       ems = self.cfg.GetUseExternalMipScript()
3906       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3907                   self.cluster.master_netdev)
3908       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3909                                                        master_params, ems)
3910       result.Raise("Could not disable the master ip")
3911       feedback_fn("Changing master_netdev from %s to %s" %
3912                   (master_params.netdev, self.op.master_netdev))
3913       self.cluster.master_netdev = self.op.master_netdev
3914
3915     if self.op.master_netmask:
3916       master_params = self.cfg.GetMasterNetworkParameters()
3917       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3918       result = self.rpc.call_node_change_master_netmask(master_params.name,
3919                                                         master_params.netmask,
3920                                                         self.op.master_netmask,
3921                                                         master_params.ip,
3922                                                         master_params.netdev)
3923       if result.fail_msg:
3924         msg = "Could not change the master IP netmask: %s" % result.fail_msg
3925         feedback_fn(msg)
3926
3927       self.cluster.master_netmask = self.op.master_netmask
3928
3929     self.cfg.Update(self.cluster, feedback_fn)
3930
3931     if self.op.master_netdev:
3932       master_params = self.cfg.GetMasterNetworkParameters()
3933       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3934                   self.op.master_netdev)
3935       ems = self.cfg.GetUseExternalMipScript()
3936       result = self.rpc.call_node_activate_master_ip(master_params.name,
3937                                                      master_params, ems)
3938       if result.fail_msg:
3939         self.LogWarning("Could not re-enable the master ip on"
3940                         " the master, please restart manually: %s",
3941                         result.fail_msg)
3942
3943
3944 def _UploadHelper(lu, nodes, fname):
3945   """Helper for uploading a file and showing warnings.
3946
3947   """
3948   if os.path.exists(fname):
3949     result = lu.rpc.call_upload_file(nodes, fname)
3950     for to_node, to_result in result.items():
3951       msg = to_result.fail_msg
3952       if msg:
3953         msg = ("Copy of file %s to node %s failed: %s" %
3954                (fname, to_node, msg))
3955         lu.proc.LogWarning(msg)
3956
3957
3958 def _ComputeAncillaryFiles(cluster, redist):
3959   """Compute files external to Ganeti which need to be consistent.
3960
3961   @type redist: boolean
3962   @param redist: Whether to include files which need to be redistributed
3963
3964   """
3965   # Compute files for all nodes
3966   files_all = set([
3967     constants.SSH_KNOWN_HOSTS_FILE,
3968     constants.CONFD_HMAC_KEY,
3969     constants.CLUSTER_DOMAIN_SECRET_FILE,
3970     constants.SPICE_CERT_FILE,
3971     constants.SPICE_CACERT_FILE,
3972     constants.RAPI_USERS_FILE,
3973     ])
3974
3975   if not redist:
3976     files_all.update(constants.ALL_CERT_FILES)
3977     files_all.update(ssconf.SimpleStore().GetFileList())
3978   else:
3979     # we need to ship at least the RAPI certificate
3980     files_all.add(constants.RAPI_CERT_FILE)
3981
3982   if cluster.modify_etc_hosts:
3983     files_all.add(constants.ETC_HOSTS)
3984
3985   # Files which are optional, these must:
3986   # - be present in one other category as well
3987   # - either exist or not exist on all nodes of that category (mc, vm all)
3988   files_opt = set([
3989     constants.RAPI_USERS_FILE,
3990     ])
3991
3992   # Files which should only be on master candidates
3993   files_mc = set()
3994
3995   if not redist:
3996     files_mc.add(constants.CLUSTER_CONF_FILE)
3997
3998     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3999     # replication
4000     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4001
4002   # Files which should only be on VM-capable nodes
4003   files_vm = set(filename
4004     for hv_name in cluster.enabled_hypervisors
4005     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4006
4007   files_opt |= set(filename
4008     for hv_name in cluster.enabled_hypervisors
4009     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4010
4011   # Filenames in each category must be unique
4012   all_files_set = files_all | files_mc | files_vm
4013   assert (len(all_files_set) ==
4014           sum(map(len, [files_all, files_mc, files_vm]))), \
4015          "Found file listed in more than one file list"
4016
4017   # Optional files must be present in one other category
4018   assert all_files_set.issuperset(files_opt), \
4019          "Optional file not in a different required list"
4020
4021   return (files_all, files_opt, files_mc, files_vm)
4022
4023
4024 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4025   """Distribute additional files which are part of the cluster configuration.
4026
4027   ConfigWriter takes care of distributing the config and ssconf files, but
4028   there are more files which should be distributed to all nodes. This function
4029   makes sure those are copied.
4030
4031   @param lu: calling logical unit
4032   @param additional_nodes: list of nodes not in the config to distribute to
4033   @type additional_vm: boolean
4034   @param additional_vm: whether the additional nodes are vm-capable or not
4035
4036   """
4037   # Gather target nodes
4038   cluster = lu.cfg.GetClusterInfo()
4039   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4040
4041   online_nodes = lu.cfg.GetOnlineNodeList()
4042   vm_nodes = lu.cfg.GetVmCapableNodeList()
4043
4044   if additional_nodes is not None:
4045     online_nodes.extend(additional_nodes)
4046     if additional_vm:
4047       vm_nodes.extend(additional_nodes)
4048
4049   # Never distribute to master node
4050   for nodelist in [online_nodes, vm_nodes]:
4051     if master_info.name in nodelist:
4052       nodelist.remove(master_info.name)
4053
4054   # Gather file lists
4055   (files_all, _, files_mc, files_vm) = \
4056     _ComputeAncillaryFiles(cluster, True)
4057
4058   # Never re-distribute configuration file from here
4059   assert not (constants.CLUSTER_CONF_FILE in files_all or
4060               constants.CLUSTER_CONF_FILE in files_vm)
4061   assert not files_mc, "Master candidates not handled in this function"
4062
4063   filemap = [
4064     (online_nodes, files_all),
4065     (vm_nodes, files_vm),
4066     ]
4067
4068   # Upload the files
4069   for (node_list, files) in filemap:
4070     for fname in files:
4071       _UploadHelper(lu, node_list, fname)
4072
4073
4074 class LUClusterRedistConf(NoHooksLU):
4075   """Force the redistribution of cluster configuration.
4076
4077   This is a very simple LU.
4078
4079   """
4080   REQ_BGL = False
4081
4082   def ExpandNames(self):
4083     self.needed_locks = {
4084       locking.LEVEL_NODE: locking.ALL_SET,
4085     }
4086     self.share_locks[locking.LEVEL_NODE] = 1
4087
4088   def Exec(self, feedback_fn):
4089     """Redistribute the configuration.
4090
4091     """
4092     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4093     _RedistributeAncillaryFiles(self)
4094
4095
4096 class LUClusterActivateMasterIp(NoHooksLU):
4097   """Activate the master IP on the master node.
4098
4099   """
4100   def Exec(self, feedback_fn):
4101     """Activate the master IP.
4102
4103     """
4104     master_params = self.cfg.GetMasterNetworkParameters()
4105     ems = self.cfg.GetUseExternalMipScript()
4106     result = self.rpc.call_node_activate_master_ip(master_params.name,
4107                                                    master_params, ems)
4108     result.Raise("Could not activate the master IP")
4109
4110
4111 class LUClusterDeactivateMasterIp(NoHooksLU):
4112   """Deactivate the master IP on the master node.
4113
4114   """
4115   def Exec(self, feedback_fn):
4116     """Deactivate the master IP.
4117
4118     """
4119     master_params = self.cfg.GetMasterNetworkParameters()
4120     ems = self.cfg.GetUseExternalMipScript()
4121     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4122                                                      master_params, ems)
4123     result.Raise("Could not deactivate the master IP")
4124
4125
4126 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4127   """Sleep and poll for an instance's disk to sync.
4128
4129   """
4130   if not instance.disks or disks is not None and not disks:
4131     return True
4132
4133   disks = _ExpandCheckDisks(instance, disks)
4134
4135   if not oneshot:
4136     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4137
4138   node = instance.primary_node
4139
4140   for dev in disks:
4141     lu.cfg.SetDiskID(dev, node)
4142
4143   # TODO: Convert to utils.Retry
4144
4145   retries = 0
4146   degr_retries = 10 # in seconds, as we sleep 1 second each time
4147   while True:
4148     max_time = 0
4149     done = True
4150     cumul_degraded = False
4151     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4152     msg = rstats.fail_msg
4153     if msg:
4154       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4155       retries += 1
4156       if retries >= 10:
4157         raise errors.RemoteError("Can't contact node %s for mirror data,"
4158                                  " aborting." % node)
4159       time.sleep(6)
4160       continue
4161     rstats = rstats.payload
4162     retries = 0
4163     for i, mstat in enumerate(rstats):
4164       if mstat is None:
4165         lu.LogWarning("Can't compute data for node %s/%s",
4166                            node, disks[i].iv_name)
4167         continue
4168
4169       cumul_degraded = (cumul_degraded or
4170                         (mstat.is_degraded and mstat.sync_percent is None))
4171       if mstat.sync_percent is not None:
4172         done = False
4173         if mstat.estimated_time is not None:
4174           rem_time = ("%s remaining (estimated)" %
4175                       utils.FormatSeconds(mstat.estimated_time))
4176           max_time = mstat.estimated_time
4177         else:
4178           rem_time = "no time estimate"
4179         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4180                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4181
4182     # if we're done but degraded, let's do a few small retries, to
4183     # make sure we see a stable and not transient situation; therefore
4184     # we force restart of the loop
4185     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4186       logging.info("Degraded disks found, %d retries left", degr_retries)
4187       degr_retries -= 1
4188       time.sleep(1)
4189       continue
4190
4191     if done or oneshot:
4192       break
4193
4194     time.sleep(min(60, max_time))
4195
4196   if done:
4197     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4198   return not cumul_degraded
4199
4200
4201 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4202   """Check that mirrors are not degraded.
4203
4204   The ldisk parameter, if True, will change the test from the
4205   is_degraded attribute (which represents overall non-ok status for
4206   the device(s)) to the ldisk (representing the local storage status).
4207
4208   """
4209   lu.cfg.SetDiskID(dev, node)
4210
4211   result = True
4212
4213   if on_primary or dev.AssembleOnSecondary():
4214     rstats = lu.rpc.call_blockdev_find(node, dev)
4215     msg = rstats.fail_msg
4216     if msg:
4217       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4218       result = False
4219     elif not rstats.payload:
4220       lu.LogWarning("Can't find disk on node %s", node)
4221       result = False
4222     else:
4223       if ldisk:
4224         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4225       else:
4226         result = result and not rstats.payload.is_degraded
4227
4228   if dev.children:
4229     for child in dev.children:
4230       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4231
4232   return result
4233
4234
4235 class LUOobCommand(NoHooksLU):
4236   """Logical unit for OOB handling.
4237
4238   """
4239   REG_BGL = False
4240   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4241
4242   def ExpandNames(self):
4243     """Gather locks we need.
4244
4245     """
4246     if self.op.node_names:
4247       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4248       lock_names = self.op.node_names
4249     else:
4250       lock_names = locking.ALL_SET
4251
4252     self.needed_locks = {
4253       locking.LEVEL_NODE: lock_names,
4254       }
4255
4256   def CheckPrereq(self):
4257     """Check prerequisites.
4258
4259     This checks:
4260      - the node exists in the configuration
4261      - OOB is supported
4262
4263     Any errors are signaled by raising errors.OpPrereqError.
4264
4265     """
4266     self.nodes = []
4267     self.master_node = self.cfg.GetMasterNode()
4268
4269     assert self.op.power_delay >= 0.0
4270
4271     if self.op.node_names:
4272       if (self.op.command in self._SKIP_MASTER and
4273           self.master_node in self.op.node_names):
4274         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4275         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4276
4277         if master_oob_handler:
4278           additional_text = ("run '%s %s %s' if you want to operate on the"
4279                              " master regardless") % (master_oob_handler,
4280                                                       self.op.command,
4281                                                       self.master_node)
4282         else:
4283           additional_text = "it does not support out-of-band operations"
4284
4285         raise errors.OpPrereqError(("Operating on the master node %s is not"
4286                                     " allowed for %s; %s") %
4287                                    (self.master_node, self.op.command,
4288                                     additional_text), errors.ECODE_INVAL)
4289     else:
4290       self.op.node_names = self.cfg.GetNodeList()
4291       if self.op.command in self._SKIP_MASTER:
4292         self.op.node_names.remove(self.master_node)
4293
4294     if self.op.command in self._SKIP_MASTER:
4295       assert self.master_node not in self.op.node_names
4296
4297     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4298       if node is None:
4299         raise errors.OpPrereqError("Node %s not found" % node_name,
4300                                    errors.ECODE_NOENT)
4301       else:
4302         self.nodes.append(node)
4303
4304       if (not self.op.ignore_status and
4305           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4306         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4307                                     " not marked offline") % node_name,
4308                                    errors.ECODE_STATE)
4309
4310   def Exec(self, feedback_fn):
4311     """Execute OOB and return result if we expect any.
4312
4313     """
4314     master_node = self.master_node
4315     ret = []
4316
4317     for idx, node in enumerate(utils.NiceSort(self.nodes,
4318                                               key=lambda node: node.name)):
4319       node_entry = [(constants.RS_NORMAL, node.name)]
4320       ret.append(node_entry)
4321
4322       oob_program = _SupportsOob(self.cfg, node)
4323
4324       if not oob_program:
4325         node_entry.append((constants.RS_UNAVAIL, None))
4326         continue
4327
4328       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4329                    self.op.command, oob_program, node.name)
4330       result = self.rpc.call_run_oob(master_node, oob_program,
4331                                      self.op.command, node.name,
4332                                      self.op.timeout)
4333
4334       if result.fail_msg:
4335         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4336                         node.name, result.fail_msg)
4337         node_entry.append((constants.RS_NODATA, None))
4338       else:
4339         try:
4340           self._CheckPayload(result)
4341         except errors.OpExecError, err:
4342           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4343                           node.name, err)
4344           node_entry.append((constants.RS_NODATA, None))
4345         else:
4346           if self.op.command == constants.OOB_HEALTH:
4347             # For health we should log important events
4348             for item, status in result.payload:
4349               if status in [constants.OOB_STATUS_WARNING,
4350                             constants.OOB_STATUS_CRITICAL]:
4351                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4352                                 item, node.name, status)
4353
4354           if self.op.command == constants.OOB_POWER_ON:
4355             node.powered = True
4356           elif self.op.command == constants.OOB_POWER_OFF:
4357             node.powered = False
4358           elif self.op.command == constants.OOB_POWER_STATUS:
4359             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4360             if powered != node.powered:
4361               logging.warning(("Recorded power state (%s) of node '%s' does not"
4362                                " match actual power state (%s)"), node.powered,
4363                               node.name, powered)
4364
4365           # For configuration changing commands we should update the node
4366           if self.op.command in (constants.OOB_POWER_ON,
4367                                  constants.OOB_POWER_OFF):
4368             self.cfg.Update(node, feedback_fn)
4369
4370           node_entry.append((constants.RS_NORMAL, result.payload))
4371
4372           if (self.op.command == constants.OOB_POWER_ON and
4373               idx < len(self.nodes) - 1):
4374             time.sleep(self.op.power_delay)
4375
4376     return ret
4377
4378   def _CheckPayload(self, result):
4379     """Checks if the payload is valid.
4380
4381     @param result: RPC result
4382     @raises errors.OpExecError: If payload is not valid
4383
4384     """
4385     errs = []
4386     if self.op.command == constants.OOB_HEALTH:
4387       if not isinstance(result.payload, list):
4388         errs.append("command 'health' is expected to return a list but got %s" %
4389                     type(result.payload))
4390       else:
4391         for item, status in result.payload:
4392           if status not in constants.OOB_STATUSES:
4393             errs.append("health item '%s' has invalid status '%s'" %
4394                         (item, status))
4395
4396     if self.op.command == constants.OOB_POWER_STATUS:
4397       if not isinstance(result.payload, dict):
4398         errs.append("power-status is expected to return a dict but got %s" %
4399                     type(result.payload))
4400
4401     if self.op.command in [
4402         constants.OOB_POWER_ON,
4403         constants.OOB_POWER_OFF,
4404         constants.OOB_POWER_CYCLE,
4405         ]:
4406       if result.payload is not None:
4407         errs.append("%s is expected to not return payload but got '%s'" %
4408                     (self.op.command, result.payload))
4409
4410     if errs:
4411       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4412                                utils.CommaJoin(errs))
4413
4414
4415 class _OsQuery(_QueryBase):
4416   FIELDS = query.OS_FIELDS
4417
4418   def ExpandNames(self, lu):
4419     # Lock all nodes in shared mode
4420     # Temporary removal of locks, should be reverted later
4421     # TODO: reintroduce locks when they are lighter-weight
4422     lu.needed_locks = {}
4423     #self.share_locks[locking.LEVEL_NODE] = 1
4424     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4425
4426     # The following variables interact with _QueryBase._GetNames
4427     if self.names:
4428       self.wanted = self.names
4429     else:
4430       self.wanted = locking.ALL_SET
4431
4432     self.do_locking = self.use_locking
4433
4434   def DeclareLocks(self, lu, level):
4435     pass
4436
4437   @staticmethod
4438   def _DiagnoseByOS(rlist):
4439     """Remaps a per-node return list into an a per-os per-node dictionary
4440
4441     @param rlist: a map with node names as keys and OS objects as values
4442
4443     @rtype: dict
4444     @return: a dictionary with osnames as keys and as value another
4445         map, with nodes as keys and tuples of (path, status, diagnose,
4446         variants, parameters, api_versions) as values, eg::
4447
4448           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4449                                      (/srv/..., False, "invalid api")],
4450                            "node2": [(/srv/..., True, "", [], [])]}
4451           }
4452
4453     """
4454     all_os = {}
4455     # we build here the list of nodes that didn't fail the RPC (at RPC
4456     # level), so that nodes with a non-responding node daemon don't
4457     # make all OSes invalid
4458     good_nodes = [node_name for node_name in rlist
4459                   if not rlist[node_name].fail_msg]
4460     for node_name, nr in rlist.items():
4461       if nr.fail_msg or not nr.payload:
4462         continue
4463       for (name, path, status, diagnose, variants,
4464            params, api_versions) in nr.payload:
4465         if name not in all_os:
4466           # build a list of nodes for this os containing empty lists
4467           # for each node in node_list
4468           all_os[name] = {}
4469           for nname in good_nodes:
4470             all_os[name][nname] = []
4471         # convert params from [name, help] to (name, help)
4472         params = [tuple(v) for v in params]
4473         all_os[name][node_name].append((path, status, diagnose,
4474                                         variants, params, api_versions))
4475     return all_os
4476
4477   def _GetQueryData(self, lu):
4478     """Computes the list of nodes and their attributes.
4479
4480     """
4481     # Locking is not used
4482     assert not (compat.any(lu.glm.is_owned(level)
4483                            for level in locking.LEVELS
4484                            if level != locking.LEVEL_CLUSTER) or
4485                 self.do_locking or self.use_locking)
4486
4487     valid_nodes = [node.name
4488                    for node in lu.cfg.GetAllNodesInfo().values()
4489                    if not node.offline and node.vm_capable]
4490     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4491     cluster = lu.cfg.GetClusterInfo()
4492
4493     data = {}
4494
4495     for (os_name, os_data) in pol.items():
4496       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4497                           hidden=(os_name in cluster.hidden_os),
4498                           blacklisted=(os_name in cluster.blacklisted_os))
4499
4500       variants = set()
4501       parameters = set()
4502       api_versions = set()
4503
4504       for idx, osl in enumerate(os_data.values()):
4505         info.valid = bool(info.valid and osl and osl[0][1])
4506         if not info.valid:
4507           break
4508
4509         (node_variants, node_params, node_api) = osl[0][3:6]
4510         if idx == 0:
4511           # First entry
4512           variants.update(node_variants)
4513           parameters.update(node_params)
4514           api_versions.update(node_api)
4515         else:
4516           # Filter out inconsistent values
4517           variants.intersection_update(node_variants)
4518           parameters.intersection_update(node_params)
4519           api_versions.intersection_update(node_api)
4520
4521       info.variants = list(variants)
4522       info.parameters = list(parameters)
4523       info.api_versions = list(api_versions)
4524
4525       data[os_name] = info
4526
4527     # Prepare data in requested order
4528     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4529             if name in data]
4530
4531
4532 class LUOsDiagnose(NoHooksLU):
4533   """Logical unit for OS diagnose/query.
4534
4535   """
4536   REQ_BGL = False
4537
4538   @staticmethod
4539   def _BuildFilter(fields, names):
4540     """Builds a filter for querying OSes.
4541
4542     """
4543     name_filter = qlang.MakeSimpleFilter("name", names)
4544
4545     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4546     # respective field is not requested
4547     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4548                      for fname in ["hidden", "blacklisted"]
4549                      if fname not in fields]
4550     if "valid" not in fields:
4551       status_filter.append([qlang.OP_TRUE, "valid"])
4552
4553     if status_filter:
4554       status_filter.insert(0, qlang.OP_AND)
4555     else:
4556       status_filter = None
4557
4558     if name_filter and status_filter:
4559       return [qlang.OP_AND, name_filter, status_filter]
4560     elif name_filter:
4561       return name_filter
4562     else:
4563       return status_filter
4564
4565   def CheckArguments(self):
4566     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4567                        self.op.output_fields, False)
4568
4569   def ExpandNames(self):
4570     self.oq.ExpandNames(self)
4571
4572   def Exec(self, feedback_fn):
4573     return self.oq.OldStyleQuery(self)
4574
4575
4576 class LUNodeRemove(LogicalUnit):
4577   """Logical unit for removing a node.
4578
4579   """
4580   HPATH = "node-remove"
4581   HTYPE = constants.HTYPE_NODE
4582
4583   def BuildHooksEnv(self):
4584     """Build hooks env.
4585
4586     This doesn't run on the target node in the pre phase as a failed
4587     node would then be impossible to remove.
4588
4589     """
4590     return {
4591       "OP_TARGET": self.op.node_name,
4592       "NODE_NAME": self.op.node_name,
4593       }
4594
4595   def BuildHooksNodes(self):
4596     """Build hooks nodes.
4597
4598     """
4599     all_nodes = self.cfg.GetNodeList()
4600     try:
4601       all_nodes.remove(self.op.node_name)
4602     except ValueError:
4603       logging.warning("Node '%s', which is about to be removed, was not found"
4604                       " in the list of all nodes", self.op.node_name)
4605     return (all_nodes, all_nodes)
4606
4607   def CheckPrereq(self):
4608     """Check prerequisites.
4609
4610     This checks:
4611      - the node exists in the configuration
4612      - it does not have primary or secondary instances
4613      - it's not the master
4614
4615     Any errors are signaled by raising errors.OpPrereqError.
4616
4617     """
4618     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4619     node = self.cfg.GetNodeInfo(self.op.node_name)
4620     assert node is not None
4621
4622     masternode = self.cfg.GetMasterNode()
4623     if node.name == masternode:
4624       raise errors.OpPrereqError("Node is the master node, failover to another"
4625                                  " node is required", errors.ECODE_INVAL)
4626
4627     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4628       if node.name in instance.all_nodes:
4629         raise errors.OpPrereqError("Instance %s is still running on the node,"
4630                                    " please remove first" % instance_name,
4631                                    errors.ECODE_INVAL)
4632     self.op.node_name = node.name
4633     self.node = node
4634
4635   def Exec(self, feedback_fn):
4636     """Removes the node from the cluster.
4637
4638     """
4639     node = self.node
4640     logging.info("Stopping the node daemon and removing configs from node %s",
4641                  node.name)
4642
4643     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4644
4645     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4646       "Not owning BGL"
4647
4648     # Promote nodes to master candidate as needed
4649     _AdjustCandidatePool(self, exceptions=[node.name])
4650     self.context.RemoveNode(node.name)
4651
4652     # Run post hooks on the node before it's removed
4653     _RunPostHook(self, node.name)
4654
4655     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4656     msg = result.fail_msg
4657     if msg:
4658       self.LogWarning("Errors encountered on the remote node while leaving"
4659                       " the cluster: %s", msg)
4660
4661     # Remove node from our /etc/hosts
4662     if self.cfg.GetClusterInfo().modify_etc_hosts:
4663       master_node = self.cfg.GetMasterNode()
4664       result = self.rpc.call_etc_hosts_modify(master_node,
4665                                               constants.ETC_HOSTS_REMOVE,
4666                                               node.name, None)
4667       result.Raise("Can't update hosts file with new host data")
4668       _RedistributeAncillaryFiles(self)
4669
4670
4671 class _NodeQuery(_QueryBase):
4672   FIELDS = query.NODE_FIELDS
4673
4674   def ExpandNames(self, lu):
4675     lu.needed_locks = {}
4676     lu.share_locks = _ShareAll()
4677
4678     if self.names:
4679       self.wanted = _GetWantedNodes(lu, self.names)
4680     else:
4681       self.wanted = locking.ALL_SET
4682
4683     self.do_locking = (self.use_locking and
4684                        query.NQ_LIVE in self.requested_data)
4685
4686     if self.do_locking:
4687       # If any non-static field is requested we need to lock the nodes
4688       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4689
4690   def DeclareLocks(self, lu, level):
4691     pass
4692
4693   def _GetQueryData(self, lu):
4694     """Computes the list of nodes and their attributes.
4695
4696     """
4697     all_info = lu.cfg.GetAllNodesInfo()
4698
4699     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4700
4701     # Gather data as requested
4702     if query.NQ_LIVE in self.requested_data:
4703       # filter out non-vm_capable nodes
4704       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4705
4706       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4707                                         [lu.cfg.GetHypervisorType()])
4708       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4709                        for (name, nresult) in node_data.items()
4710                        if not nresult.fail_msg and nresult.payload)
4711     else:
4712       live_data = None
4713
4714     if query.NQ_INST in self.requested_data:
4715       node_to_primary = dict([(name, set()) for name in nodenames])
4716       node_to_secondary = dict([(name, set()) for name in nodenames])
4717
4718       inst_data = lu.cfg.GetAllInstancesInfo()
4719
4720       for inst in inst_data.values():
4721         if inst.primary_node in node_to_primary:
4722           node_to_primary[inst.primary_node].add(inst.name)
4723         for secnode in inst.secondary_nodes:
4724           if secnode in node_to_secondary:
4725             node_to_secondary[secnode].add(inst.name)
4726     else:
4727       node_to_primary = None
4728       node_to_secondary = None
4729
4730     if query.NQ_OOB in self.requested_data:
4731       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4732                          for name, node in all_info.iteritems())
4733     else:
4734       oob_support = None
4735
4736     if query.NQ_GROUP in self.requested_data:
4737       groups = lu.cfg.GetAllNodeGroupsInfo()
4738     else:
4739       groups = {}
4740
4741     return query.NodeQueryData([all_info[name] for name in nodenames],
4742                                live_data, lu.cfg.GetMasterNode(),
4743                                node_to_primary, node_to_secondary, groups,
4744                                oob_support, lu.cfg.GetClusterInfo())
4745
4746
4747 class LUNodeQuery(NoHooksLU):
4748   """Logical unit for querying nodes.
4749
4750   """
4751   # pylint: disable=W0142
4752   REQ_BGL = False
4753
4754   def CheckArguments(self):
4755     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4756                          self.op.output_fields, self.op.use_locking)
4757
4758   def ExpandNames(self):
4759     self.nq.ExpandNames(self)
4760
4761   def DeclareLocks(self, level):
4762     self.nq.DeclareLocks(self, level)
4763
4764   def Exec(self, feedback_fn):
4765     return self.nq.OldStyleQuery(self)
4766
4767
4768 class LUNodeQueryvols(NoHooksLU):
4769   """Logical unit for getting volumes on node(s).
4770
4771   """
4772   REQ_BGL = False
4773   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4774   _FIELDS_STATIC = utils.FieldSet("node")
4775
4776   def CheckArguments(self):
4777     _CheckOutputFields(static=self._FIELDS_STATIC,
4778                        dynamic=self._FIELDS_DYNAMIC,
4779                        selected=self.op.output_fields)
4780
4781   def ExpandNames(self):
4782     self.share_locks = _ShareAll()
4783     self.needed_locks = {}
4784
4785     if not self.op.nodes:
4786       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4787     else:
4788       self.needed_locks[locking.LEVEL_NODE] = \
4789         _GetWantedNodes(self, self.op.nodes)
4790
4791   def Exec(self, feedback_fn):
4792     """Computes the list of nodes and their attributes.
4793
4794     """
4795     nodenames = self.owned_locks(locking.LEVEL_NODE)
4796     volumes = self.rpc.call_node_volumes(nodenames)
4797
4798     ilist = self.cfg.GetAllInstancesInfo()
4799     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4800
4801     output = []
4802     for node in nodenames:
4803       nresult = volumes[node]
4804       if nresult.offline:
4805         continue
4806       msg = nresult.fail_msg
4807       if msg:
4808         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4809         continue
4810
4811       node_vols = sorted(nresult.payload,
4812                          key=operator.itemgetter("dev"))
4813
4814       for vol in node_vols:
4815         node_output = []
4816         for field in self.op.output_fields:
4817           if field == "node":
4818             val = node
4819           elif field == "phys":
4820             val = vol["dev"]
4821           elif field == "vg":
4822             val = vol["vg"]
4823           elif field == "name":
4824             val = vol["name"]
4825           elif field == "size":
4826             val = int(float(vol["size"]))
4827           elif field == "instance":
4828             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4829           else:
4830             raise errors.ParameterError(field)
4831           node_output.append(str(val))
4832
4833         output.append(node_output)
4834
4835     return output
4836
4837
4838 class LUNodeQueryStorage(NoHooksLU):
4839   """Logical unit for getting information on storage units on node(s).
4840
4841   """
4842   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4843   REQ_BGL = False
4844
4845   def CheckArguments(self):
4846     _CheckOutputFields(static=self._FIELDS_STATIC,
4847                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4848                        selected=self.op.output_fields)
4849
4850   def ExpandNames(self):
4851     self.share_locks = _ShareAll()
4852     self.needed_locks = {}
4853
4854     if self.op.nodes:
4855       self.needed_locks[locking.LEVEL_NODE] = \
4856         _GetWantedNodes(self, self.op.nodes)
4857     else:
4858       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4859
4860   def Exec(self, feedback_fn):
4861     """Computes the list of nodes and their attributes.
4862
4863     """
4864     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4865
4866     # Always get name to sort by
4867     if constants.SF_NAME in self.op.output_fields:
4868       fields = self.op.output_fields[:]
4869     else:
4870       fields = [constants.SF_NAME] + self.op.output_fields
4871
4872     # Never ask for node or type as it's only known to the LU
4873     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4874       while extra in fields:
4875         fields.remove(extra)
4876
4877     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4878     name_idx = field_idx[constants.SF_NAME]
4879
4880     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4881     data = self.rpc.call_storage_list(self.nodes,
4882                                       self.op.storage_type, st_args,
4883                                       self.op.name, fields)
4884
4885     result = []
4886
4887     for node in utils.NiceSort(self.nodes):
4888       nresult = data[node]
4889       if nresult.offline:
4890         continue
4891
4892       msg = nresult.fail_msg
4893       if msg:
4894         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4895         continue
4896
4897       rows = dict([(row[name_idx], row) for row in nresult.payload])
4898
4899       for name in utils.NiceSort(rows.keys()):
4900         row = rows[name]
4901
4902         out = []
4903
4904         for field in self.op.output_fields:
4905           if field == constants.SF_NODE:
4906             val = node
4907           elif field == constants.SF_TYPE:
4908             val = self.op.storage_type
4909           elif field in field_idx:
4910             val = row[field_idx[field]]
4911           else:
4912             raise errors.ParameterError(field)
4913
4914           out.append(val)
4915
4916         result.append(out)
4917
4918     return result
4919
4920
4921 class _InstanceQuery(_QueryBase):
4922   FIELDS = query.INSTANCE_FIELDS
4923
4924   def ExpandNames(self, lu):
4925     lu.needed_locks = {}
4926     lu.share_locks = _ShareAll()
4927
4928     if self.names:
4929       self.wanted = _GetWantedInstances(lu, self.names)
4930     else:
4931       self.wanted = locking.ALL_SET
4932
4933     self.do_locking = (self.use_locking and
4934                        query.IQ_LIVE in self.requested_data)
4935     if self.do_locking:
4936       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4937       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4938       lu.needed_locks[locking.LEVEL_NODE] = []
4939       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4940
4941     self.do_grouplocks = (self.do_locking and
4942                           query.IQ_NODES in self.requested_data)
4943
4944   def DeclareLocks(self, lu, level):
4945     if self.do_locking:
4946       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4947         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4948
4949         # Lock all groups used by instances optimistically; this requires going
4950         # via the node before it's locked, requiring verification later on
4951         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4952           set(group_uuid
4953               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4954               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4955       elif level == locking.LEVEL_NODE:
4956         lu._LockInstancesNodes() # pylint: disable=W0212
4957
4958   @staticmethod
4959   def _CheckGroupLocks(lu):
4960     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4961     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4962
4963     # Check if node groups for locked instances are still correct
4964     for instance_name in owned_instances:
4965       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4966
4967   def _GetQueryData(self, lu):
4968     """Computes the list of instances and their attributes.
4969
4970     """
4971     if self.do_grouplocks:
4972       self._CheckGroupLocks(lu)
4973
4974     cluster = lu.cfg.GetClusterInfo()
4975     all_info = lu.cfg.GetAllInstancesInfo()
4976
4977     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4978
4979     instance_list = [all_info[name] for name in instance_names]
4980     nodes = frozenset(itertools.chain(*(inst.all_nodes
4981                                         for inst in instance_list)))
4982     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4983     bad_nodes = []
4984     offline_nodes = []
4985     wrongnode_inst = set()
4986
4987     # Gather data as requested
4988     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4989       live_data = {}
4990       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4991       for name in nodes:
4992         result = node_data[name]
4993         if result.offline:
4994           # offline nodes will be in both lists
4995           assert result.fail_msg
4996           offline_nodes.append(name)
4997         if result.fail_msg:
4998           bad_nodes.append(name)
4999         elif result.payload:
5000           for inst in result.payload:
5001             if inst in all_info:
5002               if all_info[inst].primary_node == name:
5003                 live_data.update(result.payload)
5004               else:
5005                 wrongnode_inst.add(inst)
5006             else:
5007               # orphan instance; we don't list it here as we don't
5008               # handle this case yet in the output of instance listing
5009               logging.warning("Orphan instance '%s' found on node %s",
5010                               inst, name)
5011         # else no instance is alive
5012     else:
5013       live_data = {}
5014
5015     if query.IQ_DISKUSAGE in self.requested_data:
5016       disk_usage = dict((inst.name,
5017                          _ComputeDiskSize(inst.disk_template,
5018                                           [{constants.IDISK_SIZE: disk.size}
5019                                            for disk in inst.disks]))
5020                         for inst in instance_list)
5021     else:
5022       disk_usage = None
5023
5024     if query.IQ_CONSOLE in self.requested_data:
5025       consinfo = {}
5026       for inst in instance_list:
5027         if inst.name in live_data:
5028           # Instance is running
5029           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5030         else:
5031           consinfo[inst.name] = None
5032       assert set(consinfo.keys()) == set(instance_names)
5033     else:
5034       consinfo = None
5035
5036     if query.IQ_NODES in self.requested_data:
5037       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5038                                             instance_list)))
5039       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5040       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5041                     for uuid in set(map(operator.attrgetter("group"),
5042                                         nodes.values())))
5043     else:
5044       nodes = None
5045       groups = None
5046
5047     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5048                                    disk_usage, offline_nodes, bad_nodes,
5049                                    live_data, wrongnode_inst, consinfo,
5050                                    nodes, groups)
5051
5052
5053 class LUQuery(NoHooksLU):
5054   """Query for resources/items of a certain kind.
5055
5056   """
5057   # pylint: disable=W0142
5058   REQ_BGL = False
5059
5060   def CheckArguments(self):
5061     qcls = _GetQueryImplementation(self.op.what)
5062
5063     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5064
5065   def ExpandNames(self):
5066     self.impl.ExpandNames(self)
5067
5068   def DeclareLocks(self, level):
5069     self.impl.DeclareLocks(self, level)
5070
5071   def Exec(self, feedback_fn):
5072     return self.impl.NewStyleQuery(self)
5073
5074
5075 class LUQueryFields(NoHooksLU):
5076   """Query for resources/items of a certain kind.
5077
5078   """
5079   # pylint: disable=W0142
5080   REQ_BGL = False
5081
5082   def CheckArguments(self):
5083     self.qcls = _GetQueryImplementation(self.op.what)
5084
5085   def ExpandNames(self):
5086     self.needed_locks = {}
5087
5088   def Exec(self, feedback_fn):
5089     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5090
5091
5092 class LUNodeModifyStorage(NoHooksLU):
5093   """Logical unit for modifying a storage volume on a node.
5094
5095   """
5096   REQ_BGL = False
5097
5098   def CheckArguments(self):
5099     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5100
5101     storage_type = self.op.storage_type
5102
5103     try:
5104       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5105     except KeyError:
5106       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5107                                  " modified" % storage_type,
5108                                  errors.ECODE_INVAL)
5109
5110     diff = set(self.op.changes.keys()) - modifiable
5111     if diff:
5112       raise errors.OpPrereqError("The following fields can not be modified for"
5113                                  " storage units of type '%s': %r" %
5114                                  (storage_type, list(diff)),
5115                                  errors.ECODE_INVAL)
5116
5117   def ExpandNames(self):
5118     self.needed_locks = {
5119       locking.LEVEL_NODE: self.op.node_name,
5120       }
5121
5122   def Exec(self, feedback_fn):
5123     """Computes the list of nodes and their attributes.
5124
5125     """
5126     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5127     result = self.rpc.call_storage_modify(self.op.node_name,
5128                                           self.op.storage_type, st_args,
5129                                           self.op.name, self.op.changes)
5130     result.Raise("Failed to modify storage unit '%s' on %s" %
5131                  (self.op.name, self.op.node_name))
5132
5133
5134 class LUNodeAdd(LogicalUnit):
5135   """Logical unit for adding node to the cluster.
5136
5137   """
5138   HPATH = "node-add"
5139   HTYPE = constants.HTYPE_NODE
5140   _NFLAGS = ["master_capable", "vm_capable"]
5141
5142   def CheckArguments(self):
5143     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5144     # validate/normalize the node name
5145     self.hostname = netutils.GetHostname(name=self.op.node_name,
5146                                          family=self.primary_ip_family)
5147     self.op.node_name = self.hostname.name
5148
5149     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5150       raise errors.OpPrereqError("Cannot readd the master node",
5151                                  errors.ECODE_STATE)
5152
5153     if self.op.readd and self.op.group:
5154       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5155                                  " being readded", errors.ECODE_INVAL)
5156
5157   def BuildHooksEnv(self):
5158     """Build hooks env.
5159
5160     This will run on all nodes before, and on all nodes + the new node after.
5161
5162     """
5163     return {
5164       "OP_TARGET": self.op.node_name,
5165       "NODE_NAME": self.op.node_name,
5166       "NODE_PIP": self.op.primary_ip,
5167       "NODE_SIP": self.op.secondary_ip,
5168       "MASTER_CAPABLE": str(self.op.master_capable),
5169       "VM_CAPABLE": str(self.op.vm_capable),
5170       }
5171
5172   def BuildHooksNodes(self):
5173     """Build hooks nodes.
5174
5175     """
5176     # Exclude added node
5177     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5178     post_nodes = pre_nodes + [self.op.node_name, ]
5179
5180     return (pre_nodes, post_nodes)
5181
5182   def CheckPrereq(self):
5183     """Check prerequisites.
5184
5185     This checks:
5186      - the new node is not already in the config
5187      - it is resolvable
5188      - its parameters (single/dual homed) matches the cluster
5189
5190     Any errors are signaled by raising errors.OpPrereqError.
5191
5192     """
5193     cfg = self.cfg
5194     hostname = self.hostname
5195     node = hostname.name
5196     primary_ip = self.op.primary_ip = hostname.ip
5197     if self.op.secondary_ip is None:
5198       if self.primary_ip_family == netutils.IP6Address.family:
5199         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5200                                    " IPv4 address must be given as secondary",
5201                                    errors.ECODE_INVAL)
5202       self.op.secondary_ip = primary_ip
5203
5204     secondary_ip = self.op.secondary_ip
5205     if not netutils.IP4Address.IsValid(secondary_ip):
5206       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5207                                  " address" % secondary_ip, errors.ECODE_INVAL)
5208
5209     node_list = cfg.GetNodeList()
5210     if not self.op.readd and node in node_list:
5211       raise errors.OpPrereqError("Node %s is already in the configuration" %
5212                                  node, errors.ECODE_EXISTS)
5213     elif self.op.readd and node not in node_list:
5214       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5215                                  errors.ECODE_NOENT)
5216
5217     self.changed_primary_ip = False
5218
5219     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5220       if self.op.readd and node == existing_node_name:
5221         if existing_node.secondary_ip != secondary_ip:
5222           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5223                                      " address configuration as before",
5224                                      errors.ECODE_INVAL)
5225         if existing_node.primary_ip != primary_ip:
5226           self.changed_primary_ip = True
5227
5228         continue
5229
5230       if (existing_node.primary_ip == primary_ip or
5231           existing_node.secondary_ip == primary_ip or
5232           existing_node.primary_ip == secondary_ip or
5233           existing_node.secondary_ip == secondary_ip):
5234         raise errors.OpPrereqError("New node ip address(es) conflict with"
5235                                    " existing node %s" % existing_node.name,
5236                                    errors.ECODE_NOTUNIQUE)
5237
5238     # After this 'if' block, None is no longer a valid value for the
5239     # _capable op attributes
5240     if self.op.readd:
5241       old_node = self.cfg.GetNodeInfo(node)
5242       assert old_node is not None, "Can't retrieve locked node %s" % node
5243       for attr in self._NFLAGS:
5244         if getattr(self.op, attr) is None:
5245           setattr(self.op, attr, getattr(old_node, attr))
5246     else:
5247       for attr in self._NFLAGS:
5248         if getattr(self.op, attr) is None:
5249           setattr(self.op, attr, True)
5250
5251     if self.op.readd and not self.op.vm_capable:
5252       pri, sec = cfg.GetNodeInstances(node)
5253       if pri or sec:
5254         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5255                                    " flag set to false, but it already holds"
5256                                    " instances" % node,
5257                                    errors.ECODE_STATE)
5258
5259     # check that the type of the node (single versus dual homed) is the
5260     # same as for the master
5261     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5262     master_singlehomed = myself.secondary_ip == myself.primary_ip
5263     newbie_singlehomed = secondary_ip == primary_ip
5264     if master_singlehomed != newbie_singlehomed:
5265       if master_singlehomed:
5266         raise errors.OpPrereqError("The master has no secondary ip but the"
5267                                    " new node has one",
5268                                    errors.ECODE_INVAL)
5269       else:
5270         raise errors.OpPrereqError("The master has a secondary ip but the"
5271                                    " new node doesn't have one",
5272                                    errors.ECODE_INVAL)
5273
5274     # checks reachability
5275     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5276       raise errors.OpPrereqError("Node not reachable by ping",
5277                                  errors.ECODE_ENVIRON)
5278
5279     if not newbie_singlehomed:
5280       # check reachability from my secondary ip to newbie's secondary ip
5281       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5282                            source=myself.secondary_ip):
5283         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5284                                    " based ping to node daemon port",
5285                                    errors.ECODE_ENVIRON)
5286
5287     if self.op.readd:
5288       exceptions = [node]
5289     else:
5290       exceptions = []
5291
5292     if self.op.master_capable:
5293       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5294     else:
5295       self.master_candidate = False
5296
5297     if self.op.readd:
5298       self.new_node = old_node
5299     else:
5300       node_group = cfg.LookupNodeGroup(self.op.group)
5301       self.new_node = objects.Node(name=node,
5302                                    primary_ip=primary_ip,
5303                                    secondary_ip=secondary_ip,
5304                                    master_candidate=self.master_candidate,
5305                                    offline=False, drained=False,
5306                                    group=node_group)
5307
5308     if self.op.ndparams:
5309       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5310
5311   def Exec(self, feedback_fn):
5312     """Adds the new node to the cluster.
5313
5314     """
5315     new_node = self.new_node
5316     node = new_node.name
5317
5318     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5319       "Not owning BGL"
5320
5321     # We adding a new node so we assume it's powered
5322     new_node.powered = True
5323
5324     # for re-adds, reset the offline/drained/master-candidate flags;
5325     # we need to reset here, otherwise offline would prevent RPC calls
5326     # later in the procedure; this also means that if the re-add
5327     # fails, we are left with a non-offlined, broken node
5328     if self.op.readd:
5329       new_node.drained = new_node.offline = False # pylint: disable=W0201
5330       self.LogInfo("Readding a node, the offline/drained flags were reset")
5331       # if we demote the node, we do cleanup later in the procedure
5332       new_node.master_candidate = self.master_candidate
5333       if self.changed_primary_ip:
5334         new_node.primary_ip = self.op.primary_ip
5335
5336     # copy the master/vm_capable flags
5337     for attr in self._NFLAGS:
5338       setattr(new_node, attr, getattr(self.op, attr))
5339
5340     # notify the user about any possible mc promotion
5341     if new_node.master_candidate:
5342       self.LogInfo("Node will be a master candidate")
5343
5344     if self.op.ndparams:
5345       new_node.ndparams = self.op.ndparams
5346     else:
5347       new_node.ndparams = {}
5348
5349     # check connectivity
5350     result = self.rpc.call_version([node])[node]
5351     result.Raise("Can't get version information from node %s" % node)
5352     if constants.PROTOCOL_VERSION == result.payload:
5353       logging.info("Communication to node %s fine, sw version %s match",
5354                    node, result.payload)
5355     else:
5356       raise errors.OpExecError("Version mismatch master version %s,"
5357                                " node version %s" %
5358                                (constants.PROTOCOL_VERSION, result.payload))
5359
5360     # Add node to our /etc/hosts, and add key to known_hosts
5361     if self.cfg.GetClusterInfo().modify_etc_hosts:
5362       master_node = self.cfg.GetMasterNode()
5363       result = self.rpc.call_etc_hosts_modify(master_node,
5364                                               constants.ETC_HOSTS_ADD,
5365                                               self.hostname.name,
5366                                               self.hostname.ip)
5367       result.Raise("Can't update hosts file with new host data")
5368
5369     if new_node.secondary_ip != new_node.primary_ip:
5370       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5371                                False)
5372
5373     node_verify_list = [self.cfg.GetMasterNode()]
5374     node_verify_param = {
5375       constants.NV_NODELIST: ([node], {}),
5376       # TODO: do a node-net-test as well?
5377     }
5378
5379     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5380                                        self.cfg.GetClusterName())
5381     for verifier in node_verify_list:
5382       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5383       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5384       if nl_payload:
5385         for failed in nl_payload:
5386           feedback_fn("ssh/hostname verification failed"
5387                       " (checking from %s): %s" %
5388                       (verifier, nl_payload[failed]))
5389         raise errors.OpExecError("ssh/hostname verification failed")
5390
5391     if self.op.readd:
5392       _RedistributeAncillaryFiles(self)
5393       self.context.ReaddNode(new_node)
5394       # make sure we redistribute the config
5395       self.cfg.Update(new_node, feedback_fn)
5396       # and make sure the new node will not have old files around
5397       if not new_node.master_candidate:
5398         result = self.rpc.call_node_demote_from_mc(new_node.name)
5399         msg = result.fail_msg
5400         if msg:
5401           self.LogWarning("Node failed to demote itself from master"
5402                           " candidate status: %s" % msg)
5403     else:
5404       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5405                                   additional_vm=self.op.vm_capable)
5406       self.context.AddNode(new_node, self.proc.GetECId())
5407
5408
5409 class LUNodeSetParams(LogicalUnit):
5410   """Modifies the parameters of a node.
5411
5412   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5413       to the node role (as _ROLE_*)
5414   @cvar _R2F: a dictionary from node role to tuples of flags
5415   @cvar _FLAGS: a list of attribute names corresponding to the flags
5416
5417   """
5418   HPATH = "node-modify"
5419   HTYPE = constants.HTYPE_NODE
5420   REQ_BGL = False
5421   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5422   _F2R = {
5423     (True, False, False): _ROLE_CANDIDATE,
5424     (False, True, False): _ROLE_DRAINED,
5425     (False, False, True): _ROLE_OFFLINE,
5426     (False, False, False): _ROLE_REGULAR,
5427     }
5428   _R2F = dict((v, k) for k, v in _F2R.items())
5429   _FLAGS = ["master_candidate", "drained", "offline"]
5430
5431   def CheckArguments(self):
5432     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5433     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5434                 self.op.master_capable, self.op.vm_capable,
5435                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5436                 self.op.disk_state]
5437     if all_mods.count(None) == len(all_mods):
5438       raise errors.OpPrereqError("Please pass at least one modification",
5439                                  errors.ECODE_INVAL)
5440     if all_mods.count(True) > 1:
5441       raise errors.OpPrereqError("Can't set the node into more than one"
5442                                  " state at the same time",
5443                                  errors.ECODE_INVAL)
5444
5445     # Boolean value that tells us whether we might be demoting from MC
5446     self.might_demote = (self.op.master_candidate == False or
5447                          self.op.offline == True or
5448                          self.op.drained == True or
5449                          self.op.master_capable == False)
5450
5451     if self.op.secondary_ip:
5452       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5453         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5454                                    " address" % self.op.secondary_ip,
5455                                    errors.ECODE_INVAL)
5456
5457     self.lock_all = self.op.auto_promote and self.might_demote
5458     self.lock_instances = self.op.secondary_ip is not None
5459
5460   def _InstanceFilter(self, instance):
5461     """Filter for getting affected instances.
5462
5463     """
5464     return (instance.disk_template in constants.DTS_INT_MIRROR and
5465             self.op.node_name in instance.all_nodes)
5466
5467   def ExpandNames(self):
5468     if self.lock_all:
5469       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5470     else:
5471       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5472
5473     # Since modifying a node can have severe effects on currently running
5474     # operations the resource lock is at least acquired in shared mode
5475     self.needed_locks[locking.LEVEL_NODE_RES] = \
5476       self.needed_locks[locking.LEVEL_NODE]
5477
5478     # Get node resource and instance locks in shared mode; they are not used
5479     # for anything but read-only access
5480     self.share_locks[locking.LEVEL_NODE_RES] = 1
5481     self.share_locks[locking.LEVEL_INSTANCE] = 1
5482
5483     if self.lock_instances:
5484       self.needed_locks[locking.LEVEL_INSTANCE] = \
5485         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5486
5487   def BuildHooksEnv(self):
5488     """Build hooks env.
5489
5490     This runs on the master node.
5491
5492     """
5493     return {
5494       "OP_TARGET": self.op.node_name,
5495       "MASTER_CANDIDATE": str(self.op.master_candidate),
5496       "OFFLINE": str(self.op.offline),
5497       "DRAINED": str(self.op.drained),
5498       "MASTER_CAPABLE": str(self.op.master_capable),
5499       "VM_CAPABLE": str(self.op.vm_capable),
5500       }
5501
5502   def BuildHooksNodes(self):
5503     """Build hooks nodes.
5504
5505     """
5506     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5507     return (nl, nl)
5508
5509   def CheckPrereq(self):
5510     """Check prerequisites.
5511
5512     This only checks the instance list against the existing names.
5513
5514     """
5515     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5516
5517     if self.lock_instances:
5518       affected_instances = \
5519         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5520
5521       # Verify instance locks
5522       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5523       wanted_instances = frozenset(affected_instances.keys())
5524       if wanted_instances - owned_instances:
5525         raise errors.OpPrereqError("Instances affected by changing node %s's"
5526                                    " secondary IP address have changed since"
5527                                    " locks were acquired, wanted '%s', have"
5528                                    " '%s'; retry the operation" %
5529                                    (self.op.node_name,
5530                                     utils.CommaJoin(wanted_instances),
5531                                     utils.CommaJoin(owned_instances)),
5532                                    errors.ECODE_STATE)
5533     else:
5534       affected_instances = None
5535
5536     if (self.op.master_candidate is not None or
5537         self.op.drained is not None or
5538         self.op.offline is not None):
5539       # we can't change the master's node flags
5540       if self.op.node_name == self.cfg.GetMasterNode():
5541         raise errors.OpPrereqError("The master role can be changed"
5542                                    " only via master-failover",
5543                                    errors.ECODE_INVAL)
5544
5545     if self.op.master_candidate and not node.master_capable:
5546       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5547                                  " it a master candidate" % node.name,
5548                                  errors.ECODE_STATE)
5549
5550     if self.op.vm_capable == False:
5551       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5552       if ipri or isec:
5553         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5554                                    " the vm_capable flag" % node.name,
5555                                    errors.ECODE_STATE)
5556
5557     if node.master_candidate and self.might_demote and not self.lock_all:
5558       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5559       # check if after removing the current node, we're missing master
5560       # candidates
5561       (mc_remaining, mc_should, _) = \
5562           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5563       if mc_remaining < mc_should:
5564         raise errors.OpPrereqError("Not enough master candidates, please"
5565                                    " pass auto promote option to allow"
5566                                    " promotion", errors.ECODE_STATE)
5567
5568     self.old_flags = old_flags = (node.master_candidate,
5569                                   node.drained, node.offline)
5570     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5571     self.old_role = old_role = self._F2R[old_flags]
5572
5573     # Check for ineffective changes
5574     for attr in self._FLAGS:
5575       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5576         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5577         setattr(self.op, attr, None)
5578
5579     # Past this point, any flag change to False means a transition
5580     # away from the respective state, as only real changes are kept
5581
5582     # TODO: We might query the real power state if it supports OOB
5583     if _SupportsOob(self.cfg, node):
5584       if self.op.offline is False and not (node.powered or
5585                                            self.op.powered == True):
5586         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5587                                     " offline status can be reset") %
5588                                    self.op.node_name)
5589     elif self.op.powered is not None:
5590       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5591                                   " as it does not support out-of-band"
5592                                   " handling") % self.op.node_name)
5593
5594     # If we're being deofflined/drained, we'll MC ourself if needed
5595     if (self.op.drained == False or self.op.offline == False or
5596         (self.op.master_capable and not node.master_capable)):
5597       if _DecideSelfPromotion(self):
5598         self.op.master_candidate = True
5599         self.LogInfo("Auto-promoting node to master candidate")
5600
5601     # If we're no longer master capable, we'll demote ourselves from MC
5602     if self.op.master_capable == False and node.master_candidate:
5603       self.LogInfo("Demoting from master candidate")
5604       self.op.master_candidate = False
5605
5606     # Compute new role
5607     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5608     if self.op.master_candidate:
5609       new_role = self._ROLE_CANDIDATE
5610     elif self.op.drained:
5611       new_role = self._ROLE_DRAINED
5612     elif self.op.offline:
5613       new_role = self._ROLE_OFFLINE
5614     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5615       # False is still in new flags, which means we're un-setting (the
5616       # only) True flag
5617       new_role = self._ROLE_REGULAR
5618     else: # no new flags, nothing, keep old role
5619       new_role = old_role
5620
5621     self.new_role = new_role
5622
5623     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5624       # Trying to transition out of offline status
5625       # TODO: Use standard RPC runner, but make sure it works when the node is
5626       # still marked offline
5627       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5628       if result.fail_msg:
5629         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5630                                    " to report its version: %s" %
5631                                    (node.name, result.fail_msg),
5632                                    errors.ECODE_STATE)
5633       else:
5634         self.LogWarning("Transitioning node from offline to online state"
5635                         " without using re-add. Please make sure the node"
5636                         " is healthy!")
5637
5638     if self.op.secondary_ip:
5639       # Ok even without locking, because this can't be changed by any LU
5640       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5641       master_singlehomed = master.secondary_ip == master.primary_ip
5642       if master_singlehomed and self.op.secondary_ip:
5643         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5644                                    " homed cluster", errors.ECODE_INVAL)
5645
5646       assert not (frozenset(affected_instances) -
5647                   self.owned_locks(locking.LEVEL_INSTANCE))
5648
5649       if node.offline:
5650         if affected_instances:
5651           raise errors.OpPrereqError("Cannot change secondary IP address:"
5652                                      " offline node has instances (%s)"
5653                                      " configured to use it" %
5654                                      utils.CommaJoin(affected_instances.keys()))
5655       else:
5656         # On online nodes, check that no instances are running, and that
5657         # the node has the new ip and we can reach it.
5658         for instance in affected_instances.values():
5659           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5660                               msg="cannot change secondary ip")
5661
5662         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5663         if master.name != node.name:
5664           # check reachability from master secondary ip to new secondary ip
5665           if not netutils.TcpPing(self.op.secondary_ip,
5666                                   constants.DEFAULT_NODED_PORT,
5667                                   source=master.secondary_ip):
5668             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5669                                        " based ping to node daemon port",
5670                                        errors.ECODE_ENVIRON)
5671
5672     if self.op.ndparams:
5673       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5674       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5675       self.new_ndparams = new_ndparams
5676
5677     if self.op.hv_state:
5678       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5679                                                  self.node.hv_state_static)
5680
5681     if self.op.disk_state:
5682       self.new_disk_state = \
5683         _MergeAndVerifyDiskState(self.op.disk_state,
5684                                  self.node.disk_state_static)
5685
5686   def Exec(self, feedback_fn):
5687     """Modifies a node.
5688
5689     """
5690     node = self.node
5691     old_role = self.old_role
5692     new_role = self.new_role
5693
5694     result = []
5695
5696     if self.op.ndparams:
5697       node.ndparams = self.new_ndparams
5698
5699     if self.op.powered is not None:
5700       node.powered = self.op.powered
5701
5702     if self.op.hv_state:
5703       node.hv_state_static = self.new_hv_state
5704
5705     if self.op.disk_state:
5706       node.disk_state_static = self.new_disk_state
5707
5708     for attr in ["master_capable", "vm_capable"]:
5709       val = getattr(self.op, attr)
5710       if val is not None:
5711         setattr(node, attr, val)
5712         result.append((attr, str(val)))
5713
5714     if new_role != old_role:
5715       # Tell the node to demote itself, if no longer MC and not offline
5716       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5717         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5718         if msg:
5719           self.LogWarning("Node failed to demote itself: %s", msg)
5720
5721       new_flags = self._R2F[new_role]
5722       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5723         if of != nf:
5724           result.append((desc, str(nf)))
5725       (node.master_candidate, node.drained, node.offline) = new_flags
5726
5727       # we locked all nodes, we adjust the CP before updating this node
5728       if self.lock_all:
5729         _AdjustCandidatePool(self, [node.name])
5730
5731     if self.op.secondary_ip:
5732       node.secondary_ip = self.op.secondary_ip
5733       result.append(("secondary_ip", self.op.secondary_ip))
5734
5735     # this will trigger configuration file update, if needed
5736     self.cfg.Update(node, feedback_fn)
5737
5738     # this will trigger job queue propagation or cleanup if the mc
5739     # flag changed
5740     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5741       self.context.ReaddNode(node)
5742
5743     return result
5744
5745
5746 class LUNodePowercycle(NoHooksLU):
5747   """Powercycles a node.
5748
5749   """
5750   REQ_BGL = False
5751
5752   def CheckArguments(self):
5753     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5754     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5755       raise errors.OpPrereqError("The node is the master and the force"
5756                                  " parameter was not set",
5757                                  errors.ECODE_INVAL)
5758
5759   def ExpandNames(self):
5760     """Locking for PowercycleNode.
5761
5762     This is a last-resort option and shouldn't block on other
5763     jobs. Therefore, we grab no locks.
5764
5765     """
5766     self.needed_locks = {}
5767
5768   def Exec(self, feedback_fn):
5769     """Reboots a node.
5770
5771     """
5772     result = self.rpc.call_node_powercycle(self.op.node_name,
5773                                            self.cfg.GetHypervisorType())
5774     result.Raise("Failed to schedule the reboot")
5775     return result.payload
5776
5777
5778 class LUClusterQuery(NoHooksLU):
5779   """Query cluster configuration.
5780
5781   """
5782   REQ_BGL = False
5783
5784   def ExpandNames(self):
5785     self.needed_locks = {}
5786
5787   def Exec(self, feedback_fn):
5788     """Return cluster config.
5789
5790     """
5791     cluster = self.cfg.GetClusterInfo()
5792     os_hvp = {}
5793
5794     # Filter just for enabled hypervisors
5795     for os_name, hv_dict in cluster.os_hvp.items():
5796       os_hvp[os_name] = {}
5797       for hv_name, hv_params in hv_dict.items():
5798         if hv_name in cluster.enabled_hypervisors:
5799           os_hvp[os_name][hv_name] = hv_params
5800
5801     # Convert ip_family to ip_version
5802     primary_ip_version = constants.IP4_VERSION
5803     if cluster.primary_ip_family == netutils.IP6Address.family:
5804       primary_ip_version = constants.IP6_VERSION
5805
5806     result = {
5807       "software_version": constants.RELEASE_VERSION,
5808       "protocol_version": constants.PROTOCOL_VERSION,
5809       "config_version": constants.CONFIG_VERSION,
5810       "os_api_version": max(constants.OS_API_VERSIONS),
5811       "export_version": constants.EXPORT_VERSION,
5812       "architecture": (platform.architecture()[0], platform.machine()),
5813       "name": cluster.cluster_name,
5814       "master": cluster.master_node,
5815       "default_hypervisor": cluster.primary_hypervisor,
5816       "enabled_hypervisors": cluster.enabled_hypervisors,
5817       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5818                         for hypervisor_name in cluster.enabled_hypervisors]),
5819       "os_hvp": os_hvp,
5820       "beparams": cluster.beparams,
5821       "osparams": cluster.osparams,
5822       "nicparams": cluster.nicparams,
5823       "ndparams": cluster.ndparams,
5824       "candidate_pool_size": cluster.candidate_pool_size,
5825       "master_netdev": cluster.master_netdev,
5826       "master_netmask": cluster.master_netmask,
5827       "use_external_mip_script": cluster.use_external_mip_script,
5828       "volume_group_name": cluster.volume_group_name,
5829       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5830       "file_storage_dir": cluster.file_storage_dir,
5831       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5832       "maintain_node_health": cluster.maintain_node_health,
5833       "ctime": cluster.ctime,
5834       "mtime": cluster.mtime,
5835       "uuid": cluster.uuid,
5836       "tags": list(cluster.GetTags()),
5837       "uid_pool": cluster.uid_pool,
5838       "default_iallocator": cluster.default_iallocator,
5839       "reserved_lvs": cluster.reserved_lvs,
5840       "primary_ip_version": primary_ip_version,
5841       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5842       "hidden_os": cluster.hidden_os,
5843       "blacklisted_os": cluster.blacklisted_os,
5844       }
5845
5846     return result
5847
5848
5849 class LUClusterConfigQuery(NoHooksLU):
5850   """Return configuration values.
5851
5852   """
5853   REQ_BGL = False
5854   _FIELDS_DYNAMIC = utils.FieldSet()
5855   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5856                                   "watcher_pause", "volume_group_name")
5857
5858   def CheckArguments(self):
5859     _CheckOutputFields(static=self._FIELDS_STATIC,
5860                        dynamic=self._FIELDS_DYNAMIC,
5861                        selected=self.op.output_fields)
5862
5863   def ExpandNames(self):
5864     self.needed_locks = {}
5865
5866   def Exec(self, feedback_fn):
5867     """Dump a representation of the cluster config to the standard output.
5868
5869     """
5870     values = []
5871     for field in self.op.output_fields:
5872       if field == "cluster_name":
5873         entry = self.cfg.GetClusterName()
5874       elif field == "master_node":
5875         entry = self.cfg.GetMasterNode()
5876       elif field == "drain_flag":
5877         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5878       elif field == "watcher_pause":
5879         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5880       elif field == "volume_group_name":
5881         entry = self.cfg.GetVGName()
5882       else:
5883         raise errors.ParameterError(field)
5884       values.append(entry)
5885     return values
5886
5887
5888 class LUInstanceActivateDisks(NoHooksLU):
5889   """Bring up an instance's disks.
5890
5891   """
5892   REQ_BGL = False
5893
5894   def ExpandNames(self):
5895     self._ExpandAndLockInstance()
5896     self.needed_locks[locking.LEVEL_NODE] = []
5897     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5898
5899   def DeclareLocks(self, level):
5900     if level == locking.LEVEL_NODE:
5901       self._LockInstancesNodes()
5902
5903   def CheckPrereq(self):
5904     """Check prerequisites.
5905
5906     This checks that the instance is in the cluster.
5907
5908     """
5909     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5910     assert self.instance is not None, \
5911       "Cannot retrieve locked instance %s" % self.op.instance_name
5912     _CheckNodeOnline(self, self.instance.primary_node)
5913
5914   def Exec(self, feedback_fn):
5915     """Activate the disks.
5916
5917     """
5918     disks_ok, disks_info = \
5919               _AssembleInstanceDisks(self, self.instance,
5920                                      ignore_size=self.op.ignore_size)
5921     if not disks_ok:
5922       raise errors.OpExecError("Cannot activate block devices")
5923
5924     return disks_info
5925
5926
5927 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5928                            ignore_size=False):
5929   """Prepare the block devices for an instance.
5930
5931   This sets up the block devices on all nodes.
5932
5933   @type lu: L{LogicalUnit}
5934   @param lu: the logical unit on whose behalf we execute
5935   @type instance: L{objects.Instance}
5936   @param instance: the instance for whose disks we assemble
5937   @type disks: list of L{objects.Disk} or None
5938   @param disks: which disks to assemble (or all, if None)
5939   @type ignore_secondaries: boolean
5940   @param ignore_secondaries: if true, errors on secondary nodes
5941       won't result in an error return from the function
5942   @type ignore_size: boolean
5943   @param ignore_size: if true, the current known size of the disk
5944       will not be used during the disk activation, useful for cases
5945       when the size is wrong
5946   @return: False if the operation failed, otherwise a list of
5947       (host, instance_visible_name, node_visible_name)
5948       with the mapping from node devices to instance devices
5949
5950   """
5951   device_info = []
5952   disks_ok = True
5953   iname = instance.name
5954   disks = _ExpandCheckDisks(instance, disks)
5955
5956   # With the two passes mechanism we try to reduce the window of
5957   # opportunity for the race condition of switching DRBD to primary
5958   # before handshaking occured, but we do not eliminate it
5959
5960   # The proper fix would be to wait (with some limits) until the
5961   # connection has been made and drbd transitions from WFConnection
5962   # into any other network-connected state (Connected, SyncTarget,
5963   # SyncSource, etc.)
5964
5965   # 1st pass, assemble on all nodes in secondary mode
5966   for idx, inst_disk in enumerate(disks):
5967     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5968       if ignore_size:
5969         node_disk = node_disk.Copy()
5970         node_disk.UnsetSize()
5971       lu.cfg.SetDiskID(node_disk, node)
5972       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5973       msg = result.fail_msg
5974       if msg:
5975         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5976                            " (is_primary=False, pass=1): %s",
5977                            inst_disk.iv_name, node, msg)
5978         if not ignore_secondaries:
5979           disks_ok = False
5980
5981   # FIXME: race condition on drbd migration to primary
5982
5983   # 2nd pass, do only the primary node
5984   for idx, inst_disk in enumerate(disks):
5985     dev_path = None
5986
5987     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5988       if node != instance.primary_node:
5989         continue
5990       if ignore_size:
5991         node_disk = node_disk.Copy()
5992         node_disk.UnsetSize()
5993       lu.cfg.SetDiskID(node_disk, node)
5994       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5995       msg = result.fail_msg
5996       if msg:
5997         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5998                            " (is_primary=True, pass=2): %s",
5999                            inst_disk.iv_name, node, msg)
6000         disks_ok = False
6001       else:
6002         dev_path = result.payload
6003
6004     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6005
6006   # leave the disks configured for the primary node
6007   # this is a workaround that would be fixed better by
6008   # improving the logical/physical id handling
6009   for disk in disks:
6010     lu.cfg.SetDiskID(disk, instance.primary_node)
6011
6012   return disks_ok, device_info
6013
6014
6015 def _StartInstanceDisks(lu, instance, force):
6016   """Start the disks of an instance.
6017
6018   """
6019   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6020                                            ignore_secondaries=force)
6021   if not disks_ok:
6022     _ShutdownInstanceDisks(lu, instance)
6023     if force is not None and not force:
6024       lu.proc.LogWarning("", hint="If the message above refers to a"
6025                          " secondary node,"
6026                          " you can retry the operation using '--force'.")
6027     raise errors.OpExecError("Disk consistency error")
6028
6029
6030 class LUInstanceDeactivateDisks(NoHooksLU):
6031   """Shutdown an instance's disks.
6032
6033   """
6034   REQ_BGL = False
6035
6036   def ExpandNames(self):
6037     self._ExpandAndLockInstance()
6038     self.needed_locks[locking.LEVEL_NODE] = []
6039     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6040
6041   def DeclareLocks(self, level):
6042     if level == locking.LEVEL_NODE:
6043       self._LockInstancesNodes()
6044
6045   def CheckPrereq(self):
6046     """Check prerequisites.
6047
6048     This checks that the instance is in the cluster.
6049
6050     """
6051     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6052     assert self.instance is not None, \
6053       "Cannot retrieve locked instance %s" % self.op.instance_name
6054
6055   def Exec(self, feedback_fn):
6056     """Deactivate the disks
6057
6058     """
6059     instance = self.instance
6060     if self.op.force:
6061       _ShutdownInstanceDisks(self, instance)
6062     else:
6063       _SafeShutdownInstanceDisks(self, instance)
6064
6065
6066 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6067   """Shutdown block devices of an instance.
6068
6069   This function checks if an instance is running, before calling
6070   _ShutdownInstanceDisks.
6071
6072   """
6073   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6074   _ShutdownInstanceDisks(lu, instance, disks=disks)
6075
6076
6077 def _ExpandCheckDisks(instance, disks):
6078   """Return the instance disks selected by the disks list
6079
6080   @type disks: list of L{objects.Disk} or None
6081   @param disks: selected disks
6082   @rtype: list of L{objects.Disk}
6083   @return: selected instance disks to act on
6084
6085   """
6086   if disks is None:
6087     return instance.disks
6088   else:
6089     if not set(disks).issubset(instance.disks):
6090       raise errors.ProgrammerError("Can only act on disks belonging to the"
6091                                    " target instance")
6092     return disks
6093
6094
6095 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6096   """Shutdown block devices of an instance.
6097
6098   This does the shutdown on all nodes of the instance.
6099
6100   If the ignore_primary is false, errors on the primary node are
6101   ignored.
6102
6103   """
6104   all_result = True
6105   disks = _ExpandCheckDisks(instance, disks)
6106
6107   for disk in disks:
6108     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6109       lu.cfg.SetDiskID(top_disk, node)
6110       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6111       msg = result.fail_msg
6112       if msg:
6113         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6114                       disk.iv_name, node, msg)
6115         if ((node == instance.primary_node and not ignore_primary) or
6116             (node != instance.primary_node and not result.offline)):
6117           all_result = False
6118   return all_result
6119
6120
6121 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6122   """Checks if a node has enough free memory.
6123
6124   This function check if a given node has the needed amount of free
6125   memory. In case the node has less memory or we cannot get the
6126   information from the node, this function raise an OpPrereqError
6127   exception.
6128
6129   @type lu: C{LogicalUnit}
6130   @param lu: a logical unit from which we get configuration data
6131   @type node: C{str}
6132   @param node: the node to check
6133   @type reason: C{str}
6134   @param reason: string to use in the error message
6135   @type requested: C{int}
6136   @param requested: the amount of memory in MiB to check for
6137   @type hypervisor_name: C{str}
6138   @param hypervisor_name: the hypervisor to ask for memory stats
6139   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6140       we cannot check the node
6141
6142   """
6143   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6144   nodeinfo[node].Raise("Can't get data from node %s" % node,
6145                        prereq=True, ecode=errors.ECODE_ENVIRON)
6146   (_, _, (hv_info, )) = nodeinfo[node].payload
6147
6148   free_mem = hv_info.get("memory_free", None)
6149   if not isinstance(free_mem, int):
6150     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6151                                " was '%s'" % (node, free_mem),
6152                                errors.ECODE_ENVIRON)
6153   if requested > free_mem:
6154     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6155                                " needed %s MiB, available %s MiB" %
6156                                (node, reason, requested, free_mem),
6157                                errors.ECODE_NORES)
6158
6159
6160 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6161   """Checks if nodes have enough free disk space in the all VGs.
6162
6163   This function check if all given nodes have the needed amount of
6164   free disk. In case any node has less disk or we cannot get the
6165   information from the node, this function raise an OpPrereqError
6166   exception.
6167
6168   @type lu: C{LogicalUnit}
6169   @param lu: a logical unit from which we get configuration data
6170   @type nodenames: C{list}
6171   @param nodenames: the list of node names to check
6172   @type req_sizes: C{dict}
6173   @param req_sizes: the hash of vg and corresponding amount of disk in
6174       MiB to check for
6175   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6176       or we cannot check the node
6177
6178   """
6179   for vg, req_size in req_sizes.items():
6180     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6181
6182
6183 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6184   """Checks if nodes have enough free disk space in the specified VG.
6185
6186   This function check if all given nodes have the needed amount of
6187   free disk. In case any node has less disk or we cannot get the
6188   information from the node, this function raise an OpPrereqError
6189   exception.
6190
6191   @type lu: C{LogicalUnit}
6192   @param lu: a logical unit from which we get configuration data
6193   @type nodenames: C{list}
6194   @param nodenames: the list of node names to check
6195   @type vg: C{str}
6196   @param vg: the volume group to check
6197   @type requested: C{int}
6198   @param requested: the amount of disk in MiB to check for
6199   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6200       or we cannot check the node
6201
6202   """
6203   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6204   for node in nodenames:
6205     info = nodeinfo[node]
6206     info.Raise("Cannot get current information from node %s" % node,
6207                prereq=True, ecode=errors.ECODE_ENVIRON)
6208     (_, (vg_info, ), _) = info.payload
6209     vg_free = vg_info.get("vg_free", None)
6210     if not isinstance(vg_free, int):
6211       raise errors.OpPrereqError("Can't compute free disk space on node"
6212                                  " %s for vg %s, result was '%s'" %
6213                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6214     if requested > vg_free:
6215       raise errors.OpPrereqError("Not enough disk space on target node %s"
6216                                  " vg %s: required %d MiB, available %d MiB" %
6217                                  (node, vg, requested, vg_free),
6218                                  errors.ECODE_NORES)
6219
6220
6221 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6222   """Checks if nodes have enough physical CPUs
6223
6224   This function checks if all given nodes have the needed number of
6225   physical CPUs. In case any node has less CPUs or we cannot get the
6226   information from the node, this function raises an OpPrereqError
6227   exception.
6228
6229   @type lu: C{LogicalUnit}
6230   @param lu: a logical unit from which we get configuration data
6231   @type nodenames: C{list}
6232   @param nodenames: the list of node names to check
6233   @type requested: C{int}
6234   @param requested: the minimum acceptable number of physical CPUs
6235   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6236       or we cannot check the node
6237
6238   """
6239   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6240   for node in nodenames:
6241     info = nodeinfo[node]
6242     info.Raise("Cannot get current information from node %s" % node,
6243                prereq=True, ecode=errors.ECODE_ENVIRON)
6244     (_, _, (hv_info, )) = info.payload
6245     num_cpus = hv_info.get("cpu_total", None)
6246     if not isinstance(num_cpus, int):
6247       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6248                                  " on node %s, result was '%s'" %
6249                                  (node, num_cpus), errors.ECODE_ENVIRON)
6250     if requested > num_cpus:
6251       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6252                                  "required" % (node, num_cpus, requested),
6253                                  errors.ECODE_NORES)
6254
6255
6256 class LUInstanceStartup(LogicalUnit):
6257   """Starts an instance.
6258
6259   """
6260   HPATH = "instance-start"
6261   HTYPE = constants.HTYPE_INSTANCE
6262   REQ_BGL = False
6263
6264   def CheckArguments(self):
6265     # extra beparams
6266     if self.op.beparams:
6267       # fill the beparams dict
6268       objects.UpgradeBeParams(self.op.beparams)
6269       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6270
6271   def ExpandNames(self):
6272     self._ExpandAndLockInstance()
6273
6274   def BuildHooksEnv(self):
6275     """Build hooks env.
6276
6277     This runs on master, primary and secondary nodes of the instance.
6278
6279     """
6280     env = {
6281       "FORCE": self.op.force,
6282       }
6283
6284     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6285
6286     return env
6287
6288   def BuildHooksNodes(self):
6289     """Build hooks nodes.
6290
6291     """
6292     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6293     return (nl, nl)
6294
6295   def CheckPrereq(self):
6296     """Check prerequisites.
6297
6298     This checks that the instance is in the cluster.
6299
6300     """
6301     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6302     assert self.instance is not None, \
6303       "Cannot retrieve locked instance %s" % self.op.instance_name
6304
6305     # extra hvparams
6306     if self.op.hvparams:
6307       # check hypervisor parameter syntax (locally)
6308       cluster = self.cfg.GetClusterInfo()
6309       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6310       filled_hvp = cluster.FillHV(instance)
6311       filled_hvp.update(self.op.hvparams)
6312       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6313       hv_type.CheckParameterSyntax(filled_hvp)
6314       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6315
6316     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6317
6318     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6319
6320     if self.primary_offline and self.op.ignore_offline_nodes:
6321       self.proc.LogWarning("Ignoring offline primary node")
6322
6323       if self.op.hvparams or self.op.beparams:
6324         self.proc.LogWarning("Overridden parameters are ignored")
6325     else:
6326       _CheckNodeOnline(self, instance.primary_node)
6327
6328       bep = self.cfg.GetClusterInfo().FillBE(instance)
6329
6330       # check bridges existence
6331       _CheckInstanceBridgesExist(self, instance)
6332
6333       remote_info = self.rpc.call_instance_info(instance.primary_node,
6334                                                 instance.name,
6335                                                 instance.hypervisor)
6336       remote_info.Raise("Error checking node %s" % instance.primary_node,
6337                         prereq=True, ecode=errors.ECODE_ENVIRON)
6338       if not remote_info.payload: # not running already
6339         _CheckNodeFreeMemory(self, instance.primary_node,
6340                              "starting instance %s" % instance.name,
6341                              bep[constants.BE_MAXMEM], instance.hypervisor)
6342
6343   def Exec(self, feedback_fn):
6344     """Start the instance.
6345
6346     """
6347     instance = self.instance
6348     force = self.op.force
6349
6350     if not self.op.no_remember:
6351       self.cfg.MarkInstanceUp(instance.name)
6352
6353     if self.primary_offline:
6354       assert self.op.ignore_offline_nodes
6355       self.proc.LogInfo("Primary node offline, marked instance as started")
6356     else:
6357       node_current = instance.primary_node
6358
6359       _StartInstanceDisks(self, instance, force)
6360
6361       result = \
6362         self.rpc.call_instance_start(node_current,
6363                                      (instance, self.op.hvparams,
6364                                       self.op.beparams),
6365                                      self.op.startup_paused)
6366       msg = result.fail_msg
6367       if msg:
6368         _ShutdownInstanceDisks(self, instance)
6369         raise errors.OpExecError("Could not start instance: %s" % msg)
6370
6371
6372 class LUInstanceReboot(LogicalUnit):
6373   """Reboot an instance.
6374
6375   """
6376   HPATH = "instance-reboot"
6377   HTYPE = constants.HTYPE_INSTANCE
6378   REQ_BGL = False
6379
6380   def ExpandNames(self):
6381     self._ExpandAndLockInstance()
6382
6383   def BuildHooksEnv(self):
6384     """Build hooks env.
6385
6386     This runs on master, primary and secondary nodes of the instance.
6387
6388     """
6389     env = {
6390       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6391       "REBOOT_TYPE": self.op.reboot_type,
6392       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6393       }
6394
6395     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6396
6397     return env
6398
6399   def BuildHooksNodes(self):
6400     """Build hooks nodes.
6401
6402     """
6403     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6404     return (nl, nl)
6405
6406   def CheckPrereq(self):
6407     """Check prerequisites.
6408
6409     This checks that the instance is in the cluster.
6410
6411     """
6412     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6413     assert self.instance is not None, \
6414       "Cannot retrieve locked instance %s" % self.op.instance_name
6415     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6416     _CheckNodeOnline(self, instance.primary_node)
6417
6418     # check bridges existence
6419     _CheckInstanceBridgesExist(self, instance)
6420
6421   def Exec(self, feedback_fn):
6422     """Reboot the instance.
6423
6424     """
6425     instance = self.instance
6426     ignore_secondaries = self.op.ignore_secondaries
6427     reboot_type = self.op.reboot_type
6428
6429     remote_info = self.rpc.call_instance_info(instance.primary_node,
6430                                               instance.name,
6431                                               instance.hypervisor)
6432     remote_info.Raise("Error checking node %s" % instance.primary_node)
6433     instance_running = bool(remote_info.payload)
6434
6435     node_current = instance.primary_node
6436
6437     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6438                                             constants.INSTANCE_REBOOT_HARD]:
6439       for disk in instance.disks:
6440         self.cfg.SetDiskID(disk, node_current)
6441       result = self.rpc.call_instance_reboot(node_current, instance,
6442                                              reboot_type,
6443                                              self.op.shutdown_timeout)
6444       result.Raise("Could not reboot instance")
6445     else:
6446       if instance_running:
6447         result = self.rpc.call_instance_shutdown(node_current, instance,
6448                                                  self.op.shutdown_timeout)
6449         result.Raise("Could not shutdown instance for full reboot")
6450         _ShutdownInstanceDisks(self, instance)
6451       else:
6452         self.LogInfo("Instance %s was already stopped, starting now",
6453                      instance.name)
6454       _StartInstanceDisks(self, instance, ignore_secondaries)
6455       result = self.rpc.call_instance_start(node_current,
6456                                             (instance, None, None), False)
6457       msg = result.fail_msg
6458       if msg:
6459         _ShutdownInstanceDisks(self, instance)
6460         raise errors.OpExecError("Could not start instance for"
6461                                  " full reboot: %s" % msg)
6462
6463     self.cfg.MarkInstanceUp(instance.name)
6464
6465
6466 class LUInstanceShutdown(LogicalUnit):
6467   """Shutdown an instance.
6468
6469   """
6470   HPATH = "instance-stop"
6471   HTYPE = constants.HTYPE_INSTANCE
6472   REQ_BGL = False
6473
6474   def ExpandNames(self):
6475     self._ExpandAndLockInstance()
6476
6477   def BuildHooksEnv(self):
6478     """Build hooks env.
6479
6480     This runs on master, primary and secondary nodes of the instance.
6481
6482     """
6483     env = _BuildInstanceHookEnvByObject(self, self.instance)
6484     env["TIMEOUT"] = self.op.timeout
6485     return env
6486
6487   def BuildHooksNodes(self):
6488     """Build hooks nodes.
6489
6490     """
6491     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6492     return (nl, nl)
6493
6494   def CheckPrereq(self):
6495     """Check prerequisites.
6496
6497     This checks that the instance is in the cluster.
6498
6499     """
6500     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6501     assert self.instance is not None, \
6502       "Cannot retrieve locked instance %s" % self.op.instance_name
6503
6504     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6505
6506     self.primary_offline = \
6507       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6508
6509     if self.primary_offline and self.op.ignore_offline_nodes:
6510       self.proc.LogWarning("Ignoring offline primary node")
6511     else:
6512       _CheckNodeOnline(self, self.instance.primary_node)
6513
6514   def Exec(self, feedback_fn):
6515     """Shutdown the instance.
6516
6517     """
6518     instance = self.instance
6519     node_current = instance.primary_node
6520     timeout = self.op.timeout
6521
6522     if not self.op.no_remember:
6523       self.cfg.MarkInstanceDown(instance.name)
6524
6525     if self.primary_offline:
6526       assert self.op.ignore_offline_nodes
6527       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6528     else:
6529       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6530       msg = result.fail_msg
6531       if msg:
6532         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6533
6534       _ShutdownInstanceDisks(self, instance)
6535
6536
6537 class LUInstanceReinstall(LogicalUnit):
6538   """Reinstall an instance.
6539
6540   """
6541   HPATH = "instance-reinstall"
6542   HTYPE = constants.HTYPE_INSTANCE
6543   REQ_BGL = False
6544
6545   def ExpandNames(self):
6546     self._ExpandAndLockInstance()
6547
6548   def BuildHooksEnv(self):
6549     """Build hooks env.
6550
6551     This runs on master, primary and secondary nodes of the instance.
6552
6553     """
6554     return _BuildInstanceHookEnvByObject(self, self.instance)
6555
6556   def BuildHooksNodes(self):
6557     """Build hooks nodes.
6558
6559     """
6560     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6561     return (nl, nl)
6562
6563   def CheckPrereq(self):
6564     """Check prerequisites.
6565
6566     This checks that the instance is in the cluster and is not running.
6567
6568     """
6569     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6570     assert instance is not None, \
6571       "Cannot retrieve locked instance %s" % self.op.instance_name
6572     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6573                      " offline, cannot reinstall")
6574     for node in instance.secondary_nodes:
6575       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6576                        " cannot reinstall")
6577
6578     if instance.disk_template == constants.DT_DISKLESS:
6579       raise errors.OpPrereqError("Instance '%s' has no disks" %
6580                                  self.op.instance_name,
6581                                  errors.ECODE_INVAL)
6582     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6583
6584     if self.op.os_type is not None:
6585       # OS verification
6586       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6587       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6588       instance_os = self.op.os_type
6589     else:
6590       instance_os = instance.os
6591
6592     nodelist = list(instance.all_nodes)
6593
6594     if self.op.osparams:
6595       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6596       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6597       self.os_inst = i_osdict # the new dict (without defaults)
6598     else:
6599       self.os_inst = None
6600
6601     self.instance = instance
6602
6603   def Exec(self, feedback_fn):
6604     """Reinstall the instance.
6605
6606     """
6607     inst = self.instance
6608
6609     if self.op.os_type is not None:
6610       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6611       inst.os = self.op.os_type
6612       # Write to configuration
6613       self.cfg.Update(inst, feedback_fn)
6614
6615     _StartInstanceDisks(self, inst, None)
6616     try:
6617       feedback_fn("Running the instance OS create scripts...")
6618       # FIXME: pass debug option from opcode to backend
6619       result = self.rpc.call_instance_os_add(inst.primary_node,
6620                                              (inst, self.os_inst), True,
6621                                              self.op.debug_level)
6622       result.Raise("Could not install OS for instance %s on node %s" %
6623                    (inst.name, inst.primary_node))
6624     finally:
6625       _ShutdownInstanceDisks(self, inst)
6626
6627
6628 class LUInstanceRecreateDisks(LogicalUnit):
6629   """Recreate an instance's missing disks.
6630
6631   """
6632   HPATH = "instance-recreate-disks"
6633   HTYPE = constants.HTYPE_INSTANCE
6634   REQ_BGL = False
6635
6636   def CheckArguments(self):
6637     # normalise the disk list
6638     self.op.disks = sorted(frozenset(self.op.disks))
6639
6640   def ExpandNames(self):
6641     self._ExpandAndLockInstance()
6642     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6643     if self.op.nodes:
6644       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6645       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6646     else:
6647       self.needed_locks[locking.LEVEL_NODE] = []
6648
6649   def DeclareLocks(self, level):
6650     if level == locking.LEVEL_NODE:
6651       # if we replace the nodes, we only need to lock the old primary,
6652       # otherwise we need to lock all nodes for disk re-creation
6653       primary_only = bool(self.op.nodes)
6654       self._LockInstancesNodes(primary_only=primary_only)
6655     elif level == locking.LEVEL_NODE_RES:
6656       # Copy node locks
6657       self.needed_locks[locking.LEVEL_NODE_RES] = \
6658         self.needed_locks[locking.LEVEL_NODE][:]
6659
6660   def BuildHooksEnv(self):
6661     """Build hooks env.
6662
6663     This runs on master, primary and secondary nodes of the instance.
6664
6665     """
6666     return _BuildInstanceHookEnvByObject(self, self.instance)
6667
6668   def BuildHooksNodes(self):
6669     """Build hooks nodes.
6670
6671     """
6672     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6673     return (nl, nl)
6674
6675   def CheckPrereq(self):
6676     """Check prerequisites.
6677
6678     This checks that the instance is in the cluster and is not running.
6679
6680     """
6681     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6682     assert instance is not None, \
6683       "Cannot retrieve locked instance %s" % self.op.instance_name
6684     if self.op.nodes:
6685       if len(self.op.nodes) != len(instance.all_nodes):
6686         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6687                                    " %d replacement nodes were specified" %
6688                                    (instance.name, len(instance.all_nodes),
6689                                     len(self.op.nodes)),
6690                                    errors.ECODE_INVAL)
6691       assert instance.disk_template != constants.DT_DRBD8 or \
6692           len(self.op.nodes) == 2
6693       assert instance.disk_template != constants.DT_PLAIN or \
6694           len(self.op.nodes) == 1
6695       primary_node = self.op.nodes[0]
6696     else:
6697       primary_node = instance.primary_node
6698     _CheckNodeOnline(self, primary_node)
6699
6700     if instance.disk_template == constants.DT_DISKLESS:
6701       raise errors.OpPrereqError("Instance '%s' has no disks" %
6702                                  self.op.instance_name, errors.ECODE_INVAL)
6703     # if we replace nodes *and* the old primary is offline, we don't
6704     # check
6705     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6706     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6707     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6708     if not (self.op.nodes and old_pnode.offline):
6709       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6710                           msg="cannot recreate disks")
6711
6712     if not self.op.disks:
6713       self.op.disks = range(len(instance.disks))
6714     else:
6715       for idx in self.op.disks:
6716         if idx >= len(instance.disks):
6717           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6718                                      errors.ECODE_INVAL)
6719     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6720       raise errors.OpPrereqError("Can't recreate disks partially and"
6721                                  " change the nodes at the same time",
6722                                  errors.ECODE_INVAL)
6723     self.instance = instance
6724
6725   def Exec(self, feedback_fn):
6726     """Recreate the disks.
6727
6728     """
6729     instance = self.instance
6730
6731     assert (self.owned_locks(locking.LEVEL_NODE) ==
6732             self.owned_locks(locking.LEVEL_NODE_RES))
6733
6734     to_skip = []
6735     mods = [] # keeps track of needed logical_id changes
6736
6737     for idx, disk in enumerate(instance.disks):
6738       if idx not in self.op.disks: # disk idx has not been passed in
6739         to_skip.append(idx)
6740         continue
6741       # update secondaries for disks, if needed
6742       if self.op.nodes:
6743         if disk.dev_type == constants.LD_DRBD8:
6744           # need to update the nodes and minors
6745           assert len(self.op.nodes) == 2
6746           assert len(disk.logical_id) == 6 # otherwise disk internals
6747                                            # have changed
6748           (_, _, old_port, _, _, old_secret) = disk.logical_id
6749           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6750           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6751                     new_minors[0], new_minors[1], old_secret)
6752           assert len(disk.logical_id) == len(new_id)
6753           mods.append((idx, new_id))
6754
6755     # now that we have passed all asserts above, we can apply the mods
6756     # in a single run (to avoid partial changes)
6757     for idx, new_id in mods:
6758       instance.disks[idx].logical_id = new_id
6759
6760     # change primary node, if needed
6761     if self.op.nodes:
6762       instance.primary_node = self.op.nodes[0]
6763       self.LogWarning("Changing the instance's nodes, you will have to"
6764                       " remove any disks left on the older nodes manually")
6765
6766     if self.op.nodes:
6767       self.cfg.Update(instance, feedback_fn)
6768
6769     _CreateDisks(self, instance, to_skip=to_skip)
6770
6771
6772 class LUInstanceRename(LogicalUnit):
6773   """Rename an instance.
6774
6775   """
6776   HPATH = "instance-rename"
6777   HTYPE = constants.HTYPE_INSTANCE
6778
6779   def CheckArguments(self):
6780     """Check arguments.
6781
6782     """
6783     if self.op.ip_check and not self.op.name_check:
6784       # TODO: make the ip check more flexible and not depend on the name check
6785       raise errors.OpPrereqError("IP address check requires a name check",
6786                                  errors.ECODE_INVAL)
6787
6788   def BuildHooksEnv(self):
6789     """Build hooks env.
6790
6791     This runs on master, primary and secondary nodes of the instance.
6792
6793     """
6794     env = _BuildInstanceHookEnvByObject(self, self.instance)
6795     env["INSTANCE_NEW_NAME"] = self.op.new_name
6796     return env
6797
6798   def BuildHooksNodes(self):
6799     """Build hooks nodes.
6800
6801     """
6802     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6803     return (nl, nl)
6804
6805   def CheckPrereq(self):
6806     """Check prerequisites.
6807
6808     This checks that the instance is in the cluster and is not running.
6809
6810     """
6811     self.op.instance_name = _ExpandInstanceName(self.cfg,
6812                                                 self.op.instance_name)
6813     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6814     assert instance is not None
6815     _CheckNodeOnline(self, instance.primary_node)
6816     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6817                         msg="cannot rename")
6818     self.instance = instance
6819
6820     new_name = self.op.new_name
6821     if self.op.name_check:
6822       hostname = netutils.GetHostname(name=new_name)
6823       if hostname.name != new_name:
6824         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6825                      hostname.name)
6826       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6827         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6828                                     " same as given hostname '%s'") %
6829                                     (hostname.name, self.op.new_name),
6830                                     errors.ECODE_INVAL)
6831       new_name = self.op.new_name = hostname.name
6832       if (self.op.ip_check and
6833           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6834         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6835                                    (hostname.ip, new_name),
6836                                    errors.ECODE_NOTUNIQUE)
6837
6838     instance_list = self.cfg.GetInstanceList()
6839     if new_name in instance_list and new_name != instance.name:
6840       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6841                                  new_name, errors.ECODE_EXISTS)
6842
6843   def Exec(self, feedback_fn):
6844     """Rename the instance.
6845
6846     """
6847     inst = self.instance
6848     old_name = inst.name
6849
6850     rename_file_storage = False
6851     if (inst.disk_template in constants.DTS_FILEBASED and
6852         self.op.new_name != inst.name):
6853       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6854       rename_file_storage = True
6855
6856     self.cfg.RenameInstance(inst.name, self.op.new_name)
6857     # Change the instance lock. This is definitely safe while we hold the BGL.
6858     # Otherwise the new lock would have to be added in acquired mode.
6859     assert self.REQ_BGL
6860     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6861     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6862
6863     # re-read the instance from the configuration after rename
6864     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6865
6866     if rename_file_storage:
6867       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6868       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6869                                                      old_file_storage_dir,
6870                                                      new_file_storage_dir)
6871       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6872                    " (but the instance has been renamed in Ganeti)" %
6873                    (inst.primary_node, old_file_storage_dir,
6874                     new_file_storage_dir))
6875
6876     _StartInstanceDisks(self, inst, None)
6877     try:
6878       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6879                                                  old_name, self.op.debug_level)
6880       msg = result.fail_msg
6881       if msg:
6882         msg = ("Could not run OS rename script for instance %s on node %s"
6883                " (but the instance has been renamed in Ganeti): %s" %
6884                (inst.name, inst.primary_node, msg))
6885         self.proc.LogWarning(msg)
6886     finally:
6887       _ShutdownInstanceDisks(self, inst)
6888
6889     return inst.name
6890
6891
6892 class LUInstanceRemove(LogicalUnit):
6893   """Remove an instance.
6894
6895   """
6896   HPATH = "instance-remove"
6897   HTYPE = constants.HTYPE_INSTANCE
6898   REQ_BGL = False
6899
6900   def ExpandNames(self):
6901     self._ExpandAndLockInstance()
6902     self.needed_locks[locking.LEVEL_NODE] = []
6903     self.needed_locks[locking.LEVEL_NODE_RES] = []
6904     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6905
6906   def DeclareLocks(self, level):
6907     if level == locking.LEVEL_NODE:
6908       self._LockInstancesNodes()
6909     elif level == locking.LEVEL_NODE_RES:
6910       # Copy node locks
6911       self.needed_locks[locking.LEVEL_NODE_RES] = \
6912         self.needed_locks[locking.LEVEL_NODE][:]
6913
6914   def BuildHooksEnv(self):
6915     """Build hooks env.
6916
6917     This runs on master, primary and secondary nodes of the instance.
6918
6919     """
6920     env = _BuildInstanceHookEnvByObject(self, self.instance)
6921     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6922     return env
6923
6924   def BuildHooksNodes(self):
6925     """Build hooks nodes.
6926
6927     """
6928     nl = [self.cfg.GetMasterNode()]
6929     nl_post = list(self.instance.all_nodes) + nl
6930     return (nl, nl_post)
6931
6932   def CheckPrereq(self):
6933     """Check prerequisites.
6934
6935     This checks that the instance is in the cluster.
6936
6937     """
6938     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6939     assert self.instance is not None, \
6940       "Cannot retrieve locked instance %s" % self.op.instance_name
6941
6942   def Exec(self, feedback_fn):
6943     """Remove the instance.
6944
6945     """
6946     instance = self.instance
6947     logging.info("Shutting down instance %s on node %s",
6948                  instance.name, instance.primary_node)
6949
6950     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6951                                              self.op.shutdown_timeout)
6952     msg = result.fail_msg
6953     if msg:
6954       if self.op.ignore_failures:
6955         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6956       else:
6957         raise errors.OpExecError("Could not shutdown instance %s on"
6958                                  " node %s: %s" %
6959                                  (instance.name, instance.primary_node, msg))
6960
6961     assert (self.owned_locks(locking.LEVEL_NODE) ==
6962             self.owned_locks(locking.LEVEL_NODE_RES))
6963     assert not (set(instance.all_nodes) -
6964                 self.owned_locks(locking.LEVEL_NODE)), \
6965       "Not owning correct locks"
6966
6967     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6968
6969
6970 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6971   """Utility function to remove an instance.
6972
6973   """
6974   logging.info("Removing block devices for instance %s", instance.name)
6975
6976   if not _RemoveDisks(lu, instance):
6977     if not ignore_failures:
6978       raise errors.OpExecError("Can't remove instance's disks")
6979     feedback_fn("Warning: can't remove instance's disks")
6980
6981   logging.info("Removing instance %s out of cluster config", instance.name)
6982
6983   lu.cfg.RemoveInstance(instance.name)
6984
6985   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6986     "Instance lock removal conflict"
6987
6988   # Remove lock for the instance
6989   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6990
6991
6992 class LUInstanceQuery(NoHooksLU):
6993   """Logical unit for querying instances.
6994
6995   """
6996   # pylint: disable=W0142
6997   REQ_BGL = False
6998
6999   def CheckArguments(self):
7000     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7001                              self.op.output_fields, self.op.use_locking)
7002
7003   def ExpandNames(self):
7004     self.iq.ExpandNames(self)
7005
7006   def DeclareLocks(self, level):
7007     self.iq.DeclareLocks(self, level)
7008
7009   def Exec(self, feedback_fn):
7010     return self.iq.OldStyleQuery(self)
7011
7012
7013 class LUInstanceFailover(LogicalUnit):
7014   """Failover an instance.
7015
7016   """
7017   HPATH = "instance-failover"
7018   HTYPE = constants.HTYPE_INSTANCE
7019   REQ_BGL = False
7020
7021   def CheckArguments(self):
7022     """Check the arguments.
7023
7024     """
7025     self.iallocator = getattr(self.op, "iallocator", None)
7026     self.target_node = getattr(self.op, "target_node", None)
7027
7028   def ExpandNames(self):
7029     self._ExpandAndLockInstance()
7030
7031     if self.op.target_node is not None:
7032       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7033
7034     self.needed_locks[locking.LEVEL_NODE] = []
7035     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7036
7037     ignore_consistency = self.op.ignore_consistency
7038     shutdown_timeout = self.op.shutdown_timeout
7039     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7040                                        cleanup=False,
7041                                        failover=True,
7042                                        ignore_consistency=ignore_consistency,
7043                                        shutdown_timeout=shutdown_timeout)
7044     self.tasklets = [self._migrater]
7045
7046   def DeclareLocks(self, level):
7047     if level == locking.LEVEL_NODE:
7048       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7049       if instance.disk_template in constants.DTS_EXT_MIRROR:
7050         if self.op.target_node is None:
7051           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7052         else:
7053           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7054                                                    self.op.target_node]
7055         del self.recalculate_locks[locking.LEVEL_NODE]
7056       else:
7057         self._LockInstancesNodes()
7058
7059   def BuildHooksEnv(self):
7060     """Build hooks env.
7061
7062     This runs on master, primary and secondary nodes of the instance.
7063
7064     """
7065     instance = self._migrater.instance
7066     source_node = instance.primary_node
7067     target_node = self.op.target_node
7068     env = {
7069       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7070       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7071       "OLD_PRIMARY": source_node,
7072       "NEW_PRIMARY": target_node,
7073       }
7074
7075     if instance.disk_template in constants.DTS_INT_MIRROR:
7076       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7077       env["NEW_SECONDARY"] = source_node
7078     else:
7079       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7080
7081     env.update(_BuildInstanceHookEnvByObject(self, instance))
7082
7083     return env
7084
7085   def BuildHooksNodes(self):
7086     """Build hooks nodes.
7087
7088     """
7089     instance = self._migrater.instance
7090     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7091     return (nl, nl + [instance.primary_node])
7092
7093
7094 class LUInstanceMigrate(LogicalUnit):
7095   """Migrate an instance.
7096
7097   This is migration without shutting down, compared to the failover,
7098   which is done with shutdown.
7099
7100   """
7101   HPATH = "instance-migrate"
7102   HTYPE = constants.HTYPE_INSTANCE
7103   REQ_BGL = False
7104
7105   def ExpandNames(self):
7106     self._ExpandAndLockInstance()
7107
7108     if self.op.target_node is not None:
7109       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7110
7111     self.needed_locks[locking.LEVEL_NODE] = []
7112     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7113
7114     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7115                                        cleanup=self.op.cleanup,
7116                                        failover=False,
7117                                        fallback=self.op.allow_failover)
7118     self.tasklets = [self._migrater]
7119
7120   def DeclareLocks(self, level):
7121     if level == locking.LEVEL_NODE:
7122       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7123       if instance.disk_template in constants.DTS_EXT_MIRROR:
7124         if self.op.target_node is None:
7125           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7126         else:
7127           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7128                                                    self.op.target_node]
7129         del self.recalculate_locks[locking.LEVEL_NODE]
7130       else:
7131         self._LockInstancesNodes()
7132
7133   def BuildHooksEnv(self):
7134     """Build hooks env.
7135
7136     This runs on master, primary and secondary nodes of the instance.
7137
7138     """
7139     instance = self._migrater.instance
7140     source_node = instance.primary_node
7141     target_node = self.op.target_node
7142     env = _BuildInstanceHookEnvByObject(self, instance)
7143     env.update({
7144       "MIGRATE_LIVE": self._migrater.live,
7145       "MIGRATE_CLEANUP": self.op.cleanup,
7146       "OLD_PRIMARY": source_node,
7147       "NEW_PRIMARY": target_node,
7148       })
7149
7150     if instance.disk_template in constants.DTS_INT_MIRROR:
7151       env["OLD_SECONDARY"] = target_node
7152       env["NEW_SECONDARY"] = source_node
7153     else:
7154       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7155
7156     return env
7157
7158   def BuildHooksNodes(self):
7159     """Build hooks nodes.
7160
7161     """
7162     instance = self._migrater.instance
7163     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7164     return (nl, nl + [instance.primary_node])
7165
7166
7167 class LUInstanceMove(LogicalUnit):
7168   """Move an instance by data-copying.
7169
7170   """
7171   HPATH = "instance-move"
7172   HTYPE = constants.HTYPE_INSTANCE
7173   REQ_BGL = False
7174
7175   def ExpandNames(self):
7176     self._ExpandAndLockInstance()
7177     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7178     self.op.target_node = target_node
7179     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7180     self.needed_locks[locking.LEVEL_NODE_RES] = []
7181     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7182
7183   def DeclareLocks(self, level):
7184     if level == locking.LEVEL_NODE:
7185       self._LockInstancesNodes(primary_only=True)
7186     elif level == locking.LEVEL_NODE_RES:
7187       # Copy node locks
7188       self.needed_locks[locking.LEVEL_NODE_RES] = \
7189         self.needed_locks[locking.LEVEL_NODE][:]
7190
7191   def BuildHooksEnv(self):
7192     """Build hooks env.
7193
7194     This runs on master, primary and secondary nodes of the instance.
7195
7196     """
7197     env = {
7198       "TARGET_NODE": self.op.target_node,
7199       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7200       }
7201     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7202     return env
7203
7204   def BuildHooksNodes(self):
7205     """Build hooks nodes.
7206
7207     """
7208     nl = [
7209       self.cfg.GetMasterNode(),
7210       self.instance.primary_node,
7211       self.op.target_node,
7212       ]
7213     return (nl, nl)
7214
7215   def CheckPrereq(self):
7216     """Check prerequisites.
7217
7218     This checks that the instance is in the cluster.
7219
7220     """
7221     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7222     assert self.instance is not None, \
7223       "Cannot retrieve locked instance %s" % self.op.instance_name
7224
7225     node = self.cfg.GetNodeInfo(self.op.target_node)
7226     assert node is not None, \
7227       "Cannot retrieve locked node %s" % self.op.target_node
7228
7229     self.target_node = target_node = node.name
7230
7231     if target_node == instance.primary_node:
7232       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7233                                  (instance.name, target_node),
7234                                  errors.ECODE_STATE)
7235
7236     bep = self.cfg.GetClusterInfo().FillBE(instance)
7237
7238     for idx, dsk in enumerate(instance.disks):
7239       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7240         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7241                                    " cannot copy" % idx, errors.ECODE_STATE)
7242
7243     _CheckNodeOnline(self, target_node)
7244     _CheckNodeNotDrained(self, target_node)
7245     _CheckNodeVmCapable(self, target_node)
7246
7247     if instance.admin_state == constants.ADMINST_UP:
7248       # check memory requirements on the secondary node
7249       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7250                            instance.name, bep[constants.BE_MAXMEM],
7251                            instance.hypervisor)
7252     else:
7253       self.LogInfo("Not checking memory on the secondary node as"
7254                    " instance will not be started")
7255
7256     # check bridge existance
7257     _CheckInstanceBridgesExist(self, instance, node=target_node)
7258
7259   def Exec(self, feedback_fn):
7260     """Move an instance.
7261
7262     The move is done by shutting it down on its present node, copying
7263     the data over (slow) and starting it on the new node.
7264
7265     """
7266     instance = self.instance
7267
7268     source_node = instance.primary_node
7269     target_node = self.target_node
7270
7271     self.LogInfo("Shutting down instance %s on source node %s",
7272                  instance.name, source_node)
7273
7274     assert (self.owned_locks(locking.LEVEL_NODE) ==
7275             self.owned_locks(locking.LEVEL_NODE_RES))
7276
7277     result = self.rpc.call_instance_shutdown(source_node, instance,
7278                                              self.op.shutdown_timeout)
7279     msg = result.fail_msg
7280     if msg:
7281       if self.op.ignore_consistency:
7282         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7283                              " Proceeding anyway. Please make sure node"
7284                              " %s is down. Error details: %s",
7285                              instance.name, source_node, source_node, msg)
7286       else:
7287         raise errors.OpExecError("Could not shutdown instance %s on"
7288                                  " node %s: %s" %
7289                                  (instance.name, source_node, msg))
7290
7291     # create the target disks
7292     try:
7293       _CreateDisks(self, instance, target_node=target_node)
7294     except errors.OpExecError:
7295       self.LogWarning("Device creation failed, reverting...")
7296       try:
7297         _RemoveDisks(self, instance, target_node=target_node)
7298       finally:
7299         self.cfg.ReleaseDRBDMinors(instance.name)
7300         raise
7301
7302     cluster_name = self.cfg.GetClusterInfo().cluster_name
7303
7304     errs = []
7305     # activate, get path, copy the data over
7306     for idx, disk in enumerate(instance.disks):
7307       self.LogInfo("Copying data for disk %d", idx)
7308       result = self.rpc.call_blockdev_assemble(target_node, disk,
7309                                                instance.name, True, idx)
7310       if result.fail_msg:
7311         self.LogWarning("Can't assemble newly created disk %d: %s",
7312                         idx, result.fail_msg)
7313         errs.append(result.fail_msg)
7314         break
7315       dev_path = result.payload
7316       result = self.rpc.call_blockdev_export(source_node, disk,
7317                                              target_node, dev_path,
7318                                              cluster_name)
7319       if result.fail_msg:
7320         self.LogWarning("Can't copy data over for disk %d: %s",
7321                         idx, result.fail_msg)
7322         errs.append(result.fail_msg)
7323         break
7324
7325     if errs:
7326       self.LogWarning("Some disks failed to copy, aborting")
7327       try:
7328         _RemoveDisks(self, instance, target_node=target_node)
7329       finally:
7330         self.cfg.ReleaseDRBDMinors(instance.name)
7331         raise errors.OpExecError("Errors during disk copy: %s" %
7332                                  (",".join(errs),))
7333
7334     instance.primary_node = target_node
7335     self.cfg.Update(instance, feedback_fn)
7336
7337     self.LogInfo("Removing the disks on the original node")
7338     _RemoveDisks(self, instance, target_node=source_node)
7339
7340     # Only start the instance if it's marked as up
7341     if instance.admin_state == constants.ADMINST_UP:
7342       self.LogInfo("Starting instance %s on node %s",
7343                    instance.name, target_node)
7344
7345       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7346                                            ignore_secondaries=True)
7347       if not disks_ok:
7348         _ShutdownInstanceDisks(self, instance)
7349         raise errors.OpExecError("Can't activate the instance's disks")
7350
7351       result = self.rpc.call_instance_start(target_node,
7352                                             (instance, None, None), False)
7353       msg = result.fail_msg
7354       if msg:
7355         _ShutdownInstanceDisks(self, instance)
7356         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7357                                  (instance.name, target_node, msg))
7358
7359
7360 class LUNodeMigrate(LogicalUnit):
7361   """Migrate all instances from a node.
7362
7363   """
7364   HPATH = "node-migrate"
7365   HTYPE = constants.HTYPE_NODE
7366   REQ_BGL = False
7367
7368   def CheckArguments(self):
7369     pass
7370
7371   def ExpandNames(self):
7372     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7373
7374     self.share_locks = _ShareAll()
7375     self.needed_locks = {
7376       locking.LEVEL_NODE: [self.op.node_name],
7377       }
7378
7379   def BuildHooksEnv(self):
7380     """Build hooks env.
7381
7382     This runs on the master, the primary and all the secondaries.
7383
7384     """
7385     return {
7386       "NODE_NAME": self.op.node_name,
7387       }
7388
7389   def BuildHooksNodes(self):
7390     """Build hooks nodes.
7391
7392     """
7393     nl = [self.cfg.GetMasterNode()]
7394     return (nl, nl)
7395
7396   def CheckPrereq(self):
7397     pass
7398
7399   def Exec(self, feedback_fn):
7400     # Prepare jobs for migration instances
7401     jobs = [
7402       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7403                                  mode=self.op.mode,
7404                                  live=self.op.live,
7405                                  iallocator=self.op.iallocator,
7406                                  target_node=self.op.target_node)]
7407       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7408       ]
7409
7410     # TODO: Run iallocator in this opcode and pass correct placement options to
7411     # OpInstanceMigrate. Since other jobs can modify the cluster between
7412     # running the iallocator and the actual migration, a good consistency model
7413     # will have to be found.
7414
7415     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7416             frozenset([self.op.node_name]))
7417
7418     return ResultWithJobs(jobs)
7419
7420
7421 class TLMigrateInstance(Tasklet):
7422   """Tasklet class for instance migration.
7423
7424   @type live: boolean
7425   @ivar live: whether the migration will be done live or non-live;
7426       this variable is initalized only after CheckPrereq has run
7427   @type cleanup: boolean
7428   @ivar cleanup: Wheater we cleanup from a failed migration
7429   @type iallocator: string
7430   @ivar iallocator: The iallocator used to determine target_node
7431   @type target_node: string
7432   @ivar target_node: If given, the target_node to reallocate the instance to
7433   @type failover: boolean
7434   @ivar failover: Whether operation results in failover or migration
7435   @type fallback: boolean
7436   @ivar fallback: Whether fallback to failover is allowed if migration not
7437                   possible
7438   @type ignore_consistency: boolean
7439   @ivar ignore_consistency: Wheter we should ignore consistency between source
7440                             and target node
7441   @type shutdown_timeout: int
7442   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7443
7444   """
7445
7446   # Constants
7447   _MIGRATION_POLL_INTERVAL = 1      # seconds
7448   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7449
7450   def __init__(self, lu, instance_name, cleanup=False,
7451                failover=False, fallback=False,
7452                ignore_consistency=False,
7453                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7454     """Initializes this class.
7455
7456     """
7457     Tasklet.__init__(self, lu)
7458
7459     # Parameters
7460     self.instance_name = instance_name
7461     self.cleanup = cleanup
7462     self.live = False # will be overridden later
7463     self.failover = failover
7464     self.fallback = fallback
7465     self.ignore_consistency = ignore_consistency
7466     self.shutdown_timeout = shutdown_timeout
7467
7468   def CheckPrereq(self):
7469     """Check prerequisites.
7470
7471     This checks that the instance is in the cluster.
7472
7473     """
7474     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7475     instance = self.cfg.GetInstanceInfo(instance_name)
7476     assert instance is not None
7477     self.instance = instance
7478
7479     if (not self.cleanup and
7480         not instance.admin_state == constants.ADMINST_UP and
7481         not self.failover and self.fallback):
7482       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7483                       " switching to failover")
7484       self.failover = True
7485
7486     if instance.disk_template not in constants.DTS_MIRRORED:
7487       if self.failover:
7488         text = "failovers"
7489       else:
7490         text = "migrations"
7491       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7492                                  " %s" % (instance.disk_template, text),
7493                                  errors.ECODE_STATE)
7494
7495     if instance.disk_template in constants.DTS_EXT_MIRROR:
7496       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7497
7498       if self.lu.op.iallocator:
7499         self._RunAllocator()
7500       else:
7501         # We set set self.target_node as it is required by
7502         # BuildHooksEnv
7503         self.target_node = self.lu.op.target_node
7504
7505       # self.target_node is already populated, either directly or by the
7506       # iallocator run
7507       target_node = self.target_node
7508       if self.target_node == instance.primary_node:
7509         raise errors.OpPrereqError("Cannot migrate instance %s"
7510                                    " to its primary (%s)" %
7511                                    (instance.name, instance.primary_node))
7512
7513       if len(self.lu.tasklets) == 1:
7514         # It is safe to release locks only when we're the only tasklet
7515         # in the LU
7516         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7517                       keep=[instance.primary_node, self.target_node])
7518
7519     else:
7520       secondary_nodes = instance.secondary_nodes
7521       if not secondary_nodes:
7522         raise errors.ConfigurationError("No secondary node but using"
7523                                         " %s disk template" %
7524                                         instance.disk_template)
7525       target_node = secondary_nodes[0]
7526       if self.lu.op.iallocator or (self.lu.op.target_node and
7527                                    self.lu.op.target_node != target_node):
7528         if self.failover:
7529           text = "failed over"
7530         else:
7531           text = "migrated"
7532         raise errors.OpPrereqError("Instances with disk template %s cannot"
7533                                    " be %s to arbitrary nodes"
7534                                    " (neither an iallocator nor a target"
7535                                    " node can be passed)" %
7536                                    (instance.disk_template, text),
7537                                    errors.ECODE_INVAL)
7538
7539     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7540
7541     # check memory requirements on the secondary node
7542     if not self.failover or instance.admin_state == constants.ADMINST_UP:
7543       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7544                            instance.name, i_be[constants.BE_MAXMEM],
7545                            instance.hypervisor)
7546     else:
7547       self.lu.LogInfo("Not checking memory on the secondary node as"
7548                       " instance will not be started")
7549
7550     # check if failover must be forced instead of migration
7551     if (not self.cleanup and not self.failover and
7552         i_be[constants.BE_ALWAYS_FAILOVER]):
7553       if self.fallback:
7554         self.lu.LogInfo("Instance configured to always failover; fallback"
7555                         " to failover")
7556         self.failover = True
7557       else:
7558         raise errors.OpPrereqError("This instance has been configured to"
7559                                    " always failover, please allow failover",
7560                                    errors.ECODE_STATE)
7561
7562     # check bridge existance
7563     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7564
7565     if not self.cleanup:
7566       _CheckNodeNotDrained(self.lu, target_node)
7567       if not self.failover:
7568         result = self.rpc.call_instance_migratable(instance.primary_node,
7569                                                    instance)
7570         if result.fail_msg and self.fallback:
7571           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7572                           " failover")
7573           self.failover = True
7574         else:
7575           result.Raise("Can't migrate, please use failover",
7576                        prereq=True, ecode=errors.ECODE_STATE)
7577
7578     assert not (self.failover and self.cleanup)
7579
7580     if not self.failover:
7581       if self.lu.op.live is not None and self.lu.op.mode is not None:
7582         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7583                                    " parameters are accepted",
7584                                    errors.ECODE_INVAL)
7585       if self.lu.op.live is not None:
7586         if self.lu.op.live:
7587           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7588         else:
7589           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7590         # reset the 'live' parameter to None so that repeated
7591         # invocations of CheckPrereq do not raise an exception
7592         self.lu.op.live = None
7593       elif self.lu.op.mode is None:
7594         # read the default value from the hypervisor
7595         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7596                                                 skip_globals=False)
7597         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7598
7599       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7600     else:
7601       # Failover is never live
7602       self.live = False
7603
7604   def _RunAllocator(self):
7605     """Run the allocator based on input opcode.
7606
7607     """
7608     ial = IAllocator(self.cfg, self.rpc,
7609                      mode=constants.IALLOCATOR_MODE_RELOC,
7610                      name=self.instance_name,
7611                      # TODO See why hail breaks with a single node below
7612                      relocate_from=[self.instance.primary_node,
7613                                     self.instance.primary_node],
7614                      )
7615
7616     ial.Run(self.lu.op.iallocator)
7617
7618     if not ial.success:
7619       raise errors.OpPrereqError("Can't compute nodes using"
7620                                  " iallocator '%s': %s" %
7621                                  (self.lu.op.iallocator, ial.info),
7622                                  errors.ECODE_NORES)
7623     if len(ial.result) != ial.required_nodes:
7624       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7625                                  " of nodes (%s), required %s" %
7626                                  (self.lu.op.iallocator, len(ial.result),
7627                                   ial.required_nodes), errors.ECODE_FAULT)
7628     self.target_node = ial.result[0]
7629     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7630                  self.instance_name, self.lu.op.iallocator,
7631                  utils.CommaJoin(ial.result))
7632
7633   def _WaitUntilSync(self):
7634     """Poll with custom rpc for disk sync.
7635
7636     This uses our own step-based rpc call.
7637
7638     """
7639     self.feedback_fn("* wait until resync is done")
7640     all_done = False
7641     while not all_done:
7642       all_done = True
7643       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7644                                             self.nodes_ip,
7645                                             self.instance.disks)
7646       min_percent = 100
7647       for node, nres in result.items():
7648         nres.Raise("Cannot resync disks on node %s" % node)
7649         node_done, node_percent = nres.payload
7650         all_done = all_done and node_done
7651         if node_percent is not None:
7652           min_percent = min(min_percent, node_percent)
7653       if not all_done:
7654         if min_percent < 100:
7655           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7656         time.sleep(2)
7657
7658   def _EnsureSecondary(self, node):
7659     """Demote a node to secondary.
7660
7661     """
7662     self.feedback_fn("* switching node %s to secondary mode" % node)
7663
7664     for dev in self.instance.disks:
7665       self.cfg.SetDiskID(dev, node)
7666
7667     result = self.rpc.call_blockdev_close(node, self.instance.name,
7668                                           self.instance.disks)
7669     result.Raise("Cannot change disk to secondary on node %s" % node)
7670
7671   def _GoStandalone(self):
7672     """Disconnect from the network.
7673
7674     """
7675     self.feedback_fn("* changing into standalone mode")
7676     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7677                                                self.instance.disks)
7678     for node, nres in result.items():
7679       nres.Raise("Cannot disconnect disks node %s" % node)
7680
7681   def _GoReconnect(self, multimaster):
7682     """Reconnect to the network.
7683
7684     """
7685     if multimaster:
7686       msg = "dual-master"
7687     else:
7688       msg = "single-master"
7689     self.feedback_fn("* changing disks into %s mode" % msg)
7690     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7691                                            self.instance.disks,
7692                                            self.instance.name, multimaster)
7693     for node, nres in result.items():
7694       nres.Raise("Cannot change disks config on node %s" % node)
7695
7696   def _ExecCleanup(self):
7697     """Try to cleanup after a failed migration.
7698
7699     The cleanup is done by:
7700       - check that the instance is running only on one node
7701         (and update the config if needed)
7702       - change disks on its secondary node to secondary
7703       - wait until disks are fully synchronized
7704       - disconnect from the network
7705       - change disks into single-master mode
7706       - wait again until disks are fully synchronized
7707
7708     """
7709     instance = self.instance
7710     target_node = self.target_node
7711     source_node = self.source_node
7712
7713     # check running on only one node
7714     self.feedback_fn("* checking where the instance actually runs"
7715                      " (if this hangs, the hypervisor might be in"
7716                      " a bad state)")
7717     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7718     for node, result in ins_l.items():
7719       result.Raise("Can't contact node %s" % node)
7720
7721     runningon_source = instance.name in ins_l[source_node].payload
7722     runningon_target = instance.name in ins_l[target_node].payload
7723
7724     if runningon_source and runningon_target:
7725       raise errors.OpExecError("Instance seems to be running on two nodes,"
7726                                " or the hypervisor is confused; you will have"
7727                                " to ensure manually that it runs only on one"
7728                                " and restart this operation")
7729
7730     if not (runningon_source or runningon_target):
7731       raise errors.OpExecError("Instance does not seem to be running at all;"
7732                                " in this case it's safer to repair by"
7733                                " running 'gnt-instance stop' to ensure disk"
7734                                " shutdown, and then restarting it")
7735
7736     if runningon_target:
7737       # the migration has actually succeeded, we need to update the config
7738       self.feedback_fn("* instance running on secondary node (%s),"
7739                        " updating config" % target_node)
7740       instance.primary_node = target_node
7741       self.cfg.Update(instance, self.feedback_fn)
7742       demoted_node = source_node
7743     else:
7744       self.feedback_fn("* instance confirmed to be running on its"
7745                        " primary node (%s)" % source_node)
7746       demoted_node = target_node
7747
7748     if instance.disk_template in constants.DTS_INT_MIRROR:
7749       self._EnsureSecondary(demoted_node)
7750       try:
7751         self._WaitUntilSync()
7752       except errors.OpExecError:
7753         # we ignore here errors, since if the device is standalone, it
7754         # won't be able to sync
7755         pass
7756       self._GoStandalone()
7757       self._GoReconnect(False)
7758       self._WaitUntilSync()
7759
7760     self.feedback_fn("* done")
7761
7762   def _RevertDiskStatus(self):
7763     """Try to revert the disk status after a failed migration.
7764
7765     """
7766     target_node = self.target_node
7767     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7768       return
7769
7770     try:
7771       self._EnsureSecondary(target_node)
7772       self._GoStandalone()
7773       self._GoReconnect(False)
7774       self._WaitUntilSync()
7775     except errors.OpExecError, err:
7776       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7777                          " please try to recover the instance manually;"
7778                          " error '%s'" % str(err))
7779
7780   def _AbortMigration(self):
7781     """Call the hypervisor code to abort a started migration.
7782
7783     """
7784     instance = self.instance
7785     target_node = self.target_node
7786     source_node = self.source_node
7787     migration_info = self.migration_info
7788
7789     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7790                                                                  instance,
7791                                                                  migration_info,
7792                                                                  False)
7793     abort_msg = abort_result.fail_msg
7794     if abort_msg:
7795       logging.error("Aborting migration failed on target node %s: %s",
7796                     target_node, abort_msg)
7797       # Don't raise an exception here, as we stil have to try to revert the
7798       # disk status, even if this step failed.
7799
7800     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7801         instance, False, self.live)
7802     abort_msg = abort_result.fail_msg
7803     if abort_msg:
7804       logging.error("Aborting migration failed on source node %s: %s",
7805                     source_node, abort_msg)
7806
7807   def _ExecMigration(self):
7808     """Migrate an instance.
7809
7810     The migrate is done by:
7811       - change the disks into dual-master mode
7812       - wait until disks are fully synchronized again
7813       - migrate the instance
7814       - change disks on the new secondary node (the old primary) to secondary
7815       - wait until disks are fully synchronized
7816       - change disks into single-master mode
7817
7818     """
7819     instance = self.instance
7820     target_node = self.target_node
7821     source_node = self.source_node
7822
7823     # Check for hypervisor version mismatch and warn the user.
7824     nodeinfo = self.rpc.call_node_info([source_node, target_node],
7825                                        None, [self.instance.hypervisor])
7826     for ninfo in nodeinfo.values():
7827       ninfo.Raise("Unable to retrieve node information from node '%s'" %
7828                   ninfo.node)
7829     (_, _, (src_info, )) = nodeinfo[source_node].payload
7830     (_, _, (dst_info, )) = nodeinfo[target_node].payload
7831
7832     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
7833         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
7834       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
7835       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
7836       if src_version != dst_version:
7837         self.feedback_fn("* warning: hypervisor version mismatch between"
7838                          " source (%s) and target (%s) node" %
7839                          (src_version, dst_version))
7840
7841     self.feedback_fn("* checking disk consistency between source and target")
7842     for dev in instance.disks:
7843       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7844         raise errors.OpExecError("Disk %s is degraded or not fully"
7845                                  " synchronized on target node,"
7846                                  " aborting migration" % dev.iv_name)
7847
7848     # First get the migration information from the remote node
7849     result = self.rpc.call_migration_info(source_node, instance)
7850     msg = result.fail_msg
7851     if msg:
7852       log_err = ("Failed fetching source migration information from %s: %s" %
7853                  (source_node, msg))
7854       logging.error(log_err)
7855       raise errors.OpExecError(log_err)
7856
7857     self.migration_info = migration_info = result.payload
7858
7859     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7860       # Then switch the disks to master/master mode
7861       self._EnsureSecondary(target_node)
7862       self._GoStandalone()
7863       self._GoReconnect(True)
7864       self._WaitUntilSync()
7865
7866     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7867     result = self.rpc.call_accept_instance(target_node,
7868                                            instance,
7869                                            migration_info,
7870                                            self.nodes_ip[target_node])
7871
7872     msg = result.fail_msg
7873     if msg:
7874       logging.error("Instance pre-migration failed, trying to revert"
7875                     " disk status: %s", msg)
7876       self.feedback_fn("Pre-migration failed, aborting")
7877       self._AbortMigration()
7878       self._RevertDiskStatus()
7879       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7880                                (instance.name, msg))
7881
7882     self.feedback_fn("* migrating instance to %s" % target_node)
7883     result = self.rpc.call_instance_migrate(source_node, instance,
7884                                             self.nodes_ip[target_node],
7885                                             self.live)
7886     msg = result.fail_msg
7887     if msg:
7888       logging.error("Instance migration failed, trying to revert"
7889                     " disk status: %s", msg)
7890       self.feedback_fn("Migration failed, aborting")
7891       self._AbortMigration()
7892       self._RevertDiskStatus()
7893       raise errors.OpExecError("Could not migrate instance %s: %s" %
7894                                (instance.name, msg))
7895
7896     self.feedback_fn("* starting memory transfer")
7897     last_feedback = time.time()
7898     while True:
7899       result = self.rpc.call_instance_get_migration_status(source_node,
7900                                                            instance)
7901       msg = result.fail_msg
7902       ms = result.payload   # MigrationStatus instance
7903       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7904         logging.error("Instance migration failed, trying to revert"
7905                       " disk status: %s", msg)
7906         self.feedback_fn("Migration failed, aborting")
7907         self._AbortMigration()
7908         self._RevertDiskStatus()
7909         raise errors.OpExecError("Could not migrate instance %s: %s" %
7910                                  (instance.name, msg))
7911
7912       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7913         self.feedback_fn("* memory transfer complete")
7914         break
7915
7916       if (utils.TimeoutExpired(last_feedback,
7917                                self._MIGRATION_FEEDBACK_INTERVAL) and
7918           ms.transferred_ram is not None):
7919         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7920         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7921         last_feedback = time.time()
7922
7923       time.sleep(self._MIGRATION_POLL_INTERVAL)
7924
7925     result = self.rpc.call_instance_finalize_migration_src(source_node,
7926                                                            instance,
7927                                                            True,
7928                                                            self.live)
7929     msg = result.fail_msg
7930     if msg:
7931       logging.error("Instance migration succeeded, but finalization failed"
7932                     " on the source node: %s", msg)
7933       raise errors.OpExecError("Could not finalize instance migration: %s" %
7934                                msg)
7935
7936     instance.primary_node = target_node
7937
7938     # distribute new instance config to the other nodes
7939     self.cfg.Update(instance, self.feedback_fn)
7940
7941     result = self.rpc.call_instance_finalize_migration_dst(target_node,
7942                                                            instance,
7943                                                            migration_info,
7944                                                            True)
7945     msg = result.fail_msg
7946     if msg:
7947       logging.error("Instance migration succeeded, but finalization failed"
7948                     " on the target node: %s", msg)
7949       raise errors.OpExecError("Could not finalize instance migration: %s" %
7950                                msg)
7951
7952     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7953       self._EnsureSecondary(source_node)
7954       self._WaitUntilSync()
7955       self._GoStandalone()
7956       self._GoReconnect(False)
7957       self._WaitUntilSync()
7958
7959     self.feedback_fn("* done")
7960
7961   def _ExecFailover(self):
7962     """Failover an instance.
7963
7964     The failover is done by shutting it down on its present node and
7965     starting it on the secondary.
7966
7967     """
7968     instance = self.instance
7969     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7970
7971     source_node = instance.primary_node
7972     target_node = self.target_node
7973
7974     if instance.admin_state == constants.ADMINST_UP:
7975       self.feedback_fn("* checking disk consistency between source and target")
7976       for dev in instance.disks:
7977         # for drbd, these are drbd over lvm
7978         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7979           if primary_node.offline:
7980             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7981                              " target node %s" %
7982                              (primary_node.name, dev.iv_name, target_node))
7983           elif not self.ignore_consistency:
7984             raise errors.OpExecError("Disk %s is degraded on target node,"
7985                                      " aborting failover" % dev.iv_name)
7986     else:
7987       self.feedback_fn("* not checking disk consistency as instance is not"
7988                        " running")
7989
7990     self.feedback_fn("* shutting down instance on source node")
7991     logging.info("Shutting down instance %s on node %s",
7992                  instance.name, source_node)
7993
7994     result = self.rpc.call_instance_shutdown(source_node, instance,
7995                                              self.shutdown_timeout)
7996     msg = result.fail_msg
7997     if msg:
7998       if self.ignore_consistency or primary_node.offline:
7999         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8000                            " proceeding anyway; please make sure node"
8001                            " %s is down; error details: %s",
8002                            instance.name, source_node, source_node, msg)
8003       else:
8004         raise errors.OpExecError("Could not shutdown instance %s on"
8005                                  " node %s: %s" %
8006                                  (instance.name, source_node, msg))
8007
8008     self.feedback_fn("* deactivating the instance's disks on source node")
8009     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8010       raise errors.OpExecError("Can't shut down the instance's disks")
8011
8012     instance.primary_node = target_node
8013     # distribute new instance config to the other nodes
8014     self.cfg.Update(instance, self.feedback_fn)
8015
8016     # Only start the instance if it's marked as up
8017     if instance.admin_state == constants.ADMINST_UP:
8018       self.feedback_fn("* activating the instance's disks on target node %s" %
8019                        target_node)
8020       logging.info("Starting instance %s on node %s",
8021                    instance.name, target_node)
8022
8023       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8024                                            ignore_secondaries=True)
8025       if not disks_ok:
8026         _ShutdownInstanceDisks(self.lu, instance)
8027         raise errors.OpExecError("Can't activate the instance's disks")
8028
8029       self.feedback_fn("* starting the instance on the target node %s" %
8030                        target_node)
8031       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8032                                             False)
8033       msg = result.fail_msg
8034       if msg:
8035         _ShutdownInstanceDisks(self.lu, instance)
8036         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8037                                  (instance.name, target_node, msg))
8038
8039   def Exec(self, feedback_fn):
8040     """Perform the migration.
8041
8042     """
8043     self.feedback_fn = feedback_fn
8044     self.source_node = self.instance.primary_node
8045
8046     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8047     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8048       self.target_node = self.instance.secondary_nodes[0]
8049       # Otherwise self.target_node has been populated either
8050       # directly, or through an iallocator.
8051
8052     self.all_nodes = [self.source_node, self.target_node]
8053     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8054                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8055
8056     if self.failover:
8057       feedback_fn("Failover instance %s" % self.instance.name)
8058       self._ExecFailover()
8059     else:
8060       feedback_fn("Migrating instance %s" % self.instance.name)
8061
8062       if self.cleanup:
8063         return self._ExecCleanup()
8064       else:
8065         return self._ExecMigration()
8066
8067
8068 def _CreateBlockDev(lu, node, instance, device, force_create,
8069                     info, force_open):
8070   """Create a tree of block devices on a given node.
8071
8072   If this device type has to be created on secondaries, create it and
8073   all its children.
8074
8075   If not, just recurse to children keeping the same 'force' value.
8076
8077   @param lu: the lu on whose behalf we execute
8078   @param node: the node on which to create the device
8079   @type instance: L{objects.Instance}
8080   @param instance: the instance which owns the device
8081   @type device: L{objects.Disk}
8082   @param device: the device to create
8083   @type force_create: boolean
8084   @param force_create: whether to force creation of this device; this
8085       will be change to True whenever we find a device which has
8086       CreateOnSecondary() attribute
8087   @param info: the extra 'metadata' we should attach to the device
8088       (this will be represented as a LVM tag)
8089   @type force_open: boolean
8090   @param force_open: this parameter will be passes to the
8091       L{backend.BlockdevCreate} function where it specifies
8092       whether we run on primary or not, and it affects both
8093       the child assembly and the device own Open() execution
8094
8095   """
8096   if device.CreateOnSecondary():
8097     force_create = True
8098
8099   if device.children:
8100     for child in device.children:
8101       _CreateBlockDev(lu, node, instance, child, force_create,
8102                       info, force_open)
8103
8104   if not force_create:
8105     return
8106
8107   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8108
8109
8110 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8111   """Create a single block device on a given node.
8112
8113   This will not recurse over children of the device, so they must be
8114   created in advance.
8115
8116   @param lu: the lu on whose behalf we execute
8117   @param node: the node on which to create the device
8118   @type instance: L{objects.Instance}
8119   @param instance: the instance which owns the device
8120   @type device: L{objects.Disk}
8121   @param device: the device to create
8122   @param info: the extra 'metadata' we should attach to the device
8123       (this will be represented as a LVM tag)
8124   @type force_open: boolean
8125   @param force_open: this parameter will be passes to the
8126       L{backend.BlockdevCreate} function where it specifies
8127       whether we run on primary or not, and it affects both
8128       the child assembly and the device own Open() execution
8129
8130   """
8131   lu.cfg.SetDiskID(device, node)
8132   result = lu.rpc.call_blockdev_create(node, device, device.size,
8133                                        instance.name, force_open, info)
8134   result.Raise("Can't create block device %s on"
8135                " node %s for instance %s" % (device, node, instance.name))
8136   if device.physical_id is None:
8137     device.physical_id = result.payload
8138
8139
8140 def _GenerateUniqueNames(lu, exts):
8141   """Generate a suitable LV name.
8142
8143   This will generate a logical volume name for the given instance.
8144
8145   """
8146   results = []
8147   for val in exts:
8148     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8149     results.append("%s%s" % (new_id, val))
8150   return results
8151
8152
8153 def _ComputeLDParams(disk_template, disk_params):
8154   """Computes Logical Disk parameters from Disk Template parameters.
8155
8156   @type disk_template: string
8157   @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8158   @type disk_params: dict
8159   @param disk_params: disk template parameters; dict(template_name -> parameters
8160   @rtype: list(dict)
8161   @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8162     contains the LD parameters of the node. The tree is flattened in-order.
8163
8164   """
8165   if disk_template not in constants.DISK_TEMPLATES:
8166     raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8167
8168   result = list()
8169   dt_params = disk_params[disk_template]
8170   if disk_template == constants.DT_DRBD8:
8171     drbd_params = {
8172       constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8173       constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8174       constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8175       constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8176       constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8177       constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8178       }
8179
8180     drbd_params = \
8181       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8182                        drbd_params)
8183
8184     result.append(drbd_params)
8185
8186     # data LV
8187     data_params = {
8188       constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8189       }
8190     data_params = \
8191       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8192                        data_params)
8193     result.append(data_params)
8194
8195     # metadata LV
8196     meta_params = {
8197       constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8198       }
8199     meta_params = \
8200       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8201                        meta_params)
8202     result.append(meta_params)
8203
8204   elif (disk_template == constants.DT_FILE or
8205         disk_template == constants.DT_SHARED_FILE):
8206     result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8207
8208   elif disk_template == constants.DT_PLAIN:
8209     params = {
8210       constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8211       }
8212     params = \
8213       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8214                        params)
8215     result.append(params)
8216
8217   elif disk_template == constants.DT_BLOCK:
8218     result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8219
8220   return result
8221
8222
8223 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8224                          iv_name, p_minor, s_minor, drbd_params, data_params,
8225                          meta_params):
8226   """Generate a drbd8 device complete with its children.
8227
8228   """
8229   assert len(vgnames) == len(names) == 2
8230   port = lu.cfg.AllocatePort()
8231   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8232
8233   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8234                           logical_id=(vgnames[0], names[0]),
8235                           params=data_params)
8236   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8237                           logical_id=(vgnames[1], names[1]),
8238                           params=meta_params)
8239   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8240                           logical_id=(primary, secondary, port,
8241                                       p_minor, s_minor,
8242                                       shared_secret),
8243                           children=[dev_data, dev_meta],
8244                           iv_name=iv_name, params=drbd_params)
8245   return drbd_dev
8246
8247
8248 def _GenerateDiskTemplate(lu, template_name,
8249                           instance_name, primary_node,
8250                           secondary_nodes, disk_info,
8251                           file_storage_dir, file_driver,
8252                           base_index, feedback_fn, disk_params):
8253   """Generate the entire disk layout for a given template type.
8254
8255   """
8256   #TODO: compute space requirements
8257
8258   vgname = lu.cfg.GetVGName()
8259   disk_count = len(disk_info)
8260   disks = []
8261   ld_params = _ComputeLDParams(template_name, disk_params)
8262   if template_name == constants.DT_DISKLESS:
8263     pass
8264   elif template_name == constants.DT_PLAIN:
8265     if len(secondary_nodes) != 0:
8266       raise errors.ProgrammerError("Wrong template configuration")
8267
8268     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8269                                       for i in range(disk_count)])
8270     for idx, disk in enumerate(disk_info):
8271       disk_index = idx + base_index
8272       vg = disk.get(constants.IDISK_VG, vgname)
8273       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8274       disk_dev = objects.Disk(dev_type=constants.LD_LV,
8275                               size=disk[constants.IDISK_SIZE],
8276                               logical_id=(vg, names[idx]),
8277                               iv_name="disk/%d" % disk_index,
8278                               mode=disk[constants.IDISK_MODE],
8279                               params=ld_params[0])
8280       disks.append(disk_dev)
8281   elif template_name == constants.DT_DRBD8:
8282     drbd_params, data_params, meta_params = ld_params
8283     if len(secondary_nodes) != 1:
8284       raise errors.ProgrammerError("Wrong template configuration")
8285     remote_node = secondary_nodes[0]
8286     minors = lu.cfg.AllocateDRBDMinor(
8287       [primary_node, remote_node] * len(disk_info), instance_name)
8288
8289     names = []
8290     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8291                                                for i in range(disk_count)]):
8292       names.append(lv_prefix + "_data")
8293       names.append(lv_prefix + "_meta")
8294     for idx, disk in enumerate(disk_info):
8295       disk_index = idx + base_index
8296       drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8297       data_vg = disk.get(constants.IDISK_VG, vgname)
8298       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8299       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8300                                       disk[constants.IDISK_SIZE],
8301                                       [data_vg, meta_vg],
8302                                       names[idx * 2:idx * 2 + 2],
8303                                       "disk/%d" % disk_index,
8304                                       minors[idx * 2], minors[idx * 2 + 1],
8305                                       drbd_params, data_params, meta_params)
8306       disk_dev.mode = disk[constants.IDISK_MODE]
8307       disks.append(disk_dev)
8308   elif template_name == constants.DT_FILE:
8309     if len(secondary_nodes) != 0:
8310       raise errors.ProgrammerError("Wrong template configuration")
8311
8312     opcodes.RequireFileStorage()
8313
8314     for idx, disk in enumerate(disk_info):
8315       disk_index = idx + base_index
8316       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8317                               size=disk[constants.IDISK_SIZE],
8318                               iv_name="disk/%d" % disk_index,
8319                               logical_id=(file_driver,
8320                                           "%s/disk%d" % (file_storage_dir,
8321                                                          disk_index)),
8322                               mode=disk[constants.IDISK_MODE],
8323                               params=ld_params[0])
8324       disks.append(disk_dev)
8325   elif template_name == constants.DT_SHARED_FILE:
8326     if len(secondary_nodes) != 0:
8327       raise errors.ProgrammerError("Wrong template configuration")
8328
8329     opcodes.RequireSharedFileStorage()
8330
8331     for idx, disk in enumerate(disk_info):
8332       disk_index = idx + base_index
8333       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8334                               size=disk[constants.IDISK_SIZE],
8335                               iv_name="disk/%d" % disk_index,
8336                               logical_id=(file_driver,
8337                                           "%s/disk%d" % (file_storage_dir,
8338                                                          disk_index)),
8339                               mode=disk[constants.IDISK_MODE],
8340                               params=ld_params[0])
8341       disks.append(disk_dev)
8342   elif template_name == constants.DT_BLOCK:
8343     if len(secondary_nodes) != 0:
8344       raise errors.ProgrammerError("Wrong template configuration")
8345
8346     for idx, disk in enumerate(disk_info):
8347       disk_index = idx + base_index
8348       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8349                               size=disk[constants.IDISK_SIZE],
8350                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8351                                           disk[constants.IDISK_ADOPT]),
8352                               iv_name="disk/%d" % disk_index,
8353                               mode=disk[constants.IDISK_MODE],
8354                               params=ld_params[0])
8355       disks.append(disk_dev)
8356
8357   else:
8358     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8359   return disks
8360
8361
8362 def _GetInstanceInfoText(instance):
8363   """Compute that text that should be added to the disk's metadata.
8364
8365   """
8366   return "originstname+%s" % instance.name
8367
8368
8369 def _CalcEta(time_taken, written, total_size):
8370   """Calculates the ETA based on size written and total size.
8371
8372   @param time_taken: The time taken so far
8373   @param written: amount written so far
8374   @param total_size: The total size of data to be written
8375   @return: The remaining time in seconds
8376
8377   """
8378   avg_time = time_taken / float(written)
8379   return (total_size - written) * avg_time
8380
8381
8382 def _WipeDisks(lu, instance):
8383   """Wipes instance disks.
8384
8385   @type lu: L{LogicalUnit}
8386   @param lu: the logical unit on whose behalf we execute
8387   @type instance: L{objects.Instance}
8388   @param instance: the instance whose disks we should create
8389   @return: the success of the wipe
8390
8391   """
8392   node = instance.primary_node
8393
8394   for device in instance.disks:
8395     lu.cfg.SetDiskID(device, node)
8396
8397   logging.info("Pause sync of instance %s disks", instance.name)
8398   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8399
8400   for idx, success in enumerate(result.payload):
8401     if not success:
8402       logging.warn("pause-sync of instance %s for disks %d failed",
8403                    instance.name, idx)
8404
8405   try:
8406     for idx, device in enumerate(instance.disks):
8407       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8408       # MAX_WIPE_CHUNK at max
8409       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8410                             constants.MIN_WIPE_CHUNK_PERCENT)
8411       # we _must_ make this an int, otherwise rounding errors will
8412       # occur
8413       wipe_chunk_size = int(wipe_chunk_size)
8414
8415       lu.LogInfo("* Wiping disk %d", idx)
8416       logging.info("Wiping disk %d for instance %s, node %s using"
8417                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8418
8419       offset = 0
8420       size = device.size
8421       last_output = 0
8422       start_time = time.time()
8423
8424       while offset < size:
8425         wipe_size = min(wipe_chunk_size, size - offset)
8426         logging.debug("Wiping disk %d, offset %s, chunk %s",
8427                       idx, offset, wipe_size)
8428         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8429         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8430                      (idx, offset, wipe_size))
8431         now = time.time()
8432         offset += wipe_size
8433         if now - last_output >= 60:
8434           eta = _CalcEta(now - start_time, offset, size)
8435           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8436                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8437           last_output = now
8438   finally:
8439     logging.info("Resume sync of instance %s disks", instance.name)
8440
8441     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8442
8443     for idx, success in enumerate(result.payload):
8444       if not success:
8445         lu.LogWarning("Resume sync of disk %d failed, please have a"
8446                       " look at the status and troubleshoot the issue", idx)
8447         logging.warn("resume-sync of instance %s for disks %d failed",
8448                      instance.name, idx)
8449
8450
8451 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8452   """Create all disks for an instance.
8453
8454   This abstracts away some work from AddInstance.
8455
8456   @type lu: L{LogicalUnit}
8457   @param lu: the logical unit on whose behalf we execute
8458   @type instance: L{objects.Instance}
8459   @param instance: the instance whose disks we should create
8460   @type to_skip: list
8461   @param to_skip: list of indices to skip
8462   @type target_node: string
8463   @param target_node: if passed, overrides the target node for creation
8464   @rtype: boolean
8465   @return: the success of the creation
8466
8467   """
8468   info = _GetInstanceInfoText(instance)
8469   if target_node is None:
8470     pnode = instance.primary_node
8471     all_nodes = instance.all_nodes
8472   else:
8473     pnode = target_node
8474     all_nodes = [pnode]
8475
8476   if instance.disk_template in constants.DTS_FILEBASED:
8477     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8478     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8479
8480     result.Raise("Failed to create directory '%s' on"
8481                  " node %s" % (file_storage_dir, pnode))
8482
8483   # Note: this needs to be kept in sync with adding of disks in
8484   # LUInstanceSetParams
8485   for idx, device in enumerate(instance.disks):
8486     if to_skip and idx in to_skip:
8487       continue
8488     logging.info("Creating volume %s for instance %s",
8489                  device.iv_name, instance.name)
8490     #HARDCODE
8491     for node in all_nodes:
8492       f_create = node == pnode
8493       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8494
8495
8496 def _RemoveDisks(lu, instance, target_node=None):
8497   """Remove all disks for an instance.
8498
8499   This abstracts away some work from `AddInstance()` and
8500   `RemoveInstance()`. Note that in case some of the devices couldn't
8501   be removed, the removal will continue with the other ones (compare
8502   with `_CreateDisks()`).
8503
8504   @type lu: L{LogicalUnit}
8505   @param lu: the logical unit on whose behalf we execute
8506   @type instance: L{objects.Instance}
8507   @param instance: the instance whose disks we should remove
8508   @type target_node: string
8509   @param target_node: used to override the node on which to remove the disks
8510   @rtype: boolean
8511   @return: the success of the removal
8512
8513   """
8514   logging.info("Removing block devices for instance %s", instance.name)
8515
8516   all_result = True
8517   for device in instance.disks:
8518     if target_node:
8519       edata = [(target_node, device)]
8520     else:
8521       edata = device.ComputeNodeTree(instance.primary_node)
8522     for node, disk in edata:
8523       lu.cfg.SetDiskID(disk, node)
8524       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8525       if msg:
8526         lu.LogWarning("Could not remove block device %s on node %s,"
8527                       " continuing anyway: %s", device.iv_name, node, msg)
8528         all_result = False
8529
8530     # if this is a DRBD disk, return its port to the pool
8531     if device.dev_type in constants.LDS_DRBD:
8532       tcp_port = device.logical_id[2]
8533       lu.cfg.AddTcpUdpPort(tcp_port)
8534
8535   if instance.disk_template == constants.DT_FILE:
8536     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8537     if target_node:
8538       tgt = target_node
8539     else:
8540       tgt = instance.primary_node
8541     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8542     if result.fail_msg:
8543       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8544                     file_storage_dir, instance.primary_node, result.fail_msg)
8545       all_result = False
8546
8547   return all_result
8548
8549
8550 def _ComputeDiskSizePerVG(disk_template, disks):
8551   """Compute disk size requirements in the volume group
8552
8553   """
8554   def _compute(disks, payload):
8555     """Universal algorithm.
8556
8557     """
8558     vgs = {}
8559     for disk in disks:
8560       vgs[disk[constants.IDISK_VG]] = \
8561         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8562
8563     return vgs
8564
8565   # Required free disk space as a function of disk and swap space
8566   req_size_dict = {
8567     constants.DT_DISKLESS: {},
8568     constants.DT_PLAIN: _compute(disks, 0),
8569     # 128 MB are added for drbd metadata for each disk
8570     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8571     constants.DT_FILE: {},
8572     constants.DT_SHARED_FILE: {},
8573   }
8574
8575   if disk_template not in req_size_dict:
8576     raise errors.ProgrammerError("Disk template '%s' size requirement"
8577                                  " is unknown" % disk_template)
8578
8579   return req_size_dict[disk_template]
8580
8581
8582 def _ComputeDiskSize(disk_template, disks):
8583   """Compute disk size requirements in the volume group
8584
8585   """
8586   # Required free disk space as a function of disk and swap space
8587   req_size_dict = {
8588     constants.DT_DISKLESS: None,
8589     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8590     # 128 MB are added for drbd metadata for each disk
8591     constants.DT_DRBD8:
8592       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8593     constants.DT_FILE: None,
8594     constants.DT_SHARED_FILE: 0,
8595     constants.DT_BLOCK: 0,
8596   }
8597
8598   if disk_template not in req_size_dict:
8599     raise errors.ProgrammerError("Disk template '%s' size requirement"
8600                                  " is unknown" % disk_template)
8601
8602   return req_size_dict[disk_template]
8603
8604
8605 def _FilterVmNodes(lu, nodenames):
8606   """Filters out non-vm_capable nodes from a list.
8607
8608   @type lu: L{LogicalUnit}
8609   @param lu: the logical unit for which we check
8610   @type nodenames: list
8611   @param nodenames: the list of nodes on which we should check
8612   @rtype: list
8613   @return: the list of vm-capable nodes
8614
8615   """
8616   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8617   return [name for name in nodenames if name not in vm_nodes]
8618
8619
8620 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8621   """Hypervisor parameter validation.
8622
8623   This function abstract the hypervisor parameter validation to be
8624   used in both instance create and instance modify.
8625
8626   @type lu: L{LogicalUnit}
8627   @param lu: the logical unit for which we check
8628   @type nodenames: list
8629   @param nodenames: the list of nodes on which we should check
8630   @type hvname: string
8631   @param hvname: the name of the hypervisor we should use
8632   @type hvparams: dict
8633   @param hvparams: the parameters which we need to check
8634   @raise errors.OpPrereqError: if the parameters are not valid
8635
8636   """
8637   nodenames = _FilterVmNodes(lu, nodenames)
8638
8639   cluster = lu.cfg.GetClusterInfo()
8640   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8641
8642   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8643   for node in nodenames:
8644     info = hvinfo[node]
8645     if info.offline:
8646       continue
8647     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8648
8649
8650 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8651   """OS parameters validation.
8652
8653   @type lu: L{LogicalUnit}
8654   @param lu: the logical unit for which we check
8655   @type required: boolean
8656   @param required: whether the validation should fail if the OS is not
8657       found
8658   @type nodenames: list
8659   @param nodenames: the list of nodes on which we should check
8660   @type osname: string
8661   @param osname: the name of the hypervisor we should use
8662   @type osparams: dict
8663   @param osparams: the parameters which we need to check
8664   @raise errors.OpPrereqError: if the parameters are not valid
8665
8666   """
8667   nodenames = _FilterVmNodes(lu, nodenames)
8668   result = lu.rpc.call_os_validate(nodenames, required, osname,
8669                                    [constants.OS_VALIDATE_PARAMETERS],
8670                                    osparams)
8671   for node, nres in result.items():
8672     # we don't check for offline cases since this should be run only
8673     # against the master node and/or an instance's nodes
8674     nres.Raise("OS Parameters validation failed on node %s" % node)
8675     if not nres.payload:
8676       lu.LogInfo("OS %s not found on node %s, validation skipped",
8677                  osname, node)
8678
8679
8680 class LUInstanceCreate(LogicalUnit):
8681   """Create an instance.
8682
8683   """
8684   HPATH = "instance-add"
8685   HTYPE = constants.HTYPE_INSTANCE
8686   REQ_BGL = False
8687
8688   def CheckArguments(self):
8689     """Check arguments.
8690
8691     """
8692     # do not require name_check to ease forward/backward compatibility
8693     # for tools
8694     if self.op.no_install and self.op.start:
8695       self.LogInfo("No-installation mode selected, disabling startup")
8696       self.op.start = False
8697     # validate/normalize the instance name
8698     self.op.instance_name = \
8699       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8700
8701     if self.op.ip_check and not self.op.name_check:
8702       # TODO: make the ip check more flexible and not depend on the name check
8703       raise errors.OpPrereqError("Cannot do IP address check without a name"
8704                                  " check", errors.ECODE_INVAL)
8705
8706     # check nics' parameter names
8707     for nic in self.op.nics:
8708       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8709
8710     # check disks. parameter names and consistent adopt/no-adopt strategy
8711     has_adopt = has_no_adopt = False
8712     for disk in self.op.disks:
8713       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8714       if constants.IDISK_ADOPT in disk:
8715         has_adopt = True
8716       else:
8717         has_no_adopt = True
8718     if has_adopt and has_no_adopt:
8719       raise errors.OpPrereqError("Either all disks are adopted or none is",
8720                                  errors.ECODE_INVAL)
8721     if has_adopt:
8722       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8723         raise errors.OpPrereqError("Disk adoption is not supported for the"
8724                                    " '%s' disk template" %
8725                                    self.op.disk_template,
8726                                    errors.ECODE_INVAL)
8727       if self.op.iallocator is not None:
8728         raise errors.OpPrereqError("Disk adoption not allowed with an"
8729                                    " iallocator script", errors.ECODE_INVAL)
8730       if self.op.mode == constants.INSTANCE_IMPORT:
8731         raise errors.OpPrereqError("Disk adoption not allowed for"
8732                                    " instance import", errors.ECODE_INVAL)
8733     else:
8734       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8735         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8736                                    " but no 'adopt' parameter given" %
8737                                    self.op.disk_template,
8738                                    errors.ECODE_INVAL)
8739
8740     self.adopt_disks = has_adopt
8741
8742     # instance name verification
8743     if self.op.name_check:
8744       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8745       self.op.instance_name = self.hostname1.name
8746       # used in CheckPrereq for ip ping check
8747       self.check_ip = self.hostname1.ip
8748     else:
8749       self.check_ip = None
8750
8751     # file storage checks
8752     if (self.op.file_driver and
8753         not self.op.file_driver in constants.FILE_DRIVER):
8754       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8755                                  self.op.file_driver, errors.ECODE_INVAL)
8756
8757     if self.op.disk_template == constants.DT_FILE:
8758       opcodes.RequireFileStorage()
8759     elif self.op.disk_template == constants.DT_SHARED_FILE:
8760       opcodes.RequireSharedFileStorage()
8761
8762     ### Node/iallocator related checks
8763     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8764
8765     if self.op.pnode is not None:
8766       if self.op.disk_template in constants.DTS_INT_MIRROR:
8767         if self.op.snode is None:
8768           raise errors.OpPrereqError("The networked disk templates need"
8769                                      " a mirror node", errors.ECODE_INVAL)
8770       elif self.op.snode:
8771         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8772                         " template")
8773         self.op.snode = None
8774
8775     self._cds = _GetClusterDomainSecret()
8776
8777     if self.op.mode == constants.INSTANCE_IMPORT:
8778       # On import force_variant must be True, because if we forced it at
8779       # initial install, our only chance when importing it back is that it
8780       # works again!
8781       self.op.force_variant = True
8782
8783       if self.op.no_install:
8784         self.LogInfo("No-installation mode has no effect during import")
8785
8786     elif self.op.mode == constants.INSTANCE_CREATE:
8787       if self.op.os_type is None:
8788         raise errors.OpPrereqError("No guest OS specified",
8789                                    errors.ECODE_INVAL)
8790       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8791         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8792                                    " installation" % self.op.os_type,
8793                                    errors.ECODE_STATE)
8794       if self.op.disk_template is None:
8795         raise errors.OpPrereqError("No disk template specified",
8796                                    errors.ECODE_INVAL)
8797
8798     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8799       # Check handshake to ensure both clusters have the same domain secret
8800       src_handshake = self.op.source_handshake
8801       if not src_handshake:
8802         raise errors.OpPrereqError("Missing source handshake",
8803                                    errors.ECODE_INVAL)
8804
8805       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8806                                                            src_handshake)
8807       if errmsg:
8808         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8809                                    errors.ECODE_INVAL)
8810
8811       # Load and check source CA
8812       self.source_x509_ca_pem = self.op.source_x509_ca
8813       if not self.source_x509_ca_pem:
8814         raise errors.OpPrereqError("Missing source X509 CA",
8815                                    errors.ECODE_INVAL)
8816
8817       try:
8818         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8819                                                     self._cds)
8820       except OpenSSL.crypto.Error, err:
8821         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8822                                    (err, ), errors.ECODE_INVAL)
8823
8824       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8825       if errcode is not None:
8826         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8827                                    errors.ECODE_INVAL)
8828
8829       self.source_x509_ca = cert
8830
8831       src_instance_name = self.op.source_instance_name
8832       if not src_instance_name:
8833         raise errors.OpPrereqError("Missing source instance name",
8834                                    errors.ECODE_INVAL)
8835
8836       self.source_instance_name = \
8837           netutils.GetHostname(name=src_instance_name).name
8838
8839     else:
8840       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8841                                  self.op.mode, errors.ECODE_INVAL)
8842
8843   def ExpandNames(self):
8844     """ExpandNames for CreateInstance.
8845
8846     Figure out the right locks for instance creation.
8847
8848     """
8849     self.needed_locks = {}
8850
8851     instance_name = self.op.instance_name
8852     # this is just a preventive check, but someone might still add this
8853     # instance in the meantime, and creation will fail at lock-add time
8854     if instance_name in self.cfg.GetInstanceList():
8855       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8856                                  instance_name, errors.ECODE_EXISTS)
8857
8858     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8859
8860     if self.op.iallocator:
8861       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8862       # specifying a group on instance creation and then selecting nodes from
8863       # that group
8864       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8865       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8866     else:
8867       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8868       nodelist = [self.op.pnode]
8869       if self.op.snode is not None:
8870         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8871         nodelist.append(self.op.snode)
8872       self.needed_locks[locking.LEVEL_NODE] = nodelist
8873       # Lock resources of instance's primary and secondary nodes (copy to
8874       # prevent accidential modification)
8875       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8876
8877     # in case of import lock the source node too
8878     if self.op.mode == constants.INSTANCE_IMPORT:
8879       src_node = self.op.src_node
8880       src_path = self.op.src_path
8881
8882       if src_path is None:
8883         self.op.src_path = src_path = self.op.instance_name
8884
8885       if src_node is None:
8886         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8887         self.op.src_node = None
8888         if os.path.isabs(src_path):
8889           raise errors.OpPrereqError("Importing an instance from a path"
8890                                      " requires a source node option",
8891                                      errors.ECODE_INVAL)
8892       else:
8893         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8894         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8895           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8896         if not os.path.isabs(src_path):
8897           self.op.src_path = src_path = \
8898             utils.PathJoin(constants.EXPORT_DIR, src_path)
8899
8900   def _RunAllocator(self):
8901     """Run the allocator based on input opcode.
8902
8903     """
8904     nics = [n.ToDict() for n in self.nics]
8905     ial = IAllocator(self.cfg, self.rpc,
8906                      mode=constants.IALLOCATOR_MODE_ALLOC,
8907                      name=self.op.instance_name,
8908                      disk_template=self.op.disk_template,
8909                      tags=self.op.tags,
8910                      os=self.op.os_type,
8911                      vcpus=self.be_full[constants.BE_VCPUS],
8912                      memory=self.be_full[constants.BE_MAXMEM],
8913                      disks=self.disks,
8914                      nics=nics,
8915                      hypervisor=self.op.hypervisor,
8916                      )
8917
8918     ial.Run(self.op.iallocator)
8919
8920     if not ial.success:
8921       raise errors.OpPrereqError("Can't compute nodes using"
8922                                  " iallocator '%s': %s" %
8923                                  (self.op.iallocator, ial.info),
8924                                  errors.ECODE_NORES)
8925     if len(ial.result) != ial.required_nodes:
8926       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8927                                  " of nodes (%s), required %s" %
8928                                  (self.op.iallocator, len(ial.result),
8929                                   ial.required_nodes), errors.ECODE_FAULT)
8930     self.op.pnode = ial.result[0]
8931     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8932                  self.op.instance_name, self.op.iallocator,
8933                  utils.CommaJoin(ial.result))
8934     if ial.required_nodes == 2:
8935       self.op.snode = ial.result[1]
8936
8937   def BuildHooksEnv(self):
8938     """Build hooks env.
8939
8940     This runs on master, primary and secondary nodes of the instance.
8941
8942     """
8943     env = {
8944       "ADD_MODE": self.op.mode,
8945       }
8946     if self.op.mode == constants.INSTANCE_IMPORT:
8947       env["SRC_NODE"] = self.op.src_node
8948       env["SRC_PATH"] = self.op.src_path
8949       env["SRC_IMAGES"] = self.src_images
8950
8951     env.update(_BuildInstanceHookEnv(
8952       name=self.op.instance_name,
8953       primary_node=self.op.pnode,
8954       secondary_nodes=self.secondaries,
8955       status=self.op.start,
8956       os_type=self.op.os_type,
8957       minmem=self.be_full[constants.BE_MINMEM],
8958       maxmem=self.be_full[constants.BE_MAXMEM],
8959       vcpus=self.be_full[constants.BE_VCPUS],
8960       nics=_NICListToTuple(self, self.nics),
8961       disk_template=self.op.disk_template,
8962       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8963              for d in self.disks],
8964       bep=self.be_full,
8965       hvp=self.hv_full,
8966       hypervisor_name=self.op.hypervisor,
8967       tags=self.op.tags,
8968     ))
8969
8970     return env
8971
8972   def BuildHooksNodes(self):
8973     """Build hooks nodes.
8974
8975     """
8976     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8977     return nl, nl
8978
8979   def _ReadExportInfo(self):
8980     """Reads the export information from disk.
8981
8982     It will override the opcode source node and path with the actual
8983     information, if these two were not specified before.
8984
8985     @return: the export information
8986
8987     """
8988     assert self.op.mode == constants.INSTANCE_IMPORT
8989
8990     src_node = self.op.src_node
8991     src_path = self.op.src_path
8992
8993     if src_node is None:
8994       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8995       exp_list = self.rpc.call_export_list(locked_nodes)
8996       found = False
8997       for node in exp_list:
8998         if exp_list[node].fail_msg:
8999           continue
9000         if src_path in exp_list[node].payload:
9001           found = True
9002           self.op.src_node = src_node = node
9003           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9004                                                        src_path)
9005           break
9006       if not found:
9007         raise errors.OpPrereqError("No export found for relative path %s" %
9008                                     src_path, errors.ECODE_INVAL)
9009
9010     _CheckNodeOnline(self, src_node)
9011     result = self.rpc.call_export_info(src_node, src_path)
9012     result.Raise("No export or invalid export found in dir %s" % src_path)
9013
9014     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9015     if not export_info.has_section(constants.INISECT_EXP):
9016       raise errors.ProgrammerError("Corrupted export config",
9017                                    errors.ECODE_ENVIRON)
9018
9019     ei_version = export_info.get(constants.INISECT_EXP, "version")
9020     if (int(ei_version) != constants.EXPORT_VERSION):
9021       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9022                                  (ei_version, constants.EXPORT_VERSION),
9023                                  errors.ECODE_ENVIRON)
9024     return export_info
9025
9026   def _ReadExportParams(self, einfo):
9027     """Use export parameters as defaults.
9028
9029     In case the opcode doesn't specify (as in override) some instance
9030     parameters, then try to use them from the export information, if
9031     that declares them.
9032
9033     """
9034     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9035
9036     if self.op.disk_template is None:
9037       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9038         self.op.disk_template = einfo.get(constants.INISECT_INS,
9039                                           "disk_template")
9040         if self.op.disk_template not in constants.DISK_TEMPLATES:
9041           raise errors.OpPrereqError("Disk template specified in configuration"
9042                                      " file is not one of the allowed values:"
9043                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9044       else:
9045         raise errors.OpPrereqError("No disk template specified and the export"
9046                                    " is missing the disk_template information",
9047                                    errors.ECODE_INVAL)
9048
9049     if not self.op.disks:
9050       disks = []
9051       # TODO: import the disk iv_name too
9052       for idx in range(constants.MAX_DISKS):
9053         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9054           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9055           disks.append({constants.IDISK_SIZE: disk_sz})
9056       self.op.disks = disks
9057       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9058         raise errors.OpPrereqError("No disk info specified and the export"
9059                                    " is missing the disk information",
9060                                    errors.ECODE_INVAL)
9061
9062     if not self.op.nics:
9063       nics = []
9064       for idx in range(constants.MAX_NICS):
9065         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9066           ndict = {}
9067           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9068             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9069             ndict[name] = v
9070           nics.append(ndict)
9071         else:
9072           break
9073       self.op.nics = nics
9074
9075     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9076       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9077
9078     if (self.op.hypervisor is None and
9079         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9080       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9081
9082     if einfo.has_section(constants.INISECT_HYP):
9083       # use the export parameters but do not override the ones
9084       # specified by the user
9085       for name, value in einfo.items(constants.INISECT_HYP):
9086         if name not in self.op.hvparams:
9087           self.op.hvparams[name] = value
9088
9089     if einfo.has_section(constants.INISECT_BEP):
9090       # use the parameters, without overriding
9091       for name, value in einfo.items(constants.INISECT_BEP):
9092         if name not in self.op.beparams:
9093           self.op.beparams[name] = value
9094         # Compatibility for the old "memory" be param
9095         if name == constants.BE_MEMORY:
9096           if constants.BE_MAXMEM not in self.op.beparams:
9097             self.op.beparams[constants.BE_MAXMEM] = value
9098           if constants.BE_MINMEM not in self.op.beparams:
9099             self.op.beparams[constants.BE_MINMEM] = value
9100     else:
9101       # try to read the parameters old style, from the main section
9102       for name in constants.BES_PARAMETERS:
9103         if (name not in self.op.beparams and
9104             einfo.has_option(constants.INISECT_INS, name)):
9105           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9106
9107     if einfo.has_section(constants.INISECT_OSP):
9108       # use the parameters, without overriding
9109       for name, value in einfo.items(constants.INISECT_OSP):
9110         if name not in self.op.osparams:
9111           self.op.osparams[name] = value
9112
9113   def _RevertToDefaults(self, cluster):
9114     """Revert the instance parameters to the default values.
9115
9116     """
9117     # hvparams
9118     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9119     for name in self.op.hvparams.keys():
9120       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9121         del self.op.hvparams[name]
9122     # beparams
9123     be_defs = cluster.SimpleFillBE({})
9124     for name in self.op.beparams.keys():
9125       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9126         del self.op.beparams[name]
9127     # nic params
9128     nic_defs = cluster.SimpleFillNIC({})
9129     for nic in self.op.nics:
9130       for name in constants.NICS_PARAMETERS:
9131         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9132           del nic[name]
9133     # osparams
9134     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9135     for name in self.op.osparams.keys():
9136       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9137         del self.op.osparams[name]
9138
9139   def _CalculateFileStorageDir(self):
9140     """Calculate final instance file storage dir.
9141
9142     """
9143     # file storage dir calculation/check
9144     self.instance_file_storage_dir = None
9145     if self.op.disk_template in constants.DTS_FILEBASED:
9146       # build the full file storage dir path
9147       joinargs = []
9148
9149       if self.op.disk_template == constants.DT_SHARED_FILE:
9150         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9151       else:
9152         get_fsd_fn = self.cfg.GetFileStorageDir
9153
9154       cfg_storagedir = get_fsd_fn()
9155       if not cfg_storagedir:
9156         raise errors.OpPrereqError("Cluster file storage dir not defined")
9157       joinargs.append(cfg_storagedir)
9158
9159       if self.op.file_storage_dir is not None:
9160         joinargs.append(self.op.file_storage_dir)
9161
9162       joinargs.append(self.op.instance_name)
9163
9164       # pylint: disable=W0142
9165       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9166
9167   def CheckPrereq(self):
9168     """Check prerequisites.
9169
9170     """
9171     self._CalculateFileStorageDir()
9172
9173     if self.op.mode == constants.INSTANCE_IMPORT:
9174       export_info = self._ReadExportInfo()
9175       self._ReadExportParams(export_info)
9176
9177     if (not self.cfg.GetVGName() and
9178         self.op.disk_template not in constants.DTS_NOT_LVM):
9179       raise errors.OpPrereqError("Cluster does not support lvm-based"
9180                                  " instances", errors.ECODE_STATE)
9181
9182     if (self.op.hypervisor is None or
9183         self.op.hypervisor == constants.VALUE_AUTO):
9184       self.op.hypervisor = self.cfg.GetHypervisorType()
9185
9186     cluster = self.cfg.GetClusterInfo()
9187     enabled_hvs = cluster.enabled_hypervisors
9188     if self.op.hypervisor not in enabled_hvs:
9189       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9190                                  " cluster (%s)" % (self.op.hypervisor,
9191                                   ",".join(enabled_hvs)),
9192                                  errors.ECODE_STATE)
9193
9194     # Check tag validity
9195     for tag in self.op.tags:
9196       objects.TaggableObject.ValidateTag(tag)
9197
9198     # check hypervisor parameter syntax (locally)
9199     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9200     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9201                                       self.op.hvparams)
9202     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9203     hv_type.CheckParameterSyntax(filled_hvp)
9204     self.hv_full = filled_hvp
9205     # check that we don't specify global parameters on an instance
9206     _CheckGlobalHvParams(self.op.hvparams)
9207
9208     # fill and remember the beparams dict
9209     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9210     for param, value in self.op.beparams.iteritems():
9211       if value == constants.VALUE_AUTO:
9212         self.op.beparams[param] = default_beparams[param]
9213     objects.UpgradeBeParams(self.op.beparams)
9214     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9215     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9216
9217     # build os parameters
9218     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9219
9220     # now that hvp/bep are in final format, let's reset to defaults,
9221     # if told to do so
9222     if self.op.identify_defaults:
9223       self._RevertToDefaults(cluster)
9224
9225     # NIC buildup
9226     self.nics = []
9227     for idx, nic in enumerate(self.op.nics):
9228       nic_mode_req = nic.get(constants.INIC_MODE, None)
9229       nic_mode = nic_mode_req
9230       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9231         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9232
9233       # in routed mode, for the first nic, the default ip is 'auto'
9234       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9235         default_ip_mode = constants.VALUE_AUTO
9236       else:
9237         default_ip_mode = constants.VALUE_NONE
9238
9239       # ip validity checks
9240       ip = nic.get(constants.INIC_IP, default_ip_mode)
9241       if ip is None or ip.lower() == constants.VALUE_NONE:
9242         nic_ip = None
9243       elif ip.lower() == constants.VALUE_AUTO:
9244         if not self.op.name_check:
9245           raise errors.OpPrereqError("IP address set to auto but name checks"
9246                                      " have been skipped",
9247                                      errors.ECODE_INVAL)
9248         nic_ip = self.hostname1.ip
9249       else:
9250         if not netutils.IPAddress.IsValid(ip):
9251           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9252                                      errors.ECODE_INVAL)
9253         nic_ip = ip
9254
9255       # TODO: check the ip address for uniqueness
9256       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9257         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9258                                    errors.ECODE_INVAL)
9259
9260       # MAC address verification
9261       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9262       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9263         mac = utils.NormalizeAndValidateMac(mac)
9264
9265         try:
9266           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9267         except errors.ReservationError:
9268           raise errors.OpPrereqError("MAC address %s already in use"
9269                                      " in cluster" % mac,
9270                                      errors.ECODE_NOTUNIQUE)
9271
9272       #  Build nic parameters
9273       link = nic.get(constants.INIC_LINK, None)
9274       if link == constants.VALUE_AUTO:
9275         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9276       nicparams = {}
9277       if nic_mode_req:
9278         nicparams[constants.NIC_MODE] = nic_mode
9279       if link:
9280         nicparams[constants.NIC_LINK] = link
9281
9282       check_params = cluster.SimpleFillNIC(nicparams)
9283       objects.NIC.CheckParameterSyntax(check_params)
9284       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9285
9286     # disk checks/pre-build
9287     default_vg = self.cfg.GetVGName()
9288     self.disks = []
9289     for disk in self.op.disks:
9290       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9291       if mode not in constants.DISK_ACCESS_SET:
9292         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9293                                    mode, errors.ECODE_INVAL)
9294       size = disk.get(constants.IDISK_SIZE, None)
9295       if size is None:
9296         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9297       try:
9298         size = int(size)
9299       except (TypeError, ValueError):
9300         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9301                                    errors.ECODE_INVAL)
9302
9303       data_vg = disk.get(constants.IDISK_VG, default_vg)
9304       new_disk = {
9305         constants.IDISK_SIZE: size,
9306         constants.IDISK_MODE: mode,
9307         constants.IDISK_VG: data_vg,
9308         }
9309       if constants.IDISK_METAVG in disk:
9310         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9311       if constants.IDISK_ADOPT in disk:
9312         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9313       self.disks.append(new_disk)
9314
9315     if self.op.mode == constants.INSTANCE_IMPORT:
9316       disk_images = []
9317       for idx in range(len(self.disks)):
9318         option = "disk%d_dump" % idx
9319         if export_info.has_option(constants.INISECT_INS, option):
9320           # FIXME: are the old os-es, disk sizes, etc. useful?
9321           export_name = export_info.get(constants.INISECT_INS, option)
9322           image = utils.PathJoin(self.op.src_path, export_name)
9323           disk_images.append(image)
9324         else:
9325           disk_images.append(False)
9326
9327       self.src_images = disk_images
9328
9329       old_name = export_info.get(constants.INISECT_INS, "name")
9330       if self.op.instance_name == old_name:
9331         for idx, nic in enumerate(self.nics):
9332           if nic.mac == constants.VALUE_AUTO:
9333             nic_mac_ini = "nic%d_mac" % idx
9334             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9335
9336     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9337
9338     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9339     if self.op.ip_check:
9340       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9341         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9342                                    (self.check_ip, self.op.instance_name),
9343                                    errors.ECODE_NOTUNIQUE)
9344
9345     #### mac address generation
9346     # By generating here the mac address both the allocator and the hooks get
9347     # the real final mac address rather than the 'auto' or 'generate' value.
9348     # There is a race condition between the generation and the instance object
9349     # creation, which means that we know the mac is valid now, but we're not
9350     # sure it will be when we actually add the instance. If things go bad
9351     # adding the instance will abort because of a duplicate mac, and the
9352     # creation job will fail.
9353     for nic in self.nics:
9354       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9355         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9356
9357     #### allocator run
9358
9359     if self.op.iallocator is not None:
9360       self._RunAllocator()
9361
9362     # Release all unneeded node locks
9363     _ReleaseLocks(self, locking.LEVEL_NODE,
9364                   keep=filter(None, [self.op.pnode, self.op.snode,
9365                                      self.op.src_node]))
9366
9367     #### node related checks
9368
9369     # check primary node
9370     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9371     assert self.pnode is not None, \
9372       "Cannot retrieve locked node %s" % self.op.pnode
9373     if pnode.offline:
9374       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9375                                  pnode.name, errors.ECODE_STATE)
9376     if pnode.drained:
9377       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9378                                  pnode.name, errors.ECODE_STATE)
9379     if not pnode.vm_capable:
9380       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9381                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9382
9383     self.secondaries = []
9384
9385     # mirror node verification
9386     if self.op.disk_template in constants.DTS_INT_MIRROR:
9387       if self.op.snode == pnode.name:
9388         raise errors.OpPrereqError("The secondary node cannot be the"
9389                                    " primary node", errors.ECODE_INVAL)
9390       _CheckNodeOnline(self, self.op.snode)
9391       _CheckNodeNotDrained(self, self.op.snode)
9392       _CheckNodeVmCapable(self, self.op.snode)
9393       self.secondaries.append(self.op.snode)
9394
9395       snode = self.cfg.GetNodeInfo(self.op.snode)
9396       if pnode.group != snode.group:
9397         self.LogWarning("The primary and secondary nodes are in two"
9398                         " different node groups; the disk parameters"
9399                         " from the first disk's node group will be"
9400                         " used")
9401
9402     nodenames = [pnode.name] + self.secondaries
9403
9404     # disk parameters (not customizable at instance or node level)
9405     # just use the primary node parameters, ignoring the secondary.
9406     self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9407
9408     if not self.adopt_disks:
9409       # Check lv size requirements, if not adopting
9410       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9411       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9412
9413     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9414       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9415                                 disk[constants.IDISK_ADOPT])
9416                      for disk in self.disks])
9417       if len(all_lvs) != len(self.disks):
9418         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9419                                    errors.ECODE_INVAL)
9420       for lv_name in all_lvs:
9421         try:
9422           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9423           # to ReserveLV uses the same syntax
9424           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9425         except errors.ReservationError:
9426           raise errors.OpPrereqError("LV named %s used by another instance" %
9427                                      lv_name, errors.ECODE_NOTUNIQUE)
9428
9429       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9430       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9431
9432       node_lvs = self.rpc.call_lv_list([pnode.name],
9433                                        vg_names.payload.keys())[pnode.name]
9434       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9435       node_lvs = node_lvs.payload
9436
9437       delta = all_lvs.difference(node_lvs.keys())
9438       if delta:
9439         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9440                                    utils.CommaJoin(delta),
9441                                    errors.ECODE_INVAL)
9442       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9443       if online_lvs:
9444         raise errors.OpPrereqError("Online logical volumes found, cannot"
9445                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9446                                    errors.ECODE_STATE)
9447       # update the size of disk based on what is found
9448       for dsk in self.disks:
9449         dsk[constants.IDISK_SIZE] = \
9450           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9451                                         dsk[constants.IDISK_ADOPT])][0]))
9452
9453     elif self.op.disk_template == constants.DT_BLOCK:
9454       # Normalize and de-duplicate device paths
9455       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9456                        for disk in self.disks])
9457       if len(all_disks) != len(self.disks):
9458         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9459                                    errors.ECODE_INVAL)
9460       baddisks = [d for d in all_disks
9461                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9462       if baddisks:
9463         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9464                                    " cannot be adopted" %
9465                                    (", ".join(baddisks),
9466                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9467                                    errors.ECODE_INVAL)
9468
9469       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9470                                             list(all_disks))[pnode.name]
9471       node_disks.Raise("Cannot get block device information from node %s" %
9472                        pnode.name)
9473       node_disks = node_disks.payload
9474       delta = all_disks.difference(node_disks.keys())
9475       if delta:
9476         raise errors.OpPrereqError("Missing block device(s): %s" %
9477                                    utils.CommaJoin(delta),
9478                                    errors.ECODE_INVAL)
9479       for dsk in self.disks:
9480         dsk[constants.IDISK_SIZE] = \
9481           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9482
9483     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9484
9485     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9486     # check OS parameters (remotely)
9487     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9488
9489     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9490
9491     # memory check on primary node
9492     #TODO(dynmem): use MINMEM for checking
9493     if self.op.start:
9494       _CheckNodeFreeMemory(self, self.pnode.name,
9495                            "creating instance %s" % self.op.instance_name,
9496                            self.be_full[constants.BE_MAXMEM],
9497                            self.op.hypervisor)
9498
9499     self.dry_run_result = list(nodenames)
9500
9501   def Exec(self, feedback_fn):
9502     """Create and add the instance to the cluster.
9503
9504     """
9505     instance = self.op.instance_name
9506     pnode_name = self.pnode.name
9507
9508     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9509                 self.owned_locks(locking.LEVEL_NODE)), \
9510       "Node locks differ from node resource locks"
9511
9512     ht_kind = self.op.hypervisor
9513     if ht_kind in constants.HTS_REQ_PORT:
9514       network_port = self.cfg.AllocatePort()
9515     else:
9516       network_port = None
9517
9518     disks = _GenerateDiskTemplate(self,
9519                                   self.op.disk_template,
9520                                   instance, pnode_name,
9521                                   self.secondaries,
9522                                   self.disks,
9523                                   self.instance_file_storage_dir,
9524                                   self.op.file_driver,
9525                                   0,
9526                                   feedback_fn,
9527                                   self.diskparams)
9528
9529     iobj = objects.Instance(name=instance, os=self.op.os_type,
9530                             primary_node=pnode_name,
9531                             nics=self.nics, disks=disks,
9532                             disk_template=self.op.disk_template,
9533                             admin_state=constants.ADMINST_DOWN,
9534                             network_port=network_port,
9535                             beparams=self.op.beparams,
9536                             hvparams=self.op.hvparams,
9537                             hypervisor=self.op.hypervisor,
9538                             osparams=self.op.osparams,
9539                             )
9540
9541     if self.op.tags:
9542       for tag in self.op.tags:
9543         iobj.AddTag(tag)
9544
9545     if self.adopt_disks:
9546       if self.op.disk_template == constants.DT_PLAIN:
9547         # rename LVs to the newly-generated names; we need to construct
9548         # 'fake' LV disks with the old data, plus the new unique_id
9549         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9550         rename_to = []
9551         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9552           rename_to.append(t_dsk.logical_id)
9553           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9554           self.cfg.SetDiskID(t_dsk, pnode_name)
9555         result = self.rpc.call_blockdev_rename(pnode_name,
9556                                                zip(tmp_disks, rename_to))
9557         result.Raise("Failed to rename adoped LVs")
9558     else:
9559       feedback_fn("* creating instance disks...")
9560       try:
9561         _CreateDisks(self, iobj)
9562       except errors.OpExecError:
9563         self.LogWarning("Device creation failed, reverting...")
9564         try:
9565           _RemoveDisks(self, iobj)
9566         finally:
9567           self.cfg.ReleaseDRBDMinors(instance)
9568           raise
9569
9570     feedback_fn("adding instance %s to cluster config" % instance)
9571
9572     self.cfg.AddInstance(iobj, self.proc.GetECId())
9573
9574     # Declare that we don't want to remove the instance lock anymore, as we've
9575     # added the instance to the config
9576     del self.remove_locks[locking.LEVEL_INSTANCE]
9577
9578     if self.op.mode == constants.INSTANCE_IMPORT:
9579       # Release unused nodes
9580       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9581     else:
9582       # Release all nodes
9583       _ReleaseLocks(self, locking.LEVEL_NODE)
9584
9585     disk_abort = False
9586     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9587       feedback_fn("* wiping instance disks...")
9588       try:
9589         _WipeDisks(self, iobj)
9590       except errors.OpExecError, err:
9591         logging.exception("Wiping disks failed")
9592         self.LogWarning("Wiping instance disks failed (%s)", err)
9593         disk_abort = True
9594
9595     if disk_abort:
9596       # Something is already wrong with the disks, don't do anything else
9597       pass
9598     elif self.op.wait_for_sync:
9599       disk_abort = not _WaitForSync(self, iobj)
9600     elif iobj.disk_template in constants.DTS_INT_MIRROR:
9601       # make sure the disks are not degraded (still sync-ing is ok)
9602       feedback_fn("* checking mirrors status")
9603       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9604     else:
9605       disk_abort = False
9606
9607     if disk_abort:
9608       _RemoveDisks(self, iobj)
9609       self.cfg.RemoveInstance(iobj.name)
9610       # Make sure the instance lock gets removed
9611       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9612       raise errors.OpExecError("There are some degraded disks for"
9613                                " this instance")
9614
9615     # Release all node resource locks
9616     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9617
9618     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9619       if self.op.mode == constants.INSTANCE_CREATE:
9620         if not self.op.no_install:
9621           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9622                         not self.op.wait_for_sync)
9623           if pause_sync:
9624             feedback_fn("* pausing disk sync to install instance OS")
9625             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9626                                                               iobj.disks, True)
9627             for idx, success in enumerate(result.payload):
9628               if not success:
9629                 logging.warn("pause-sync of instance %s for disk %d failed",
9630                              instance, idx)
9631
9632           feedback_fn("* running the instance OS create scripts...")
9633           # FIXME: pass debug option from opcode to backend
9634           os_add_result = \
9635             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9636                                           self.op.debug_level)
9637           if pause_sync:
9638             feedback_fn("* resuming disk sync")
9639             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9640                                                               iobj.disks, False)
9641             for idx, success in enumerate(result.payload):
9642               if not success:
9643                 logging.warn("resume-sync of instance %s for disk %d failed",
9644                              instance, idx)
9645
9646           os_add_result.Raise("Could not add os for instance %s"
9647                               " on node %s" % (instance, pnode_name))
9648
9649       elif self.op.mode == constants.INSTANCE_IMPORT:
9650         feedback_fn("* running the instance OS import scripts...")
9651
9652         transfers = []
9653
9654         for idx, image in enumerate(self.src_images):
9655           if not image:
9656             continue
9657
9658           # FIXME: pass debug option from opcode to backend
9659           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9660                                              constants.IEIO_FILE, (image, ),
9661                                              constants.IEIO_SCRIPT,
9662                                              (iobj.disks[idx], idx),
9663                                              None)
9664           transfers.append(dt)
9665
9666         import_result = \
9667           masterd.instance.TransferInstanceData(self, feedback_fn,
9668                                                 self.op.src_node, pnode_name,
9669                                                 self.pnode.secondary_ip,
9670                                                 iobj, transfers)
9671         if not compat.all(import_result):
9672           self.LogWarning("Some disks for instance %s on node %s were not"
9673                           " imported successfully" % (instance, pnode_name))
9674
9675       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9676         feedback_fn("* preparing remote import...")
9677         # The source cluster will stop the instance before attempting to make a
9678         # connection. In some cases stopping an instance can take a long time,
9679         # hence the shutdown timeout is added to the connection timeout.
9680         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9681                            self.op.source_shutdown_timeout)
9682         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9683
9684         assert iobj.primary_node == self.pnode.name
9685         disk_results = \
9686           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9687                                         self.source_x509_ca,
9688                                         self._cds, timeouts)
9689         if not compat.all(disk_results):
9690           # TODO: Should the instance still be started, even if some disks
9691           # failed to import (valid for local imports, too)?
9692           self.LogWarning("Some disks for instance %s on node %s were not"
9693                           " imported successfully" % (instance, pnode_name))
9694
9695         # Run rename script on newly imported instance
9696         assert iobj.name == instance
9697         feedback_fn("Running rename script for %s" % instance)
9698         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9699                                                    self.source_instance_name,
9700                                                    self.op.debug_level)
9701         if result.fail_msg:
9702           self.LogWarning("Failed to run rename script for %s on node"
9703                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9704
9705       else:
9706         # also checked in the prereq part
9707         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9708                                      % self.op.mode)
9709
9710     assert not self.owned_locks(locking.LEVEL_NODE_RES)
9711
9712     if self.op.start:
9713       iobj.admin_state = constants.ADMINST_UP
9714       self.cfg.Update(iobj, feedback_fn)
9715       logging.info("Starting instance %s on node %s", instance, pnode_name)
9716       feedback_fn("* starting instance...")
9717       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9718                                             False)
9719       result.Raise("Could not start instance")
9720
9721     return list(iobj.all_nodes)
9722
9723
9724 class LUInstanceConsole(NoHooksLU):
9725   """Connect to an instance's console.
9726
9727   This is somewhat special in that it returns the command line that
9728   you need to run on the master node in order to connect to the
9729   console.
9730
9731   """
9732   REQ_BGL = False
9733
9734   def ExpandNames(self):
9735     self.share_locks = _ShareAll()
9736     self._ExpandAndLockInstance()
9737
9738   def CheckPrereq(self):
9739     """Check prerequisites.
9740
9741     This checks that the instance is in the cluster.
9742
9743     """
9744     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9745     assert self.instance is not None, \
9746       "Cannot retrieve locked instance %s" % self.op.instance_name
9747     _CheckNodeOnline(self, self.instance.primary_node)
9748
9749   def Exec(self, feedback_fn):
9750     """Connect to the console of an instance
9751
9752     """
9753     instance = self.instance
9754     node = instance.primary_node
9755
9756     node_insts = self.rpc.call_instance_list([node],
9757                                              [instance.hypervisor])[node]
9758     node_insts.Raise("Can't get node information from %s" % node)
9759
9760     if instance.name not in node_insts.payload:
9761       if instance.admin_state == constants.ADMINST_UP:
9762         state = constants.INSTST_ERRORDOWN
9763       elif instance.admin_state == constants.ADMINST_DOWN:
9764         state = constants.INSTST_ADMINDOWN
9765       else:
9766         state = constants.INSTST_ADMINOFFLINE
9767       raise errors.OpExecError("Instance %s is not running (state %s)" %
9768                                (instance.name, state))
9769
9770     logging.debug("Connecting to console of %s on %s", instance.name, node)
9771
9772     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9773
9774
9775 def _GetInstanceConsole(cluster, instance):
9776   """Returns console information for an instance.
9777
9778   @type cluster: L{objects.Cluster}
9779   @type instance: L{objects.Instance}
9780   @rtype: dict
9781
9782   """
9783   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9784   # beparams and hvparams are passed separately, to avoid editing the
9785   # instance and then saving the defaults in the instance itself.
9786   hvparams = cluster.FillHV(instance)
9787   beparams = cluster.FillBE(instance)
9788   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9789
9790   assert console.instance == instance.name
9791   assert console.Validate()
9792
9793   return console.ToDict()
9794
9795
9796 class LUInstanceReplaceDisks(LogicalUnit):
9797   """Replace the disks of an instance.
9798
9799   """
9800   HPATH = "mirrors-replace"
9801   HTYPE = constants.HTYPE_INSTANCE
9802   REQ_BGL = False
9803
9804   def CheckArguments(self):
9805     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9806                                   self.op.iallocator)
9807
9808   def ExpandNames(self):
9809     self._ExpandAndLockInstance()
9810
9811     assert locking.LEVEL_NODE not in self.needed_locks
9812     assert locking.LEVEL_NODE_RES not in self.needed_locks
9813     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9814
9815     assert self.op.iallocator is None or self.op.remote_node is None, \
9816       "Conflicting options"
9817
9818     if self.op.remote_node is not None:
9819       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9820
9821       # Warning: do not remove the locking of the new secondary here
9822       # unless DRBD8.AddChildren is changed to work in parallel;
9823       # currently it doesn't since parallel invocations of
9824       # FindUnusedMinor will conflict
9825       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9826       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9827     else:
9828       self.needed_locks[locking.LEVEL_NODE] = []
9829       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9830
9831       if self.op.iallocator is not None:
9832         # iallocator will select a new node in the same group
9833         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9834
9835     self.needed_locks[locking.LEVEL_NODE_RES] = []
9836
9837     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9838                                    self.op.iallocator, self.op.remote_node,
9839                                    self.op.disks, False, self.op.early_release)
9840
9841     self.tasklets = [self.replacer]
9842
9843   def DeclareLocks(self, level):
9844     if level == locking.LEVEL_NODEGROUP:
9845       assert self.op.remote_node is None
9846       assert self.op.iallocator is not None
9847       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9848
9849       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9850       # Lock all groups used by instance optimistically; this requires going
9851       # via the node before it's locked, requiring verification later on
9852       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9853         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9854
9855     elif level == locking.LEVEL_NODE:
9856       if self.op.iallocator is not None:
9857         assert self.op.remote_node is None
9858         assert not self.needed_locks[locking.LEVEL_NODE]
9859
9860         # Lock member nodes of all locked groups
9861         self.needed_locks[locking.LEVEL_NODE] = [node_name
9862           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9863           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9864       else:
9865         self._LockInstancesNodes()
9866     elif level == locking.LEVEL_NODE_RES:
9867       # Reuse node locks
9868       self.needed_locks[locking.LEVEL_NODE_RES] = \
9869         self.needed_locks[locking.LEVEL_NODE]
9870
9871   def BuildHooksEnv(self):
9872     """Build hooks env.
9873
9874     This runs on the master, the primary and all the secondaries.
9875
9876     """
9877     instance = self.replacer.instance
9878     env = {
9879       "MODE": self.op.mode,
9880       "NEW_SECONDARY": self.op.remote_node,
9881       "OLD_SECONDARY": instance.secondary_nodes[0],
9882       }
9883     env.update(_BuildInstanceHookEnvByObject(self, instance))
9884     return env
9885
9886   def BuildHooksNodes(self):
9887     """Build hooks nodes.
9888
9889     """
9890     instance = self.replacer.instance
9891     nl = [
9892       self.cfg.GetMasterNode(),
9893       instance.primary_node,
9894       ]
9895     if self.op.remote_node is not None:
9896       nl.append(self.op.remote_node)
9897     return nl, nl
9898
9899   def CheckPrereq(self):
9900     """Check prerequisites.
9901
9902     """
9903     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9904             self.op.iallocator is None)
9905
9906     # Verify if node group locks are still correct
9907     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9908     if owned_groups:
9909       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9910
9911     return LogicalUnit.CheckPrereq(self)
9912
9913
9914 class TLReplaceDisks(Tasklet):
9915   """Replaces disks for an instance.
9916
9917   Note: Locking is not within the scope of this class.
9918
9919   """
9920   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9921                disks, delay_iallocator, early_release):
9922     """Initializes this class.
9923
9924     """
9925     Tasklet.__init__(self, lu)
9926
9927     # Parameters
9928     self.instance_name = instance_name
9929     self.mode = mode
9930     self.iallocator_name = iallocator_name
9931     self.remote_node = remote_node
9932     self.disks = disks
9933     self.delay_iallocator = delay_iallocator
9934     self.early_release = early_release
9935
9936     # Runtime data
9937     self.instance = None
9938     self.new_node = None
9939     self.target_node = None
9940     self.other_node = None
9941     self.remote_node_info = None
9942     self.node_secondary_ip = None
9943
9944   @staticmethod
9945   def CheckArguments(mode, remote_node, iallocator):
9946     """Helper function for users of this class.
9947
9948     """
9949     # check for valid parameter combination
9950     if mode == constants.REPLACE_DISK_CHG:
9951       if remote_node is None and iallocator is None:
9952         raise errors.OpPrereqError("When changing the secondary either an"
9953                                    " iallocator script must be used or the"
9954                                    " new node given", errors.ECODE_INVAL)
9955
9956       if remote_node is not None and iallocator is not None:
9957         raise errors.OpPrereqError("Give either the iallocator or the new"
9958                                    " secondary, not both", errors.ECODE_INVAL)
9959
9960     elif remote_node is not None or iallocator is not None:
9961       # Not replacing the secondary
9962       raise errors.OpPrereqError("The iallocator and new node options can"
9963                                  " only be used when changing the"
9964                                  " secondary node", errors.ECODE_INVAL)
9965
9966   @staticmethod
9967   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9968     """Compute a new secondary node using an IAllocator.
9969
9970     """
9971     ial = IAllocator(lu.cfg, lu.rpc,
9972                      mode=constants.IALLOCATOR_MODE_RELOC,
9973                      name=instance_name,
9974                      relocate_from=list(relocate_from))
9975
9976     ial.Run(iallocator_name)
9977
9978     if not ial.success:
9979       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9980                                  " %s" % (iallocator_name, ial.info),
9981                                  errors.ECODE_NORES)
9982
9983     if len(ial.result) != ial.required_nodes:
9984       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9985                                  " of nodes (%s), required %s" %
9986                                  (iallocator_name,
9987                                   len(ial.result), ial.required_nodes),
9988                                  errors.ECODE_FAULT)
9989
9990     remote_node_name = ial.result[0]
9991
9992     lu.LogInfo("Selected new secondary for instance '%s': %s",
9993                instance_name, remote_node_name)
9994
9995     return remote_node_name
9996
9997   def _FindFaultyDisks(self, node_name):
9998     """Wrapper for L{_FindFaultyInstanceDisks}.
9999
10000     """
10001     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10002                                     node_name, True)
10003
10004   def _CheckDisksActivated(self, instance):
10005     """Checks if the instance disks are activated.
10006
10007     @param instance: The instance to check disks
10008     @return: True if they are activated, False otherwise
10009
10010     """
10011     nodes = instance.all_nodes
10012
10013     for idx, dev in enumerate(instance.disks):
10014       for node in nodes:
10015         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10016         self.cfg.SetDiskID(dev, node)
10017
10018         result = self.rpc.call_blockdev_find(node, dev)
10019
10020         if result.offline:
10021           continue
10022         elif result.fail_msg or not result.payload:
10023           return False
10024
10025     return True
10026
10027   def CheckPrereq(self):
10028     """Check prerequisites.
10029
10030     This checks that the instance is in the cluster.
10031
10032     """
10033     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10034     assert instance is not None, \
10035       "Cannot retrieve locked instance %s" % self.instance_name
10036
10037     if instance.disk_template != constants.DT_DRBD8:
10038       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10039                                  " instances", errors.ECODE_INVAL)
10040
10041     if len(instance.secondary_nodes) != 1:
10042       raise errors.OpPrereqError("The instance has a strange layout,"
10043                                  " expected one secondary but found %d" %
10044                                  len(instance.secondary_nodes),
10045                                  errors.ECODE_FAULT)
10046
10047     if not self.delay_iallocator:
10048       self._CheckPrereq2()
10049
10050   def _CheckPrereq2(self):
10051     """Check prerequisites, second part.
10052
10053     This function should always be part of CheckPrereq. It was separated and is
10054     now called from Exec because during node evacuation iallocator was only
10055     called with an unmodified cluster model, not taking planned changes into
10056     account.
10057
10058     """
10059     instance = self.instance
10060     secondary_node = instance.secondary_nodes[0]
10061
10062     if self.iallocator_name is None:
10063       remote_node = self.remote_node
10064     else:
10065       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10066                                        instance.name, instance.secondary_nodes)
10067
10068     if remote_node is None:
10069       self.remote_node_info = None
10070     else:
10071       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10072              "Remote node '%s' is not locked" % remote_node
10073
10074       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10075       assert self.remote_node_info is not None, \
10076         "Cannot retrieve locked node %s" % remote_node
10077
10078     if remote_node == self.instance.primary_node:
10079       raise errors.OpPrereqError("The specified node is the primary node of"
10080                                  " the instance", errors.ECODE_INVAL)
10081
10082     if remote_node == secondary_node:
10083       raise errors.OpPrereqError("The specified node is already the"
10084                                  " secondary node of the instance",
10085                                  errors.ECODE_INVAL)
10086
10087     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10088                                     constants.REPLACE_DISK_CHG):
10089       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10090                                  errors.ECODE_INVAL)
10091
10092     if self.mode == constants.REPLACE_DISK_AUTO:
10093       if not self._CheckDisksActivated(instance):
10094         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10095                                    " first" % self.instance_name,
10096                                    errors.ECODE_STATE)
10097       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10098       faulty_secondary = self._FindFaultyDisks(secondary_node)
10099
10100       if faulty_primary and faulty_secondary:
10101         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10102                                    " one node and can not be repaired"
10103                                    " automatically" % self.instance_name,
10104                                    errors.ECODE_STATE)
10105
10106       if faulty_primary:
10107         self.disks = faulty_primary
10108         self.target_node = instance.primary_node
10109         self.other_node = secondary_node
10110         check_nodes = [self.target_node, self.other_node]
10111       elif faulty_secondary:
10112         self.disks = faulty_secondary
10113         self.target_node = secondary_node
10114         self.other_node = instance.primary_node
10115         check_nodes = [self.target_node, self.other_node]
10116       else:
10117         self.disks = []
10118         check_nodes = []
10119
10120     else:
10121       # Non-automatic modes
10122       if self.mode == constants.REPLACE_DISK_PRI:
10123         self.target_node = instance.primary_node
10124         self.other_node = secondary_node
10125         check_nodes = [self.target_node, self.other_node]
10126
10127       elif self.mode == constants.REPLACE_DISK_SEC:
10128         self.target_node = secondary_node
10129         self.other_node = instance.primary_node
10130         check_nodes = [self.target_node, self.other_node]
10131
10132       elif self.mode == constants.REPLACE_DISK_CHG:
10133         self.new_node = remote_node
10134         self.other_node = instance.primary_node
10135         self.target_node = secondary_node
10136         check_nodes = [self.new_node, self.other_node]
10137
10138         _CheckNodeNotDrained(self.lu, remote_node)
10139         _CheckNodeVmCapable(self.lu, remote_node)
10140
10141         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10142         assert old_node_info is not None
10143         if old_node_info.offline and not self.early_release:
10144           # doesn't make sense to delay the release
10145           self.early_release = True
10146           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10147                           " early-release mode", secondary_node)
10148
10149       else:
10150         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10151                                      self.mode)
10152
10153       # If not specified all disks should be replaced
10154       if not self.disks:
10155         self.disks = range(len(self.instance.disks))
10156
10157     # TODO: compute disk parameters
10158     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10159     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10160     if primary_node_info.group != secondary_node_info.group:
10161       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10162                       " different node groups; the disk parameters of the"
10163                       " primary node's group will be applied.")
10164
10165     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10166
10167     for node in check_nodes:
10168       _CheckNodeOnline(self.lu, node)
10169
10170     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10171                                                           self.other_node,
10172                                                           self.target_node]
10173                               if node_name is not None)
10174
10175     # Release unneeded node and node resource locks
10176     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10177     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10178
10179     # Release any owned node group
10180     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10181       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10182
10183     # Check whether disks are valid
10184     for disk_idx in self.disks:
10185       instance.FindDisk(disk_idx)
10186
10187     # Get secondary node IP addresses
10188     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10189                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10190
10191   def Exec(self, feedback_fn):
10192     """Execute disk replacement.
10193
10194     This dispatches the disk replacement to the appropriate handler.
10195
10196     """
10197     if self.delay_iallocator:
10198       self._CheckPrereq2()
10199
10200     if __debug__:
10201       # Verify owned locks before starting operation
10202       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10203       assert set(owned_nodes) == set(self.node_secondary_ip), \
10204           ("Incorrect node locks, owning %s, expected %s" %
10205            (owned_nodes, self.node_secondary_ip.keys()))
10206       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10207               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10208
10209       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10210       assert list(owned_instances) == [self.instance_name], \
10211           "Instance '%s' not locked" % self.instance_name
10212
10213       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10214           "Should not own any node group lock at this point"
10215
10216     if not self.disks:
10217       feedback_fn("No disks need replacement")
10218       return
10219
10220     feedback_fn("Replacing disk(s) %s for %s" %
10221                 (utils.CommaJoin(self.disks), self.instance.name))
10222
10223     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10224
10225     # Activate the instance disks if we're replacing them on a down instance
10226     if activate_disks:
10227       _StartInstanceDisks(self.lu, self.instance, True)
10228
10229     try:
10230       # Should we replace the secondary node?
10231       if self.new_node is not None:
10232         fn = self._ExecDrbd8Secondary
10233       else:
10234         fn = self._ExecDrbd8DiskOnly
10235
10236       result = fn(feedback_fn)
10237     finally:
10238       # Deactivate the instance disks if we're replacing them on a
10239       # down instance
10240       if activate_disks:
10241         _SafeShutdownInstanceDisks(self.lu, self.instance)
10242
10243     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10244
10245     if __debug__:
10246       # Verify owned locks
10247       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10248       nodes = frozenset(self.node_secondary_ip)
10249       assert ((self.early_release and not owned_nodes) or
10250               (not self.early_release and not (set(owned_nodes) - nodes))), \
10251         ("Not owning the correct locks, early_release=%s, owned=%r,"
10252          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10253
10254     return result
10255
10256   def _CheckVolumeGroup(self, nodes):
10257     self.lu.LogInfo("Checking volume groups")
10258
10259     vgname = self.cfg.GetVGName()
10260
10261     # Make sure volume group exists on all involved nodes
10262     results = self.rpc.call_vg_list(nodes)
10263     if not results:
10264       raise errors.OpExecError("Can't list volume groups on the nodes")
10265
10266     for node in nodes:
10267       res = results[node]
10268       res.Raise("Error checking node %s" % node)
10269       if vgname not in res.payload:
10270         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10271                                  (vgname, node))
10272
10273   def _CheckDisksExistence(self, nodes):
10274     # Check disk existence
10275     for idx, dev in enumerate(self.instance.disks):
10276       if idx not in self.disks:
10277         continue
10278
10279       for node in nodes:
10280         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10281         self.cfg.SetDiskID(dev, node)
10282
10283         result = self.rpc.call_blockdev_find(node, dev)
10284
10285         msg = result.fail_msg
10286         if msg or not result.payload:
10287           if not msg:
10288             msg = "disk not found"
10289           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10290                                    (idx, node, msg))
10291
10292   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10293     for idx, dev in enumerate(self.instance.disks):
10294       if idx not in self.disks:
10295         continue
10296
10297       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10298                       (idx, node_name))
10299
10300       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10301                                    ldisk=ldisk):
10302         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10303                                  " replace disks for instance %s" %
10304                                  (node_name, self.instance.name))
10305
10306   def _CreateNewStorage(self, node_name):
10307     """Create new storage on the primary or secondary node.
10308
10309     This is only used for same-node replaces, not for changing the
10310     secondary node, hence we don't want to modify the existing disk.
10311
10312     """
10313     iv_names = {}
10314
10315     for idx, dev in enumerate(self.instance.disks):
10316       if idx not in self.disks:
10317         continue
10318
10319       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10320
10321       self.cfg.SetDiskID(dev, node_name)
10322
10323       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10324       names = _GenerateUniqueNames(self.lu, lv_names)
10325
10326       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10327
10328       vg_data = dev.children[0].logical_id[0]
10329       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10330                              logical_id=(vg_data, names[0]), params=data_p)
10331       vg_meta = dev.children[1].logical_id[0]
10332       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10333                              logical_id=(vg_meta, names[1]), params=meta_p)
10334
10335       new_lvs = [lv_data, lv_meta]
10336       old_lvs = [child.Copy() for child in dev.children]
10337       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10338
10339       # we pass force_create=True to force the LVM creation
10340       for new_lv in new_lvs:
10341         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10342                         _GetInstanceInfoText(self.instance), False)
10343
10344     return iv_names
10345
10346   def _CheckDevices(self, node_name, iv_names):
10347     for name, (dev, _, _) in iv_names.iteritems():
10348       self.cfg.SetDiskID(dev, node_name)
10349
10350       result = self.rpc.call_blockdev_find(node_name, dev)
10351
10352       msg = result.fail_msg
10353       if msg or not result.payload:
10354         if not msg:
10355           msg = "disk not found"
10356         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10357                                  (name, msg))
10358
10359       if result.payload.is_degraded:
10360         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10361
10362   def _RemoveOldStorage(self, node_name, iv_names):
10363     for name, (_, old_lvs, _) in iv_names.iteritems():
10364       self.lu.LogInfo("Remove logical volumes for %s" % name)
10365
10366       for lv in old_lvs:
10367         self.cfg.SetDiskID(lv, node_name)
10368
10369         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10370         if msg:
10371           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10372                              hint="remove unused LVs manually")
10373
10374   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10375     """Replace a disk on the primary or secondary for DRBD 8.
10376
10377     The algorithm for replace is quite complicated:
10378
10379       1. for each disk to be replaced:
10380
10381         1. create new LVs on the target node with unique names
10382         1. detach old LVs from the drbd device
10383         1. rename old LVs to name_replaced.<time_t>
10384         1. rename new LVs to old LVs
10385         1. attach the new LVs (with the old names now) to the drbd device
10386
10387       1. wait for sync across all devices
10388
10389       1. for each modified disk:
10390
10391         1. remove old LVs (which have the name name_replaces.<time_t>)
10392
10393     Failures are not very well handled.
10394
10395     """
10396     steps_total = 6
10397
10398     # Step: check device activation
10399     self.lu.LogStep(1, steps_total, "Check device existence")
10400     self._CheckDisksExistence([self.other_node, self.target_node])
10401     self._CheckVolumeGroup([self.target_node, self.other_node])
10402
10403     # Step: check other node consistency
10404     self.lu.LogStep(2, steps_total, "Check peer consistency")
10405     self._CheckDisksConsistency(self.other_node,
10406                                 self.other_node == self.instance.primary_node,
10407                                 False)
10408
10409     # Step: create new storage
10410     self.lu.LogStep(3, steps_total, "Allocate new storage")
10411     iv_names = self._CreateNewStorage(self.target_node)
10412
10413     # Step: for each lv, detach+rename*2+attach
10414     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10415     for dev, old_lvs, new_lvs in iv_names.itervalues():
10416       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10417
10418       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10419                                                      old_lvs)
10420       result.Raise("Can't detach drbd from local storage on node"
10421                    " %s for device %s" % (self.target_node, dev.iv_name))
10422       #dev.children = []
10423       #cfg.Update(instance)
10424
10425       # ok, we created the new LVs, so now we know we have the needed
10426       # storage; as such, we proceed on the target node to rename
10427       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10428       # using the assumption that logical_id == physical_id (which in
10429       # turn is the unique_id on that node)
10430
10431       # FIXME(iustin): use a better name for the replaced LVs
10432       temp_suffix = int(time.time())
10433       ren_fn = lambda d, suff: (d.physical_id[0],
10434                                 d.physical_id[1] + "_replaced-%s" % suff)
10435
10436       # Build the rename list based on what LVs exist on the node
10437       rename_old_to_new = []
10438       for to_ren in old_lvs:
10439         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10440         if not result.fail_msg and result.payload:
10441           # device exists
10442           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10443
10444       self.lu.LogInfo("Renaming the old LVs on the target node")
10445       result = self.rpc.call_blockdev_rename(self.target_node,
10446                                              rename_old_to_new)
10447       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10448
10449       # Now we rename the new LVs to the old LVs
10450       self.lu.LogInfo("Renaming the new LVs on the target node")
10451       rename_new_to_old = [(new, old.physical_id)
10452                            for old, new in zip(old_lvs, new_lvs)]
10453       result = self.rpc.call_blockdev_rename(self.target_node,
10454                                              rename_new_to_old)
10455       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10456
10457       # Intermediate steps of in memory modifications
10458       for old, new in zip(old_lvs, new_lvs):
10459         new.logical_id = old.logical_id
10460         self.cfg.SetDiskID(new, self.target_node)
10461
10462       # We need to modify old_lvs so that removal later removes the
10463       # right LVs, not the newly added ones; note that old_lvs is a
10464       # copy here
10465       for disk in old_lvs:
10466         disk.logical_id = ren_fn(disk, temp_suffix)
10467         self.cfg.SetDiskID(disk, self.target_node)
10468
10469       # Now that the new lvs have the old name, we can add them to the device
10470       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10471       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10472                                                   new_lvs)
10473       msg = result.fail_msg
10474       if msg:
10475         for new_lv in new_lvs:
10476           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10477                                                new_lv).fail_msg
10478           if msg2:
10479             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10480                                hint=("cleanup manually the unused logical"
10481                                      "volumes"))
10482         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10483
10484     cstep = itertools.count(5)
10485
10486     if self.early_release:
10487       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10488       self._RemoveOldStorage(self.target_node, iv_names)
10489       # TODO: Check if releasing locks early still makes sense
10490       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10491     else:
10492       # Release all resource locks except those used by the instance
10493       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10494                     keep=self.node_secondary_ip.keys())
10495
10496     # Release all node locks while waiting for sync
10497     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10498
10499     # TODO: Can the instance lock be downgraded here? Take the optional disk
10500     # shutdown in the caller into consideration.
10501
10502     # Wait for sync
10503     # This can fail as the old devices are degraded and _WaitForSync
10504     # does a combined result over all disks, so we don't check its return value
10505     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10506     _WaitForSync(self.lu, self.instance)
10507
10508     # Check all devices manually
10509     self._CheckDevices(self.instance.primary_node, iv_names)
10510
10511     # Step: remove old storage
10512     if not self.early_release:
10513       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10514       self._RemoveOldStorage(self.target_node, iv_names)
10515
10516   def _ExecDrbd8Secondary(self, feedback_fn):
10517     """Replace the secondary node for DRBD 8.
10518
10519     The algorithm for replace is quite complicated:
10520       - for all disks of the instance:
10521         - create new LVs on the new node with same names
10522         - shutdown the drbd device on the old secondary
10523         - disconnect the drbd network on the primary
10524         - create the drbd device on the new secondary
10525         - network attach the drbd on the primary, using an artifice:
10526           the drbd code for Attach() will connect to the network if it
10527           finds a device which is connected to the good local disks but
10528           not network enabled
10529       - wait for sync across all devices
10530       - remove all disks from the old secondary
10531
10532     Failures are not very well handled.
10533
10534     """
10535     steps_total = 6
10536
10537     pnode = self.instance.primary_node
10538
10539     # Step: check device activation
10540     self.lu.LogStep(1, steps_total, "Check device existence")
10541     self._CheckDisksExistence([self.instance.primary_node])
10542     self._CheckVolumeGroup([self.instance.primary_node])
10543
10544     # Step: check other node consistency
10545     self.lu.LogStep(2, steps_total, "Check peer consistency")
10546     self._CheckDisksConsistency(self.instance.primary_node, True, True)
10547
10548     # Step: create new storage
10549     self.lu.LogStep(3, steps_total, "Allocate new storage")
10550     for idx, dev in enumerate(self.instance.disks):
10551       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10552                       (self.new_node, idx))
10553       # we pass force_create=True to force LVM creation
10554       for new_lv in dev.children:
10555         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10556                         _GetInstanceInfoText(self.instance), False)
10557
10558     # Step 4: dbrd minors and drbd setups changes
10559     # after this, we must manually remove the drbd minors on both the
10560     # error and the success paths
10561     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10562     minors = self.cfg.AllocateDRBDMinor([self.new_node
10563                                          for dev in self.instance.disks],
10564                                         self.instance.name)
10565     logging.debug("Allocated minors %r", minors)
10566
10567     iv_names = {}
10568     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10569       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10570                       (self.new_node, idx))
10571       # create new devices on new_node; note that we create two IDs:
10572       # one without port, so the drbd will be activated without
10573       # networking information on the new node at this stage, and one
10574       # with network, for the latter activation in step 4
10575       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10576       if self.instance.primary_node == o_node1:
10577         p_minor = o_minor1
10578       else:
10579         assert self.instance.primary_node == o_node2, "Three-node instance?"
10580         p_minor = o_minor2
10581
10582       new_alone_id = (self.instance.primary_node, self.new_node, None,
10583                       p_minor, new_minor, o_secret)
10584       new_net_id = (self.instance.primary_node, self.new_node, o_port,
10585                     p_minor, new_minor, o_secret)
10586
10587       iv_names[idx] = (dev, dev.children, new_net_id)
10588       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10589                     new_net_id)
10590       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10591       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10592                               logical_id=new_alone_id,
10593                               children=dev.children,
10594                               size=dev.size,
10595                               params=drbd_params)
10596       try:
10597         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10598                               _GetInstanceInfoText(self.instance), False)
10599       except errors.GenericError:
10600         self.cfg.ReleaseDRBDMinors(self.instance.name)
10601         raise
10602
10603     # We have new devices, shutdown the drbd on the old secondary
10604     for idx, dev in enumerate(self.instance.disks):
10605       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10606       self.cfg.SetDiskID(dev, self.target_node)
10607       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10608       if msg:
10609         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10610                            "node: %s" % (idx, msg),
10611                            hint=("Please cleanup this device manually as"
10612                                  " soon as possible"))
10613
10614     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10615     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10616                                                self.instance.disks)[pnode]
10617
10618     msg = result.fail_msg
10619     if msg:
10620       # detaches didn't succeed (unlikely)
10621       self.cfg.ReleaseDRBDMinors(self.instance.name)
10622       raise errors.OpExecError("Can't detach the disks from the network on"
10623                                " old node: %s" % (msg,))
10624
10625     # if we managed to detach at least one, we update all the disks of
10626     # the instance to point to the new secondary
10627     self.lu.LogInfo("Updating instance configuration")
10628     for dev, _, new_logical_id in iv_names.itervalues():
10629       dev.logical_id = new_logical_id
10630       self.cfg.SetDiskID(dev, self.instance.primary_node)
10631
10632     self.cfg.Update(self.instance, feedback_fn)
10633
10634     # Release all node locks (the configuration has been updated)
10635     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10636
10637     # and now perform the drbd attach
10638     self.lu.LogInfo("Attaching primary drbds to new secondary"
10639                     " (standalone => connected)")
10640     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10641                                             self.new_node],
10642                                            self.node_secondary_ip,
10643                                            self.instance.disks,
10644                                            self.instance.name,
10645                                            False)
10646     for to_node, to_result in result.items():
10647       msg = to_result.fail_msg
10648       if msg:
10649         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10650                            to_node, msg,
10651                            hint=("please do a gnt-instance info to see the"
10652                                  " status of disks"))
10653
10654     cstep = itertools.count(5)
10655
10656     if self.early_release:
10657       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10658       self._RemoveOldStorage(self.target_node, iv_names)
10659       # TODO: Check if releasing locks early still makes sense
10660       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10661     else:
10662       # Release all resource locks except those used by the instance
10663       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10664                     keep=self.node_secondary_ip.keys())
10665
10666     # TODO: Can the instance lock be downgraded here? Take the optional disk
10667     # shutdown in the caller into consideration.
10668
10669     # Wait for sync
10670     # This can fail as the old devices are degraded and _WaitForSync
10671     # does a combined result over all disks, so we don't check its return value
10672     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10673     _WaitForSync(self.lu, self.instance)
10674
10675     # Check all devices manually
10676     self._CheckDevices(self.instance.primary_node, iv_names)
10677
10678     # Step: remove old storage
10679     if not self.early_release:
10680       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10681       self._RemoveOldStorage(self.target_node, iv_names)
10682
10683
10684 class LURepairNodeStorage(NoHooksLU):
10685   """Repairs the volume group on a node.
10686
10687   """
10688   REQ_BGL = False
10689
10690   def CheckArguments(self):
10691     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10692
10693     storage_type = self.op.storage_type
10694
10695     if (constants.SO_FIX_CONSISTENCY not in
10696         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10697       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10698                                  " repaired" % storage_type,
10699                                  errors.ECODE_INVAL)
10700
10701   def ExpandNames(self):
10702     self.needed_locks = {
10703       locking.LEVEL_NODE: [self.op.node_name],
10704       }
10705
10706   def _CheckFaultyDisks(self, instance, node_name):
10707     """Ensure faulty disks abort the opcode or at least warn."""
10708     try:
10709       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10710                                   node_name, True):
10711         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10712                                    " node '%s'" % (instance.name, node_name),
10713                                    errors.ECODE_STATE)
10714     except errors.OpPrereqError, err:
10715       if self.op.ignore_consistency:
10716         self.proc.LogWarning(str(err.args[0]))
10717       else:
10718         raise
10719
10720   def CheckPrereq(self):
10721     """Check prerequisites.
10722
10723     """
10724     # Check whether any instance on this node has faulty disks
10725     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10726       if inst.admin_state != constants.ADMINST_UP:
10727         continue
10728       check_nodes = set(inst.all_nodes)
10729       check_nodes.discard(self.op.node_name)
10730       for inst_node_name in check_nodes:
10731         self._CheckFaultyDisks(inst, inst_node_name)
10732
10733   def Exec(self, feedback_fn):
10734     feedback_fn("Repairing storage unit '%s' on %s ..." %
10735                 (self.op.name, self.op.node_name))
10736
10737     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10738     result = self.rpc.call_storage_execute(self.op.node_name,
10739                                            self.op.storage_type, st_args,
10740                                            self.op.name,
10741                                            constants.SO_FIX_CONSISTENCY)
10742     result.Raise("Failed to repair storage unit '%s' on %s" %
10743                  (self.op.name, self.op.node_name))
10744
10745
10746 class LUNodeEvacuate(NoHooksLU):
10747   """Evacuates instances off a list of nodes.
10748
10749   """
10750   REQ_BGL = False
10751
10752   _MODE2IALLOCATOR = {
10753     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10754     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10755     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10756     }
10757   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10758   assert (frozenset(_MODE2IALLOCATOR.values()) ==
10759           constants.IALLOCATOR_NEVAC_MODES)
10760
10761   def CheckArguments(self):
10762     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10763
10764   def ExpandNames(self):
10765     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10766
10767     if self.op.remote_node is not None:
10768       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10769       assert self.op.remote_node
10770
10771       if self.op.remote_node == self.op.node_name:
10772         raise errors.OpPrereqError("Can not use evacuated node as a new"
10773                                    " secondary node", errors.ECODE_INVAL)
10774
10775       if self.op.mode != constants.NODE_EVAC_SEC:
10776         raise errors.OpPrereqError("Without the use of an iallocator only"
10777                                    " secondary instances can be evacuated",
10778                                    errors.ECODE_INVAL)
10779
10780     # Declare locks
10781     self.share_locks = _ShareAll()
10782     self.needed_locks = {
10783       locking.LEVEL_INSTANCE: [],
10784       locking.LEVEL_NODEGROUP: [],
10785       locking.LEVEL_NODE: [],
10786       }
10787
10788     # Determine nodes (via group) optimistically, needs verification once locks
10789     # have been acquired
10790     self.lock_nodes = self._DetermineNodes()
10791
10792   def _DetermineNodes(self):
10793     """Gets the list of nodes to operate on.
10794
10795     """
10796     if self.op.remote_node is None:
10797       # Iallocator will choose any node(s) in the same group
10798       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10799     else:
10800       group_nodes = frozenset([self.op.remote_node])
10801
10802     # Determine nodes to be locked
10803     return set([self.op.node_name]) | group_nodes
10804
10805   def _DetermineInstances(self):
10806     """Builds list of instances to operate on.
10807
10808     """
10809     assert self.op.mode in constants.NODE_EVAC_MODES
10810
10811     if self.op.mode == constants.NODE_EVAC_PRI:
10812       # Primary instances only
10813       inst_fn = _GetNodePrimaryInstances
10814       assert self.op.remote_node is None, \
10815         "Evacuating primary instances requires iallocator"
10816     elif self.op.mode == constants.NODE_EVAC_SEC:
10817       # Secondary instances only
10818       inst_fn = _GetNodeSecondaryInstances
10819     else:
10820       # All instances
10821       assert self.op.mode == constants.NODE_EVAC_ALL
10822       inst_fn = _GetNodeInstances
10823       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10824       # per instance
10825       raise errors.OpPrereqError("Due to an issue with the iallocator"
10826                                  " interface it is not possible to evacuate"
10827                                  " all instances at once; specify explicitly"
10828                                  " whether to evacuate primary or secondary"
10829                                  " instances",
10830                                  errors.ECODE_INVAL)
10831
10832     return inst_fn(self.cfg, self.op.node_name)
10833
10834   def DeclareLocks(self, level):
10835     if level == locking.LEVEL_INSTANCE:
10836       # Lock instances optimistically, needs verification once node and group
10837       # locks have been acquired
10838       self.needed_locks[locking.LEVEL_INSTANCE] = \
10839         set(i.name for i in self._DetermineInstances())
10840
10841     elif level == locking.LEVEL_NODEGROUP:
10842       # Lock node groups for all potential target nodes optimistically, needs
10843       # verification once nodes have been acquired
10844       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10845         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10846
10847     elif level == locking.LEVEL_NODE:
10848       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10849
10850   def CheckPrereq(self):
10851     # Verify locks
10852     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10853     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10854     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10855
10856     need_nodes = self._DetermineNodes()
10857
10858     if not owned_nodes.issuperset(need_nodes):
10859       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10860                                  " locks were acquired, current nodes are"
10861                                  " are '%s', used to be '%s'; retry the"
10862                                  " operation" %
10863                                  (self.op.node_name,
10864                                   utils.CommaJoin(need_nodes),
10865                                   utils.CommaJoin(owned_nodes)),
10866                                  errors.ECODE_STATE)
10867
10868     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10869     if owned_groups != wanted_groups:
10870       raise errors.OpExecError("Node groups changed since locks were acquired,"
10871                                " current groups are '%s', used to be '%s';"
10872                                " retry the operation" %
10873                                (utils.CommaJoin(wanted_groups),
10874                                 utils.CommaJoin(owned_groups)))
10875
10876     # Determine affected instances
10877     self.instances = self._DetermineInstances()
10878     self.instance_names = [i.name for i in self.instances]
10879
10880     if set(self.instance_names) != owned_instances:
10881       raise errors.OpExecError("Instances on node '%s' changed since locks"
10882                                " were acquired, current instances are '%s',"
10883                                " used to be '%s'; retry the operation" %
10884                                (self.op.node_name,
10885                                 utils.CommaJoin(self.instance_names),
10886                                 utils.CommaJoin(owned_instances)))
10887
10888     if self.instance_names:
10889       self.LogInfo("Evacuating instances from node '%s': %s",
10890                    self.op.node_name,
10891                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10892     else:
10893       self.LogInfo("No instances to evacuate from node '%s'",
10894                    self.op.node_name)
10895
10896     if self.op.remote_node is not None:
10897       for i in self.instances:
10898         if i.primary_node == self.op.remote_node:
10899           raise errors.OpPrereqError("Node %s is the primary node of"
10900                                      " instance %s, cannot use it as"
10901                                      " secondary" %
10902                                      (self.op.remote_node, i.name),
10903                                      errors.ECODE_INVAL)
10904
10905   def Exec(self, feedback_fn):
10906     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10907
10908     if not self.instance_names:
10909       # No instances to evacuate
10910       jobs = []
10911
10912     elif self.op.iallocator is not None:
10913       # TODO: Implement relocation to other group
10914       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10915                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10916                        instances=list(self.instance_names))
10917
10918       ial.Run(self.op.iallocator)
10919
10920       if not ial.success:
10921         raise errors.OpPrereqError("Can't compute node evacuation using"
10922                                    " iallocator '%s': %s" %
10923                                    (self.op.iallocator, ial.info),
10924                                    errors.ECODE_NORES)
10925
10926       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10927
10928     elif self.op.remote_node is not None:
10929       assert self.op.mode == constants.NODE_EVAC_SEC
10930       jobs = [
10931         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10932                                         remote_node=self.op.remote_node,
10933                                         disks=[],
10934                                         mode=constants.REPLACE_DISK_CHG,
10935                                         early_release=self.op.early_release)]
10936         for instance_name in self.instance_names
10937         ]
10938
10939     else:
10940       raise errors.ProgrammerError("No iallocator or remote node")
10941
10942     return ResultWithJobs(jobs)
10943
10944
10945 def _SetOpEarlyRelease(early_release, op):
10946   """Sets C{early_release} flag on opcodes if available.
10947
10948   """
10949   try:
10950     op.early_release = early_release
10951   except AttributeError:
10952     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10953
10954   return op
10955
10956
10957 def _NodeEvacDest(use_nodes, group, nodes):
10958   """Returns group or nodes depending on caller's choice.
10959
10960   """
10961   if use_nodes:
10962     return utils.CommaJoin(nodes)
10963   else:
10964     return group
10965
10966
10967 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10968   """Unpacks the result of change-group and node-evacuate iallocator requests.
10969
10970   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10971   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10972
10973   @type lu: L{LogicalUnit}
10974   @param lu: Logical unit instance
10975   @type alloc_result: tuple/list
10976   @param alloc_result: Result from iallocator
10977   @type early_release: bool
10978   @param early_release: Whether to release locks early if possible
10979   @type use_nodes: bool
10980   @param use_nodes: Whether to display node names instead of groups
10981
10982   """
10983   (moved, failed, jobs) = alloc_result
10984
10985   if failed:
10986     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10987                                  for (name, reason) in failed)
10988     lu.LogWarning("Unable to evacuate instances %s", failreason)
10989     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10990
10991   if moved:
10992     lu.LogInfo("Instances to be moved: %s",
10993                utils.CommaJoin("%s (to %s)" %
10994                                (name, _NodeEvacDest(use_nodes, group, nodes))
10995                                for (name, group, nodes) in moved))
10996
10997   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10998               map(opcodes.OpCode.LoadOpCode, ops))
10999           for ops in jobs]
11000
11001
11002 class LUInstanceGrowDisk(LogicalUnit):
11003   """Grow a disk of an instance.
11004
11005   """
11006   HPATH = "disk-grow"
11007   HTYPE = constants.HTYPE_INSTANCE
11008   REQ_BGL = False
11009
11010   def ExpandNames(self):
11011     self._ExpandAndLockInstance()
11012     self.needed_locks[locking.LEVEL_NODE] = []
11013     self.needed_locks[locking.LEVEL_NODE_RES] = []
11014     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11015
11016   def DeclareLocks(self, level):
11017     if level == locking.LEVEL_NODE:
11018       self._LockInstancesNodes()
11019     elif level == locking.LEVEL_NODE_RES:
11020       # Copy node locks
11021       self.needed_locks[locking.LEVEL_NODE_RES] = \
11022         self.needed_locks[locking.LEVEL_NODE][:]
11023
11024   def BuildHooksEnv(self):
11025     """Build hooks env.
11026
11027     This runs on the master, the primary and all the secondaries.
11028
11029     """
11030     env = {
11031       "DISK": self.op.disk,
11032       "AMOUNT": self.op.amount,
11033       }
11034     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11035     return env
11036
11037   def BuildHooksNodes(self):
11038     """Build hooks nodes.
11039
11040     """
11041     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11042     return (nl, nl)
11043
11044   def CheckPrereq(self):
11045     """Check prerequisites.
11046
11047     This checks that the instance is in the cluster.
11048
11049     """
11050     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11051     assert instance is not None, \
11052       "Cannot retrieve locked instance %s" % self.op.instance_name
11053     nodenames = list(instance.all_nodes)
11054     for node in nodenames:
11055       _CheckNodeOnline(self, node)
11056
11057     self.instance = instance
11058
11059     if instance.disk_template not in constants.DTS_GROWABLE:
11060       raise errors.OpPrereqError("Instance's disk layout does not support"
11061                                  " growing", errors.ECODE_INVAL)
11062
11063     self.disk = instance.FindDisk(self.op.disk)
11064
11065     if instance.disk_template not in (constants.DT_FILE,
11066                                       constants.DT_SHARED_FILE):
11067       # TODO: check the free disk space for file, when that feature will be
11068       # supported
11069       _CheckNodesFreeDiskPerVG(self, nodenames,
11070                                self.disk.ComputeGrowth(self.op.amount))
11071
11072   def Exec(self, feedback_fn):
11073     """Execute disk grow.
11074
11075     """
11076     instance = self.instance
11077     disk = self.disk
11078
11079     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11080     assert (self.owned_locks(locking.LEVEL_NODE) ==
11081             self.owned_locks(locking.LEVEL_NODE_RES))
11082
11083     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11084     if not disks_ok:
11085       raise errors.OpExecError("Cannot activate block device to grow")
11086
11087     feedback_fn("Growing disk %s of instance '%s' by %s" %
11088                 (self.op.disk, instance.name,
11089                  utils.FormatUnit(self.op.amount, "h")))
11090
11091     # First run all grow ops in dry-run mode
11092     for node in instance.all_nodes:
11093       self.cfg.SetDiskID(disk, node)
11094       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11095       result.Raise("Grow request failed to node %s" % node)
11096
11097     # We know that (as far as we can test) operations across different
11098     # nodes will succeed, time to run it for real
11099     for node in instance.all_nodes:
11100       self.cfg.SetDiskID(disk, node)
11101       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11102       result.Raise("Grow request failed to node %s" % node)
11103
11104       # TODO: Rewrite code to work properly
11105       # DRBD goes into sync mode for a short amount of time after executing the
11106       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11107       # calling "resize" in sync mode fails. Sleeping for a short amount of
11108       # time is a work-around.
11109       time.sleep(5)
11110
11111     disk.RecordGrow(self.op.amount)
11112     self.cfg.Update(instance, feedback_fn)
11113
11114     # Changes have been recorded, release node lock
11115     _ReleaseLocks(self, locking.LEVEL_NODE)
11116
11117     # Downgrade lock while waiting for sync
11118     self.glm.downgrade(locking.LEVEL_INSTANCE)
11119
11120     if self.op.wait_for_sync:
11121       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11122       if disk_abort:
11123         self.proc.LogWarning("Disk sync-ing has not returned a good"
11124                              " status; please check the instance")
11125       if instance.admin_state != constants.ADMINST_UP:
11126         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11127     elif instance.admin_state != constants.ADMINST_UP:
11128       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11129                            " not supposed to be running because no wait for"
11130                            " sync mode was requested")
11131
11132     assert self.owned_locks(locking.LEVEL_NODE_RES)
11133     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11134
11135
11136 class LUInstanceQueryData(NoHooksLU):
11137   """Query runtime instance data.
11138
11139   """
11140   REQ_BGL = False
11141
11142   def ExpandNames(self):
11143     self.needed_locks = {}
11144
11145     # Use locking if requested or when non-static information is wanted
11146     if not (self.op.static or self.op.use_locking):
11147       self.LogWarning("Non-static data requested, locks need to be acquired")
11148       self.op.use_locking = True
11149
11150     if self.op.instances or not self.op.use_locking:
11151       # Expand instance names right here
11152       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11153     else:
11154       # Will use acquired locks
11155       self.wanted_names = None
11156
11157     if self.op.use_locking:
11158       self.share_locks = _ShareAll()
11159
11160       if self.wanted_names is None:
11161         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11162       else:
11163         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11164
11165       self.needed_locks[locking.LEVEL_NODE] = []
11166       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11167
11168   def DeclareLocks(self, level):
11169     if self.op.use_locking and level == locking.LEVEL_NODE:
11170       self._LockInstancesNodes()
11171
11172   def CheckPrereq(self):
11173     """Check prerequisites.
11174
11175     This only checks the optional instance list against the existing names.
11176
11177     """
11178     if self.wanted_names is None:
11179       assert self.op.use_locking, "Locking was not used"
11180       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11181
11182     self.wanted_instances = \
11183         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11184
11185   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11186     """Returns the status of a block device
11187
11188     """
11189     if self.op.static or not node:
11190       return None
11191
11192     self.cfg.SetDiskID(dev, node)
11193
11194     result = self.rpc.call_blockdev_find(node, dev)
11195     if result.offline:
11196       return None
11197
11198     result.Raise("Can't compute disk status for %s" % instance_name)
11199
11200     status = result.payload
11201     if status is None:
11202       return None
11203
11204     return (status.dev_path, status.major, status.minor,
11205             status.sync_percent, status.estimated_time,
11206             status.is_degraded, status.ldisk_status)
11207
11208   def _ComputeDiskStatus(self, instance, snode, dev):
11209     """Compute block device status.
11210
11211     """
11212     if dev.dev_type in constants.LDS_DRBD:
11213       # we change the snode then (otherwise we use the one passed in)
11214       if dev.logical_id[0] == instance.primary_node:
11215         snode = dev.logical_id[1]
11216       else:
11217         snode = dev.logical_id[0]
11218
11219     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11220                                               instance.name, dev)
11221     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11222
11223     if dev.children:
11224       dev_children = map(compat.partial(self._ComputeDiskStatus,
11225                                         instance, snode),
11226                          dev.children)
11227     else:
11228       dev_children = []
11229
11230     return {
11231       "iv_name": dev.iv_name,
11232       "dev_type": dev.dev_type,
11233       "logical_id": dev.logical_id,
11234       "physical_id": dev.physical_id,
11235       "pstatus": dev_pstatus,
11236       "sstatus": dev_sstatus,
11237       "children": dev_children,
11238       "mode": dev.mode,
11239       "size": dev.size,
11240       }
11241
11242   def Exec(self, feedback_fn):
11243     """Gather and return data"""
11244     result = {}
11245
11246     cluster = self.cfg.GetClusterInfo()
11247
11248     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11249                                           for i in self.wanted_instances)
11250     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11251       if self.op.static or pnode.offline:
11252         remote_state = None
11253         if pnode.offline:
11254           self.LogWarning("Primary node %s is marked offline, returning static"
11255                           " information only for instance %s" %
11256                           (pnode.name, instance.name))
11257       else:
11258         remote_info = self.rpc.call_instance_info(instance.primary_node,
11259                                                   instance.name,
11260                                                   instance.hypervisor)
11261         remote_info.Raise("Error checking node %s" % instance.primary_node)
11262         remote_info = remote_info.payload
11263         if remote_info and "state" in remote_info:
11264           remote_state = "up"
11265         else:
11266           if instance.admin_state == constants.ADMINST_UP:
11267             remote_state = "down"
11268           else:
11269             remote_state = instance.admin_state
11270
11271       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11272                   instance.disks)
11273
11274       result[instance.name] = {
11275         "name": instance.name,
11276         "config_state": instance.admin_state,
11277         "run_state": remote_state,
11278         "pnode": instance.primary_node,
11279         "snodes": instance.secondary_nodes,
11280         "os": instance.os,
11281         # this happens to be the same format used for hooks
11282         "nics": _NICListToTuple(self, instance.nics),
11283         "disk_template": instance.disk_template,
11284         "disks": disks,
11285         "hypervisor": instance.hypervisor,
11286         "network_port": instance.network_port,
11287         "hv_instance": instance.hvparams,
11288         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11289         "be_instance": instance.beparams,
11290         "be_actual": cluster.FillBE(instance),
11291         "os_instance": instance.osparams,
11292         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11293         "serial_no": instance.serial_no,
11294         "mtime": instance.mtime,
11295         "ctime": instance.ctime,
11296         "uuid": instance.uuid,
11297         }
11298
11299     return result
11300
11301
11302 class LUInstanceSetParams(LogicalUnit):
11303   """Modifies an instances's parameters.
11304
11305   """
11306   HPATH = "instance-modify"
11307   HTYPE = constants.HTYPE_INSTANCE
11308   REQ_BGL = False
11309
11310   def CheckArguments(self):
11311     if not (self.op.nics or self.op.disks or self.op.disk_template or
11312             self.op.hvparams or self.op.beparams or self.op.os_name or
11313             self.op.online_inst or self.op.offline_inst):
11314       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11315
11316     if self.op.hvparams:
11317       _CheckGlobalHvParams(self.op.hvparams)
11318
11319     # Disk validation
11320     disk_addremove = 0
11321     for disk_op, disk_dict in self.op.disks:
11322       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11323       if disk_op == constants.DDM_REMOVE:
11324         disk_addremove += 1
11325         continue
11326       elif disk_op == constants.DDM_ADD:
11327         disk_addremove += 1
11328       else:
11329         if not isinstance(disk_op, int):
11330           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11331         if not isinstance(disk_dict, dict):
11332           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11333           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11334
11335       if disk_op == constants.DDM_ADD:
11336         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11337         if mode not in constants.DISK_ACCESS_SET:
11338           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11339                                      errors.ECODE_INVAL)
11340         size = disk_dict.get(constants.IDISK_SIZE, None)
11341         if size is None:
11342           raise errors.OpPrereqError("Required disk parameter size missing",
11343                                      errors.ECODE_INVAL)
11344         try:
11345           size = int(size)
11346         except (TypeError, ValueError), err:
11347           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11348                                      str(err), errors.ECODE_INVAL)
11349         disk_dict[constants.IDISK_SIZE] = size
11350       else:
11351         # modification of disk
11352         if constants.IDISK_SIZE in disk_dict:
11353           raise errors.OpPrereqError("Disk size change not possible, use"
11354                                      " grow-disk", errors.ECODE_INVAL)
11355
11356     if disk_addremove > 1:
11357       raise errors.OpPrereqError("Only one disk add or remove operation"
11358                                  " supported at a time", errors.ECODE_INVAL)
11359
11360     if self.op.disks and self.op.disk_template is not None:
11361       raise errors.OpPrereqError("Disk template conversion and other disk"
11362                                  " changes not supported at the same time",
11363                                  errors.ECODE_INVAL)
11364
11365     if (self.op.disk_template and
11366         self.op.disk_template in constants.DTS_INT_MIRROR and
11367         self.op.remote_node is None):
11368       raise errors.OpPrereqError("Changing the disk template to a mirrored"
11369                                  " one requires specifying a secondary node",
11370                                  errors.ECODE_INVAL)
11371
11372     # NIC validation
11373     nic_addremove = 0
11374     for nic_op, nic_dict in self.op.nics:
11375       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11376       if nic_op == constants.DDM_REMOVE:
11377         nic_addremove += 1
11378         continue
11379       elif nic_op == constants.DDM_ADD:
11380         nic_addremove += 1
11381       else:
11382         if not isinstance(nic_op, int):
11383           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11384         if not isinstance(nic_dict, dict):
11385           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11386           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11387
11388       # nic_dict should be a dict
11389       nic_ip = nic_dict.get(constants.INIC_IP, None)
11390       if nic_ip is not None:
11391         if nic_ip.lower() == constants.VALUE_NONE:
11392           nic_dict[constants.INIC_IP] = None
11393         else:
11394           if not netutils.IPAddress.IsValid(nic_ip):
11395             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11396                                        errors.ECODE_INVAL)
11397
11398       nic_bridge = nic_dict.get("bridge", None)
11399       nic_link = nic_dict.get(constants.INIC_LINK, None)
11400       if nic_bridge and nic_link:
11401         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11402                                    " at the same time", errors.ECODE_INVAL)
11403       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11404         nic_dict["bridge"] = None
11405       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11406         nic_dict[constants.INIC_LINK] = None
11407
11408       if nic_op == constants.DDM_ADD:
11409         nic_mac = nic_dict.get(constants.INIC_MAC, None)
11410         if nic_mac is None:
11411           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11412
11413       if constants.INIC_MAC in nic_dict:
11414         nic_mac = nic_dict[constants.INIC_MAC]
11415         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11416           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11417
11418         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11419           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11420                                      " modifying an existing nic",
11421                                      errors.ECODE_INVAL)
11422
11423     if nic_addremove > 1:
11424       raise errors.OpPrereqError("Only one NIC add or remove operation"
11425                                  " supported at a time", errors.ECODE_INVAL)
11426
11427   def ExpandNames(self):
11428     self._ExpandAndLockInstance()
11429     # Can't even acquire node locks in shared mode as upcoming changes in
11430     # Ganeti 2.6 will start to modify the node object on disk conversion
11431     self.needed_locks[locking.LEVEL_NODE] = []
11432     self.needed_locks[locking.LEVEL_NODE_RES] = []
11433     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11434
11435   def DeclareLocks(self, level):
11436     if level == locking.LEVEL_NODE:
11437       self._LockInstancesNodes()
11438       if self.op.disk_template and self.op.remote_node:
11439         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11440         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11441     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11442       # Copy node locks
11443       self.needed_locks[locking.LEVEL_NODE_RES] = \
11444         self.needed_locks[locking.LEVEL_NODE][:]
11445
11446   def BuildHooksEnv(self):
11447     """Build hooks env.
11448
11449     This runs on the master, primary and secondaries.
11450
11451     """
11452     args = dict()
11453     if constants.BE_MINMEM in self.be_new:
11454       args["minmem"] = self.be_new[constants.BE_MINMEM]
11455     if constants.BE_MAXMEM in self.be_new:
11456       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11457     if constants.BE_VCPUS in self.be_new:
11458       args["vcpus"] = self.be_new[constants.BE_VCPUS]
11459     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11460     # information at all.
11461     if self.op.nics:
11462       args["nics"] = []
11463       nic_override = dict(self.op.nics)
11464       for idx, nic in enumerate(self.instance.nics):
11465         if idx in nic_override:
11466           this_nic_override = nic_override[idx]
11467         else:
11468           this_nic_override = {}
11469         if constants.INIC_IP in this_nic_override:
11470           ip = this_nic_override[constants.INIC_IP]
11471         else:
11472           ip = nic.ip
11473         if constants.INIC_MAC in this_nic_override:
11474           mac = this_nic_override[constants.INIC_MAC]
11475         else:
11476           mac = nic.mac
11477         if idx in self.nic_pnew:
11478           nicparams = self.nic_pnew[idx]
11479         else:
11480           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11481         mode = nicparams[constants.NIC_MODE]
11482         link = nicparams[constants.NIC_LINK]
11483         args["nics"].append((ip, mac, mode, link))
11484       if constants.DDM_ADD in nic_override:
11485         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11486         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11487         nicparams = self.nic_pnew[constants.DDM_ADD]
11488         mode = nicparams[constants.NIC_MODE]
11489         link = nicparams[constants.NIC_LINK]
11490         args["nics"].append((ip, mac, mode, link))
11491       elif constants.DDM_REMOVE in nic_override:
11492         del args["nics"][-1]
11493
11494     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11495     if self.op.disk_template:
11496       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11497
11498     return env
11499
11500   def BuildHooksNodes(self):
11501     """Build hooks nodes.
11502
11503     """
11504     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11505     return (nl, nl)
11506
11507   def CheckPrereq(self):
11508     """Check prerequisites.
11509
11510     This only checks the instance list against the existing names.
11511
11512     """
11513     # checking the new params on the primary/secondary nodes
11514
11515     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11516     cluster = self.cluster = self.cfg.GetClusterInfo()
11517     assert self.instance is not None, \
11518       "Cannot retrieve locked instance %s" % self.op.instance_name
11519     pnode = instance.primary_node
11520     nodelist = list(instance.all_nodes)
11521     pnode_info = self.cfg.GetNodeInfo(pnode)
11522     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11523
11524     # OS change
11525     if self.op.os_name and not self.op.force:
11526       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11527                       self.op.force_variant)
11528       instance_os = self.op.os_name
11529     else:
11530       instance_os = instance.os
11531
11532     if self.op.disk_template:
11533       if instance.disk_template == self.op.disk_template:
11534         raise errors.OpPrereqError("Instance already has disk template %s" %
11535                                    instance.disk_template, errors.ECODE_INVAL)
11536
11537       if (instance.disk_template,
11538           self.op.disk_template) not in self._DISK_CONVERSIONS:
11539         raise errors.OpPrereqError("Unsupported disk template conversion from"
11540                                    " %s to %s" % (instance.disk_template,
11541                                                   self.op.disk_template),
11542                                    errors.ECODE_INVAL)
11543       _CheckInstanceState(self, instance, INSTANCE_DOWN,
11544                           msg="cannot change disk template")
11545       if self.op.disk_template in constants.DTS_INT_MIRROR:
11546         if self.op.remote_node == pnode:
11547           raise errors.OpPrereqError("Given new secondary node %s is the same"
11548                                      " as the primary node of the instance" %
11549                                      self.op.remote_node, errors.ECODE_STATE)
11550         _CheckNodeOnline(self, self.op.remote_node)
11551         _CheckNodeNotDrained(self, self.op.remote_node)
11552         # FIXME: here we assume that the old instance type is DT_PLAIN
11553         assert instance.disk_template == constants.DT_PLAIN
11554         disks = [{constants.IDISK_SIZE: d.size,
11555                   constants.IDISK_VG: d.logical_id[0]}
11556                  for d in instance.disks]
11557         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11558         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11559
11560         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11561         if pnode_info.group != snode_info.group:
11562           self.LogWarning("The primary and secondary nodes are in two"
11563                           " different node groups; the disk parameters"
11564                           " from the first disk's node group will be"
11565                           " used")
11566
11567     # hvparams processing
11568     if self.op.hvparams:
11569       hv_type = instance.hypervisor
11570       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11571       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11572       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11573
11574       # local check
11575       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11576       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11577       self.hv_proposed = self.hv_new = hv_new # the new actual values
11578       self.hv_inst = i_hvdict # the new dict (without defaults)
11579     else:
11580       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11581                                               instance.hvparams)
11582       self.hv_new = self.hv_inst = {}
11583
11584     # beparams processing
11585     if self.op.beparams:
11586       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11587                                    use_none=True)
11588       objects.UpgradeBeParams(i_bedict)
11589       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11590       be_new = cluster.SimpleFillBE(i_bedict)
11591       self.be_proposed = self.be_new = be_new # the new actual values
11592       self.be_inst = i_bedict # the new dict (without defaults)
11593     else:
11594       self.be_new = self.be_inst = {}
11595       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11596     be_old = cluster.FillBE(instance)
11597
11598     # CPU param validation -- checking every time a paramtere is
11599     # changed to cover all cases where either CPU mask or vcpus have
11600     # changed
11601     if (constants.BE_VCPUS in self.be_proposed and
11602         constants.HV_CPU_MASK in self.hv_proposed):
11603       cpu_list = \
11604         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11605       # Verify mask is consistent with number of vCPUs. Can skip this
11606       # test if only 1 entry in the CPU mask, which means same mask
11607       # is applied to all vCPUs.
11608       if (len(cpu_list) > 1 and
11609           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11610         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11611                                    " CPU mask [%s]" %
11612                                    (self.be_proposed[constants.BE_VCPUS],
11613                                     self.hv_proposed[constants.HV_CPU_MASK]),
11614                                    errors.ECODE_INVAL)
11615
11616       # Only perform this test if a new CPU mask is given
11617       if constants.HV_CPU_MASK in self.hv_new:
11618         # Calculate the largest CPU number requested
11619         max_requested_cpu = max(map(max, cpu_list))
11620         # Check that all of the instance's nodes have enough physical CPUs to
11621         # satisfy the requested CPU mask
11622         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11623                                 max_requested_cpu + 1, instance.hypervisor)
11624
11625     # osparams processing
11626     if self.op.osparams:
11627       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11628       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11629       self.os_inst = i_osdict # the new dict (without defaults)
11630     else:
11631       self.os_inst = {}
11632
11633     self.warn = []
11634
11635     #TODO(dynmem): do the appropriate check involving MINMEM
11636     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11637         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11638       mem_check_list = [pnode]
11639       if be_new[constants.BE_AUTO_BALANCE]:
11640         # either we changed auto_balance to yes or it was from before
11641         mem_check_list.extend(instance.secondary_nodes)
11642       instance_info = self.rpc.call_instance_info(pnode, instance.name,
11643                                                   instance.hypervisor)
11644       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11645                                          [instance.hypervisor])
11646       pninfo = nodeinfo[pnode]
11647       msg = pninfo.fail_msg
11648       if msg:
11649         # Assume the primary node is unreachable and go ahead
11650         self.warn.append("Can't get info from primary node %s: %s" %
11651                          (pnode, msg))
11652       else:
11653         (_, _, (pnhvinfo, )) = pninfo.payload
11654         if not isinstance(pnhvinfo.get("memory_free", None), int):
11655           self.warn.append("Node data from primary node %s doesn't contain"
11656                            " free memory information" % pnode)
11657         elif instance_info.fail_msg:
11658           self.warn.append("Can't get instance runtime information: %s" %
11659                           instance_info.fail_msg)
11660         else:
11661           if instance_info.payload:
11662             current_mem = int(instance_info.payload["memory"])
11663           else:
11664             # Assume instance not running
11665             # (there is a slight race condition here, but it's not very
11666             # probable, and we have no other way to check)
11667             # TODO: Describe race condition
11668             current_mem = 0
11669           #TODO(dynmem): do the appropriate check involving MINMEM
11670           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11671                       pnhvinfo["memory_free"])
11672           if miss_mem > 0:
11673             raise errors.OpPrereqError("This change will prevent the instance"
11674                                        " from starting, due to %d MB of memory"
11675                                        " missing on its primary node" %
11676                                        miss_mem,
11677                                        errors.ECODE_NORES)
11678
11679       if be_new[constants.BE_AUTO_BALANCE]:
11680         for node, nres in nodeinfo.items():
11681           if node not in instance.secondary_nodes:
11682             continue
11683           nres.Raise("Can't get info from secondary node %s" % node,
11684                      prereq=True, ecode=errors.ECODE_STATE)
11685           (_, _, (nhvinfo, )) = nres.payload
11686           if not isinstance(nhvinfo.get("memory_free", None), int):
11687             raise errors.OpPrereqError("Secondary node %s didn't return free"
11688                                        " memory information" % node,
11689                                        errors.ECODE_STATE)
11690           #TODO(dynmem): do the appropriate check involving MINMEM
11691           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11692             raise errors.OpPrereqError("This change will prevent the instance"
11693                                        " from failover to its secondary node"
11694                                        " %s, due to not enough memory" % node,
11695                                        errors.ECODE_STATE)
11696
11697     # NIC processing
11698     self.nic_pnew = {}
11699     self.nic_pinst = {}
11700     for nic_op, nic_dict in self.op.nics:
11701       if nic_op == constants.DDM_REMOVE:
11702         if not instance.nics:
11703           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11704                                      errors.ECODE_INVAL)
11705         continue
11706       if nic_op != constants.DDM_ADD:
11707         # an existing nic
11708         if not instance.nics:
11709           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11710                                      " no NICs" % nic_op,
11711                                      errors.ECODE_INVAL)
11712         if nic_op < 0 or nic_op >= len(instance.nics):
11713           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11714                                      " are 0 to %d" %
11715                                      (nic_op, len(instance.nics) - 1),
11716                                      errors.ECODE_INVAL)
11717         old_nic_params = instance.nics[nic_op].nicparams
11718         old_nic_ip = instance.nics[nic_op].ip
11719       else:
11720         old_nic_params = {}
11721         old_nic_ip = None
11722
11723       update_params_dict = dict([(key, nic_dict[key])
11724                                  for key in constants.NICS_PARAMETERS
11725                                  if key in nic_dict])
11726
11727       if "bridge" in nic_dict:
11728         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11729
11730       new_nic_params = _GetUpdatedParams(old_nic_params,
11731                                          update_params_dict)
11732       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11733       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11734       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11735       self.nic_pinst[nic_op] = new_nic_params
11736       self.nic_pnew[nic_op] = new_filled_nic_params
11737       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11738
11739       if new_nic_mode == constants.NIC_MODE_BRIDGED:
11740         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11741         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11742         if msg:
11743           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11744           if self.op.force:
11745             self.warn.append(msg)
11746           else:
11747             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11748       if new_nic_mode == constants.NIC_MODE_ROUTED:
11749         if constants.INIC_IP in nic_dict:
11750           nic_ip = nic_dict[constants.INIC_IP]
11751         else:
11752           nic_ip = old_nic_ip
11753         if nic_ip is None:
11754           raise errors.OpPrereqError("Cannot set the nic ip to None"
11755                                      " on a routed nic", errors.ECODE_INVAL)
11756       if constants.INIC_MAC in nic_dict:
11757         nic_mac = nic_dict[constants.INIC_MAC]
11758         if nic_mac is None:
11759           raise errors.OpPrereqError("Cannot set the nic mac to None",
11760                                      errors.ECODE_INVAL)
11761         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11762           # otherwise generate the mac
11763           nic_dict[constants.INIC_MAC] = \
11764             self.cfg.GenerateMAC(self.proc.GetECId())
11765         else:
11766           # or validate/reserve the current one
11767           try:
11768             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11769           except errors.ReservationError:
11770             raise errors.OpPrereqError("MAC address %s already in use"
11771                                        " in cluster" % nic_mac,
11772                                        errors.ECODE_NOTUNIQUE)
11773
11774     # DISK processing
11775     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11776       raise errors.OpPrereqError("Disk operations not supported for"
11777                                  " diskless instances",
11778                                  errors.ECODE_INVAL)
11779     for disk_op, _ in self.op.disks:
11780       if disk_op == constants.DDM_REMOVE:
11781         if len(instance.disks) == 1:
11782           raise errors.OpPrereqError("Cannot remove the last disk of"
11783                                      " an instance", errors.ECODE_INVAL)
11784         _CheckInstanceState(self, instance, INSTANCE_DOWN,
11785                             msg="cannot remove disks")
11786
11787       if (disk_op == constants.DDM_ADD and
11788           len(instance.disks) >= constants.MAX_DISKS):
11789         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11790                                    " add more" % constants.MAX_DISKS,
11791                                    errors.ECODE_STATE)
11792       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11793         # an existing disk
11794         if disk_op < 0 or disk_op >= len(instance.disks):
11795           raise errors.OpPrereqError("Invalid disk index %s, valid values"
11796                                      " are 0 to %d" %
11797                                      (disk_op, len(instance.disks)),
11798                                      errors.ECODE_INVAL)
11799
11800     # disabling the instance
11801     if self.op.offline_inst:
11802       _CheckInstanceState(self, instance, INSTANCE_DOWN,
11803                           msg="cannot change instance state to offline")
11804
11805     # enabling the instance
11806     if self.op.online_inst:
11807       _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11808                           msg="cannot make instance go online")
11809
11810   def _ConvertPlainToDrbd(self, feedback_fn):
11811     """Converts an instance from plain to drbd.
11812
11813     """
11814     feedback_fn("Converting template to drbd")
11815     instance = self.instance
11816     pnode = instance.primary_node
11817     snode = self.op.remote_node
11818
11819     assert instance.disk_template == constants.DT_PLAIN
11820
11821     # create a fake disk info for _GenerateDiskTemplate
11822     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11823                   constants.IDISK_VG: d.logical_id[0]}
11824                  for d in instance.disks]
11825     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11826                                       instance.name, pnode, [snode],
11827                                       disk_info, None, None, 0, feedback_fn,
11828                                       self.diskparams)
11829     info = _GetInstanceInfoText(instance)
11830     feedback_fn("Creating aditional volumes...")
11831     # first, create the missing data and meta devices
11832     for disk in new_disks:
11833       # unfortunately this is... not too nice
11834       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11835                             info, True)
11836       for child in disk.children:
11837         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11838     # at this stage, all new LVs have been created, we can rename the
11839     # old ones
11840     feedback_fn("Renaming original volumes...")
11841     rename_list = [(o, n.children[0].logical_id)
11842                    for (o, n) in zip(instance.disks, new_disks)]
11843     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11844     result.Raise("Failed to rename original LVs")
11845
11846     feedback_fn("Initializing DRBD devices...")
11847     # all child devices are in place, we can now create the DRBD devices
11848     for disk in new_disks:
11849       for node in [pnode, snode]:
11850         f_create = node == pnode
11851         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11852
11853     # at this point, the instance has been modified
11854     instance.disk_template = constants.DT_DRBD8
11855     instance.disks = new_disks
11856     self.cfg.Update(instance, feedback_fn)
11857
11858     # Release node locks while waiting for sync
11859     _ReleaseLocks(self, locking.LEVEL_NODE)
11860
11861     # disks are created, waiting for sync
11862     disk_abort = not _WaitForSync(self, instance,
11863                                   oneshot=not self.op.wait_for_sync)
11864     if disk_abort:
11865       raise errors.OpExecError("There are some degraded disks for"
11866                                " this instance, please cleanup manually")
11867
11868     # Node resource locks will be released by caller
11869
11870   def _ConvertDrbdToPlain(self, feedback_fn):
11871     """Converts an instance from drbd to plain.
11872
11873     """
11874     instance = self.instance
11875
11876     assert len(instance.secondary_nodes) == 1
11877     assert instance.disk_template == constants.DT_DRBD8
11878
11879     pnode = instance.primary_node
11880     snode = instance.secondary_nodes[0]
11881     feedback_fn("Converting template to plain")
11882
11883     old_disks = instance.disks
11884     new_disks = [d.children[0] for d in old_disks]
11885
11886     # copy over size and mode
11887     for parent, child in zip(old_disks, new_disks):
11888       child.size = parent.size
11889       child.mode = parent.mode
11890
11891     # update instance structure
11892     instance.disks = new_disks
11893     instance.disk_template = constants.DT_PLAIN
11894     self.cfg.Update(instance, feedback_fn)
11895
11896     # Release locks in case removing disks takes a while
11897     _ReleaseLocks(self, locking.LEVEL_NODE)
11898
11899     feedback_fn("Removing volumes on the secondary node...")
11900     for disk in old_disks:
11901       self.cfg.SetDiskID(disk, snode)
11902       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11903       if msg:
11904         self.LogWarning("Could not remove block device %s on node %s,"
11905                         " continuing anyway: %s", disk.iv_name, snode, msg)
11906
11907     feedback_fn("Removing unneeded volumes on the primary node...")
11908     for idx, disk in enumerate(old_disks):
11909       meta = disk.children[1]
11910       self.cfg.SetDiskID(meta, pnode)
11911       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11912       if msg:
11913         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11914                         " continuing anyway: %s", idx, pnode, msg)
11915
11916     # this is a DRBD disk, return its port to the pool
11917     for disk in old_disks:
11918       tcp_port = disk.logical_id[2]
11919       self.cfg.AddTcpUdpPort(tcp_port)
11920
11921     # Node resource locks will be released by caller
11922
11923   def Exec(self, feedback_fn):
11924     """Modifies an instance.
11925
11926     All parameters take effect only at the next restart of the instance.
11927
11928     """
11929     # Process here the warnings from CheckPrereq, as we don't have a
11930     # feedback_fn there.
11931     for warn in self.warn:
11932       feedback_fn("WARNING: %s" % warn)
11933
11934     assert ((self.op.disk_template is None) ^
11935             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11936       "Not owning any node resource locks"
11937
11938     result = []
11939     instance = self.instance
11940     # disk changes
11941     for disk_op, disk_dict in self.op.disks:
11942       if disk_op == constants.DDM_REMOVE:
11943         # remove the last disk
11944         device = instance.disks.pop()
11945         device_idx = len(instance.disks)
11946         for node, disk in device.ComputeNodeTree(instance.primary_node):
11947           self.cfg.SetDiskID(disk, node)
11948           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11949           if msg:
11950             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11951                             " continuing anyway", device_idx, node, msg)
11952         result.append(("disk/%d" % device_idx, "remove"))
11953
11954         # if this is a DRBD disk, return its port to the pool
11955         if device.dev_type in constants.LDS_DRBD:
11956           tcp_port = device.logical_id[2]
11957           self.cfg.AddTcpUdpPort(tcp_port)
11958       elif disk_op == constants.DDM_ADD:
11959         # add a new disk
11960         if instance.disk_template in (constants.DT_FILE,
11961                                         constants.DT_SHARED_FILE):
11962           file_driver, file_path = instance.disks[0].logical_id
11963           file_path = os.path.dirname(file_path)
11964         else:
11965           file_driver = file_path = None
11966         disk_idx_base = len(instance.disks)
11967         new_disk = _GenerateDiskTemplate(self,
11968                                          instance.disk_template,
11969                                          instance.name, instance.primary_node,
11970                                          instance.secondary_nodes,
11971                                          [disk_dict],
11972                                          file_path,
11973                                          file_driver,
11974                                          disk_idx_base,
11975                                          feedback_fn,
11976                                          self.diskparams)[0]
11977         instance.disks.append(new_disk)
11978         info = _GetInstanceInfoText(instance)
11979
11980         logging.info("Creating volume %s for instance %s",
11981                      new_disk.iv_name, instance.name)
11982         # Note: this needs to be kept in sync with _CreateDisks
11983         #HARDCODE
11984         for node in instance.all_nodes:
11985           f_create = node == instance.primary_node
11986           try:
11987             _CreateBlockDev(self, node, instance, new_disk,
11988                             f_create, info, f_create)
11989           except errors.OpExecError, err:
11990             self.LogWarning("Failed to create volume %s (%s) on"
11991                             " node %s: %s",
11992                             new_disk.iv_name, new_disk, node, err)
11993         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11994                        (new_disk.size, new_disk.mode)))
11995       else:
11996         # change a given disk
11997         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11998         result.append(("disk.mode/%d" % disk_op,
11999                        disk_dict[constants.IDISK_MODE]))
12000
12001     if self.op.disk_template:
12002       if __debug__:
12003         check_nodes = set(instance.all_nodes)
12004         if self.op.remote_node:
12005           check_nodes.add(self.op.remote_node)
12006         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12007           owned = self.owned_locks(level)
12008           assert not (check_nodes - owned), \
12009             ("Not owning the correct locks, owning %r, expected at least %r" %
12010              (owned, check_nodes))
12011
12012       r_shut = _ShutdownInstanceDisks(self, instance)
12013       if not r_shut:
12014         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12015                                  " proceed with disk template conversion")
12016       mode = (instance.disk_template, self.op.disk_template)
12017       try:
12018         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12019       except:
12020         self.cfg.ReleaseDRBDMinors(instance.name)
12021         raise
12022       result.append(("disk_template", self.op.disk_template))
12023
12024       assert instance.disk_template == self.op.disk_template, \
12025         ("Expected disk template '%s', found '%s'" %
12026          (self.op.disk_template, instance.disk_template))
12027
12028     # Release node and resource locks if there are any (they might already have
12029     # been released during disk conversion)
12030     _ReleaseLocks(self, locking.LEVEL_NODE)
12031     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12032
12033     # NIC changes
12034     for nic_op, nic_dict in self.op.nics:
12035       if nic_op == constants.DDM_REMOVE:
12036         # remove the last nic
12037         del instance.nics[-1]
12038         result.append(("nic.%d" % len(instance.nics), "remove"))
12039       elif nic_op == constants.DDM_ADD:
12040         # mac and bridge should be set, by now
12041         mac = nic_dict[constants.INIC_MAC]
12042         ip = nic_dict.get(constants.INIC_IP, None)
12043         nicparams = self.nic_pinst[constants.DDM_ADD]
12044         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12045         instance.nics.append(new_nic)
12046         result.append(("nic.%d" % (len(instance.nics) - 1),
12047                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12048                        (new_nic.mac, new_nic.ip,
12049                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12050                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12051                        )))
12052       else:
12053         for key in (constants.INIC_MAC, constants.INIC_IP):
12054           if key in nic_dict:
12055             setattr(instance.nics[nic_op], key, nic_dict[key])
12056         if nic_op in self.nic_pinst:
12057           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12058         for key, val in nic_dict.iteritems():
12059           result.append(("nic.%s/%d" % (key, nic_op), val))
12060
12061     # hvparams changes
12062     if self.op.hvparams:
12063       instance.hvparams = self.hv_inst
12064       for key, val in self.op.hvparams.iteritems():
12065         result.append(("hv/%s" % key, val))
12066
12067     # beparams changes
12068     if self.op.beparams:
12069       instance.beparams = self.be_inst
12070       for key, val in self.op.beparams.iteritems():
12071         result.append(("be/%s" % key, val))
12072
12073     # OS change
12074     if self.op.os_name:
12075       instance.os = self.op.os_name
12076
12077     # osparams changes
12078     if self.op.osparams:
12079       instance.osparams = self.os_inst
12080       for key, val in self.op.osparams.iteritems():
12081         result.append(("os/%s" % key, val))
12082
12083     # online/offline instance
12084     if self.op.online_inst:
12085       self.cfg.MarkInstanceDown(instance.name)
12086       result.append(("admin_state", constants.ADMINST_DOWN))
12087     if self.op.offline_inst:
12088       self.cfg.MarkInstanceOffline(instance.name)
12089       result.append(("admin_state", constants.ADMINST_OFFLINE))
12090
12091     self.cfg.Update(instance, feedback_fn)
12092
12093     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12094                 self.owned_locks(locking.LEVEL_NODE)), \
12095       "All node locks should have been released by now"
12096
12097     return result
12098
12099   _DISK_CONVERSIONS = {
12100     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12101     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12102     }
12103
12104
12105 class LUInstanceChangeGroup(LogicalUnit):
12106   HPATH = "instance-change-group"
12107   HTYPE = constants.HTYPE_INSTANCE
12108   REQ_BGL = False
12109
12110   def ExpandNames(self):
12111     self.share_locks = _ShareAll()
12112     self.needed_locks = {
12113       locking.LEVEL_NODEGROUP: [],
12114       locking.LEVEL_NODE: [],
12115       }
12116
12117     self._ExpandAndLockInstance()
12118
12119     if self.op.target_groups:
12120       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12121                                   self.op.target_groups)
12122     else:
12123       self.req_target_uuids = None
12124
12125     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12126
12127   def DeclareLocks(self, level):
12128     if level == locking.LEVEL_NODEGROUP:
12129       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12130
12131       if self.req_target_uuids:
12132         lock_groups = set(self.req_target_uuids)
12133
12134         # Lock all groups used by instance optimistically; this requires going
12135         # via the node before it's locked, requiring verification later on
12136         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12137         lock_groups.update(instance_groups)
12138       else:
12139         # No target groups, need to lock all of them
12140         lock_groups = locking.ALL_SET
12141
12142       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12143
12144     elif level == locking.LEVEL_NODE:
12145       if self.req_target_uuids:
12146         # Lock all nodes used by instances
12147         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12148         self._LockInstancesNodes()
12149
12150         # Lock all nodes in all potential target groups
12151         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12152                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12153         member_nodes = [node_name
12154                         for group in lock_groups
12155                         for node_name in self.cfg.GetNodeGroup(group).members]
12156         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12157       else:
12158         # Lock all nodes as all groups are potential targets
12159         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12160
12161   def CheckPrereq(self):
12162     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12163     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12164     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12165
12166     assert (self.req_target_uuids is None or
12167             owned_groups.issuperset(self.req_target_uuids))
12168     assert owned_instances == set([self.op.instance_name])
12169
12170     # Get instance information
12171     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12172
12173     # Check if node groups for locked instance are still correct
12174     assert owned_nodes.issuperset(self.instance.all_nodes), \
12175       ("Instance %s's nodes changed while we kept the lock" %
12176        self.op.instance_name)
12177
12178     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12179                                            owned_groups)
12180
12181     if self.req_target_uuids:
12182       # User requested specific target groups
12183       self.target_uuids = self.req_target_uuids
12184     else:
12185       # All groups except those used by the instance are potential targets
12186       self.target_uuids = owned_groups - inst_groups
12187
12188     conflicting_groups = self.target_uuids & inst_groups
12189     if conflicting_groups:
12190       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12191                                  " used by the instance '%s'" %
12192                                  (utils.CommaJoin(conflicting_groups),
12193                                   self.op.instance_name),
12194                                  errors.ECODE_INVAL)
12195
12196     if not self.target_uuids:
12197       raise errors.OpPrereqError("There are no possible target groups",
12198                                  errors.ECODE_INVAL)
12199
12200   def BuildHooksEnv(self):
12201     """Build hooks env.
12202
12203     """
12204     assert self.target_uuids
12205
12206     env = {
12207       "TARGET_GROUPS": " ".join(self.target_uuids),
12208       }
12209
12210     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12211
12212     return env
12213
12214   def BuildHooksNodes(self):
12215     """Build hooks nodes.
12216
12217     """
12218     mn = self.cfg.GetMasterNode()
12219     return ([mn], [mn])
12220
12221   def Exec(self, feedback_fn):
12222     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12223
12224     assert instances == [self.op.instance_name], "Instance not locked"
12225
12226     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12227                      instances=instances, target_groups=list(self.target_uuids))
12228
12229     ial.Run(self.op.iallocator)
12230
12231     if not ial.success:
12232       raise errors.OpPrereqError("Can't compute solution for changing group of"
12233                                  " instance '%s' using iallocator '%s': %s" %
12234                                  (self.op.instance_name, self.op.iallocator,
12235                                   ial.info),
12236                                  errors.ECODE_NORES)
12237
12238     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12239
12240     self.LogInfo("Iallocator returned %s job(s) for changing group of"
12241                  " instance '%s'", len(jobs), self.op.instance_name)
12242
12243     return ResultWithJobs(jobs)
12244
12245
12246 class LUBackupQuery(NoHooksLU):
12247   """Query the exports list
12248
12249   """
12250   REQ_BGL = False
12251
12252   def ExpandNames(self):
12253     self.needed_locks = {}
12254     self.share_locks[locking.LEVEL_NODE] = 1
12255     if not self.op.nodes:
12256       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12257     else:
12258       self.needed_locks[locking.LEVEL_NODE] = \
12259         _GetWantedNodes(self, self.op.nodes)
12260
12261   def Exec(self, feedback_fn):
12262     """Compute the list of all the exported system images.
12263
12264     @rtype: dict
12265     @return: a dictionary with the structure node->(export-list)
12266         where export-list is a list of the instances exported on
12267         that node.
12268
12269     """
12270     self.nodes = self.owned_locks(locking.LEVEL_NODE)
12271     rpcresult = self.rpc.call_export_list(self.nodes)
12272     result = {}
12273     for node in rpcresult:
12274       if rpcresult[node].fail_msg:
12275         result[node] = False
12276       else:
12277         result[node] = rpcresult[node].payload
12278
12279     return result
12280
12281
12282 class LUBackupPrepare(NoHooksLU):
12283   """Prepares an instance for an export and returns useful information.
12284
12285   """
12286   REQ_BGL = False
12287
12288   def ExpandNames(self):
12289     self._ExpandAndLockInstance()
12290
12291   def CheckPrereq(self):
12292     """Check prerequisites.
12293
12294     """
12295     instance_name = self.op.instance_name
12296
12297     self.instance = self.cfg.GetInstanceInfo(instance_name)
12298     assert self.instance is not None, \
12299           "Cannot retrieve locked instance %s" % self.op.instance_name
12300     _CheckNodeOnline(self, self.instance.primary_node)
12301
12302     self._cds = _GetClusterDomainSecret()
12303
12304   def Exec(self, feedback_fn):
12305     """Prepares an instance for an export.
12306
12307     """
12308     instance = self.instance
12309
12310     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12311       salt = utils.GenerateSecret(8)
12312
12313       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12314       result = self.rpc.call_x509_cert_create(instance.primary_node,
12315                                               constants.RIE_CERT_VALIDITY)
12316       result.Raise("Can't create X509 key and certificate on %s" % result.node)
12317
12318       (name, cert_pem) = result.payload
12319
12320       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12321                                              cert_pem)
12322
12323       return {
12324         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12325         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12326                           salt),
12327         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12328         }
12329
12330     return None
12331
12332
12333 class LUBackupExport(LogicalUnit):
12334   """Export an instance to an image in the cluster.
12335
12336   """
12337   HPATH = "instance-export"
12338   HTYPE = constants.HTYPE_INSTANCE
12339   REQ_BGL = False
12340
12341   def CheckArguments(self):
12342     """Check the arguments.
12343
12344     """
12345     self.x509_key_name = self.op.x509_key_name
12346     self.dest_x509_ca_pem = self.op.destination_x509_ca
12347
12348     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12349       if not self.x509_key_name:
12350         raise errors.OpPrereqError("Missing X509 key name for encryption",
12351                                    errors.ECODE_INVAL)
12352
12353       if not self.dest_x509_ca_pem:
12354         raise errors.OpPrereqError("Missing destination X509 CA",
12355                                    errors.ECODE_INVAL)
12356
12357   def ExpandNames(self):
12358     self._ExpandAndLockInstance()
12359
12360     # Lock all nodes for local exports
12361     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12362       # FIXME: lock only instance primary and destination node
12363       #
12364       # Sad but true, for now we have do lock all nodes, as we don't know where
12365       # the previous export might be, and in this LU we search for it and
12366       # remove it from its current node. In the future we could fix this by:
12367       #  - making a tasklet to search (share-lock all), then create the
12368       #    new one, then one to remove, after
12369       #  - removing the removal operation altogether
12370       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12371
12372   def DeclareLocks(self, level):
12373     """Last minute lock declaration."""
12374     # All nodes are locked anyway, so nothing to do here.
12375
12376   def BuildHooksEnv(self):
12377     """Build hooks env.
12378
12379     This will run on the master, primary node and target node.
12380
12381     """
12382     env = {
12383       "EXPORT_MODE": self.op.mode,
12384       "EXPORT_NODE": self.op.target_node,
12385       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12386       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12387       # TODO: Generic function for boolean env variables
12388       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12389       }
12390
12391     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12392
12393     return env
12394
12395   def BuildHooksNodes(self):
12396     """Build hooks nodes.
12397
12398     """
12399     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12400
12401     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12402       nl.append(self.op.target_node)
12403
12404     return (nl, nl)
12405
12406   def CheckPrereq(self):
12407     """Check prerequisites.
12408
12409     This checks that the instance and node names are valid.
12410
12411     """
12412     instance_name = self.op.instance_name
12413
12414     self.instance = self.cfg.GetInstanceInfo(instance_name)
12415     assert self.instance is not None, \
12416           "Cannot retrieve locked instance %s" % self.op.instance_name
12417     _CheckNodeOnline(self, self.instance.primary_node)
12418
12419     if (self.op.remove_instance and
12420         self.instance.admin_state == constants.ADMINST_UP and
12421         not self.op.shutdown):
12422       raise errors.OpPrereqError("Can not remove instance without shutting it"
12423                                  " down before")
12424
12425     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12426       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12427       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12428       assert self.dst_node is not None
12429
12430       _CheckNodeOnline(self, self.dst_node.name)
12431       _CheckNodeNotDrained(self, self.dst_node.name)
12432
12433       self._cds = None
12434       self.dest_disk_info = None
12435       self.dest_x509_ca = None
12436
12437     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12438       self.dst_node = None
12439
12440       if len(self.op.target_node) != len(self.instance.disks):
12441         raise errors.OpPrereqError(("Received destination information for %s"
12442                                     " disks, but instance %s has %s disks") %
12443                                    (len(self.op.target_node), instance_name,
12444                                     len(self.instance.disks)),
12445                                    errors.ECODE_INVAL)
12446
12447       cds = _GetClusterDomainSecret()
12448
12449       # Check X509 key name
12450       try:
12451         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12452       except (TypeError, ValueError), err:
12453         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12454
12455       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12456         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12457                                    errors.ECODE_INVAL)
12458
12459       # Load and verify CA
12460       try:
12461         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12462       except OpenSSL.crypto.Error, err:
12463         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12464                                    (err, ), errors.ECODE_INVAL)
12465
12466       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12467       if errcode is not None:
12468         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12469                                    (msg, ), errors.ECODE_INVAL)
12470
12471       self.dest_x509_ca = cert
12472
12473       # Verify target information
12474       disk_info = []
12475       for idx, disk_data in enumerate(self.op.target_node):
12476         try:
12477           (host, port, magic) = \
12478             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12479         except errors.GenericError, err:
12480           raise errors.OpPrereqError("Target info for disk %s: %s" %
12481                                      (idx, err), errors.ECODE_INVAL)
12482
12483         disk_info.append((host, port, magic))
12484
12485       assert len(disk_info) == len(self.op.target_node)
12486       self.dest_disk_info = disk_info
12487
12488     else:
12489       raise errors.ProgrammerError("Unhandled export mode %r" %
12490                                    self.op.mode)
12491
12492     # instance disk type verification
12493     # TODO: Implement export support for file-based disks
12494     for disk in self.instance.disks:
12495       if disk.dev_type == constants.LD_FILE:
12496         raise errors.OpPrereqError("Export not supported for instances with"
12497                                    " file-based disks", errors.ECODE_INVAL)
12498
12499   def _CleanupExports(self, feedback_fn):
12500     """Removes exports of current instance from all other nodes.
12501
12502     If an instance in a cluster with nodes A..D was exported to node C, its
12503     exports will be removed from the nodes A, B and D.
12504
12505     """
12506     assert self.op.mode != constants.EXPORT_MODE_REMOTE
12507
12508     nodelist = self.cfg.GetNodeList()
12509     nodelist.remove(self.dst_node.name)
12510
12511     # on one-node clusters nodelist will be empty after the removal
12512     # if we proceed the backup would be removed because OpBackupQuery
12513     # substitutes an empty list with the full cluster node list.
12514     iname = self.instance.name
12515     if nodelist:
12516       feedback_fn("Removing old exports for instance %s" % iname)
12517       exportlist = self.rpc.call_export_list(nodelist)
12518       for node in exportlist:
12519         if exportlist[node].fail_msg:
12520           continue
12521         if iname in exportlist[node].payload:
12522           msg = self.rpc.call_export_remove(node, iname).fail_msg
12523           if msg:
12524             self.LogWarning("Could not remove older export for instance %s"
12525                             " on node %s: %s", iname, node, msg)
12526
12527   def Exec(self, feedback_fn):
12528     """Export an instance to an image in the cluster.
12529
12530     """
12531     assert self.op.mode in constants.EXPORT_MODES
12532
12533     instance = self.instance
12534     src_node = instance.primary_node
12535
12536     if self.op.shutdown:
12537       # shutdown the instance, but not the disks
12538       feedback_fn("Shutting down instance %s" % instance.name)
12539       result = self.rpc.call_instance_shutdown(src_node, instance,
12540                                                self.op.shutdown_timeout)
12541       # TODO: Maybe ignore failures if ignore_remove_failures is set
12542       result.Raise("Could not shutdown instance %s on"
12543                    " node %s" % (instance.name, src_node))
12544
12545     # set the disks ID correctly since call_instance_start needs the
12546     # correct drbd minor to create the symlinks
12547     for disk in instance.disks:
12548       self.cfg.SetDiskID(disk, src_node)
12549
12550     activate_disks = (instance.admin_state != constants.ADMINST_UP)
12551
12552     if activate_disks:
12553       # Activate the instance disks if we'exporting a stopped instance
12554       feedback_fn("Activating disks for %s" % instance.name)
12555       _StartInstanceDisks(self, instance, None)
12556
12557     try:
12558       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12559                                                      instance)
12560
12561       helper.CreateSnapshots()
12562       try:
12563         if (self.op.shutdown and
12564             instance.admin_state == constants.ADMINST_UP and
12565             not self.op.remove_instance):
12566           assert not activate_disks
12567           feedback_fn("Starting instance %s" % instance.name)
12568           result = self.rpc.call_instance_start(src_node,
12569                                                 (instance, None, None), False)
12570           msg = result.fail_msg
12571           if msg:
12572             feedback_fn("Failed to start instance: %s" % msg)
12573             _ShutdownInstanceDisks(self, instance)
12574             raise errors.OpExecError("Could not start instance: %s" % msg)
12575
12576         if self.op.mode == constants.EXPORT_MODE_LOCAL:
12577           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12578         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12579           connect_timeout = constants.RIE_CONNECT_TIMEOUT
12580           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12581
12582           (key_name, _, _) = self.x509_key_name
12583
12584           dest_ca_pem = \
12585             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12586                                             self.dest_x509_ca)
12587
12588           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12589                                                      key_name, dest_ca_pem,
12590                                                      timeouts)
12591       finally:
12592         helper.Cleanup()
12593
12594       # Check for backwards compatibility
12595       assert len(dresults) == len(instance.disks)
12596       assert compat.all(isinstance(i, bool) for i in dresults), \
12597              "Not all results are boolean: %r" % dresults
12598
12599     finally:
12600       if activate_disks:
12601         feedback_fn("Deactivating disks for %s" % instance.name)
12602         _ShutdownInstanceDisks(self, instance)
12603
12604     if not (compat.all(dresults) and fin_resu):
12605       failures = []
12606       if not fin_resu:
12607         failures.append("export finalization")
12608       if not compat.all(dresults):
12609         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12610                                if not dsk)
12611         failures.append("disk export: disk(s) %s" % fdsk)
12612
12613       raise errors.OpExecError("Export failed, errors in %s" %
12614                                utils.CommaJoin(failures))
12615
12616     # At this point, the export was successful, we can cleanup/finish
12617
12618     # Remove instance if requested
12619     if self.op.remove_instance:
12620       feedback_fn("Removing instance %s" % instance.name)
12621       _RemoveInstance(self, feedback_fn, instance,
12622                       self.op.ignore_remove_failures)
12623
12624     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12625       self._CleanupExports(feedback_fn)
12626
12627     return fin_resu, dresults
12628
12629
12630 class LUBackupRemove(NoHooksLU):
12631   """Remove exports related to the named instance.
12632
12633   """
12634   REQ_BGL = False
12635
12636   def ExpandNames(self):
12637     self.needed_locks = {}
12638     # We need all nodes to be locked in order for RemoveExport to work, but we
12639     # don't need to lock the instance itself, as nothing will happen to it (and
12640     # we can remove exports also for a removed instance)
12641     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12642
12643   def Exec(self, feedback_fn):
12644     """Remove any export.
12645
12646     """
12647     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12648     # If the instance was not found we'll try with the name that was passed in.
12649     # This will only work if it was an FQDN, though.
12650     fqdn_warn = False
12651     if not instance_name:
12652       fqdn_warn = True
12653       instance_name = self.op.instance_name
12654
12655     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12656     exportlist = self.rpc.call_export_list(locked_nodes)
12657     found = False
12658     for node in exportlist:
12659       msg = exportlist[node].fail_msg
12660       if msg:
12661         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12662         continue
12663       if instance_name in exportlist[node].payload:
12664         found = True
12665         result = self.rpc.call_export_remove(node, instance_name)
12666         msg = result.fail_msg
12667         if msg:
12668           logging.error("Could not remove export for instance %s"
12669                         " on node %s: %s", instance_name, node, msg)
12670
12671     if fqdn_warn and not found:
12672       feedback_fn("Export not found. If trying to remove an export belonging"
12673                   " to a deleted instance please use its Fully Qualified"
12674                   " Domain Name.")
12675
12676
12677 class LUGroupAdd(LogicalUnit):
12678   """Logical unit for creating node groups.
12679
12680   """
12681   HPATH = "group-add"
12682   HTYPE = constants.HTYPE_GROUP
12683   REQ_BGL = False
12684
12685   def ExpandNames(self):
12686     # We need the new group's UUID here so that we can create and acquire the
12687     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12688     # that it should not check whether the UUID exists in the configuration.
12689     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12690     self.needed_locks = {}
12691     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12692
12693   def CheckPrereq(self):
12694     """Check prerequisites.
12695
12696     This checks that the given group name is not an existing node group
12697     already.
12698
12699     """
12700     try:
12701       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12702     except errors.OpPrereqError:
12703       pass
12704     else:
12705       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12706                                  " node group (UUID: %s)" %
12707                                  (self.op.group_name, existing_uuid),
12708                                  errors.ECODE_EXISTS)
12709
12710     if self.op.ndparams:
12711       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12712
12713     if self.op.diskparams:
12714       for templ in constants.DISK_TEMPLATES:
12715         if templ not in self.op.diskparams:
12716           self.op.diskparams[templ] = {}
12717         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12718     else:
12719       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12720
12721   def BuildHooksEnv(self):
12722     """Build hooks env.
12723
12724     """
12725     return {
12726       "GROUP_NAME": self.op.group_name,
12727       }
12728
12729   def BuildHooksNodes(self):
12730     """Build hooks nodes.
12731
12732     """
12733     mn = self.cfg.GetMasterNode()
12734     return ([mn], [mn])
12735
12736   def Exec(self, feedback_fn):
12737     """Add the node group to the cluster.
12738
12739     """
12740     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12741                                   uuid=self.group_uuid,
12742                                   alloc_policy=self.op.alloc_policy,
12743                                   ndparams=self.op.ndparams,
12744                                   diskparams=self.op.diskparams)
12745
12746     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12747     del self.remove_locks[locking.LEVEL_NODEGROUP]
12748
12749
12750 class LUGroupAssignNodes(NoHooksLU):
12751   """Logical unit for assigning nodes to groups.
12752
12753   """
12754   REQ_BGL = False
12755
12756   def ExpandNames(self):
12757     # These raise errors.OpPrereqError on their own:
12758     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12759     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12760
12761     # We want to lock all the affected nodes and groups. We have readily
12762     # available the list of nodes, and the *destination* group. To gather the
12763     # list of "source" groups, we need to fetch node information later on.
12764     self.needed_locks = {
12765       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12766       locking.LEVEL_NODE: self.op.nodes,
12767       }
12768
12769   def DeclareLocks(self, level):
12770     if level == locking.LEVEL_NODEGROUP:
12771       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12772
12773       # Try to get all affected nodes' groups without having the group or node
12774       # lock yet. Needs verification later in the code flow.
12775       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12776
12777       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12778
12779   def CheckPrereq(self):
12780     """Check prerequisites.
12781
12782     """
12783     assert self.needed_locks[locking.LEVEL_NODEGROUP]
12784     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12785             frozenset(self.op.nodes))
12786
12787     expected_locks = (set([self.group_uuid]) |
12788                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12789     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12790     if actual_locks != expected_locks:
12791       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12792                                " current groups are '%s', used to be '%s'" %
12793                                (utils.CommaJoin(expected_locks),
12794                                 utils.CommaJoin(actual_locks)))
12795
12796     self.node_data = self.cfg.GetAllNodesInfo()
12797     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12798     instance_data = self.cfg.GetAllInstancesInfo()
12799
12800     if self.group is None:
12801       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12802                                (self.op.group_name, self.group_uuid))
12803
12804     (new_splits, previous_splits) = \
12805       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12806                                              for node in self.op.nodes],
12807                                             self.node_data, instance_data)
12808
12809     if new_splits:
12810       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12811
12812       if not self.op.force:
12813         raise errors.OpExecError("The following instances get split by this"
12814                                  " change and --force was not given: %s" %
12815                                  fmt_new_splits)
12816       else:
12817         self.LogWarning("This operation will split the following instances: %s",
12818                         fmt_new_splits)
12819
12820         if previous_splits:
12821           self.LogWarning("In addition, these already-split instances continue"
12822                           " to be split across groups: %s",
12823                           utils.CommaJoin(utils.NiceSort(previous_splits)))
12824
12825   def Exec(self, feedback_fn):
12826     """Assign nodes to a new group.
12827
12828     """
12829     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12830
12831     self.cfg.AssignGroupNodes(mods)
12832
12833   @staticmethod
12834   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12835     """Check for split instances after a node assignment.
12836
12837     This method considers a series of node assignments as an atomic operation,
12838     and returns information about split instances after applying the set of
12839     changes.
12840
12841     In particular, it returns information about newly split instances, and
12842     instances that were already split, and remain so after the change.
12843
12844     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12845     considered.
12846
12847     @type changes: list of (node_name, new_group_uuid) pairs.
12848     @param changes: list of node assignments to consider.
12849     @param node_data: a dict with data for all nodes
12850     @param instance_data: a dict with all instances to consider
12851     @rtype: a two-tuple
12852     @return: a list of instances that were previously okay and result split as a
12853       consequence of this change, and a list of instances that were previously
12854       split and this change does not fix.
12855
12856     """
12857     changed_nodes = dict((node, group) for node, group in changes
12858                          if node_data[node].group != group)
12859
12860     all_split_instances = set()
12861     previously_split_instances = set()
12862
12863     def InstanceNodes(instance):
12864       return [instance.primary_node] + list(instance.secondary_nodes)
12865
12866     for inst in instance_data.values():
12867       if inst.disk_template not in constants.DTS_INT_MIRROR:
12868         continue
12869
12870       instance_nodes = InstanceNodes(inst)
12871
12872       if len(set(node_data[node].group for node in instance_nodes)) > 1:
12873         previously_split_instances.add(inst.name)
12874
12875       if len(set(changed_nodes.get(node, node_data[node].group)
12876                  for node in instance_nodes)) > 1:
12877         all_split_instances.add(inst.name)
12878
12879     return (list(all_split_instances - previously_split_instances),
12880             list(previously_split_instances & all_split_instances))
12881
12882
12883 class _GroupQuery(_QueryBase):
12884   FIELDS = query.GROUP_FIELDS
12885
12886   def ExpandNames(self, lu):
12887     lu.needed_locks = {}
12888
12889     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12890     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12891
12892     if not self.names:
12893       self.wanted = [name_to_uuid[name]
12894                      for name in utils.NiceSort(name_to_uuid.keys())]
12895     else:
12896       # Accept names to be either names or UUIDs.
12897       missing = []
12898       self.wanted = []
12899       all_uuid = frozenset(self._all_groups.keys())
12900
12901       for name in self.names:
12902         if name in all_uuid:
12903           self.wanted.append(name)
12904         elif name in name_to_uuid:
12905           self.wanted.append(name_to_uuid[name])
12906         else:
12907           missing.append(name)
12908
12909       if missing:
12910         raise errors.OpPrereqError("Some groups do not exist: %s" %
12911                                    utils.CommaJoin(missing),
12912                                    errors.ECODE_NOENT)
12913
12914   def DeclareLocks(self, lu, level):
12915     pass
12916
12917   def _GetQueryData(self, lu):
12918     """Computes the list of node groups and their attributes.
12919
12920     """
12921     do_nodes = query.GQ_NODE in self.requested_data
12922     do_instances = query.GQ_INST in self.requested_data
12923
12924     group_to_nodes = None
12925     group_to_instances = None
12926
12927     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12928     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12929     # latter GetAllInstancesInfo() is not enough, for we have to go through
12930     # instance->node. Hence, we will need to process nodes even if we only need
12931     # instance information.
12932     if do_nodes or do_instances:
12933       all_nodes = lu.cfg.GetAllNodesInfo()
12934       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12935       node_to_group = {}
12936
12937       for node in all_nodes.values():
12938         if node.group in group_to_nodes:
12939           group_to_nodes[node.group].append(node.name)
12940           node_to_group[node.name] = node.group
12941
12942       if do_instances:
12943         all_instances = lu.cfg.GetAllInstancesInfo()
12944         group_to_instances = dict((uuid, []) for uuid in self.wanted)
12945
12946         for instance in all_instances.values():
12947           node = instance.primary_node
12948           if node in node_to_group:
12949             group_to_instances[node_to_group[node]].append(instance.name)
12950
12951         if not do_nodes:
12952           # Do not pass on node information if it was not requested.
12953           group_to_nodes = None
12954
12955     return query.GroupQueryData([self._all_groups[uuid]
12956                                  for uuid in self.wanted],
12957                                 group_to_nodes, group_to_instances)
12958
12959
12960 class LUGroupQuery(NoHooksLU):
12961   """Logical unit for querying node groups.
12962
12963   """
12964   REQ_BGL = False
12965
12966   def CheckArguments(self):
12967     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12968                           self.op.output_fields, False)
12969
12970   def ExpandNames(self):
12971     self.gq.ExpandNames(self)
12972
12973   def DeclareLocks(self, level):
12974     self.gq.DeclareLocks(self, level)
12975
12976   def Exec(self, feedback_fn):
12977     return self.gq.OldStyleQuery(self)
12978
12979
12980 class LUGroupSetParams(LogicalUnit):
12981   """Modifies the parameters of a node group.
12982
12983   """
12984   HPATH = "group-modify"
12985   HTYPE = constants.HTYPE_GROUP
12986   REQ_BGL = False
12987
12988   def CheckArguments(self):
12989     all_changes = [
12990       self.op.ndparams,
12991       self.op.diskparams,
12992       self.op.alloc_policy,
12993       self.op.hv_state,
12994       self.op.disk_state
12995       ]
12996
12997     if all_changes.count(None) == len(all_changes):
12998       raise errors.OpPrereqError("Please pass at least one modification",
12999                                  errors.ECODE_INVAL)
13000
13001   def ExpandNames(self):
13002     # This raises errors.OpPrereqError on its own:
13003     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13004
13005     self.needed_locks = {
13006       locking.LEVEL_NODEGROUP: [self.group_uuid],
13007       }
13008
13009   def CheckPrereq(self):
13010     """Check prerequisites.
13011
13012     """
13013     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13014
13015     if self.group is None:
13016       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13017                                (self.op.group_name, self.group_uuid))
13018
13019     if self.op.ndparams:
13020       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13021       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13022       self.new_ndparams = new_ndparams
13023
13024     if self.op.diskparams:
13025       self.new_diskparams = dict()
13026       for templ in constants.DISK_TEMPLATES:
13027         if templ not in self.op.diskparams:
13028           self.op.diskparams[templ] = {}
13029         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13030                                              self.op.diskparams[templ])
13031         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13032         self.new_diskparams[templ] = new_templ_params
13033
13034     if self.op.hv_state:
13035       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13036                                                  self.group.hv_state_static)
13037
13038     if self.op.disk_state:
13039       self.new_disk_state = \
13040         _MergeAndVerifyDiskState(self.op.disk_state,
13041                                  self.group.disk_state_static)
13042
13043   def BuildHooksEnv(self):
13044     """Build hooks env.
13045
13046     """
13047     return {
13048       "GROUP_NAME": self.op.group_name,
13049       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13050       }
13051
13052   def BuildHooksNodes(self):
13053     """Build hooks nodes.
13054
13055     """
13056     mn = self.cfg.GetMasterNode()
13057     return ([mn], [mn])
13058
13059   def Exec(self, feedback_fn):
13060     """Modifies the node group.
13061
13062     """
13063     result = []
13064
13065     if self.op.ndparams:
13066       self.group.ndparams = self.new_ndparams
13067       result.append(("ndparams", str(self.group.ndparams)))
13068
13069     if self.op.diskparams:
13070       self.group.diskparams = self.new_diskparams
13071       result.append(("diskparams", str(self.group.diskparams)))
13072
13073     if self.op.alloc_policy:
13074       self.group.alloc_policy = self.op.alloc_policy
13075
13076     if self.op.hv_state:
13077       self.group.hv_state_static = self.new_hv_state
13078
13079     if self.op.disk_state:
13080       self.group.disk_state_static = self.new_disk_state
13081
13082     self.cfg.Update(self.group, feedback_fn)
13083     return result
13084
13085
13086 class LUGroupRemove(LogicalUnit):
13087   HPATH = "group-remove"
13088   HTYPE = constants.HTYPE_GROUP
13089   REQ_BGL = False
13090
13091   def ExpandNames(self):
13092     # This will raises errors.OpPrereqError on its own:
13093     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13094     self.needed_locks = {
13095       locking.LEVEL_NODEGROUP: [self.group_uuid],
13096       }
13097
13098   def CheckPrereq(self):
13099     """Check prerequisites.
13100
13101     This checks that the given group name exists as a node group, that is
13102     empty (i.e., contains no nodes), and that is not the last group of the
13103     cluster.
13104
13105     """
13106     # Verify that the group is empty.
13107     group_nodes = [node.name
13108                    for node in self.cfg.GetAllNodesInfo().values()
13109                    if node.group == self.group_uuid]
13110
13111     if group_nodes:
13112       raise errors.OpPrereqError("Group '%s' not empty, has the following"
13113                                  " nodes: %s" %
13114                                  (self.op.group_name,
13115                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
13116                                  errors.ECODE_STATE)
13117
13118     # Verify the cluster would not be left group-less.
13119     if len(self.cfg.GetNodeGroupList()) == 1:
13120       raise errors.OpPrereqError("Group '%s' is the only group,"
13121                                  " cannot be removed" %
13122                                  self.op.group_name,
13123                                  errors.ECODE_STATE)
13124
13125   def BuildHooksEnv(self):
13126     """Build hooks env.
13127
13128     """
13129     return {
13130       "GROUP_NAME": self.op.group_name,
13131       }
13132
13133   def BuildHooksNodes(self):
13134     """Build hooks nodes.
13135
13136     """
13137     mn = self.cfg.GetMasterNode()
13138     return ([mn], [mn])
13139
13140   def Exec(self, feedback_fn):
13141     """Remove the node group.
13142
13143     """
13144     try:
13145       self.cfg.RemoveNodeGroup(self.group_uuid)
13146     except errors.ConfigurationError:
13147       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13148                                (self.op.group_name, self.group_uuid))
13149
13150     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13151
13152
13153 class LUGroupRename(LogicalUnit):
13154   HPATH = "group-rename"
13155   HTYPE = constants.HTYPE_GROUP
13156   REQ_BGL = False
13157
13158   def ExpandNames(self):
13159     # This raises errors.OpPrereqError on its own:
13160     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13161
13162     self.needed_locks = {
13163       locking.LEVEL_NODEGROUP: [self.group_uuid],
13164       }
13165
13166   def CheckPrereq(self):
13167     """Check prerequisites.
13168
13169     Ensures requested new name is not yet used.
13170
13171     """
13172     try:
13173       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13174     except errors.OpPrereqError:
13175       pass
13176     else:
13177       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13178                                  " node group (UUID: %s)" %
13179                                  (self.op.new_name, new_name_uuid),
13180                                  errors.ECODE_EXISTS)
13181
13182   def BuildHooksEnv(self):
13183     """Build hooks env.
13184
13185     """
13186     return {
13187       "OLD_NAME": self.op.group_name,
13188       "NEW_NAME": self.op.new_name,
13189       }
13190
13191   def BuildHooksNodes(self):
13192     """Build hooks nodes.
13193
13194     """
13195     mn = self.cfg.GetMasterNode()
13196
13197     all_nodes = self.cfg.GetAllNodesInfo()
13198     all_nodes.pop(mn, None)
13199
13200     run_nodes = [mn]
13201     run_nodes.extend(node.name for node in all_nodes.values()
13202                      if node.group == self.group_uuid)
13203
13204     return (run_nodes, run_nodes)
13205
13206   def Exec(self, feedback_fn):
13207     """Rename the node group.
13208
13209     """
13210     group = self.cfg.GetNodeGroup(self.group_uuid)
13211
13212     if group is None:
13213       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13214                                (self.op.group_name, self.group_uuid))
13215
13216     group.name = self.op.new_name
13217     self.cfg.Update(group, feedback_fn)
13218
13219     return self.op.new_name
13220
13221
13222 class LUGroupEvacuate(LogicalUnit):
13223   HPATH = "group-evacuate"
13224   HTYPE = constants.HTYPE_GROUP
13225   REQ_BGL = False
13226
13227   def ExpandNames(self):
13228     # This raises errors.OpPrereqError on its own:
13229     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13230
13231     if self.op.target_groups:
13232       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13233                                   self.op.target_groups)
13234     else:
13235       self.req_target_uuids = []
13236
13237     if self.group_uuid in self.req_target_uuids:
13238       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13239                                  " as a target group (targets are %s)" %
13240                                  (self.group_uuid,
13241                                   utils.CommaJoin(self.req_target_uuids)),
13242                                  errors.ECODE_INVAL)
13243
13244     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13245
13246     self.share_locks = _ShareAll()
13247     self.needed_locks = {
13248       locking.LEVEL_INSTANCE: [],
13249       locking.LEVEL_NODEGROUP: [],
13250       locking.LEVEL_NODE: [],
13251       }
13252
13253   def DeclareLocks(self, level):
13254     if level == locking.LEVEL_INSTANCE:
13255       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13256
13257       # Lock instances optimistically, needs verification once node and group
13258       # locks have been acquired
13259       self.needed_locks[locking.LEVEL_INSTANCE] = \
13260         self.cfg.GetNodeGroupInstances(self.group_uuid)
13261
13262     elif level == locking.LEVEL_NODEGROUP:
13263       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13264
13265       if self.req_target_uuids:
13266         lock_groups = set([self.group_uuid] + self.req_target_uuids)
13267
13268         # Lock all groups used by instances optimistically; this requires going
13269         # via the node before it's locked, requiring verification later on
13270         lock_groups.update(group_uuid
13271                            for instance_name in
13272                              self.owned_locks(locking.LEVEL_INSTANCE)
13273                            for group_uuid in
13274                              self.cfg.GetInstanceNodeGroups(instance_name))
13275       else:
13276         # No target groups, need to lock all of them
13277         lock_groups = locking.ALL_SET
13278
13279       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13280
13281     elif level == locking.LEVEL_NODE:
13282       # This will only lock the nodes in the group to be evacuated which
13283       # contain actual instances
13284       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13285       self._LockInstancesNodes()
13286
13287       # Lock all nodes in group to be evacuated and target groups
13288       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13289       assert self.group_uuid in owned_groups
13290       member_nodes = [node_name
13291                       for group in owned_groups
13292                       for node_name in self.cfg.GetNodeGroup(group).members]
13293       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13294
13295   def CheckPrereq(self):
13296     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13297     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13298     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13299
13300     assert owned_groups.issuperset(self.req_target_uuids)
13301     assert self.group_uuid in owned_groups
13302
13303     # Check if locked instances are still correct
13304     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13305
13306     # Get instance information
13307     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13308
13309     # Check if node groups for locked instances are still correct
13310     for instance_name in owned_instances:
13311       inst = self.instances[instance_name]
13312       assert owned_nodes.issuperset(inst.all_nodes), \
13313         "Instance %s's nodes changed while we kept the lock" % instance_name
13314
13315       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13316                                              owned_groups)
13317
13318       assert self.group_uuid in inst_groups, \
13319         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13320
13321     if self.req_target_uuids:
13322       # User requested specific target groups
13323       self.target_uuids = self.req_target_uuids
13324     else:
13325       # All groups except the one to be evacuated are potential targets
13326       self.target_uuids = [group_uuid for group_uuid in owned_groups
13327                            if group_uuid != self.group_uuid]
13328
13329       if not self.target_uuids:
13330         raise errors.OpPrereqError("There are no possible target groups",
13331                                    errors.ECODE_INVAL)
13332
13333   def BuildHooksEnv(self):
13334     """Build hooks env.
13335
13336     """
13337     return {
13338       "GROUP_NAME": self.op.group_name,
13339       "TARGET_GROUPS": " ".join(self.target_uuids),
13340       }
13341
13342   def BuildHooksNodes(self):
13343     """Build hooks nodes.
13344
13345     """
13346     mn = self.cfg.GetMasterNode()
13347
13348     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13349
13350     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13351
13352     return (run_nodes, run_nodes)
13353
13354   def Exec(self, feedback_fn):
13355     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13356
13357     assert self.group_uuid not in self.target_uuids
13358
13359     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13360                      instances=instances, target_groups=self.target_uuids)
13361
13362     ial.Run(self.op.iallocator)
13363
13364     if not ial.success:
13365       raise errors.OpPrereqError("Can't compute group evacuation using"
13366                                  " iallocator '%s': %s" %
13367                                  (self.op.iallocator, ial.info),
13368                                  errors.ECODE_NORES)
13369
13370     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13371
13372     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13373                  len(jobs), self.op.group_name)
13374
13375     return ResultWithJobs(jobs)
13376
13377
13378 class TagsLU(NoHooksLU): # pylint: disable=W0223
13379   """Generic tags LU.
13380
13381   This is an abstract class which is the parent of all the other tags LUs.
13382
13383   """
13384   def ExpandNames(self):
13385     self.group_uuid = None
13386     self.needed_locks = {}
13387     if self.op.kind == constants.TAG_NODE:
13388       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13389       self.needed_locks[locking.LEVEL_NODE] = self.op.name
13390     elif self.op.kind == constants.TAG_INSTANCE:
13391       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13392       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13393     elif self.op.kind == constants.TAG_NODEGROUP:
13394       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13395
13396     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13397     # not possible to acquire the BGL based on opcode parameters)
13398
13399   def CheckPrereq(self):
13400     """Check prerequisites.
13401
13402     """
13403     if self.op.kind == constants.TAG_CLUSTER:
13404       self.target = self.cfg.GetClusterInfo()
13405     elif self.op.kind == constants.TAG_NODE:
13406       self.target = self.cfg.GetNodeInfo(self.op.name)
13407     elif self.op.kind == constants.TAG_INSTANCE:
13408       self.target = self.cfg.GetInstanceInfo(self.op.name)
13409     elif self.op.kind == constants.TAG_NODEGROUP:
13410       self.target = self.cfg.GetNodeGroup(self.group_uuid)
13411     else:
13412       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13413                                  str(self.op.kind), errors.ECODE_INVAL)
13414
13415
13416 class LUTagsGet(TagsLU):
13417   """Returns the tags of a given object.
13418
13419   """
13420   REQ_BGL = False
13421
13422   def ExpandNames(self):
13423     TagsLU.ExpandNames(self)
13424
13425     # Share locks as this is only a read operation
13426     self.share_locks = _ShareAll()
13427
13428   def Exec(self, feedback_fn):
13429     """Returns the tag list.
13430
13431     """
13432     return list(self.target.GetTags())
13433
13434
13435 class LUTagsSearch(NoHooksLU):
13436   """Searches the tags for a given pattern.
13437
13438   """
13439   REQ_BGL = False
13440
13441   def ExpandNames(self):
13442     self.needed_locks = {}
13443
13444   def CheckPrereq(self):
13445     """Check prerequisites.
13446
13447     This checks the pattern passed for validity by compiling it.
13448
13449     """
13450     try:
13451       self.re = re.compile(self.op.pattern)
13452     except re.error, err:
13453       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13454                                  (self.op.pattern, err), errors.ECODE_INVAL)
13455
13456   def Exec(self, feedback_fn):
13457     """Returns the tag list.
13458
13459     """
13460     cfg = self.cfg
13461     tgts = [("/cluster", cfg.GetClusterInfo())]
13462     ilist = cfg.GetAllInstancesInfo().values()
13463     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13464     nlist = cfg.GetAllNodesInfo().values()
13465     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13466     tgts.extend(("/nodegroup/%s" % n.name, n)
13467                 for n in cfg.GetAllNodeGroupsInfo().values())
13468     results = []
13469     for path, target in tgts:
13470       for tag in target.GetTags():
13471         if self.re.search(tag):
13472           results.append((path, tag))
13473     return results
13474
13475
13476 class LUTagsSet(TagsLU):
13477   """Sets a tag on a given object.
13478
13479   """
13480   REQ_BGL = False
13481
13482   def CheckPrereq(self):
13483     """Check prerequisites.
13484
13485     This checks the type and length of the tag name and value.
13486
13487     """
13488     TagsLU.CheckPrereq(self)
13489     for tag in self.op.tags:
13490       objects.TaggableObject.ValidateTag(tag)
13491
13492   def Exec(self, feedback_fn):
13493     """Sets the tag.
13494
13495     """
13496     try:
13497       for tag in self.op.tags:
13498         self.target.AddTag(tag)
13499     except errors.TagError, err:
13500       raise errors.OpExecError("Error while setting tag: %s" % str(err))
13501     self.cfg.Update(self.target, feedback_fn)
13502
13503
13504 class LUTagsDel(TagsLU):
13505   """Delete a list of tags from a given object.
13506
13507   """
13508   REQ_BGL = False
13509
13510   def CheckPrereq(self):
13511     """Check prerequisites.
13512
13513     This checks that we have the given tag.
13514
13515     """
13516     TagsLU.CheckPrereq(self)
13517     for tag in self.op.tags:
13518       objects.TaggableObject.ValidateTag(tag)
13519     del_tags = frozenset(self.op.tags)
13520     cur_tags = self.target.GetTags()
13521
13522     diff_tags = del_tags - cur_tags
13523     if diff_tags:
13524       diff_names = ("'%s'" % i for i in sorted(diff_tags))
13525       raise errors.OpPrereqError("Tag(s) %s not found" %
13526                                  (utils.CommaJoin(diff_names), ),
13527                                  errors.ECODE_NOENT)
13528
13529   def Exec(self, feedback_fn):
13530     """Remove the tag from the object.
13531
13532     """
13533     for tag in self.op.tags:
13534       self.target.RemoveTag(tag)
13535     self.cfg.Update(self.target, feedback_fn)
13536
13537
13538 class LUTestDelay(NoHooksLU):
13539   """Sleep for a specified amount of time.
13540
13541   This LU sleeps on the master and/or nodes for a specified amount of
13542   time.
13543
13544   """
13545   REQ_BGL = False
13546
13547   def ExpandNames(self):
13548     """Expand names and set required locks.
13549
13550     This expands the node list, if any.
13551
13552     """
13553     self.needed_locks = {}
13554     if self.op.on_nodes:
13555       # _GetWantedNodes can be used here, but is not always appropriate to use
13556       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13557       # more information.
13558       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13559       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13560
13561   def _TestDelay(self):
13562     """Do the actual sleep.
13563
13564     """
13565     if self.op.on_master:
13566       if not utils.TestDelay(self.op.duration):
13567         raise errors.OpExecError("Error during master delay test")
13568     if self.op.on_nodes:
13569       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13570       for node, node_result in result.items():
13571         node_result.Raise("Failure during rpc call to node %s" % node)
13572
13573   def Exec(self, feedback_fn):
13574     """Execute the test delay opcode, with the wanted repetitions.
13575
13576     """
13577     if self.op.repeat == 0:
13578       self._TestDelay()
13579     else:
13580       top_value = self.op.repeat - 1
13581       for i in range(self.op.repeat):
13582         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13583         self._TestDelay()
13584
13585
13586 class LUTestJqueue(NoHooksLU):
13587   """Utility LU to test some aspects of the job queue.
13588
13589   """
13590   REQ_BGL = False
13591
13592   # Must be lower than default timeout for WaitForJobChange to see whether it
13593   # notices changed jobs
13594   _CLIENT_CONNECT_TIMEOUT = 20.0
13595   _CLIENT_CONFIRM_TIMEOUT = 60.0
13596
13597   @classmethod
13598   def _NotifyUsingSocket(cls, cb, errcls):
13599     """Opens a Unix socket and waits for another program to connect.
13600
13601     @type cb: callable
13602     @param cb: Callback to send socket name to client
13603     @type errcls: class
13604     @param errcls: Exception class to use for errors
13605
13606     """
13607     # Using a temporary directory as there's no easy way to create temporary
13608     # sockets without writing a custom loop around tempfile.mktemp and
13609     # socket.bind
13610     tmpdir = tempfile.mkdtemp()
13611     try:
13612       tmpsock = utils.PathJoin(tmpdir, "sock")
13613
13614       logging.debug("Creating temporary socket at %s", tmpsock)
13615       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13616       try:
13617         sock.bind(tmpsock)
13618         sock.listen(1)
13619
13620         # Send details to client
13621         cb(tmpsock)
13622
13623         # Wait for client to connect before continuing
13624         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13625         try:
13626           (conn, _) = sock.accept()
13627         except socket.error, err:
13628           raise errcls("Client didn't connect in time (%s)" % err)
13629       finally:
13630         sock.close()
13631     finally:
13632       # Remove as soon as client is connected
13633       shutil.rmtree(tmpdir)
13634
13635     # Wait for client to close
13636     try:
13637       try:
13638         # pylint: disable=E1101
13639         # Instance of '_socketobject' has no ... member
13640         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13641         conn.recv(1)
13642       except socket.error, err:
13643         raise errcls("Client failed to confirm notification (%s)" % err)
13644     finally:
13645       conn.close()
13646
13647   def _SendNotification(self, test, arg, sockname):
13648     """Sends a notification to the client.
13649
13650     @type test: string
13651     @param test: Test name
13652     @param arg: Test argument (depends on test)
13653     @type sockname: string
13654     @param sockname: Socket path
13655
13656     """
13657     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13658
13659   def _Notify(self, prereq, test, arg):
13660     """Notifies the client of a test.
13661
13662     @type prereq: bool
13663     @param prereq: Whether this is a prereq-phase test
13664     @type test: string
13665     @param test: Test name
13666     @param arg: Test argument (depends on test)
13667
13668     """
13669     if prereq:
13670       errcls = errors.OpPrereqError
13671     else:
13672       errcls = errors.OpExecError
13673
13674     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13675                                                   test, arg),
13676                                    errcls)
13677
13678   def CheckArguments(self):
13679     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13680     self.expandnames_calls = 0
13681
13682   def ExpandNames(self):
13683     checkargs_calls = getattr(self, "checkargs_calls", 0)
13684     if checkargs_calls < 1:
13685       raise errors.ProgrammerError("CheckArguments was not called")
13686
13687     self.expandnames_calls += 1
13688
13689     if self.op.notify_waitlock:
13690       self._Notify(True, constants.JQT_EXPANDNAMES, None)
13691
13692     self.LogInfo("Expanding names")
13693
13694     # Get lock on master node (just to get a lock, not for a particular reason)
13695     self.needed_locks = {
13696       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13697       }
13698
13699   def Exec(self, feedback_fn):
13700     if self.expandnames_calls < 1:
13701       raise errors.ProgrammerError("ExpandNames was not called")
13702
13703     if self.op.notify_exec:
13704       self._Notify(False, constants.JQT_EXEC, None)
13705
13706     self.LogInfo("Executing")
13707
13708     if self.op.log_messages:
13709       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13710       for idx, msg in enumerate(self.op.log_messages):
13711         self.LogInfo("Sending log message %s", idx + 1)
13712         feedback_fn(constants.JQT_MSGPREFIX + msg)
13713         # Report how many test messages have been sent
13714         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13715
13716     if self.op.fail:
13717       raise errors.OpExecError("Opcode failure was requested")
13718
13719     return True
13720
13721
13722 class IAllocator(object):
13723   """IAllocator framework.
13724
13725   An IAllocator instance has three sets of attributes:
13726     - cfg that is needed to query the cluster
13727     - input data (all members of the _KEYS class attribute are required)
13728     - four buffer attributes (in|out_data|text), that represent the
13729       input (to the external script) in text and data structure format,
13730       and the output from it, again in two formats
13731     - the result variables from the script (success, info, nodes) for
13732       easy usage
13733
13734   """
13735   # pylint: disable=R0902
13736   # lots of instance attributes
13737
13738   def __init__(self, cfg, rpc_runner, mode, **kwargs):
13739     self.cfg = cfg
13740     self.rpc = rpc_runner
13741     # init buffer variables
13742     self.in_text = self.out_text = self.in_data = self.out_data = None
13743     # init all input fields so that pylint is happy
13744     self.mode = mode
13745     self.memory = self.disks = self.disk_template = None
13746     self.os = self.tags = self.nics = self.vcpus = None
13747     self.hypervisor = None
13748     self.relocate_from = None
13749     self.name = None
13750     self.instances = None
13751     self.evac_mode = None
13752     self.target_groups = []
13753     # computed fields
13754     self.required_nodes = None
13755     # init result fields
13756     self.success = self.info = self.result = None
13757
13758     try:
13759       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13760     except KeyError:
13761       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13762                                    " IAllocator" % self.mode)
13763
13764     keyset = [n for (n, _) in keydata]
13765
13766     for key in kwargs:
13767       if key not in keyset:
13768         raise errors.ProgrammerError("Invalid input parameter '%s' to"
13769                                      " IAllocator" % key)
13770       setattr(self, key, kwargs[key])
13771
13772     for key in keyset:
13773       if key not in kwargs:
13774         raise errors.ProgrammerError("Missing input parameter '%s' to"
13775                                      " IAllocator" % key)
13776     self._BuildInputData(compat.partial(fn, self), keydata)
13777
13778   def _ComputeClusterData(self):
13779     """Compute the generic allocator input data.
13780
13781     This is the data that is independent of the actual operation.
13782
13783     """
13784     cfg = self.cfg
13785     cluster_info = cfg.GetClusterInfo()
13786     # cluster data
13787     data = {
13788       "version": constants.IALLOCATOR_VERSION,
13789       "cluster_name": cfg.GetClusterName(),
13790       "cluster_tags": list(cluster_info.GetTags()),
13791       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13792       # we don't have job IDs
13793       }
13794     ninfo = cfg.GetAllNodesInfo()
13795     iinfo = cfg.GetAllInstancesInfo().values()
13796     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13797
13798     # node data
13799     node_list = [n.name for n in ninfo.values() if n.vm_capable]
13800
13801     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13802       hypervisor_name = self.hypervisor
13803     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13804       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13805     else:
13806       hypervisor_name = cluster_info.primary_hypervisor
13807
13808     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
13809                                         [hypervisor_name])
13810     node_iinfo = \
13811       self.rpc.call_all_instances_info(node_list,
13812                                        cluster_info.enabled_hypervisors)
13813
13814     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13815
13816     config_ndata = self._ComputeBasicNodeData(ninfo)
13817     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13818                                                  i_list, config_ndata)
13819     assert len(data["nodes"]) == len(ninfo), \
13820         "Incomplete node data computed"
13821
13822     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13823
13824     self.in_data = data
13825
13826   @staticmethod
13827   def _ComputeNodeGroupData(cfg):
13828     """Compute node groups data.
13829
13830     """
13831     ng = dict((guuid, {
13832       "name": gdata.name,
13833       "alloc_policy": gdata.alloc_policy,
13834       })
13835       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13836
13837     return ng
13838
13839   @staticmethod
13840   def _ComputeBasicNodeData(node_cfg):
13841     """Compute global node data.
13842
13843     @rtype: dict
13844     @returns: a dict of name: (node dict, node config)
13845
13846     """
13847     # fill in static (config-based) values
13848     node_results = dict((ninfo.name, {
13849       "tags": list(ninfo.GetTags()),
13850       "primary_ip": ninfo.primary_ip,
13851       "secondary_ip": ninfo.secondary_ip,
13852       "offline": ninfo.offline,
13853       "drained": ninfo.drained,
13854       "master_candidate": ninfo.master_candidate,
13855       "group": ninfo.group,
13856       "master_capable": ninfo.master_capable,
13857       "vm_capable": ninfo.vm_capable,
13858       })
13859       for ninfo in node_cfg.values())
13860
13861     return node_results
13862
13863   @staticmethod
13864   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13865                               node_results):
13866     """Compute global node data.
13867
13868     @param node_results: the basic node structures as filled from the config
13869
13870     """
13871     #TODO(dynmem): compute the right data on MAX and MIN memory
13872     # make a copy of the current dict
13873     node_results = dict(node_results)
13874     for nname, nresult in node_data.items():
13875       assert nname in node_results, "Missing basic data for node %s" % nname
13876       ninfo = node_cfg[nname]
13877
13878       if not (ninfo.offline or ninfo.drained):
13879         nresult.Raise("Can't get data for node %s" % nname)
13880         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13881                                 nname)
13882         remote_info = _MakeLegacyNodeInfo(nresult.payload)
13883
13884         for attr in ["memory_total", "memory_free", "memory_dom0",
13885                      "vg_size", "vg_free", "cpu_total"]:
13886           if attr not in remote_info:
13887             raise errors.OpExecError("Node '%s' didn't return attribute"
13888                                      " '%s'" % (nname, attr))
13889           if not isinstance(remote_info[attr], int):
13890             raise errors.OpExecError("Node '%s' returned invalid value"
13891                                      " for '%s': %s" %
13892                                      (nname, attr, remote_info[attr]))
13893         # compute memory used by primary instances
13894         i_p_mem = i_p_up_mem = 0
13895         for iinfo, beinfo in i_list:
13896           if iinfo.primary_node == nname:
13897             i_p_mem += beinfo[constants.BE_MAXMEM]
13898             if iinfo.name not in node_iinfo[nname].payload:
13899               i_used_mem = 0
13900             else:
13901               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13902             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
13903             remote_info["memory_free"] -= max(0, i_mem_diff)
13904
13905             if iinfo.admin_state == constants.ADMINST_UP:
13906               i_p_up_mem += beinfo[constants.BE_MAXMEM]
13907
13908         # compute memory used by instances
13909         pnr_dyn = {
13910           "total_memory": remote_info["memory_total"],
13911           "reserved_memory": remote_info["memory_dom0"],
13912           "free_memory": remote_info["memory_free"],
13913           "total_disk": remote_info["vg_size"],
13914           "free_disk": remote_info["vg_free"],
13915           "total_cpus": remote_info["cpu_total"],
13916           "i_pri_memory": i_p_mem,
13917           "i_pri_up_memory": i_p_up_mem,
13918           }
13919         pnr_dyn.update(node_results[nname])
13920         node_results[nname] = pnr_dyn
13921
13922     return node_results
13923
13924   @staticmethod
13925   def _ComputeInstanceData(cluster_info, i_list):
13926     """Compute global instance data.
13927
13928     """
13929     instance_data = {}
13930     for iinfo, beinfo in i_list:
13931       nic_data = []
13932       for nic in iinfo.nics:
13933         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13934         nic_dict = {
13935           "mac": nic.mac,
13936           "ip": nic.ip,
13937           "mode": filled_params[constants.NIC_MODE],
13938           "link": filled_params[constants.NIC_LINK],
13939           }
13940         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13941           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13942         nic_data.append(nic_dict)
13943       pir = {
13944         "tags": list(iinfo.GetTags()),
13945         "admin_state": iinfo.admin_state,
13946         "vcpus": beinfo[constants.BE_VCPUS],
13947         "memory": beinfo[constants.BE_MAXMEM],
13948         "os": iinfo.os,
13949         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13950         "nics": nic_data,
13951         "disks": [{constants.IDISK_SIZE: dsk.size,
13952                    constants.IDISK_MODE: dsk.mode}
13953                   for dsk in iinfo.disks],
13954         "disk_template": iinfo.disk_template,
13955         "hypervisor": iinfo.hypervisor,
13956         }
13957       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13958                                                  pir["disks"])
13959       instance_data[iinfo.name] = pir
13960
13961     return instance_data
13962
13963   def _AddNewInstance(self):
13964     """Add new instance data to allocator structure.
13965
13966     This in combination with _AllocatorGetClusterData will create the
13967     correct structure needed as input for the allocator.
13968
13969     The checks for the completeness of the opcode must have already been
13970     done.
13971
13972     """
13973     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13974
13975     if self.disk_template in constants.DTS_INT_MIRROR:
13976       self.required_nodes = 2
13977     else:
13978       self.required_nodes = 1
13979
13980     request = {
13981       "name": self.name,
13982       "disk_template": self.disk_template,
13983       "tags": self.tags,
13984       "os": self.os,
13985       "vcpus": self.vcpus,
13986       "memory": self.memory,
13987       "disks": self.disks,
13988       "disk_space_total": disk_space,
13989       "nics": self.nics,
13990       "required_nodes": self.required_nodes,
13991       "hypervisor": self.hypervisor,
13992       }
13993
13994     return request
13995
13996   def _AddRelocateInstance(self):
13997     """Add relocate instance data to allocator structure.
13998
13999     This in combination with _IAllocatorGetClusterData will create the
14000     correct structure needed as input for the allocator.
14001
14002     The checks for the completeness of the opcode must have already been
14003     done.
14004
14005     """
14006     instance = self.cfg.GetInstanceInfo(self.name)
14007     if instance is None:
14008       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14009                                    " IAllocator" % self.name)
14010
14011     if instance.disk_template not in constants.DTS_MIRRORED:
14012       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14013                                  errors.ECODE_INVAL)
14014
14015     if instance.disk_template in constants.DTS_INT_MIRROR and \
14016         len(instance.secondary_nodes) != 1:
14017       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14018                                  errors.ECODE_STATE)
14019
14020     self.required_nodes = 1
14021     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14022     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14023
14024     request = {
14025       "name": self.name,
14026       "disk_space_total": disk_space,
14027       "required_nodes": self.required_nodes,
14028       "relocate_from": self.relocate_from,
14029       }
14030     return request
14031
14032   def _AddNodeEvacuate(self):
14033     """Get data for node-evacuate requests.
14034
14035     """
14036     return {
14037       "instances": self.instances,
14038       "evac_mode": self.evac_mode,
14039       }
14040
14041   def _AddChangeGroup(self):
14042     """Get data for node-evacuate requests.
14043
14044     """
14045     return {
14046       "instances": self.instances,
14047       "target_groups": self.target_groups,
14048       }
14049
14050   def _BuildInputData(self, fn, keydata):
14051     """Build input data structures.
14052
14053     """
14054     self._ComputeClusterData()
14055
14056     request = fn()
14057     request["type"] = self.mode
14058     for keyname, keytype in keydata:
14059       if keyname not in request:
14060         raise errors.ProgrammerError("Request parameter %s is missing" %
14061                                      keyname)
14062       val = request[keyname]
14063       if not keytype(val):
14064         raise errors.ProgrammerError("Request parameter %s doesn't pass"
14065                                      " validation, value %s, expected"
14066                                      " type %s" % (keyname, val, keytype))
14067     self.in_data["request"] = request
14068
14069     self.in_text = serializer.Dump(self.in_data)
14070
14071   _STRING_LIST = ht.TListOf(ht.TString)
14072   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14073      # pylint: disable=E1101
14074      # Class '...' has no 'OP_ID' member
14075      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14076                           opcodes.OpInstanceMigrate.OP_ID,
14077                           opcodes.OpInstanceReplaceDisks.OP_ID])
14078      })))
14079
14080   _NEVAC_MOVED = \
14081     ht.TListOf(ht.TAnd(ht.TIsLength(3),
14082                        ht.TItems([ht.TNonEmptyString,
14083                                   ht.TNonEmptyString,
14084                                   ht.TListOf(ht.TNonEmptyString),
14085                                  ])))
14086   _NEVAC_FAILED = \
14087     ht.TListOf(ht.TAnd(ht.TIsLength(2),
14088                        ht.TItems([ht.TNonEmptyString,
14089                                   ht.TMaybeString,
14090                                  ])))
14091   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14092                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14093
14094   _MODE_DATA = {
14095     constants.IALLOCATOR_MODE_ALLOC:
14096       (_AddNewInstance,
14097        [
14098         ("name", ht.TString),
14099         ("memory", ht.TInt),
14100         ("disks", ht.TListOf(ht.TDict)),
14101         ("disk_template", ht.TString),
14102         ("os", ht.TString),
14103         ("tags", _STRING_LIST),
14104         ("nics", ht.TListOf(ht.TDict)),
14105         ("vcpus", ht.TInt),
14106         ("hypervisor", ht.TString),
14107         ], ht.TList),
14108     constants.IALLOCATOR_MODE_RELOC:
14109       (_AddRelocateInstance,
14110        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14111        ht.TList),
14112      constants.IALLOCATOR_MODE_NODE_EVAC:
14113       (_AddNodeEvacuate, [
14114         ("instances", _STRING_LIST),
14115         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14116         ], _NEVAC_RESULT),
14117      constants.IALLOCATOR_MODE_CHG_GROUP:
14118       (_AddChangeGroup, [
14119         ("instances", _STRING_LIST),
14120         ("target_groups", _STRING_LIST),
14121         ], _NEVAC_RESULT),
14122     }
14123
14124   def Run(self, name, validate=True, call_fn=None):
14125     """Run an instance allocator and return the results.
14126
14127     """
14128     if call_fn is None:
14129       call_fn = self.rpc.call_iallocator_runner
14130
14131     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14132     result.Raise("Failure while running the iallocator script")
14133
14134     self.out_text = result.payload
14135     if validate:
14136       self._ValidateResult()
14137
14138   def _ValidateResult(self):
14139     """Process the allocator results.
14140
14141     This will process and if successful save the result in
14142     self.out_data and the other parameters.
14143
14144     """
14145     try:
14146       rdict = serializer.Load(self.out_text)
14147     except Exception, err:
14148       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14149
14150     if not isinstance(rdict, dict):
14151       raise errors.OpExecError("Can't parse iallocator results: not a dict")
14152
14153     # TODO: remove backwards compatiblity in later versions
14154     if "nodes" in rdict and "result" not in rdict:
14155       rdict["result"] = rdict["nodes"]
14156       del rdict["nodes"]
14157
14158     for key in "success", "info", "result":
14159       if key not in rdict:
14160         raise errors.OpExecError("Can't parse iallocator results:"
14161                                  " missing key '%s'" % key)
14162       setattr(self, key, rdict[key])
14163
14164     if not self._result_check(self.result):
14165       raise errors.OpExecError("Iallocator returned invalid result,"
14166                                " expected %s, got %s" %
14167                                (self._result_check, self.result),
14168                                errors.ECODE_INVAL)
14169
14170     if self.mode == constants.IALLOCATOR_MODE_RELOC:
14171       assert self.relocate_from is not None
14172       assert self.required_nodes == 1
14173
14174       node2group = dict((name, ndata["group"])
14175                         for (name, ndata) in self.in_data["nodes"].items())
14176
14177       fn = compat.partial(self._NodesToGroups, node2group,
14178                           self.in_data["nodegroups"])
14179
14180       instance = self.cfg.GetInstanceInfo(self.name)
14181       request_groups = fn(self.relocate_from + [instance.primary_node])
14182       result_groups = fn(rdict["result"] + [instance.primary_node])
14183
14184       if self.success and not set(result_groups).issubset(request_groups):
14185         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14186                                  " differ from original groups (%s)" %
14187                                  (utils.CommaJoin(result_groups),
14188                                   utils.CommaJoin(request_groups)))
14189
14190     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14191       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14192
14193     self.out_data = rdict
14194
14195   @staticmethod
14196   def _NodesToGroups(node2group, groups, nodes):
14197     """Returns a list of unique group names for a list of nodes.
14198
14199     @type node2group: dict
14200     @param node2group: Map from node name to group UUID
14201     @type groups: dict
14202     @param groups: Group information
14203     @type nodes: list
14204     @param nodes: Node names
14205
14206     """
14207     result = set()
14208
14209     for node in nodes:
14210       try:
14211         group_uuid = node2group[node]
14212       except KeyError:
14213         # Ignore unknown node
14214         pass
14215       else:
14216         try:
14217           group = groups[group_uuid]
14218         except KeyError:
14219           # Can't find group, let's use UUID
14220           group_name = group_uuid
14221         else:
14222           group_name = group["name"]
14223
14224         result.add(group_name)
14225
14226     return sorted(result)
14227
14228
14229 class LUTestAllocator(NoHooksLU):
14230   """Run allocator tests.
14231
14232   This LU runs the allocator tests
14233
14234   """
14235   def CheckPrereq(self):
14236     """Check prerequisites.
14237
14238     This checks the opcode parameters depending on the director and mode test.
14239
14240     """
14241     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14242       for attr in ["memory", "disks", "disk_template",
14243                    "os", "tags", "nics", "vcpus"]:
14244         if not hasattr(self.op, attr):
14245           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14246                                      attr, errors.ECODE_INVAL)
14247       iname = self.cfg.ExpandInstanceName(self.op.name)
14248       if iname is not None:
14249         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14250                                    iname, errors.ECODE_EXISTS)
14251       if not isinstance(self.op.nics, list):
14252         raise errors.OpPrereqError("Invalid parameter 'nics'",
14253                                    errors.ECODE_INVAL)
14254       if not isinstance(self.op.disks, list):
14255         raise errors.OpPrereqError("Invalid parameter 'disks'",
14256                                    errors.ECODE_INVAL)
14257       for row in self.op.disks:
14258         if (not isinstance(row, dict) or
14259             constants.IDISK_SIZE not in row or
14260             not isinstance(row[constants.IDISK_SIZE], int) or
14261             constants.IDISK_MODE not in row or
14262             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14263           raise errors.OpPrereqError("Invalid contents of the 'disks'"
14264                                      " parameter", errors.ECODE_INVAL)
14265       if self.op.hypervisor is None:
14266         self.op.hypervisor = self.cfg.GetHypervisorType()
14267     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14268       fname = _ExpandInstanceName(self.cfg, self.op.name)
14269       self.op.name = fname
14270       self.relocate_from = \
14271           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14272     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14273                           constants.IALLOCATOR_MODE_NODE_EVAC):
14274       if not self.op.instances:
14275         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14276       self.op.instances = _GetWantedInstances(self, self.op.instances)
14277     else:
14278       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14279                                  self.op.mode, errors.ECODE_INVAL)
14280
14281     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14282       if self.op.allocator is None:
14283         raise errors.OpPrereqError("Missing allocator name",
14284                                    errors.ECODE_INVAL)
14285     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14286       raise errors.OpPrereqError("Wrong allocator test '%s'" %
14287                                  self.op.direction, errors.ECODE_INVAL)
14288
14289   def Exec(self, feedback_fn):
14290     """Run the allocator test.
14291
14292     """
14293     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14294       ial = IAllocator(self.cfg, self.rpc,
14295                        mode=self.op.mode,
14296                        name=self.op.name,
14297                        memory=self.op.memory,
14298                        disks=self.op.disks,
14299                        disk_template=self.op.disk_template,
14300                        os=self.op.os,
14301                        tags=self.op.tags,
14302                        nics=self.op.nics,
14303                        vcpus=self.op.vcpus,
14304                        hypervisor=self.op.hypervisor,
14305                        )
14306     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14307       ial = IAllocator(self.cfg, self.rpc,
14308                        mode=self.op.mode,
14309                        name=self.op.name,
14310                        relocate_from=list(self.relocate_from),
14311                        )
14312     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14313       ial = IAllocator(self.cfg, self.rpc,
14314                        mode=self.op.mode,
14315                        instances=self.op.instances,
14316                        target_groups=self.op.target_groups)
14317     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14318       ial = IAllocator(self.cfg, self.rpc,
14319                        mode=self.op.mode,
14320                        instances=self.op.instances,
14321                        evac_mode=self.op.evac_mode)
14322     else:
14323       raise errors.ProgrammerError("Uncatched mode %s in"
14324                                    " LUTestAllocator.Exec", self.op.mode)
14325
14326     if self.op.direction == constants.IALLOCATOR_DIR_IN:
14327       result = ial.in_text
14328     else:
14329       ial.Run(self.op.allocator, validate=False)
14330       result = ial.out_text
14331     return result
14332
14333
14334 #: Query type implementations
14335 _QUERY_IMPL = {
14336   constants.QR_INSTANCE: _InstanceQuery,
14337   constants.QR_NODE: _NodeQuery,
14338   constants.QR_GROUP: _GroupQuery,
14339   constants.QR_OS: _OsQuery,
14340   }
14341
14342 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14343
14344
14345 def _GetQueryImplementation(name):
14346   """Returns the implemtnation for a query type.
14347
14348   @param name: Query type, must be one of L{constants.QR_VIA_OP}
14349
14350   """
14351   try:
14352     return _QUERY_IMPL[name]
14353   except KeyError:
14354     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14355                                errors.ECODE_INVAL)