code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_UP = [constants.ADMINST_UP]
  72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
  74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  76
  77
  78 class ResultWithJobs:
  79   """Data container for LU results with jobs.
  80
  81   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  82   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  83   contained in the C{jobs} attribute and include the job IDs in the opcode
  84   result.
  85
  86   """
  87   def __init__(self, jobs, **kwargs):
  88     """Initializes this class.
  89
  90     Additional return values can be specified as keyword arguments.
  91
  92     @type jobs: list of lists of L{opcode.OpCode}
  93     @param jobs: A list of lists of opcode objects
  94
  95     """
  96     self.jobs = jobs
  97     self.other = kwargs
  98
  99
 100 class LogicalUnit(object):
 101   """Logical Unit base class.
 102
 103   Subclasses must follow these rules:
 104     - implement ExpandNames
 105     - implement CheckPrereq (except when tasklets are used)
 106     - implement Exec (except when tasklets are used)
 107     - implement BuildHooksEnv
 108     - implement BuildHooksNodes
 109     - redefine HPATH and HTYPE
 110     - optionally redefine their run requirements:
 111         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 112
 113   Note that all commands require root permissions.
 114
 115   @ivar dry_run_result: the value (if any) that will be returned to the caller
 116       in dry-run mode (signalled by opcode dry_run parameter)
 117
 118   """
 119   HPATH = None
 120   HTYPE = None
 121   REQ_BGL = True
 122
 123   def __init__(self, processor, op, context, rpc_runner):
 124     """Constructor for LogicalUnit.
 125
 126     This needs to be overridden in derived classes in order to check op
 127     validity.
 128
 129     """
 130     self.proc = processor
 131     self.op = op
 132     self.cfg = context.cfg
 133     self.glm = context.glm
 134     # readability alias
 135     self.owned_locks = context.glm.list_owned
 136     self.context = context
 137     self.rpc = rpc_runner
 138     # Dicts used to declare locking needs to mcpu
 139     self.needed_locks = None
 140     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 141     self.add_locks = {}
 142     self.remove_locks = {}
 143     # Used to force good behavior when calling helper functions
 144     self.recalculate_locks = {}
 145     # logging
 146     self.Log = processor.Log # pylint: disable=C0103
 147     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 148     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 149     self.LogStep = processor.LogStep # pylint: disable=C0103
 150     # support for dry-run
 151     self.dry_run_result = None
 152     # support for generic debug attribute
 153     if (not hasattr(self.op, "debug_level") or
 154         not isinstance(self.op.debug_level, int)):
 155       self.op.debug_level = 0
 156
 157     # Tasklets
 158     self.tasklets = None
 159
 160     # Validate opcode parameters and set defaults
 161     self.op.Validate(True)
 162
 163     self.CheckArguments()
 164
 165   def CheckArguments(self):
 166     """Check syntactic validity for the opcode arguments.
 167
 168     This method is for doing a simple syntactic check and ensure
 169     validity of opcode parameters, without any cluster-related
 170     checks. While the same can be accomplished in ExpandNames and/or
 171     CheckPrereq, doing these separate is better because:
 172
 173       - ExpandNames is left as as purely a lock-related function
 174       - CheckPrereq is run after we have acquired locks (and possible
 175         waited for them)
 176
 177     The function is allowed to change the self.op attribute so that
 178     later methods can no longer worry about missing parameters.
 179
 180     """
 181     pass
 182
 183   def ExpandNames(self):
 184     """Expand names for this LU.
 185
 186     This method is called before starting to execute the opcode, and it should
 187     update all the parameters of the opcode to their canonical form (e.g. a
 188     short node name must be fully expanded after this method has successfully
 189     completed). This way locking, hooks, logging, etc. can work correctly.
 190
 191     LUs which implement this method must also populate the self.needed_locks
 192     member, as a dict with lock levels as keys, and a list of needed lock names
 193     as values. Rules:
 194
 195       - use an empty dict if you don't need any lock
 196       - if you don't need any lock at a particular level omit that level
 197       - don't put anything for the BGL level
 198       - if you want all locks at a level use locking.ALL_SET as a value
 199
 200     If you need to share locks (rather than acquire them exclusively) at one
 201     level you can modify self.share_locks, setting a true value (usually 1) for
 202     that level. By default locks are not shared.
 203
 204     This function can also define a list of tasklets, which then will be
 205     executed in order instead of the usual LU-level CheckPrereq and Exec
 206     functions, if those are not defined by the LU.
 207
 208     Examples::
 209
 210       # Acquire all nodes and one instance
 211       self.needed_locks = {
 212         locking.LEVEL_NODE: locking.ALL_SET,
 213         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 214       }
 215       # Acquire just two nodes
 216       self.needed_locks = {
 217         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 218       }
 219       # Acquire no locks
 220       self.needed_locks = {} # No, you can't leave it to the default value None
 221
 222     """
 223     # The implementation of this method is mandatory only if the new LU is
 224     # concurrent, so that old LUs don't need to be changed all at the same
 225     # time.
 226     if self.REQ_BGL:
 227       self.needed_locks = {} # Exclusive LUs don't need locks.
 228     else:
 229       raise NotImplementedError
 230
 231   def DeclareLocks(self, level):
 232     """Declare LU locking needs for a level
 233
 234     While most LUs can just declare their locking needs at ExpandNames time,
 235     sometimes there's the need to calculate some locks after having acquired
 236     the ones before. This function is called just before acquiring locks at a
 237     particular level, but after acquiring the ones at lower levels, and permits
 238     such calculations. It can be used to modify self.needed_locks, and by
 239     default it does nothing.
 240
 241     This function is only called if you have something already set in
 242     self.needed_locks for the level.
 243
 244     @param level: Locking level which is going to be locked
 245     @type level: member of ganeti.locking.LEVELS
 246
 247     """
 248
 249   def CheckPrereq(self):
 250     """Check prerequisites for this LU.
 251
 252     This method should check that the prerequisites for the execution
 253     of this LU are fulfilled. It can do internode communication, but
 254     it should be idempotent - no cluster or system changes are
 255     allowed.
 256
 257     The method should raise errors.OpPrereqError in case something is
 258     not fulfilled. Its return value is ignored.
 259
 260     This method should also update all the parameters of the opcode to
 261     their canonical form if it hasn't been done by ExpandNames before.
 262
 263     """
 264     if self.tasklets is not None:
 265       for (idx, tl) in enumerate(self.tasklets):
 266         logging.debug("Checking prerequisites for tasklet %s/%s",
 267                       idx + 1, len(self.tasklets))
 268         tl.CheckPrereq()
 269     else:
 270       pass
 271
 272   def Exec(self, feedback_fn):
 273     """Execute the LU.
 274
 275     This method should implement the actual work. It should raise
 276     errors.OpExecError for failures that are somewhat dealt with in
 277     code, or expected.
 278
 279     """
 280     if self.tasklets is not None:
 281       for (idx, tl) in enumerate(self.tasklets):
 282         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 283         tl.Exec(feedback_fn)
 284     else:
 285       raise NotImplementedError
 286
 287   def BuildHooksEnv(self):
 288     """Build hooks environment for this LU.
 289
 290     @rtype: dict
 291     @return: Dictionary containing the environment that will be used for
 292       running the hooks for this LU. The keys of the dict must not be prefixed
 293       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 294       will extend the environment with additional variables. If no environment
 295       should be defined, an empty dictionary should be returned (not C{None}).
 296     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 297       will not be called.
 298
 299     """
 300     raise NotImplementedError
 301
 302   def BuildHooksNodes(self):
 303     """Build list of nodes to run LU's hooks.
 304
 305     @rtype: tuple; (list, list)
 306     @return: Tuple containing a list of node names on which the hook
 307       should run before the execution and a list of node names on which the
 308       hook should run after the execution. No nodes should be returned as an
 309       empty list (and not None).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 317     """Notify the LU about the results of its hooks.
 318
 319     This method is called every time a hooks phase is executed, and notifies
 320     the Logical Unit about the hooks' result. The LU can then use it to alter
 321     its result based on the hooks.  By default the method does nothing and the
 322     previous result is passed back unchanged but any LU can define it if it
 323     wants to use the local cluster hook-scripts somehow.
 324
 325     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 326         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 327     @param hook_results: the results of the multi-node hooks rpc call
 328     @param feedback_fn: function used send feedback back to the caller
 329     @param lu_result: the previous Exec result this LU had, or None
 330         in the PRE phase
 331     @return: the new Exec result, based on the previous result
 332         and hook results
 333
 334     """
 335     # API must be kept, thus we ignore the unused argument and could
 336     # be a function warnings
 337     # pylint: disable=W0613,R0201
 338     return lu_result
 339
 340   def _ExpandAndLockInstance(self):
 341     """Helper function to expand and lock an instance.
 342
 343     Many LUs that work on an instance take its name in self.op.instance_name
 344     and need to expand it and then declare the expanded name for locking. This
 345     function does it, and then updates self.op.instance_name to the expanded
 346     name. It also initializes needed_locks as a dict, if this hasn't been done
 347     before.
 348
 349     """
 350     if self.needed_locks is None:
 351       self.needed_locks = {}
 352     else:
 353       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 354         "_ExpandAndLockInstance called with instance-level locks set"
 355     self.op.instance_name = _ExpandInstanceName(self.cfg,
 356                                                 self.op.instance_name)
 357     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 358
 359   def _LockInstancesNodes(self, primary_only=False,
 360                           level=locking.LEVEL_NODE):
 361     """Helper function to declare instances' nodes for locking.
 362
 363     This function should be called after locking one or more instances to lock
 364     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 365     with all primary or secondary nodes for instances already locked and
 366     present in self.needed_locks[locking.LEVEL_INSTANCE].
 367
 368     It should be called from DeclareLocks, and for safety only works if
 369     self.recalculate_locks[locking.LEVEL_NODE] is set.
 370
 371     In the future it may grow parameters to just lock some instance's nodes, or
 372     to just lock primaries or secondary nodes, if needed.
 373
 374     If should be called in DeclareLocks in a way similar to::
 375
 376       if level == locking.LEVEL_NODE:
 377         self._LockInstancesNodes()
 378
 379     @type primary_only: boolean
 380     @param primary_only: only lock primary nodes of locked instances
 381     @param level: Which lock level to use for locking nodes
 382
 383     """
 384     assert level in self.recalculate_locks, \
 385       "_LockInstancesNodes helper function called with no nodes to recalculate"
 386
 387     # TODO: check if we're really been called with the instance locks held
 388
 389     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 390     # future we might want to have different behaviors depending on the value
 391     # of self.recalculate_locks[locking.LEVEL_NODE]
 392     wanted_nodes = []
 393     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 394     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 395       wanted_nodes.append(instance.primary_node)
 396       if not primary_only:
 397         wanted_nodes.extend(instance.secondary_nodes)
 398
 399     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 400       self.needed_locks[level] = wanted_nodes
 401     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 402       self.needed_locks[level].extend(wanted_nodes)
 403     else:
 404       raise errors.ProgrammerError("Unknown recalculation mode")
 405
 406     del self.recalculate_locks[level]
 407
 408
 409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 410   """Simple LU which runs no hooks.
 411
 412   This LU is intended as a parent for other LogicalUnits which will
 413   run no hooks, in order to reduce duplicate code.
 414
 415   """
 416   HPATH = None
 417   HTYPE = None
 418
 419   def BuildHooksEnv(self):
 420     """Empty BuildHooksEnv for NoHooksLu.
 421
 422     This just raises an error.
 423
 424     """
 425     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 426
 427   def BuildHooksNodes(self):
 428     """Empty BuildHooksNodes for NoHooksLU.
 429
 430     """
 431     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 432
 433
 434 class Tasklet:
 435   """Tasklet base class.
 436
 437   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 438   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 439   tasklets know nothing about locks.
 440
 441   Subclasses must follow these rules:
 442     - Implement CheckPrereq
 443     - Implement Exec
 444
 445   """
 446   def __init__(self, lu):
 447     self.lu = lu
 448
 449     # Shortcuts
 450     self.cfg = lu.cfg
 451     self.rpc = lu.rpc
 452
 453   def CheckPrereq(self):
 454     """Check prerequisites for this tasklets.
 455
 456     This method should check whether the prerequisites for the execution of
 457     this tasklet are fulfilled. It can do internode communication, but it
 458     should be idempotent - no cluster or system changes are allowed.
 459
 460     The method should raise errors.OpPrereqError in case something is not
 461     fulfilled. Its return value is ignored.
 462
 463     This method should also update all parameters to their canonical form if it
 464     hasn't been done before.
 465
 466     """
 467     pass
 468
 469   def Exec(self, feedback_fn):
 470     """Execute the tasklet.
 471
 472     This method should implement the actual work. It should raise
 473     errors.OpExecError for failures that are somewhat dealt with in code, or
 474     expected.
 475
 476     """
 477     raise NotImplementedError
 478
 479
 480 class _QueryBase:
 481   """Base for query utility classes.
 482
 483   """
 484   #: Attribute holding field definitions
 485   FIELDS = None
 486
 487   def __init__(self, qfilter, fields, use_locking):
 488     """Initializes this class.
 489
 490     """
 491     self.use_locking = use_locking
 492
 493     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 494                              namefield="name")
 495     self.requested_data = self.query.RequestedData()
 496     self.names = self.query.RequestedNames()
 497
 498     # Sort only if no names were requested
 499     self.sort_by_name = not self.names
 500
 501     self.do_locking = None
 502     self.wanted = None
 503
 504   def _GetNames(self, lu, all_names, lock_level):
 505     """Helper function to determine names asked for in the query.
 506
 507     """
 508     if self.do_locking:
 509       names = lu.owned_locks(lock_level)
 510     else:
 511       names = all_names
 512
 513     if self.wanted == locking.ALL_SET:
 514       assert not self.names
 515       # caller didn't specify names, so ordering is not important
 516       return utils.NiceSort(names)
 517
 518     # caller specified names and we must keep the same order
 519     assert self.names
 520     assert not self.do_locking or lu.glm.is_owned(lock_level)
 521
 522     missing = set(self.wanted).difference(names)
 523     if missing:
 524       raise errors.OpExecError("Some items were removed before retrieving"
 525                                " their data: %s" % missing)
 526
 527     # Return expanded names
 528     return self.wanted
 529
 530   def ExpandNames(self, lu):
 531     """Expand names for this query.
 532
 533     See L{LogicalUnit.ExpandNames}.
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def DeclareLocks(self, lu, level):
 539     """Declare locks for this query.
 540
 541     See L{LogicalUnit.DeclareLocks}.
 542
 543     """
 544     raise NotImplementedError()
 545
 546   def _GetQueryData(self, lu):
 547     """Collects all data for this query.
 548
 549     @return: Query data object
 550
 551     """
 552     raise NotImplementedError()
 553
 554   def NewStyleQuery(self, lu):
 555     """Collect data and execute query.
 556
 557     """
 558     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 559                                   sort_by_name=self.sort_by_name)
 560
 561   def OldStyleQuery(self, lu):
 562     """Collect data and execute query.
 563
 564     """
 565     return self.query.OldStyleQuery(self._GetQueryData(lu),
 566                                     sort_by_name=self.sort_by_name)
 567
 568
 569 def _ShareAll():
 570   """Returns a dict declaring all lock levels shared.
 571
 572   """
 573   return dict.fromkeys(locking.LEVELS, 1)
 574
 575
 576 def _MakeLegacyNodeInfo(data):
 577   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 578
 579   Converts the data into a single dictionary. This is fine for most use cases,
 580   but some require information from more than one volume group or hypervisor.
 581
 582   """
 583   (bootid, (vg_info, ), (hv_info, )) = data
 584
 585   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 586     "bootid": bootid,
 587     })
 588
 589
 590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 591   """Checks if the owned node groups are still correct for an instance.
 592
 593   @type cfg: L{config.ConfigWriter}
 594   @param cfg: The cluster configuration
 595   @type instance_name: string
 596   @param instance_name: Instance name
 597   @type owned_groups: set or frozenset
 598   @param owned_groups: List of currently owned node groups
 599
 600   """
 601   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 602
 603   if not owned_groups.issuperset(inst_groups):
 604     raise errors.OpPrereqError("Instance %s's node groups changed since"
 605                                " locks were acquired, current groups are"
 606                                " are '%s', owning groups '%s'; retry the"
 607                                " operation" %
 608                                (instance_name,
 609                                 utils.CommaJoin(inst_groups),
 610                                 utils.CommaJoin(owned_groups)),
 611                                errors.ECODE_STATE)
 612
 613   return inst_groups
 614
 615
 616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 617   """Checks if the instances in a node group are still correct.
 618
 619   @type cfg: L{config.ConfigWriter}
 620   @param cfg: The cluster configuration
 621   @type group_uuid: string
 622   @param group_uuid: Node group UUID
 623   @type owned_instances: set or frozenset
 624   @param owned_instances: List of currently owned instances
 625
 626   """
 627   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 628   if owned_instances != wanted_instances:
 629     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 630                                " locks were acquired, wanted '%s', have '%s';"
 631                                " retry the operation" %
 632                                (group_uuid,
 633                                 utils.CommaJoin(wanted_instances),
 634                                 utils.CommaJoin(owned_instances)),
 635                                errors.ECODE_STATE)
 636
 637   return wanted_instances
 638
 639
 640 def _SupportsOob(cfg, node):
 641   """Tells if node supports OOB.
 642
 643   @type cfg: L{config.ConfigWriter}
 644   @param cfg: The cluster configuration
 645   @type node: L{objects.Node}
 646   @param node: The node
 647   @return: The OOB script if supported or an empty string otherwise
 648
 649   """
 650   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 651
 652
 653 def _GetWantedNodes(lu, nodes):
 654   """Returns list of checked and expanded node names.
 655
 656   @type lu: L{LogicalUnit}
 657   @param lu: the logical unit on whose behalf we execute
 658   @type nodes: list
 659   @param nodes: list of node names or None for all nodes
 660   @rtype: list
 661   @return: the list of nodes, sorted
 662   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 663
 664   """
 665   if nodes:
 666     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 667
 668   return utils.NiceSort(lu.cfg.GetNodeList())
 669
 670
 671 def _GetWantedInstances(lu, instances):
 672   """Returns list of checked and expanded instance names.
 673
 674   @type lu: L{LogicalUnit}
 675   @param lu: the logical unit on whose behalf we execute
 676   @type instances: list
 677   @param instances: list of instance names or None for all instances
 678   @rtype: list
 679   @return: the list of instances, sorted
 680   @raise errors.OpPrereqError: if the instances parameter is wrong type
 681   @raise errors.OpPrereqError: if any of the passed instances is not found
 682
 683   """
 684   if instances:
 685     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 686   else:
 687     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 688   return wanted
 689
 690
 691 def _GetUpdatedParams(old_params, update_dict,
 692                       use_default=True, use_none=False):
 693   """Return the new version of a parameter dictionary.
 694
 695   @type old_params: dict
 696   @param old_params: old parameters
 697   @type update_dict: dict
 698   @param update_dict: dict containing new parameter values, or
 699       constants.VALUE_DEFAULT to reset the parameter to its default
 700       value
 701   @param use_default: boolean
 702   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 703       values as 'to be deleted' values
 704   @param use_none: boolean
 705   @type use_none: whether to recognise C{None} values as 'to be
 706       deleted' values
 707   @rtype: dict
 708   @return: the new parameter dictionary
 709
 710   """
 711   params_copy = copy.deepcopy(old_params)
 712   for key, val in update_dict.iteritems():
 713     if ((use_default and val == constants.VALUE_DEFAULT) or
 714         (use_none and val is None)):
 715       try:
 716         del params_copy[key]
 717       except KeyError:
 718         pass
 719     else:
 720       params_copy[key] = val
 721   return params_copy
 722
 723
 724 def _UpdateAndVerifySubDict(base, updates, type_check):
 725   """Updates and verifies a dict with sub dicts of the same type.
 726
 727   @param base: The dict with the old data
 728   @param updates: The dict with the new data
 729   @param type_check: Dict suitable to ForceDictType to verify correct types
 730   @returns: A new dict with updated and verified values
 731
 732   """
 733   def fn(old, value):
 734     new = _GetUpdatedParams(old, value)
 735     utils.ForceDictType(new, type_check)
 736     return new
 737
 738   ret = copy.deepcopy(base)
 739   ret.update(dict((key, fn(base.get(key, {}), value))
 740                   for key, value in updates.items()))
 741   return ret
 742
 743
 744 def _MergeAndVerifyHvState(op_input, obj_input):
 745   """Combines the hv state from an opcode with the one of the object
 746
 747   @param op_input: The input dict from the opcode
 748   @param obj_input: The input dict from the objects
 749   @return: The verified and updated dict
 750
 751   """
 752   if op_input:
 753     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 754     if invalid_hvs:
 755       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 756                                  " %s" % utils.CommaJoin(invalid_hvs),
 757                                  errors.ECODE_INVAL)
 758     if obj_input is None:
 759       obj_input = {}
 760     type_check = constants.HVSTS_PARAMETER_TYPES
 761     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 762
 763   return None
 764
 765
 766 def _MergeAndVerifyDiskState(op_input, obj_input):
 767   """Combines the disk state from an opcode with the one of the object
 768
 769   @param op_input: The input dict from the opcode
 770   @param obj_input: The input dict from the objects
 771   @return: The verified and updated dict
 772   """
 773   if op_input:
 774     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 775     if invalid_dst:
 776       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 777                                  utils.CommaJoin(invalid_dst),
 778                                  errors.ECODE_INVAL)
 779     type_check = constants.DSS_PARAMETER_TYPES
 780     if obj_input is None:
 781       obj_input = {}
 782     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 783                                               type_check))
 784                 for key, value in op_input.items())
 785
 786   return None
 787
 788
 789 def _ReleaseLocks(lu, level, names=None, keep=None):
 790   """Releases locks owned by an LU.
 791
 792   @type lu: L{LogicalUnit}
 793   @param level: Lock level
 794   @type names: list or None
 795   @param names: Names of locks to release
 796   @type keep: list or None
 797   @param keep: Names of locks to retain
 798
 799   """
 800   assert not (keep is not None and names is not None), \
 801          "Only one of the 'names' and the 'keep' parameters can be given"
 802
 803   if names is not None:
 804     should_release = names.__contains__
 805   elif keep:
 806     should_release = lambda name: name not in keep
 807   else:
 808     should_release = None
 809
 810   owned = lu.owned_locks(level)
 811   if not owned:
 812     # Not owning any lock at this level, do nothing
 813     pass
 814
 815   elif should_release:
 816     retain = []
 817     release = []
 818
 819     # Determine which locks to release
 820     for name in owned:
 821       if should_release(name):
 822         release.append(name)
 823       else:
 824         retain.append(name)
 825
 826     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 827
 828     # Release just some locks
 829     lu.glm.release(level, names=release)
 830
 831     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 832   else:
 833     # Release everything
 834     lu.glm.release(level)
 835
 836     assert not lu.glm.is_owned(level), "No locks should be owned"
 837
 838
 839 def _MapInstanceDisksToNodes(instances):
 840   """Creates a map from (node, volume) to instance name.
 841
 842   @type instances: list of L{objects.Instance}
 843   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 844
 845   """
 846   return dict(((node, vol), inst.name)
 847               for inst in instances
 848               for (node, vols) in inst.MapLVsByNode().items()
 849               for vol in vols)
 850
 851
 852 def _RunPostHook(lu, node_name):
 853   """Runs the post-hook for an opcode on a single node.
 854
 855   """
 856   hm = lu.proc.BuildHooksManager(lu)
 857   try:
 858     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 859   except:
 860     # pylint: disable=W0702
 861     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 862
 863
 864 def _CheckOutputFields(static, dynamic, selected):
 865   """Checks whether all selected fields are valid.
 866
 867   @type static: L{utils.FieldSet}
 868   @param static: static fields set
 869   @type dynamic: L{utils.FieldSet}
 870   @param dynamic: dynamic fields set
 871
 872   """
 873   f = utils.FieldSet()
 874   f.Extend(static)
 875   f.Extend(dynamic)
 876
 877   delta = f.NonMatching(selected)
 878   if delta:
 879     raise errors.OpPrereqError("Unknown output fields selected: %s"
 880                                % ",".join(delta), errors.ECODE_INVAL)
 881
 882
 883 def _CheckGlobalHvParams(params):
 884   """Validates that given hypervisor params are not global ones.
 885
 886   This will ensure that instances don't get customised versions of
 887   global params.
 888
 889   """
 890   used_globals = constants.HVC_GLOBALS.intersection(params)
 891   if used_globals:
 892     msg = ("The following hypervisor parameters are global and cannot"
 893            " be customized at instance level, please modify them at"
 894            " cluster level: %s" % utils.CommaJoin(used_globals))
 895     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 896
 897
 898 def _CheckNodeOnline(lu, node, msg=None):
 899   """Ensure that a given node is online.
 900
 901   @param lu: the LU on behalf of which we make the check
 902   @param node: the node to check
 903   @param msg: if passed, should be a message to replace the default one
 904   @raise errors.OpPrereqError: if the node is offline
 905
 906   """
 907   if msg is None:
 908     msg = "Can't use offline node"
 909   if lu.cfg.GetNodeInfo(node).offline:
 910     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 911
 912
 913 def _CheckNodeNotDrained(lu, node):
 914   """Ensure that a given node is not drained.
 915
 916   @param lu: the LU on behalf of which we make the check
 917   @param node: the node to check
 918   @raise errors.OpPrereqError: if the node is drained
 919
 920   """
 921   if lu.cfg.GetNodeInfo(node).drained:
 922     raise errors.OpPrereqError("Can't use drained node %s" % node,
 923                                errors.ECODE_STATE)
 924
 925
 926 def _CheckNodeVmCapable(lu, node):
 927   """Ensure that a given node is vm capable.
 928
 929   @param lu: the LU on behalf of which we make the check
 930   @param node: the node to check
 931   @raise errors.OpPrereqError: if the node is not vm capable
 932
 933   """
 934   if not lu.cfg.GetNodeInfo(node).vm_capable:
 935     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 936                                errors.ECODE_STATE)
 937
 938
 939 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 940   """Ensure that a node supports a given OS.
 941
 942   @param lu: the LU on behalf of which we make the check
 943   @param node: the node to check
 944   @param os_name: the OS to query about
 945   @param force_variant: whether to ignore variant errors
 946   @raise errors.OpPrereqError: if the node is not supporting the OS
 947
 948   """
 949   result = lu.rpc.call_os_get(node, os_name)
 950   result.Raise("OS '%s' not in supported OS list for node %s" %
 951                (os_name, node),
 952                prereq=True, ecode=errors.ECODE_INVAL)
 953   if not force_variant:
 954     _CheckOSVariant(result.payload, os_name)
 955
 956
 957 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 958   """Ensure that a node has the given secondary ip.
 959
 960   @type lu: L{LogicalUnit}
 961   @param lu: the LU on behalf of which we make the check
 962   @type node: string
 963   @param node: the node to check
 964   @type secondary_ip: string
 965   @param secondary_ip: the ip to check
 966   @type prereq: boolean
 967   @param prereq: whether to throw a prerequisite or an execute error
 968   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 969   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 970
 971   """
 972   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 973   result.Raise("Failure checking secondary ip on node %s" % node,
 974                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 975   if not result.payload:
 976     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 977            " please fix and re-run this command" % secondary_ip)
 978     if prereq:
 979       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 980     else:
 981       raise errors.OpExecError(msg)
 982
 983
 984 def _GetClusterDomainSecret():
 985   """Reads the cluster domain secret.
 986
 987   """
 988   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 989                                strict=True)
 990
 991
 992 def _CheckInstanceState(lu, instance, req_states, msg=None):
 993   """Ensure that an instance is in one of the required states.
 994
 995   @param lu: the LU on behalf of which we make the check
 996   @param instance: the instance to check
 997   @param msg: if passed, should be a message to replace the default one
 998   @raise errors.OpPrereqError: if the instance is not in the required state
 999
1000   """
1001   if msg is None:
1002     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1003   if instance.admin_state not in req_states:
1004     raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1005                                (instance, instance.admin_state, msg),
1006                                errors.ECODE_STATE)
1007
1008   if constants.ADMINST_UP not in req_states:
1009     pnode = instance.primary_node
1010     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1011     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1012                 prereq=True, ecode=errors.ECODE_ENVIRON)
1013
1014     if instance.name in ins_l.payload:
1015       raise errors.OpPrereqError("Instance %s is running, %s" %
1016                                  (instance.name, msg), errors.ECODE_STATE)
1017
1018
1019 def _ExpandItemName(fn, name, kind):
1020   """Expand an item name.
1021
1022   @param fn: the function to use for expansion
1023   @param name: requested item name
1024   @param kind: text description ('Node' or 'Instance')
1025   @return: the resolved (full) name
1026   @raise errors.OpPrereqError: if the item is not found
1027
1028   """
1029   full_name = fn(name)
1030   if full_name is None:
1031     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1032                                errors.ECODE_NOENT)
1033   return full_name
1034
1035
1036 def _ExpandNodeName(cfg, name):
1037   """Wrapper over L{_ExpandItemName} for nodes."""
1038   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1039
1040
1041 def _ExpandInstanceName(cfg, name):
1042   """Wrapper over L{_ExpandItemName} for instance."""
1043   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1044
1045
1046 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1047                           minmem, maxmem, vcpus, nics, disk_template, disks,
1048                           bep, hvp, hypervisor_name, tags):
1049   """Builds instance related env variables for hooks
1050
1051   This builds the hook environment from individual variables.
1052
1053   @type name: string
1054   @param name: the name of the instance
1055   @type primary_node: string
1056   @param primary_node: the name of the instance's primary node
1057   @type secondary_nodes: list
1058   @param secondary_nodes: list of secondary nodes as strings
1059   @type os_type: string
1060   @param os_type: the name of the instance's OS
1061   @type status: string
1062   @param status: the desired status of the instance
1063   @type minmem: string
1064   @param minmem: the minimum memory size of the instance
1065   @type maxmem: string
1066   @param maxmem: the maximum memory size of the instance
1067   @type vcpus: string
1068   @param vcpus: the count of VCPUs the instance has
1069   @type nics: list
1070   @param nics: list of tuples (ip, mac, mode, link) representing
1071       the NICs the instance has
1072   @type disk_template: string
1073   @param disk_template: the disk template of the instance
1074   @type disks: list
1075   @param disks: the list of (size, mode) pairs
1076   @type bep: dict
1077   @param bep: the backend parameters for the instance
1078   @type hvp: dict
1079   @param hvp: the hypervisor parameters for the instance
1080   @type hypervisor_name: string
1081   @param hypervisor_name: the hypervisor for the instance
1082   @type tags: list
1083   @param tags: list of instance tags as strings
1084   @rtype: dict
1085   @return: the hook environment for this instance
1086
1087   """
1088   env = {
1089     "OP_TARGET": name,
1090     "INSTANCE_NAME": name,
1091     "INSTANCE_PRIMARY": primary_node,
1092     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1093     "INSTANCE_OS_TYPE": os_type,
1094     "INSTANCE_STATUS": status,
1095     "INSTANCE_MINMEM": minmem,
1096     "INSTANCE_MAXMEM": maxmem,
1097     # TODO(2.7) remove deprecated "memory" value
1098     "INSTANCE_MEMORY": maxmem,
1099     "INSTANCE_VCPUS": vcpus,
1100     "INSTANCE_DISK_TEMPLATE": disk_template,
1101     "INSTANCE_HYPERVISOR": hypervisor_name,
1102   }
1103   if nics:
1104     nic_count = len(nics)
1105     for idx, (ip, mac, mode, link) in enumerate(nics):
1106       if ip is None:
1107         ip = ""
1108       env["INSTANCE_NIC%d_IP" % idx] = ip
1109       env["INSTANCE_NIC%d_MAC" % idx] = mac
1110       env["INSTANCE_NIC%d_MODE" % idx] = mode
1111       env["INSTANCE_NIC%d_LINK" % idx] = link
1112       if mode == constants.NIC_MODE_BRIDGED:
1113         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1114   else:
1115     nic_count = 0
1116
1117   env["INSTANCE_NIC_COUNT"] = nic_count
1118
1119   if disks:
1120     disk_count = len(disks)
1121     for idx, (size, mode) in enumerate(disks):
1122       env["INSTANCE_DISK%d_SIZE" % idx] = size
1123       env["INSTANCE_DISK%d_MODE" % idx] = mode
1124   else:
1125     disk_count = 0
1126
1127   env["INSTANCE_DISK_COUNT"] = disk_count
1128
1129   if not tags:
1130     tags = []
1131
1132   env["INSTANCE_TAGS"] = " ".join(tags)
1133
1134   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1135     for key, value in source.items():
1136       env["INSTANCE_%s_%s" % (kind, key)] = value
1137
1138   return env
1139
1140
1141 def _NICListToTuple(lu, nics):
1142   """Build a list of nic information tuples.
1143
1144   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1145   value in LUInstanceQueryData.
1146
1147   @type lu:  L{LogicalUnit}
1148   @param lu: the logical unit on whose behalf we execute
1149   @type nics: list of L{objects.NIC}
1150   @param nics: list of nics to convert to hooks tuples
1151
1152   """
1153   hooks_nics = []
1154   cluster = lu.cfg.GetClusterInfo()
1155   for nic in nics:
1156     ip = nic.ip
1157     mac = nic.mac
1158     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1159     mode = filled_params[constants.NIC_MODE]
1160     link = filled_params[constants.NIC_LINK]
1161     hooks_nics.append((ip, mac, mode, link))
1162   return hooks_nics
1163
1164
1165 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1166   """Builds instance related env variables for hooks from an object.
1167
1168   @type lu: L{LogicalUnit}
1169   @param lu: the logical unit on whose behalf we execute
1170   @type instance: L{objects.Instance}
1171   @param instance: the instance for which we should build the
1172       environment
1173   @type override: dict
1174   @param override: dictionary with key/values that will override
1175       our values
1176   @rtype: dict
1177   @return: the hook environment dictionary
1178
1179   """
1180   cluster = lu.cfg.GetClusterInfo()
1181   bep = cluster.FillBE(instance)
1182   hvp = cluster.FillHV(instance)
1183   args = {
1184     "name": instance.name,
1185     "primary_node": instance.primary_node,
1186     "secondary_nodes": instance.secondary_nodes,
1187     "os_type": instance.os,
1188     "status": instance.admin_state,
1189     "maxmem": bep[constants.BE_MAXMEM],
1190     "minmem": bep[constants.BE_MINMEM],
1191     "vcpus": bep[constants.BE_VCPUS],
1192     "nics": _NICListToTuple(lu, instance.nics),
1193     "disk_template": instance.disk_template,
1194     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1195     "bep": bep,
1196     "hvp": hvp,
1197     "hypervisor_name": instance.hypervisor,
1198     "tags": instance.tags,
1199   }
1200   if override:
1201     args.update(override)
1202   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1203
1204
1205 def _AdjustCandidatePool(lu, exceptions):
1206   """Adjust the candidate pool after node operations.
1207
1208   """
1209   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1210   if mod_list:
1211     lu.LogInfo("Promoted nodes to master candidate role: %s",
1212                utils.CommaJoin(node.name for node in mod_list))
1213     for name in mod_list:
1214       lu.context.ReaddNode(name)
1215   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1216   if mc_now > mc_max:
1217     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1218                (mc_now, mc_max))
1219
1220
1221 def _DecideSelfPromotion(lu, exceptions=None):
1222   """Decide whether I should promote myself as a master candidate.
1223
1224   """
1225   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1226   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1227   # the new node will increase mc_max with one, so:
1228   mc_should = min(mc_should + 1, cp_size)
1229   return mc_now < mc_should
1230
1231
1232 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1233   """Check that the brigdes needed by a list of nics exist.
1234
1235   """
1236   cluster = lu.cfg.GetClusterInfo()
1237   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1238   brlist = [params[constants.NIC_LINK] for params in paramslist
1239             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1240   if brlist:
1241     result = lu.rpc.call_bridges_exist(target_node, brlist)
1242     result.Raise("Error checking bridges on destination node '%s'" %
1243                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1244
1245
1246 def _CheckInstanceBridgesExist(lu, instance, node=None):
1247   """Check that the brigdes needed by an instance exist.
1248
1249   """
1250   if node is None:
1251     node = instance.primary_node
1252   _CheckNicsBridgesExist(lu, instance.nics, node)
1253
1254
1255 def _CheckOSVariant(os_obj, name):
1256   """Check whether an OS name conforms to the os variants specification.
1257
1258   @type os_obj: L{objects.OS}
1259   @param os_obj: OS object to check
1260   @type name: string
1261   @param name: OS name passed by the user, to check for validity
1262
1263   """
1264   variant = objects.OS.GetVariant(name)
1265   if not os_obj.supported_variants:
1266     if variant:
1267       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1268                                  " passed)" % (os_obj.name, variant),
1269                                  errors.ECODE_INVAL)
1270     return
1271   if not variant:
1272     raise errors.OpPrereqError("OS name must include a variant",
1273                                errors.ECODE_INVAL)
1274
1275   if variant not in os_obj.supported_variants:
1276     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1277
1278
1279 def _GetNodeInstancesInner(cfg, fn):
1280   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1281
1282
1283 def _GetNodeInstances(cfg, node_name):
1284   """Returns a list of all primary and secondary instances on a node.
1285
1286   """
1287
1288   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1289
1290
1291 def _GetNodePrimaryInstances(cfg, node_name):
1292   """Returns primary instances on a node.
1293
1294   """
1295   return _GetNodeInstancesInner(cfg,
1296                                 lambda inst: node_name == inst.primary_node)
1297
1298
1299 def _GetNodeSecondaryInstances(cfg, node_name):
1300   """Returns secondary instances on a node.
1301
1302   """
1303   return _GetNodeInstancesInner(cfg,
1304                                 lambda inst: node_name in inst.secondary_nodes)
1305
1306
1307 def _GetStorageTypeArgs(cfg, storage_type):
1308   """Returns the arguments for a storage type.
1309
1310   """
1311   # Special case for file storage
1312   if storage_type == constants.ST_FILE:
1313     # storage.FileStorage wants a list of storage directories
1314     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1315
1316   return []
1317
1318
1319 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1320   faulty = []
1321
1322   for dev in instance.disks:
1323     cfg.SetDiskID(dev, node_name)
1324
1325   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1326   result.Raise("Failed to get disk status from node %s" % node_name,
1327                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1328
1329   for idx, bdev_status in enumerate(result.payload):
1330     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1331       faulty.append(idx)
1332
1333   return faulty
1334
1335
1336 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1337   """Check the sanity of iallocator and node arguments and use the
1338   cluster-wide iallocator if appropriate.
1339
1340   Check that at most one of (iallocator, node) is specified. If none is
1341   specified, then the LU's opcode's iallocator slot is filled with the
1342   cluster-wide default iallocator.
1343
1344   @type iallocator_slot: string
1345   @param iallocator_slot: the name of the opcode iallocator slot
1346   @type node_slot: string
1347   @param node_slot: the name of the opcode target node slot
1348
1349   """
1350   node = getattr(lu.op, node_slot, None)
1351   iallocator = getattr(lu.op, iallocator_slot, None)
1352
1353   if node is not None and iallocator is not None:
1354     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1355                                errors.ECODE_INVAL)
1356   elif node is None and iallocator is None:
1357     default_iallocator = lu.cfg.GetDefaultIAllocator()
1358     if default_iallocator:
1359       setattr(lu.op, iallocator_slot, default_iallocator)
1360     else:
1361       raise errors.OpPrereqError("No iallocator or node given and no"
1362                                  " cluster-wide default iallocator found;"
1363                                  " please specify either an iallocator or a"
1364                                  " node, or set a cluster-wide default"
1365                                  " iallocator")
1366
1367
1368 def _GetDefaultIAllocator(cfg, iallocator):
1369   """Decides on which iallocator to use.
1370
1371   @type cfg: L{config.ConfigWriter}
1372   @param cfg: Cluster configuration object
1373   @type iallocator: string or None
1374   @param iallocator: Iallocator specified in opcode
1375   @rtype: string
1376   @return: Iallocator name
1377
1378   """
1379   if not iallocator:
1380     # Use default iallocator
1381     iallocator = cfg.GetDefaultIAllocator()
1382
1383   if not iallocator:
1384     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1385                                " opcode nor as a cluster-wide default",
1386                                errors.ECODE_INVAL)
1387
1388   return iallocator
1389
1390
1391 class LUClusterPostInit(LogicalUnit):
1392   """Logical unit for running hooks after cluster initialization.
1393
1394   """
1395   HPATH = "cluster-init"
1396   HTYPE = constants.HTYPE_CLUSTER
1397
1398   def BuildHooksEnv(self):
1399     """Build hooks env.
1400
1401     """
1402     return {
1403       "OP_TARGET": self.cfg.GetClusterName(),
1404       }
1405
1406   def BuildHooksNodes(self):
1407     """Build hooks nodes.
1408
1409     """
1410     return ([], [self.cfg.GetMasterNode()])
1411
1412   def Exec(self, feedback_fn):
1413     """Nothing to do.
1414
1415     """
1416     return True
1417
1418
1419 class LUClusterDestroy(LogicalUnit):
1420   """Logical unit for destroying the cluster.
1421
1422   """
1423   HPATH = "cluster-destroy"
1424   HTYPE = constants.HTYPE_CLUSTER
1425
1426   def BuildHooksEnv(self):
1427     """Build hooks env.
1428
1429     """
1430     return {
1431       "OP_TARGET": self.cfg.GetClusterName(),
1432       }
1433
1434   def BuildHooksNodes(self):
1435     """Build hooks nodes.
1436
1437     """
1438     return ([], [])
1439
1440   def CheckPrereq(self):
1441     """Check prerequisites.
1442
1443     This checks whether the cluster is empty.
1444
1445     Any errors are signaled by raising errors.OpPrereqError.
1446
1447     """
1448     master = self.cfg.GetMasterNode()
1449
1450     nodelist = self.cfg.GetNodeList()
1451     if len(nodelist) != 1 or nodelist[0] != master:
1452       raise errors.OpPrereqError("There are still %d node(s) in"
1453                                  " this cluster." % (len(nodelist) - 1),
1454                                  errors.ECODE_INVAL)
1455     instancelist = self.cfg.GetInstanceList()
1456     if instancelist:
1457       raise errors.OpPrereqError("There are still %d instance(s) in"
1458                                  " this cluster." % len(instancelist),
1459                                  errors.ECODE_INVAL)
1460
1461   def Exec(self, feedback_fn):
1462     """Destroys the cluster.
1463
1464     """
1465     master_params = self.cfg.GetMasterNetworkParameters()
1466
1467     # Run post hooks on master node before it's removed
1468     _RunPostHook(self, master_params.name)
1469
1470     ems = self.cfg.GetUseExternalMipScript()
1471     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1472                                                      master_params, ems)
1473     result.Raise("Could not disable the master role")
1474
1475     return master_params.name
1476
1477
1478 def _VerifyCertificate(filename):
1479   """Verifies a certificate for L{LUClusterVerifyConfig}.
1480
1481   @type filename: string
1482   @param filename: Path to PEM file
1483
1484   """
1485   try:
1486     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1487                                            utils.ReadFile(filename))
1488   except Exception, err: # pylint: disable=W0703
1489     return (LUClusterVerifyConfig.ETYPE_ERROR,
1490             "Failed to load X509 certificate %s: %s" % (filename, err))
1491
1492   (errcode, msg) = \
1493     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1494                                 constants.SSL_CERT_EXPIRATION_ERROR)
1495
1496   if msg:
1497     fnamemsg = "While verifying %s: %s" % (filename, msg)
1498   else:
1499     fnamemsg = None
1500
1501   if errcode is None:
1502     return (None, fnamemsg)
1503   elif errcode == utils.CERT_WARNING:
1504     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1505   elif errcode == utils.CERT_ERROR:
1506     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1507
1508   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1509
1510
1511 def _GetAllHypervisorParameters(cluster, instances):
1512   """Compute the set of all hypervisor parameters.
1513
1514   @type cluster: L{objects.Cluster}
1515   @param cluster: the cluster object
1516   @param instances: list of L{objects.Instance}
1517   @param instances: additional instances from which to obtain parameters
1518   @rtype: list of (origin, hypervisor, parameters)
1519   @return: a list with all parameters found, indicating the hypervisor they
1520        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1521
1522   """
1523   hvp_data = []
1524
1525   for hv_name in cluster.enabled_hypervisors:
1526     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1527
1528   for os_name, os_hvp in cluster.os_hvp.items():
1529     for hv_name, hv_params in os_hvp.items():
1530       if hv_params:
1531         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1532         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1533
1534   # TODO: collapse identical parameter values in a single one
1535   for instance in instances:
1536     if instance.hvparams:
1537       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1538                        cluster.FillHV(instance)))
1539
1540   return hvp_data
1541
1542
1543 class _VerifyErrors(object):
1544   """Mix-in for cluster/group verify LUs.
1545
1546   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1547   self.op and self._feedback_fn to be available.)
1548
1549   """
1550
1551   ETYPE_FIELD = "code"
1552   ETYPE_ERROR = "ERROR"
1553   ETYPE_WARNING = "WARNING"
1554
1555   def _Error(self, ecode, item, msg, *args, **kwargs):
1556     """Format an error message.
1557
1558     Based on the opcode's error_codes parameter, either format a
1559     parseable error code, or a simpler error string.
1560
1561     This must be called only from Exec and functions called from Exec.
1562
1563     """
1564     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1565     itype, etxt, _ = ecode
1566     # first complete the msg
1567     if args:
1568       msg = msg % args
1569     # then format the whole message
1570     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1571       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1572     else:
1573       if item:
1574         item = " " + item
1575       else:
1576         item = ""
1577       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1578     # and finally report it via the feedback_fn
1579     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1580
1581   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1582     """Log an error message if the passed condition is True.
1583
1584     """
1585     cond = (bool(cond)
1586             or self.op.debug_simulate_errors) # pylint: disable=E1101
1587
1588     # If the error code is in the list of ignored errors, demote the error to a
1589     # warning
1590     (_, etxt, _) = ecode
1591     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1592       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1593
1594     if cond:
1595       self._Error(ecode, *args, **kwargs)
1596
1597     # do not mark the operation as failed for WARN cases only
1598     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1599       self.bad = self.bad or cond
1600
1601
1602 class LUClusterVerify(NoHooksLU):
1603   """Submits all jobs necessary to verify the cluster.
1604
1605   """
1606   REQ_BGL = False
1607
1608   def ExpandNames(self):
1609     self.needed_locks = {}
1610
1611   def Exec(self, feedback_fn):
1612     jobs = []
1613
1614     if self.op.group_name:
1615       groups = [self.op.group_name]
1616       depends_fn = lambda: None
1617     else:
1618       groups = self.cfg.GetNodeGroupList()
1619
1620       # Verify global configuration
1621       jobs.append([
1622         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1623         ])
1624
1625       # Always depend on global verification
1626       depends_fn = lambda: [(-len(jobs), [])]
1627
1628     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1629                                             ignore_errors=self.op.ignore_errors,
1630                                             depends=depends_fn())]
1631                 for group in groups)
1632
1633     # Fix up all parameters
1634     for op in itertools.chain(*jobs): # pylint: disable=W0142
1635       op.debug_simulate_errors = self.op.debug_simulate_errors
1636       op.verbose = self.op.verbose
1637       op.error_codes = self.op.error_codes
1638       try:
1639         op.skip_checks = self.op.skip_checks
1640       except AttributeError:
1641         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1642
1643     return ResultWithJobs(jobs)
1644
1645
1646 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1647   """Verifies the cluster config.
1648
1649   """
1650   REQ_BGL = True
1651
1652   def _VerifyHVP(self, hvp_data):
1653     """Verifies locally the syntax of the hypervisor parameters.
1654
1655     """
1656     for item, hv_name, hv_params in hvp_data:
1657       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1658              (item, hv_name))
1659       try:
1660         hv_class = hypervisor.GetHypervisor(hv_name)
1661         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1662         hv_class.CheckParameterSyntax(hv_params)
1663       except errors.GenericError, err:
1664         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1665
1666   def ExpandNames(self):
1667     # Information can be safely retrieved as the BGL is acquired in exclusive
1668     # mode
1669     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1670     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1671     self.all_node_info = self.cfg.GetAllNodesInfo()
1672     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1673     self.needed_locks = {}
1674
1675   def Exec(self, feedback_fn):
1676     """Verify integrity of cluster, performing various test on nodes.
1677
1678     """
1679     self.bad = False
1680     self._feedback_fn = feedback_fn
1681
1682     feedback_fn("* Verifying cluster config")
1683
1684     for msg in self.cfg.VerifyConfig():
1685       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1686
1687     feedback_fn("* Verifying cluster certificate files")
1688
1689     for cert_filename in constants.ALL_CERT_FILES:
1690       (errcode, msg) = _VerifyCertificate(cert_filename)
1691       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1692
1693     feedback_fn("* Verifying hypervisor parameters")
1694
1695     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1696                                                 self.all_inst_info.values()))
1697
1698     feedback_fn("* Verifying all nodes belong to an existing group")
1699
1700     # We do this verification here because, should this bogus circumstance
1701     # occur, it would never be caught by VerifyGroup, which only acts on
1702     # nodes/instances reachable from existing node groups.
1703
1704     dangling_nodes = set(node.name for node in self.all_node_info.values()
1705                          if node.group not in self.all_group_info)
1706
1707     dangling_instances = {}
1708     no_node_instances = []
1709
1710     for inst in self.all_inst_info.values():
1711       if inst.primary_node in dangling_nodes:
1712         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1713       elif inst.primary_node not in self.all_node_info:
1714         no_node_instances.append(inst.name)
1715
1716     pretty_dangling = [
1717         "%s (%s)" %
1718         (node.name,
1719          utils.CommaJoin(dangling_instances.get(node.name,
1720                                                 ["no instances"])))
1721         for node in dangling_nodes]
1722
1723     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1724                   None,
1725                   "the following nodes (and their instances) belong to a non"
1726                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1727
1728     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1729                   None,
1730                   "the following instances have a non-existing primary-node:"
1731                   " %s", utils.CommaJoin(no_node_instances))
1732
1733     return not self.bad
1734
1735
1736 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1737   """Verifies the status of a node group.
1738
1739   """
1740   HPATH = "cluster-verify"
1741   HTYPE = constants.HTYPE_CLUSTER
1742   REQ_BGL = False
1743
1744   _HOOKS_INDENT_RE = re.compile("^", re.M)
1745
1746   class NodeImage(object):
1747     """A class representing the logical and physical status of a node.
1748
1749     @type name: string
1750     @ivar name: the node name to which this object refers
1751     @ivar volumes: a structure as returned from
1752         L{ganeti.backend.GetVolumeList} (runtime)
1753     @ivar instances: a list of running instances (runtime)
1754     @ivar pinst: list of configured primary instances (config)
1755     @ivar sinst: list of configured secondary instances (config)
1756     @ivar sbp: dictionary of {primary-node: list of instances} for all
1757         instances for which this node is secondary (config)
1758     @ivar mfree: free memory, as reported by hypervisor (runtime)
1759     @ivar dfree: free disk, as reported by the node (runtime)
1760     @ivar offline: the offline status (config)
1761     @type rpc_fail: boolean
1762     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1763         not whether the individual keys were correct) (runtime)
1764     @type lvm_fail: boolean
1765     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1766     @type hyp_fail: boolean
1767     @ivar hyp_fail: whether the RPC call didn't return the instance list
1768     @type ghost: boolean
1769     @ivar ghost: whether this is a known node or not (config)
1770     @type os_fail: boolean
1771     @ivar os_fail: whether the RPC call didn't return valid OS data
1772     @type oslist: list
1773     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1774     @type vm_capable: boolean
1775     @ivar vm_capable: whether the node can host instances
1776
1777     """
1778     def __init__(self, offline=False, name=None, vm_capable=True):
1779       self.name = name
1780       self.volumes = {}
1781       self.instances = []
1782       self.pinst = []
1783       self.sinst = []
1784       self.sbp = {}
1785       self.mfree = 0
1786       self.dfree = 0
1787       self.offline = offline
1788       self.vm_capable = vm_capable
1789       self.rpc_fail = False
1790       self.lvm_fail = False
1791       self.hyp_fail = False
1792       self.ghost = False
1793       self.os_fail = False
1794       self.oslist = {}
1795
1796   def ExpandNames(self):
1797     # This raises errors.OpPrereqError on its own:
1798     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1799
1800     # Get instances in node group; this is unsafe and needs verification later
1801     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1802
1803     self.needed_locks = {
1804       locking.LEVEL_INSTANCE: inst_names,
1805       locking.LEVEL_NODEGROUP: [self.group_uuid],
1806       locking.LEVEL_NODE: [],
1807       }
1808
1809     self.share_locks = _ShareAll()
1810
1811   def DeclareLocks(self, level):
1812     if level == locking.LEVEL_NODE:
1813       # Get members of node group; this is unsafe and needs verification later
1814       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1815
1816       all_inst_info = self.cfg.GetAllInstancesInfo()
1817
1818       # In Exec(), we warn about mirrored instances that have primary and
1819       # secondary living in separate node groups. To fully verify that
1820       # volumes for these instances are healthy, we will need to do an
1821       # extra call to their secondaries. We ensure here those nodes will
1822       # be locked.
1823       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1824         # Important: access only the instances whose lock is owned
1825         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1826           nodes.update(all_inst_info[inst].secondary_nodes)
1827
1828       self.needed_locks[locking.LEVEL_NODE] = nodes
1829
1830   def CheckPrereq(self):
1831     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1832     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1833
1834     group_nodes = set(self.group_info.members)
1835     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1836
1837     unlocked_nodes = \
1838         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1839
1840     unlocked_instances = \
1841         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1842
1843     if unlocked_nodes:
1844       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1845                                  utils.CommaJoin(unlocked_nodes))
1846
1847     if unlocked_instances:
1848       raise errors.OpPrereqError("Missing lock for instances: %s" %
1849                                  utils.CommaJoin(unlocked_instances))
1850
1851     self.all_node_info = self.cfg.GetAllNodesInfo()
1852     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1853
1854     self.my_node_names = utils.NiceSort(group_nodes)
1855     self.my_inst_names = utils.NiceSort(group_instances)
1856
1857     self.my_node_info = dict((name, self.all_node_info[name])
1858                              for name in self.my_node_names)
1859
1860     self.my_inst_info = dict((name, self.all_inst_info[name])
1861                              for name in self.my_inst_names)
1862
1863     # We detect here the nodes that will need the extra RPC calls for verifying
1864     # split LV volumes; they should be locked.
1865     extra_lv_nodes = set()
1866
1867     for inst in self.my_inst_info.values():
1868       if inst.disk_template in constants.DTS_INT_MIRROR:
1869         group = self.my_node_info[inst.primary_node].group
1870         for nname in inst.secondary_nodes:
1871           if self.all_node_info[nname].group != group:
1872             extra_lv_nodes.add(nname)
1873
1874     unlocked_lv_nodes = \
1875         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1876
1877     if unlocked_lv_nodes:
1878       raise errors.OpPrereqError("these nodes could be locked: %s" %
1879                                  utils.CommaJoin(unlocked_lv_nodes))
1880     self.extra_lv_nodes = list(extra_lv_nodes)
1881
1882   def _VerifyNode(self, ninfo, nresult):
1883     """Perform some basic validation on data returned from a node.
1884
1885       - check the result data structure is well formed and has all the
1886         mandatory fields
1887       - check ganeti version
1888
1889     @type ninfo: L{objects.Node}
1890     @param ninfo: the node to check
1891     @param nresult: the results from the node
1892     @rtype: boolean
1893     @return: whether overall this call was successful (and we can expect
1894          reasonable values in the respose)
1895
1896     """
1897     node = ninfo.name
1898     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1899
1900     # main result, nresult should be a non-empty dict
1901     test = not nresult or not isinstance(nresult, dict)
1902     _ErrorIf(test, constants.CV_ENODERPC, node,
1903                   "unable to verify node: no data returned")
1904     if test:
1905       return False
1906
1907     # compares ganeti version
1908     local_version = constants.PROTOCOL_VERSION
1909     remote_version = nresult.get("version", None)
1910     test = not (remote_version and
1911                 isinstance(remote_version, (list, tuple)) and
1912                 len(remote_version) == 2)
1913     _ErrorIf(test, constants.CV_ENODERPC, node,
1914              "connection to node returned invalid data")
1915     if test:
1916       return False
1917
1918     test = local_version != remote_version[0]
1919     _ErrorIf(test, constants.CV_ENODEVERSION, node,
1920              "incompatible protocol versions: master %s,"
1921              " node %s", local_version, remote_version[0])
1922     if test:
1923       return False
1924
1925     # node seems compatible, we can actually try to look into its results
1926
1927     # full package version
1928     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1929                   constants.CV_ENODEVERSION, node,
1930                   "software version mismatch: master %s, node %s",
1931                   constants.RELEASE_VERSION, remote_version[1],
1932                   code=self.ETYPE_WARNING)
1933
1934     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1935     if ninfo.vm_capable and isinstance(hyp_result, dict):
1936       for hv_name, hv_result in hyp_result.iteritems():
1937         test = hv_result is not None
1938         _ErrorIf(test, constants.CV_ENODEHV, node,
1939                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1940
1941     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1942     if ninfo.vm_capable and isinstance(hvp_result, list):
1943       for item, hv_name, hv_result in hvp_result:
1944         _ErrorIf(True, constants.CV_ENODEHV, node,
1945                  "hypervisor %s parameter verify failure (source %s): %s",
1946                  hv_name, item, hv_result)
1947
1948     test = nresult.get(constants.NV_NODESETUP,
1949                        ["Missing NODESETUP results"])
1950     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1951              "; ".join(test))
1952
1953     return True
1954
1955   def _VerifyNodeTime(self, ninfo, nresult,
1956                       nvinfo_starttime, nvinfo_endtime):
1957     """Check the node time.
1958
1959     @type ninfo: L{objects.Node}
1960     @param ninfo: the node to check
1961     @param nresult: the remote results for the node
1962     @param nvinfo_starttime: the start time of the RPC call
1963     @param nvinfo_endtime: the end time of the RPC call
1964
1965     """
1966     node = ninfo.name
1967     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1968
1969     ntime = nresult.get(constants.NV_TIME, None)
1970     try:
1971       ntime_merged = utils.MergeTime(ntime)
1972     except (ValueError, TypeError):
1973       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1974       return
1975
1976     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1977       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1978     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1979       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1980     else:
1981       ntime_diff = None
1982
1983     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1984              "Node time diverges by at least %s from master node time",
1985              ntime_diff)
1986
1987   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1988     """Check the node LVM results.
1989
1990     @type ninfo: L{objects.Node}
1991     @param ninfo: the node to check
1992     @param nresult: the remote results for the node
1993     @param vg_name: the configured VG name
1994
1995     """
1996     if vg_name is None:
1997       return
1998
1999     node = ninfo.name
2000     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2001
2002     # checks vg existence and size > 20G
2003     vglist = nresult.get(constants.NV_VGLIST, None)
2004     test = not vglist
2005     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2006     if not test:
2007       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2008                                             constants.MIN_VG_SIZE)
2009       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2010
2011     # check pv names
2012     pvlist = nresult.get(constants.NV_PVLIST, None)
2013     test = pvlist is None
2014     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2015     if not test:
2016       # check that ':' is not present in PV names, since it's a
2017       # special character for lvcreate (denotes the range of PEs to
2018       # use on the PV)
2019       for _, pvname, owner_vg in pvlist:
2020         test = ":" in pvname
2021         _ErrorIf(test, constants.CV_ENODELVM, node,
2022                  "Invalid character ':' in PV '%s' of VG '%s'",
2023                  pvname, owner_vg)
2024
2025   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2026     """Check the node bridges.
2027
2028     @type ninfo: L{objects.Node}
2029     @param ninfo: the node to check
2030     @param nresult: the remote results for the node
2031     @param bridges: the expected list of bridges
2032
2033     """
2034     if not bridges:
2035       return
2036
2037     node = ninfo.name
2038     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2039
2040     missing = nresult.get(constants.NV_BRIDGES, None)
2041     test = not isinstance(missing, list)
2042     _ErrorIf(test, constants.CV_ENODENET, node,
2043              "did not return valid bridge information")
2044     if not test:
2045       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2046                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2047
2048   def _VerifyNodeUserScripts(self, ninfo, nresult):
2049     """Check the results of user scripts presence and executability on the node
2050
2051     @type ninfo: L{objects.Node}
2052     @param ninfo: the node to check
2053     @param nresult: the remote results for the node
2054
2055     """
2056     node = ninfo.name
2057
2058     test = not constants.NV_USERSCRIPTS in nresult
2059     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2060                   "did not return user scripts information")
2061
2062     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2063     if not test:
2064       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2065                     "user scripts not present or not executable: %s" %
2066                     utils.CommaJoin(sorted(broken_scripts)))
2067
2068   def _VerifyNodeNetwork(self, ninfo, nresult):
2069     """Check the node network connectivity results.
2070
2071     @type ninfo: L{objects.Node}
2072     @param ninfo: the node to check
2073     @param nresult: the remote results for the node
2074
2075     """
2076     node = ninfo.name
2077     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2078
2079     test = constants.NV_NODELIST not in nresult
2080     _ErrorIf(test, constants.CV_ENODESSH, node,
2081              "node hasn't returned node ssh connectivity data")
2082     if not test:
2083       if nresult[constants.NV_NODELIST]:
2084         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2085           _ErrorIf(True, constants.CV_ENODESSH, node,
2086                    "ssh communication with node '%s': %s", a_node, a_msg)
2087
2088     test = constants.NV_NODENETTEST not in nresult
2089     _ErrorIf(test, constants.CV_ENODENET, node,
2090              "node hasn't returned node tcp connectivity data")
2091     if not test:
2092       if nresult[constants.NV_NODENETTEST]:
2093         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2094         for anode in nlist:
2095           _ErrorIf(True, constants.CV_ENODENET, node,
2096                    "tcp communication with node '%s': %s",
2097                    anode, nresult[constants.NV_NODENETTEST][anode])
2098
2099     test = constants.NV_MASTERIP not in nresult
2100     _ErrorIf(test, constants.CV_ENODENET, node,
2101              "node hasn't returned node master IP reachability data")
2102     if not test:
2103       if not nresult[constants.NV_MASTERIP]:
2104         if node == self.master_node:
2105           msg = "the master node cannot reach the master IP (not configured?)"
2106         else:
2107           msg = "cannot reach the master IP"
2108         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2109
2110   def _VerifyInstance(self, instance, instanceconfig, node_image,
2111                       diskstatus):
2112     """Verify an instance.
2113
2114     This function checks to see if the required block devices are
2115     available on the instance's node.
2116
2117     """
2118     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2119     node_current = instanceconfig.primary_node
2120
2121     node_vol_should = {}
2122     instanceconfig.MapLVsByNode(node_vol_should)
2123
2124     for node in node_vol_should:
2125       n_img = node_image[node]
2126       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2127         # ignore missing volumes on offline or broken nodes
2128         continue
2129       for volume in node_vol_should[node]:
2130         test = volume not in n_img.volumes
2131         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2132                  "volume %s missing on node %s", volume, node)
2133
2134     if instanceconfig.admin_state == constants.ADMINST_UP:
2135       pri_img = node_image[node_current]
2136       test = instance not in pri_img.instances and not pri_img.offline
2137       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2138                "instance not running on its primary node %s",
2139                node_current)
2140
2141     diskdata = [(nname, success, status, idx)
2142                 for (nname, disks) in diskstatus.items()
2143                 for idx, (success, status) in enumerate(disks)]
2144
2145     for nname, success, bdev_status, idx in diskdata:
2146       # the 'ghost node' construction in Exec() ensures that we have a
2147       # node here
2148       snode = node_image[nname]
2149       bad_snode = snode.ghost or snode.offline
2150       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2151                not success and not bad_snode,
2152                constants.CV_EINSTANCEFAULTYDISK, instance,
2153                "couldn't retrieve status for disk/%s on %s: %s",
2154                idx, nname, bdev_status)
2155       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2156                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2157                constants.CV_EINSTANCEFAULTYDISK, instance,
2158                "disk/%s on %s is faulty", idx, nname)
2159
2160   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2161     """Verify if there are any unknown volumes in the cluster.
2162
2163     The .os, .swap and backup volumes are ignored. All other volumes are
2164     reported as unknown.
2165
2166     @type reserved: L{ganeti.utils.FieldSet}
2167     @param reserved: a FieldSet of reserved volume names
2168
2169     """
2170     for node, n_img in node_image.items():
2171       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2172         # skip non-healthy nodes
2173         continue
2174       for volume in n_img.volumes:
2175         test = ((node not in node_vol_should or
2176                 volume not in node_vol_should[node]) and
2177                 not reserved.Matches(volume))
2178         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2179                       "volume %s is unknown", volume)
2180
2181   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2182     """Verify N+1 Memory Resilience.
2183
2184     Check that if one single node dies we can still start all the
2185     instances it was primary for.
2186
2187     """
2188     cluster_info = self.cfg.GetClusterInfo()
2189     for node, n_img in node_image.items():
2190       # This code checks that every node which is now listed as
2191       # secondary has enough memory to host all instances it is
2192       # supposed to should a single other node in the cluster fail.
2193       # FIXME: not ready for failover to an arbitrary node
2194       # FIXME: does not support file-backed instances
2195       # WARNING: we currently take into account down instances as well
2196       # as up ones, considering that even if they're down someone
2197       # might want to start them even in the event of a node failure.
2198       if n_img.offline:
2199         # we're skipping offline nodes from the N+1 warning, since
2200         # most likely we don't have good memory infromation from them;
2201         # we already list instances living on such nodes, and that's
2202         # enough warning
2203         continue
2204       #TODO(dynmem): use MINMEM for checking
2205       #TODO(dynmem): also consider ballooning out other instances
2206       for prinode, instances in n_img.sbp.items():
2207         needed_mem = 0
2208         for instance in instances:
2209           bep = cluster_info.FillBE(instance_cfg[instance])
2210           if bep[constants.BE_AUTO_BALANCE]:
2211             needed_mem += bep[constants.BE_MAXMEM]
2212         test = n_img.mfree < needed_mem
2213         self._ErrorIf(test, constants.CV_ENODEN1, node,
2214                       "not enough memory to accomodate instance failovers"
2215                       " should node %s fail (%dMiB needed, %dMiB available)",
2216                       prinode, needed_mem, n_img.mfree)
2217
2218   @classmethod
2219   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2220                    (files_all, files_opt, files_mc, files_vm)):
2221     """Verifies file checksums collected from all nodes.
2222
2223     @param errorif: Callback for reporting errors
2224     @param nodeinfo: List of L{objects.Node} objects
2225     @param master_node: Name of master node
2226     @param all_nvinfo: RPC results
2227
2228     """
2229     # Define functions determining which nodes to consider for a file
2230     files2nodefn = [
2231       (files_all, None),
2232       (files_mc, lambda node: (node.master_candidate or
2233                                node.name == master_node)),
2234       (files_vm, lambda node: node.vm_capable),
2235       ]
2236
2237     # Build mapping from filename to list of nodes which should have the file
2238     nodefiles = {}
2239     for (files, fn) in files2nodefn:
2240       if fn is None:
2241         filenodes = nodeinfo
2242       else:
2243         filenodes = filter(fn, nodeinfo)
2244       nodefiles.update((filename,
2245                         frozenset(map(operator.attrgetter("name"), filenodes)))
2246                        for filename in files)
2247
2248     assert set(nodefiles) == (files_all | files_mc | files_vm)
2249
2250     fileinfo = dict((filename, {}) for filename in nodefiles)
2251     ignore_nodes = set()
2252
2253     for node in nodeinfo:
2254       if node.offline:
2255         ignore_nodes.add(node.name)
2256         continue
2257
2258       nresult = all_nvinfo[node.name]
2259
2260       if nresult.fail_msg or not nresult.payload:
2261         node_files = None
2262       else:
2263         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2264
2265       test = not (node_files and isinstance(node_files, dict))
2266       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2267               "Node did not return file checksum data")
2268       if test:
2269         ignore_nodes.add(node.name)
2270         continue
2271
2272       # Build per-checksum mapping from filename to nodes having it
2273       for (filename, checksum) in node_files.items():
2274         assert filename in nodefiles
2275         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2276
2277     for (filename, checksums) in fileinfo.items():
2278       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2279
2280       # Nodes having the file
2281       with_file = frozenset(node_name
2282                             for nodes in fileinfo[filename].values()
2283                             for node_name in nodes) - ignore_nodes
2284
2285       expected_nodes = nodefiles[filename] - ignore_nodes
2286
2287       # Nodes missing file
2288       missing_file = expected_nodes - with_file
2289
2290       if filename in files_opt:
2291         # All or no nodes
2292         errorif(missing_file and missing_file != expected_nodes,
2293                 constants.CV_ECLUSTERFILECHECK, None,
2294                 "File %s is optional, but it must exist on all or no"
2295                 " nodes (not found on %s)",
2296                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2297       else:
2298         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2299                 "File %s is missing from node(s) %s", filename,
2300                 utils.CommaJoin(utils.NiceSort(missing_file)))
2301
2302         # Warn if a node has a file it shouldn't
2303         unexpected = with_file - expected_nodes
2304         errorif(unexpected,
2305                 constants.CV_ECLUSTERFILECHECK, None,
2306                 "File %s should not exist on node(s) %s",
2307                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2308
2309       # See if there are multiple versions of the file
2310       test = len(checksums) > 1
2311       if test:
2312         variants = ["variant %s on %s" %
2313                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2314                     for (idx, (checksum, nodes)) in
2315                       enumerate(sorted(checksums.items()))]
2316       else:
2317         variants = []
2318
2319       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2320               "File %s found with %s different checksums (%s)",
2321               filename, len(checksums), "; ".join(variants))
2322
2323   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2324                       drbd_map):
2325     """Verifies and the node DRBD status.
2326
2327     @type ninfo: L{objects.Node}
2328     @param ninfo: the node to check
2329     @param nresult: the remote results for the node
2330     @param instanceinfo: the dict of instances
2331     @param drbd_helper: the configured DRBD usermode helper
2332     @param drbd_map: the DRBD map as returned by
2333         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2334
2335     """
2336     node = ninfo.name
2337     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2338
2339     if drbd_helper:
2340       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2341       test = (helper_result == None)
2342       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2343                "no drbd usermode helper returned")
2344       if helper_result:
2345         status, payload = helper_result
2346         test = not status
2347         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2348                  "drbd usermode helper check unsuccessful: %s", payload)
2349         test = status and (payload != drbd_helper)
2350         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2351                  "wrong drbd usermode helper: %s", payload)
2352
2353     # compute the DRBD minors
2354     node_drbd = {}
2355     for minor, instance in drbd_map[node].items():
2356       test = instance not in instanceinfo
2357       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2358                "ghost instance '%s' in temporary DRBD map", instance)
2359         # ghost instance should not be running, but otherwise we
2360         # don't give double warnings (both ghost instance and
2361         # unallocated minor in use)
2362       if test:
2363         node_drbd[minor] = (instance, False)
2364       else:
2365         instance = instanceinfo[instance]
2366         node_drbd[minor] = (instance.name,
2367                             instance.admin_state == constants.ADMINST_UP)
2368
2369     # and now check them
2370     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2371     test = not isinstance(used_minors, (tuple, list))
2372     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2373              "cannot parse drbd status file: %s", str(used_minors))
2374     if test:
2375       # we cannot check drbd status
2376       return
2377
2378     for minor, (iname, must_exist) in node_drbd.items():
2379       test = minor not in used_minors and must_exist
2380       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2381                "drbd minor %d of instance %s is not active", minor, iname)
2382     for minor in used_minors:
2383       test = minor not in node_drbd
2384       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2385                "unallocated drbd minor %d is in use", minor)
2386
2387   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2388     """Builds the node OS structures.
2389
2390     @type ninfo: L{objects.Node}
2391     @param ninfo: the node to check
2392     @param nresult: the remote results for the node
2393     @param nimg: the node image object
2394
2395     """
2396     node = ninfo.name
2397     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2398
2399     remote_os = nresult.get(constants.NV_OSLIST, None)
2400     test = (not isinstance(remote_os, list) or
2401             not compat.all(isinstance(v, list) and len(v) == 7
2402                            for v in remote_os))
2403
2404     _ErrorIf(test, constants.CV_ENODEOS, node,
2405              "node hasn't returned valid OS data")
2406
2407     nimg.os_fail = test
2408
2409     if test:
2410       return
2411
2412     os_dict = {}
2413
2414     for (name, os_path, status, diagnose,
2415          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2416
2417       if name not in os_dict:
2418         os_dict[name] = []
2419
2420       # parameters is a list of lists instead of list of tuples due to
2421       # JSON lacking a real tuple type, fix it:
2422       parameters = [tuple(v) for v in parameters]
2423       os_dict[name].append((os_path, status, diagnose,
2424                             set(variants), set(parameters), set(api_ver)))
2425
2426     nimg.oslist = os_dict
2427
2428   def _VerifyNodeOS(self, ninfo, nimg, base):
2429     """Verifies the node OS list.
2430
2431     @type ninfo: L{objects.Node}
2432     @param ninfo: the node to check
2433     @param nimg: the node image object
2434     @param base: the 'template' node we match against (e.g. from the master)
2435
2436     """
2437     node = ninfo.name
2438     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2439
2440     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2441
2442     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2443     for os_name, os_data in nimg.oslist.items():
2444       assert os_data, "Empty OS status for OS %s?!" % os_name
2445       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2446       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2447                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2448       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2449                "OS '%s' has multiple entries (first one shadows the rest): %s",
2450                os_name, utils.CommaJoin([v[0] for v in os_data]))
2451       # comparisons with the 'base' image
2452       test = os_name not in base.oslist
2453       _ErrorIf(test, constants.CV_ENODEOS, node,
2454                "Extra OS %s not present on reference node (%s)",
2455                os_name, base.name)
2456       if test:
2457         continue
2458       assert base.oslist[os_name], "Base node has empty OS status?"
2459       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2460       if not b_status:
2461         # base OS is invalid, skipping
2462         continue
2463       for kind, a, b in [("API version", f_api, b_api),
2464                          ("variants list", f_var, b_var),
2465                          ("parameters", beautify_params(f_param),
2466                           beautify_params(b_param))]:
2467         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2468                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2469                  kind, os_name, base.name,
2470                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2471
2472     # check any missing OSes
2473     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2474     _ErrorIf(missing, constants.CV_ENODEOS, node,
2475              "OSes present on reference node %s but missing on this node: %s",
2476              base.name, utils.CommaJoin(missing))
2477
2478   def _VerifyOob(self, ninfo, nresult):
2479     """Verifies out of band functionality of a node.
2480
2481     @type ninfo: L{objects.Node}
2482     @param ninfo: the node to check
2483     @param nresult: the remote results for the node
2484
2485     """
2486     node = ninfo.name
2487     # We just have to verify the paths on master and/or master candidates
2488     # as the oob helper is invoked on the master
2489     if ((ninfo.master_candidate or ninfo.master_capable) and
2490         constants.NV_OOB_PATHS in nresult):
2491       for path_result in nresult[constants.NV_OOB_PATHS]:
2492         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2493
2494   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2495     """Verifies and updates the node volume data.
2496
2497     This function will update a L{NodeImage}'s internal structures
2498     with data from the remote call.
2499
2500     @type ninfo: L{objects.Node}
2501     @param ninfo: the node to check
2502     @param nresult: the remote results for the node
2503     @param nimg: the node image object
2504     @param vg_name: the configured VG name
2505
2506     """
2507     node = ninfo.name
2508     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2509
2510     nimg.lvm_fail = True
2511     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2512     if vg_name is None:
2513       pass
2514     elif isinstance(lvdata, basestring):
2515       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2516                utils.SafeEncode(lvdata))
2517     elif not isinstance(lvdata, dict):
2518       _ErrorIf(True, constants.CV_ENODELVM, node,
2519                "rpc call to node failed (lvlist)")
2520     else:
2521       nimg.volumes = lvdata
2522       nimg.lvm_fail = False
2523
2524   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2525     """Verifies and updates the node instance list.
2526
2527     If the listing was successful, then updates this node's instance
2528     list. Otherwise, it marks the RPC call as failed for the instance
2529     list key.
2530
2531     @type ninfo: L{objects.Node}
2532     @param ninfo: the node to check
2533     @param nresult: the remote results for the node
2534     @param nimg: the node image object
2535
2536     """
2537     idata = nresult.get(constants.NV_INSTANCELIST, None)
2538     test = not isinstance(idata, list)
2539     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2540                   "rpc call to node failed (instancelist): %s",
2541                   utils.SafeEncode(str(idata)))
2542     if test:
2543       nimg.hyp_fail = True
2544     else:
2545       nimg.instances = idata
2546
2547   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2548     """Verifies and computes a node information map
2549
2550     @type ninfo: L{objects.Node}
2551     @param ninfo: the node to check
2552     @param nresult: the remote results for the node
2553     @param nimg: the node image object
2554     @param vg_name: the configured VG name
2555
2556     """
2557     node = ninfo.name
2558     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2559
2560     # try to read free memory (from the hypervisor)
2561     hv_info = nresult.get(constants.NV_HVINFO, None)
2562     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2563     _ErrorIf(test, constants.CV_ENODEHV, node,
2564              "rpc call to node failed (hvinfo)")
2565     if not test:
2566       try:
2567         nimg.mfree = int(hv_info["memory_free"])
2568       except (ValueError, TypeError):
2569         _ErrorIf(True, constants.CV_ENODERPC, node,
2570                  "node returned invalid nodeinfo, check hypervisor")
2571
2572     # FIXME: devise a free space model for file based instances as well
2573     if vg_name is not None:
2574       test = (constants.NV_VGLIST not in nresult or
2575               vg_name not in nresult[constants.NV_VGLIST])
2576       _ErrorIf(test, constants.CV_ENODELVM, node,
2577                "node didn't return data for the volume group '%s'"
2578                " - it is either missing or broken", vg_name)
2579       if not test:
2580         try:
2581           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2582         except (ValueError, TypeError):
2583           _ErrorIf(True, constants.CV_ENODERPC, node,
2584                    "node returned invalid LVM info, check LVM status")
2585
2586   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2587     """Gets per-disk status information for all instances.
2588
2589     @type nodelist: list of strings
2590     @param nodelist: Node names
2591     @type node_image: dict of (name, L{objects.Node})
2592     @param node_image: Node objects
2593     @type instanceinfo: dict of (name, L{objects.Instance})
2594     @param instanceinfo: Instance objects
2595     @rtype: {instance: {node: [(succes, payload)]}}
2596     @return: a dictionary of per-instance dictionaries with nodes as
2597         keys and disk information as values; the disk information is a
2598         list of tuples (success, payload)
2599
2600     """
2601     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2602
2603     node_disks = {}
2604     node_disks_devonly = {}
2605     diskless_instances = set()
2606     diskless = constants.DT_DISKLESS
2607
2608     for nname in nodelist:
2609       node_instances = list(itertools.chain(node_image[nname].pinst,
2610                                             node_image[nname].sinst))
2611       diskless_instances.update(inst for inst in node_instances
2612                                 if instanceinfo[inst].disk_template == diskless)
2613       disks = [(inst, disk)
2614                for inst in node_instances
2615                for disk in instanceinfo[inst].disks]
2616
2617       if not disks:
2618         # No need to collect data
2619         continue
2620
2621       node_disks[nname] = disks
2622
2623       # Creating copies as SetDiskID below will modify the objects and that can
2624       # lead to incorrect data returned from nodes
2625       devonly = [dev.Copy() for (_, dev) in disks]
2626
2627       for dev in devonly:
2628         self.cfg.SetDiskID(dev, nname)
2629
2630       node_disks_devonly[nname] = devonly
2631
2632     assert len(node_disks) == len(node_disks_devonly)
2633
2634     # Collect data from all nodes with disks
2635     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2636                                                           node_disks_devonly)
2637
2638     assert len(result) == len(node_disks)
2639
2640     instdisk = {}
2641
2642     for (nname, nres) in result.items():
2643       disks = node_disks[nname]
2644
2645       if nres.offline:
2646         # No data from this node
2647         data = len(disks) * [(False, "node offline")]
2648       else:
2649         msg = nres.fail_msg
2650         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2651                  "while getting disk information: %s", msg)
2652         if msg:
2653           # No data from this node
2654           data = len(disks) * [(False, msg)]
2655         else:
2656           data = []
2657           for idx, i in enumerate(nres.payload):
2658             if isinstance(i, (tuple, list)) and len(i) == 2:
2659               data.append(i)
2660             else:
2661               logging.warning("Invalid result from node %s, entry %d: %s",
2662                               nname, idx, i)
2663               data.append((False, "Invalid result from the remote node"))
2664
2665       for ((inst, _), status) in zip(disks, data):
2666         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2667
2668     # Add empty entries for diskless instances.
2669     for inst in diskless_instances:
2670       assert inst not in instdisk
2671       instdisk[inst] = {}
2672
2673     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2674                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2675                       compat.all(isinstance(s, (tuple, list)) and
2676                                  len(s) == 2 for s in statuses)
2677                       for inst, nnames in instdisk.items()
2678                       for nname, statuses in nnames.items())
2679     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2680
2681     return instdisk
2682
2683   @staticmethod
2684   def _SshNodeSelector(group_uuid, all_nodes):
2685     """Create endless iterators for all potential SSH check hosts.
2686
2687     """
2688     nodes = [node for node in all_nodes
2689              if (node.group != group_uuid and
2690                  not node.offline)]
2691     keyfunc = operator.attrgetter("group")
2692
2693     return map(itertools.cycle,
2694                [sorted(map(operator.attrgetter("name"), names))
2695                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2696                                                   keyfunc)])
2697
2698   @classmethod
2699   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2700     """Choose which nodes should talk to which other nodes.
2701
2702     We will make nodes contact all nodes in their group, and one node from
2703     every other group.
2704
2705     @warning: This algorithm has a known issue if one node group is much
2706       smaller than others (e.g. just one node). In such a case all other
2707       nodes will talk to the single node.
2708
2709     """
2710     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2711     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2712
2713     return (online_nodes,
2714             dict((name, sorted([i.next() for i in sel]))
2715                  for name in online_nodes))
2716
2717   def BuildHooksEnv(self):
2718     """Build hooks env.
2719
2720     Cluster-Verify hooks just ran in the post phase and their failure makes
2721     the output be logged in the verify output and the verification to fail.
2722
2723     """
2724     env = {
2725       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2726       }
2727
2728     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2729                for node in self.my_node_info.values())
2730
2731     return env
2732
2733   def BuildHooksNodes(self):
2734     """Build hooks nodes.
2735
2736     """
2737     return ([], self.my_node_names)
2738
2739   def Exec(self, feedback_fn):
2740     """Verify integrity of the node group, performing various test on nodes.
2741
2742     """
2743     # This method has too many local variables. pylint: disable=R0914
2744     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2745
2746     if not self.my_node_names:
2747       # empty node group
2748       feedback_fn("* Empty node group, skipping verification")
2749       return True
2750
2751     self.bad = False
2752     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2753     verbose = self.op.verbose
2754     self._feedback_fn = feedback_fn
2755
2756     vg_name = self.cfg.GetVGName()
2757     drbd_helper = self.cfg.GetDRBDHelper()
2758     cluster = self.cfg.GetClusterInfo()
2759     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2760     hypervisors = cluster.enabled_hypervisors
2761     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2762
2763     i_non_redundant = [] # Non redundant instances
2764     i_non_a_balanced = [] # Non auto-balanced instances
2765     i_offline = 0 # Count of offline instances
2766     n_offline = 0 # Count of offline nodes
2767     n_drained = 0 # Count of nodes being drained
2768     node_vol_should = {}
2769
2770     # FIXME: verify OS list
2771
2772     # File verification
2773     filemap = _ComputeAncillaryFiles(cluster, False)
2774
2775     # do local checksums
2776     master_node = self.master_node = self.cfg.GetMasterNode()
2777     master_ip = self.cfg.GetMasterIP()
2778
2779     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2780
2781     user_scripts = []
2782     if self.cfg.GetUseExternalMipScript():
2783       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2784
2785     node_verify_param = {
2786       constants.NV_FILELIST:
2787         utils.UniqueSequence(filename
2788                              for files in filemap
2789                              for filename in files),
2790       constants.NV_NODELIST:
2791         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2792                                   self.all_node_info.values()),
2793       constants.NV_HYPERVISOR: hypervisors,
2794       constants.NV_HVPARAMS:
2795         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2796       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2797                                  for node in node_data_list
2798                                  if not node.offline],
2799       constants.NV_INSTANCELIST: hypervisors,
2800       constants.NV_VERSION: None,
2801       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2802       constants.NV_NODESETUP: None,
2803       constants.NV_TIME: None,
2804       constants.NV_MASTERIP: (master_node, master_ip),
2805       constants.NV_OSLIST: None,
2806       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2807       constants.NV_USERSCRIPTS: user_scripts,
2808       }
2809
2810     if vg_name is not None:
2811       node_verify_param[constants.NV_VGLIST] = None
2812       node_verify_param[constants.NV_LVLIST] = vg_name
2813       node_verify_param[constants.NV_PVLIST] = [vg_name]
2814       node_verify_param[constants.NV_DRBDLIST] = None
2815
2816     if drbd_helper:
2817       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2818
2819     # bridge checks
2820     # FIXME: this needs to be changed per node-group, not cluster-wide
2821     bridges = set()
2822     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2823     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2824       bridges.add(default_nicpp[constants.NIC_LINK])
2825     for instance in self.my_inst_info.values():
2826       for nic in instance.nics:
2827         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2828         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2829           bridges.add(full_nic[constants.NIC_LINK])
2830
2831     if bridges:
2832       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2833
2834     # Build our expected cluster state
2835     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2836                                                  name=node.name,
2837                                                  vm_capable=node.vm_capable))
2838                       for node in node_data_list)
2839
2840     # Gather OOB paths
2841     oob_paths = []
2842     for node in self.all_node_info.values():
2843       path = _SupportsOob(self.cfg, node)
2844       if path and path not in oob_paths:
2845         oob_paths.append(path)
2846
2847     if oob_paths:
2848       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2849
2850     for instance in self.my_inst_names:
2851       inst_config = self.my_inst_info[instance]
2852
2853       for nname in inst_config.all_nodes:
2854         if nname not in node_image:
2855           gnode = self.NodeImage(name=nname)
2856           gnode.ghost = (nname not in self.all_node_info)
2857           node_image[nname] = gnode
2858
2859       inst_config.MapLVsByNode(node_vol_should)
2860
2861       pnode = inst_config.primary_node
2862       node_image[pnode].pinst.append(instance)
2863
2864       for snode in inst_config.secondary_nodes:
2865         nimg = node_image[snode]
2866         nimg.sinst.append(instance)
2867         if pnode not in nimg.sbp:
2868           nimg.sbp[pnode] = []
2869         nimg.sbp[pnode].append(instance)
2870
2871     # At this point, we have the in-memory data structures complete,
2872     # except for the runtime information, which we'll gather next
2873
2874     # Due to the way our RPC system works, exact response times cannot be
2875     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2876     # time before and after executing the request, we can at least have a time
2877     # window.
2878     nvinfo_starttime = time.time()
2879     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2880                                            node_verify_param,
2881                                            self.cfg.GetClusterName())
2882     nvinfo_endtime = time.time()
2883
2884     if self.extra_lv_nodes and vg_name is not None:
2885       extra_lv_nvinfo = \
2886           self.rpc.call_node_verify(self.extra_lv_nodes,
2887                                     {constants.NV_LVLIST: vg_name},
2888                                     self.cfg.GetClusterName())
2889     else:
2890       extra_lv_nvinfo = {}
2891
2892     all_drbd_map = self.cfg.ComputeDRBDMap()
2893
2894     feedback_fn("* Gathering disk information (%s nodes)" %
2895                 len(self.my_node_names))
2896     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2897                                      self.my_inst_info)
2898
2899     feedback_fn("* Verifying configuration file consistency")
2900
2901     # If not all nodes are being checked, we need to make sure the master node
2902     # and a non-checked vm_capable node are in the list.
2903     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2904     if absent_nodes:
2905       vf_nvinfo = all_nvinfo.copy()
2906       vf_node_info = list(self.my_node_info.values())
2907       additional_nodes = []
2908       if master_node not in self.my_node_info:
2909         additional_nodes.append(master_node)
2910         vf_node_info.append(self.all_node_info[master_node])
2911       # Add the first vm_capable node we find which is not included
2912       for node in absent_nodes:
2913         nodeinfo = self.all_node_info[node]
2914         if nodeinfo.vm_capable and not nodeinfo.offline:
2915           additional_nodes.append(node)
2916           vf_node_info.append(self.all_node_info[node])
2917           break
2918       key = constants.NV_FILELIST
2919       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2920                                                  {key: node_verify_param[key]},
2921                                                  self.cfg.GetClusterName()))
2922     else:
2923       vf_nvinfo = all_nvinfo
2924       vf_node_info = self.my_node_info.values()
2925
2926     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2927
2928     feedback_fn("* Verifying node status")
2929
2930     refos_img = None
2931
2932     for node_i in node_data_list:
2933       node = node_i.name
2934       nimg = node_image[node]
2935
2936       if node_i.offline:
2937         if verbose:
2938           feedback_fn("* Skipping offline node %s" % (node,))
2939         n_offline += 1
2940         continue
2941
2942       if node == master_node:
2943         ntype = "master"
2944       elif node_i.master_candidate:
2945         ntype = "master candidate"
2946       elif node_i.drained:
2947         ntype = "drained"
2948         n_drained += 1
2949       else:
2950         ntype = "regular"
2951       if verbose:
2952         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2953
2954       msg = all_nvinfo[node].fail_msg
2955       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2956                msg)
2957       if msg:
2958         nimg.rpc_fail = True
2959         continue
2960
2961       nresult = all_nvinfo[node].payload
2962
2963       nimg.call_ok = self._VerifyNode(node_i, nresult)
2964       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2965       self._VerifyNodeNetwork(node_i, nresult)
2966       self._VerifyNodeUserScripts(node_i, nresult)
2967       self._VerifyOob(node_i, nresult)
2968
2969       if nimg.vm_capable:
2970         self._VerifyNodeLVM(node_i, nresult, vg_name)
2971         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2972                              all_drbd_map)
2973
2974         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2975         self._UpdateNodeInstances(node_i, nresult, nimg)
2976         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2977         self._UpdateNodeOS(node_i, nresult, nimg)
2978
2979         if not nimg.os_fail:
2980           if refos_img is None:
2981             refos_img = nimg
2982           self._VerifyNodeOS(node_i, nimg, refos_img)
2983         self._VerifyNodeBridges(node_i, nresult, bridges)
2984
2985         # Check whether all running instancies are primary for the node. (This
2986         # can no longer be done from _VerifyInstance below, since some of the
2987         # wrong instances could be from other node groups.)
2988         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2989
2990         for inst in non_primary_inst:
2991           # FIXME: investigate best way to handle offline insts
2992           if inst.admin_state == constants.ADMINST_OFFLINE:
2993             if verbose:
2994               feedback_fn("* Skipping offline instance %s" % inst.name)
2995             i_offline += 1
2996             continue
2997           test = inst in self.all_inst_info
2998           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2999                    "instance should not run on node %s", node_i.name)
3000           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3001                    "node is running unknown instance %s", inst)
3002
3003     for node, result in extra_lv_nvinfo.items():
3004       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3005                               node_image[node], vg_name)
3006
3007     feedback_fn("* Verifying instance status")
3008     for instance in self.my_inst_names:
3009       if verbose:
3010         feedback_fn("* Verifying instance %s" % instance)
3011       inst_config = self.my_inst_info[instance]
3012       self._VerifyInstance(instance, inst_config, node_image,
3013                            instdisk[instance])
3014       inst_nodes_offline = []
3015
3016       pnode = inst_config.primary_node
3017       pnode_img = node_image[pnode]
3018       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3019                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3020                " primary node failed", instance)
3021
3022       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3023                pnode_img.offline,
3024                constants.CV_EINSTANCEBADNODE, instance,
3025                "instance is marked as running and lives on offline node %s",
3026                inst_config.primary_node)
3027
3028       # If the instance is non-redundant we cannot survive losing its primary
3029       # node, so we are not N+1 compliant. On the other hand we have no disk
3030       # templates with more than one secondary so that situation is not well
3031       # supported either.
3032       # FIXME: does not support file-backed instances
3033       if not inst_config.secondary_nodes:
3034         i_non_redundant.append(instance)
3035
3036       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3037                constants.CV_EINSTANCELAYOUT,
3038                instance, "instance has multiple secondary nodes: %s",
3039                utils.CommaJoin(inst_config.secondary_nodes),
3040                code=self.ETYPE_WARNING)
3041
3042       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3043         pnode = inst_config.primary_node
3044         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3045         instance_groups = {}
3046
3047         for node in instance_nodes:
3048           instance_groups.setdefault(self.all_node_info[node].group,
3049                                      []).append(node)
3050
3051         pretty_list = [
3052           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3053           # Sort so that we always list the primary node first.
3054           for group, nodes in sorted(instance_groups.items(),
3055                                      key=lambda (_, nodes): pnode in nodes,
3056                                      reverse=True)]
3057
3058         self._ErrorIf(len(instance_groups) > 1,
3059                       constants.CV_EINSTANCESPLITGROUPS,
3060                       instance, "instance has primary and secondary nodes in"
3061                       " different groups: %s", utils.CommaJoin(pretty_list),
3062                       code=self.ETYPE_WARNING)
3063
3064       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3065         i_non_a_balanced.append(instance)
3066
3067       for snode in inst_config.secondary_nodes:
3068         s_img = node_image[snode]
3069         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3070                  snode, "instance %s, connection to secondary node failed",
3071                  instance)
3072
3073         if s_img.offline:
3074           inst_nodes_offline.append(snode)
3075
3076       # warn that the instance lives on offline nodes
3077       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3078                "instance has offline secondary node(s) %s",
3079                utils.CommaJoin(inst_nodes_offline))
3080       # ... or ghost/non-vm_capable nodes
3081       for node in inst_config.all_nodes:
3082         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3083                  instance, "instance lives on ghost node %s", node)
3084         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3085                  instance, "instance lives on non-vm_capable node %s", node)
3086
3087     feedback_fn("* Verifying orphan volumes")
3088     reserved = utils.FieldSet(*cluster.reserved_lvs)
3089
3090     # We will get spurious "unknown volume" warnings if any node of this group
3091     # is secondary for an instance whose primary is in another group. To avoid
3092     # them, we find these instances and add their volumes to node_vol_should.
3093     for inst in self.all_inst_info.values():
3094       for secondary in inst.secondary_nodes:
3095         if (secondary in self.my_node_info
3096             and inst.name not in self.my_inst_info):
3097           inst.MapLVsByNode(node_vol_should)
3098           break
3099
3100     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3101
3102     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3103       feedback_fn("* Verifying N+1 Memory redundancy")
3104       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3105
3106     feedback_fn("* Other Notes")
3107     if i_non_redundant:
3108       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3109                   % len(i_non_redundant))
3110
3111     if i_non_a_balanced:
3112       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3113                   % len(i_non_a_balanced))
3114
3115     if i_offline:
3116       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3117
3118     if n_offline:
3119       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3120
3121     if n_drained:
3122       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3123
3124     return not self.bad
3125
3126   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3127     """Analyze the post-hooks' result
3128
3129     This method analyses the hook result, handles it, and sends some
3130     nicely-formatted feedback back to the user.
3131
3132     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3133         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3134     @param hooks_results: the results of the multi-node hooks rpc call
3135     @param feedback_fn: function used send feedback back to the caller
3136     @param lu_result: previous Exec result
3137     @return: the new Exec result, based on the previous result
3138         and hook results
3139
3140     """
3141     # We only really run POST phase hooks, only for non-empty groups,
3142     # and are only interested in their results
3143     if not self.my_node_names:
3144       # empty node group
3145       pass
3146     elif phase == constants.HOOKS_PHASE_POST:
3147       # Used to change hooks' output to proper indentation
3148       feedback_fn("* Hooks Results")
3149       assert hooks_results, "invalid result from hooks"
3150
3151       for node_name in hooks_results:
3152         res = hooks_results[node_name]
3153         msg = res.fail_msg
3154         test = msg and not res.offline
3155         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3156                       "Communication failure in hooks execution: %s", msg)
3157         if res.offline or msg:
3158           # No need to investigate payload if node is offline or gave
3159           # an error.
3160           continue
3161         for script, hkr, output in res.payload:
3162           test = hkr == constants.HKR_FAIL
3163           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3164                         "Script %s failed, output:", script)
3165           if test:
3166             output = self._HOOKS_INDENT_RE.sub("      ", output)
3167             feedback_fn("%s" % output)
3168             lu_result = False
3169
3170     return lu_result
3171
3172
3173 class LUClusterVerifyDisks(NoHooksLU):
3174   """Verifies the cluster disks status.
3175
3176   """
3177   REQ_BGL = False
3178
3179   def ExpandNames(self):
3180     self.share_locks = _ShareAll()
3181     self.needed_locks = {
3182       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3183       }
3184
3185   def Exec(self, feedback_fn):
3186     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3187
3188     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3189     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3190                            for group in group_names])
3191
3192
3193 class LUGroupVerifyDisks(NoHooksLU):
3194   """Verifies the status of all disks in a node group.
3195
3196   """
3197   REQ_BGL = False
3198
3199   def ExpandNames(self):
3200     # Raises errors.OpPrereqError on its own if group can't be found
3201     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3202
3203     self.share_locks = _ShareAll()
3204     self.needed_locks = {
3205       locking.LEVEL_INSTANCE: [],
3206       locking.LEVEL_NODEGROUP: [],
3207       locking.LEVEL_NODE: [],
3208       }
3209
3210   def DeclareLocks(self, level):
3211     if level == locking.LEVEL_INSTANCE:
3212       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3213
3214       # Lock instances optimistically, needs verification once node and group
3215       # locks have been acquired
3216       self.needed_locks[locking.LEVEL_INSTANCE] = \
3217         self.cfg.GetNodeGroupInstances(self.group_uuid)
3218
3219     elif level == locking.LEVEL_NODEGROUP:
3220       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3221
3222       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3223         set([self.group_uuid] +
3224             # Lock all groups used by instances optimistically; this requires
3225             # going via the node before it's locked, requiring verification
3226             # later on
3227             [group_uuid
3228              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3229              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3230
3231     elif level == locking.LEVEL_NODE:
3232       # This will only lock the nodes in the group to be verified which contain
3233       # actual instances
3234       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3235       self._LockInstancesNodes()
3236
3237       # Lock all nodes in group to be verified
3238       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3239       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3240       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3241
3242   def CheckPrereq(self):
3243     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3244     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3245     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3246
3247     assert self.group_uuid in owned_groups
3248
3249     # Check if locked instances are still correct
3250     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3251
3252     # Get instance information
3253     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3254
3255     # Check if node groups for locked instances are still correct
3256     for (instance_name, inst) in self.instances.items():
3257       assert owned_nodes.issuperset(inst.all_nodes), \
3258         "Instance %s's nodes changed while we kept the lock" % instance_name
3259
3260       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3261                                              owned_groups)
3262
3263       assert self.group_uuid in inst_groups, \
3264         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3265
3266   def Exec(self, feedback_fn):
3267     """Verify integrity of cluster disks.
3268
3269     @rtype: tuple of three items
3270     @return: a tuple of (dict of node-to-node_error, list of instances
3271         which need activate-disks, dict of instance: (node, volume) for
3272         missing volumes
3273
3274     """
3275     res_nodes = {}
3276     res_instances = set()
3277     res_missing = {}
3278
3279     nv_dict = _MapInstanceDisksToNodes([inst
3280             for inst in self.instances.values()
3281             if inst.admin_state == constants.ADMINST_UP])
3282
3283     if nv_dict:
3284       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3285                              set(self.cfg.GetVmCapableNodeList()))
3286
3287       node_lvs = self.rpc.call_lv_list(nodes, [])
3288
3289       for (node, node_res) in node_lvs.items():
3290         if node_res.offline:
3291           continue
3292
3293         msg = node_res.fail_msg
3294         if msg:
3295           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3296           res_nodes[node] = msg
3297           continue
3298
3299         for lv_name, (_, _, lv_online) in node_res.payload.items():
3300           inst = nv_dict.pop((node, lv_name), None)
3301           if not (lv_online or inst is None):
3302             res_instances.add(inst)
3303
3304       # any leftover items in nv_dict are missing LVs, let's arrange the data
3305       # better
3306       for key, inst in nv_dict.iteritems():
3307         res_missing.setdefault(inst, []).append(list(key))
3308
3309     return (res_nodes, list(res_instances), res_missing)
3310
3311
3312 class LUClusterRepairDiskSizes(NoHooksLU):
3313   """Verifies the cluster disks sizes.
3314
3315   """
3316   REQ_BGL = False
3317
3318   def ExpandNames(self):
3319     if self.op.instances:
3320       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3321       self.needed_locks = {
3322         locking.LEVEL_NODE_RES: [],
3323         locking.LEVEL_INSTANCE: self.wanted_names,
3324         }
3325       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3326     else:
3327       self.wanted_names = None
3328       self.needed_locks = {
3329         locking.LEVEL_NODE_RES: locking.ALL_SET,
3330         locking.LEVEL_INSTANCE: locking.ALL_SET,
3331         }
3332     self.share_locks = {
3333       locking.LEVEL_NODE_RES: 1,
3334       locking.LEVEL_INSTANCE: 0,
3335       }
3336
3337   def DeclareLocks(self, level):
3338     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3339       self._LockInstancesNodes(primary_only=True, level=level)
3340
3341   def CheckPrereq(self):
3342     """Check prerequisites.
3343
3344     This only checks the optional instance list against the existing names.
3345
3346     """
3347     if self.wanted_names is None:
3348       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3349
3350     self.wanted_instances = \
3351         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3352
3353   def _EnsureChildSizes(self, disk):
3354     """Ensure children of the disk have the needed disk size.
3355
3356     This is valid mainly for DRBD8 and fixes an issue where the
3357     children have smaller disk size.
3358
3359     @param disk: an L{ganeti.objects.Disk} object
3360
3361     """
3362     if disk.dev_type == constants.LD_DRBD8:
3363       assert disk.children, "Empty children for DRBD8?"
3364       fchild = disk.children[0]
3365       mismatch = fchild.size < disk.size
3366       if mismatch:
3367         self.LogInfo("Child disk has size %d, parent %d, fixing",
3368                      fchild.size, disk.size)
3369         fchild.size = disk.size
3370
3371       # and we recurse on this child only, not on the metadev
3372       return self._EnsureChildSizes(fchild) or mismatch
3373     else:
3374       return False
3375
3376   def Exec(self, feedback_fn):
3377     """Verify the size of cluster disks.
3378
3379     """
3380     # TODO: check child disks too
3381     # TODO: check differences in size between primary/secondary nodes
3382     per_node_disks = {}
3383     for instance in self.wanted_instances:
3384       pnode = instance.primary_node
3385       if pnode not in per_node_disks:
3386         per_node_disks[pnode] = []
3387       for idx, disk in enumerate(instance.disks):
3388         per_node_disks[pnode].append((instance, idx, disk))
3389
3390     assert not (frozenset(per_node_disks.keys()) -
3391                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3392       "Not owning correct locks"
3393     assert not self.owned_locks(locking.LEVEL_NODE)
3394
3395     changed = []
3396     for node, dskl in per_node_disks.items():
3397       newl = [v[2].Copy() for v in dskl]
3398       for dsk in newl:
3399         self.cfg.SetDiskID(dsk, node)
3400       result = self.rpc.call_blockdev_getsize(node, newl)
3401       if result.fail_msg:
3402         self.LogWarning("Failure in blockdev_getsize call to node"
3403                         " %s, ignoring", node)
3404         continue
3405       if len(result.payload) != len(dskl):
3406         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3407                         " result.payload=%s", node, len(dskl), result.payload)
3408         self.LogWarning("Invalid result from node %s, ignoring node results",
3409                         node)
3410         continue
3411       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3412         if size is None:
3413           self.LogWarning("Disk %d of instance %s did not return size"
3414                           " information, ignoring", idx, instance.name)
3415           continue
3416         if not isinstance(size, (int, long)):
3417           self.LogWarning("Disk %d of instance %s did not return valid"
3418                           " size information, ignoring", idx, instance.name)
3419           continue
3420         size = size >> 20
3421         if size != disk.size:
3422           self.LogInfo("Disk %d of instance %s has mismatched size,"
3423                        " correcting: recorded %d, actual %d", idx,
3424                        instance.name, disk.size, size)
3425           disk.size = size
3426           self.cfg.Update(instance, feedback_fn)
3427           changed.append((instance.name, idx, size))
3428         if self._EnsureChildSizes(disk):
3429           self.cfg.Update(instance, feedback_fn)
3430           changed.append((instance.name, idx, disk.size))
3431     return changed
3432
3433
3434 class LUClusterRename(LogicalUnit):
3435   """Rename the cluster.
3436
3437   """
3438   HPATH = "cluster-rename"
3439   HTYPE = constants.HTYPE_CLUSTER
3440
3441   def BuildHooksEnv(self):
3442     """Build hooks env.
3443
3444     """
3445     return {
3446       "OP_TARGET": self.cfg.GetClusterName(),
3447       "NEW_NAME": self.op.name,
3448       }
3449
3450   def BuildHooksNodes(self):
3451     """Build hooks nodes.
3452
3453     """
3454     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3455
3456   def CheckPrereq(self):
3457     """Verify that the passed name is a valid one.
3458
3459     """
3460     hostname = netutils.GetHostname(name=self.op.name,
3461                                     family=self.cfg.GetPrimaryIPFamily())
3462
3463     new_name = hostname.name
3464     self.ip = new_ip = hostname.ip
3465     old_name = self.cfg.GetClusterName()
3466     old_ip = self.cfg.GetMasterIP()
3467     if new_name == old_name and new_ip == old_ip:
3468       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3469                                  " cluster has changed",
3470                                  errors.ECODE_INVAL)
3471     if new_ip != old_ip:
3472       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3473         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3474                                    " reachable on the network" %
3475                                    new_ip, errors.ECODE_NOTUNIQUE)
3476
3477     self.op.name = new_name
3478
3479   def Exec(self, feedback_fn):
3480     """Rename the cluster.
3481
3482     """
3483     clustername = self.op.name
3484     new_ip = self.ip
3485
3486     # shutdown the master IP
3487     master_params = self.cfg.GetMasterNetworkParameters()
3488     ems = self.cfg.GetUseExternalMipScript()
3489     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3490                                                      master_params, ems)
3491     result.Raise("Could not disable the master role")
3492
3493     try:
3494       cluster = self.cfg.GetClusterInfo()
3495       cluster.cluster_name = clustername
3496       cluster.master_ip = new_ip
3497       self.cfg.Update(cluster, feedback_fn)
3498
3499       # update the known hosts file
3500       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3501       node_list = self.cfg.GetOnlineNodeList()
3502       try:
3503         node_list.remove(master_params.name)
3504       except ValueError:
3505         pass
3506       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3507     finally:
3508       master_params.ip = new_ip
3509       result = self.rpc.call_node_activate_master_ip(master_params.name,
3510                                                      master_params, ems)
3511       msg = result.fail_msg
3512       if msg:
3513         self.LogWarning("Could not re-enable the master role on"
3514                         " the master, please restart manually: %s", msg)
3515
3516     return clustername
3517
3518
3519 def _ValidateNetmask(cfg, netmask):
3520   """Checks if a netmask is valid.
3521
3522   @type cfg: L{config.ConfigWriter}
3523   @param cfg: The cluster configuration
3524   @type netmask: int
3525   @param netmask: the netmask to be verified
3526   @raise errors.OpPrereqError: if the validation fails
3527
3528   """
3529   ip_family = cfg.GetPrimaryIPFamily()
3530   try:
3531     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3532   except errors.ProgrammerError:
3533     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3534                                ip_family)
3535   if not ipcls.ValidateNetmask(netmask):
3536     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3537                                 (netmask))
3538
3539
3540 class LUClusterSetParams(LogicalUnit):
3541   """Change the parameters of the cluster.
3542
3543   """
3544   HPATH = "cluster-modify"
3545   HTYPE = constants.HTYPE_CLUSTER
3546   REQ_BGL = False
3547
3548   def CheckArguments(self):
3549     """Check parameters
3550
3551     """
3552     if self.op.uid_pool:
3553       uidpool.CheckUidPool(self.op.uid_pool)
3554
3555     if self.op.add_uids:
3556       uidpool.CheckUidPool(self.op.add_uids)
3557
3558     if self.op.remove_uids:
3559       uidpool.CheckUidPool(self.op.remove_uids)
3560
3561     if self.op.master_netmask is not None:
3562       _ValidateNetmask(self.cfg, self.op.master_netmask)
3563
3564     if self.op.diskparams:
3565       for dt_params in self.op.diskparams.values():
3566         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3567
3568   def ExpandNames(self):
3569     # FIXME: in the future maybe other cluster params won't require checking on
3570     # all nodes to be modified.
3571     self.needed_locks = {
3572       locking.LEVEL_NODE: locking.ALL_SET,
3573     }
3574     self.share_locks[locking.LEVEL_NODE] = 1
3575
3576   def BuildHooksEnv(self):
3577     """Build hooks env.
3578
3579     """
3580     return {
3581       "OP_TARGET": self.cfg.GetClusterName(),
3582       "NEW_VG_NAME": self.op.vg_name,
3583       }
3584
3585   def BuildHooksNodes(self):
3586     """Build hooks nodes.
3587
3588     """
3589     mn = self.cfg.GetMasterNode()
3590     return ([mn], [mn])
3591
3592   def CheckPrereq(self):
3593     """Check prerequisites.
3594
3595     This checks whether the given params don't conflict and
3596     if the given volume group is valid.
3597
3598     """
3599     if self.op.vg_name is not None and not self.op.vg_name:
3600       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3601         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3602                                    " instances exist", errors.ECODE_INVAL)
3603
3604     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3605       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3606         raise errors.OpPrereqError("Cannot disable drbd helper while"
3607                                    " drbd-based instances exist",
3608                                    errors.ECODE_INVAL)
3609
3610     node_list = self.owned_locks(locking.LEVEL_NODE)
3611
3612     # if vg_name not None, checks given volume group on all nodes
3613     if self.op.vg_name:
3614       vglist = self.rpc.call_vg_list(node_list)
3615       for node in node_list:
3616         msg = vglist[node].fail_msg
3617         if msg:
3618           # ignoring down node
3619           self.LogWarning("Error while gathering data on node %s"
3620                           " (ignoring node): %s", node, msg)
3621           continue
3622         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3623                                               self.op.vg_name,
3624                                               constants.MIN_VG_SIZE)
3625         if vgstatus:
3626           raise errors.OpPrereqError("Error on node '%s': %s" %
3627                                      (node, vgstatus), errors.ECODE_ENVIRON)
3628
3629     if self.op.drbd_helper:
3630       # checks given drbd helper on all nodes
3631       helpers = self.rpc.call_drbd_helper(node_list)
3632       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3633         if ninfo.offline:
3634           self.LogInfo("Not checking drbd helper on offline node %s", node)
3635           continue
3636         msg = helpers[node].fail_msg
3637         if msg:
3638           raise errors.OpPrereqError("Error checking drbd helper on node"
3639                                      " '%s': %s" % (node, msg),
3640                                      errors.ECODE_ENVIRON)
3641         node_helper = helpers[node].payload
3642         if node_helper != self.op.drbd_helper:
3643           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3644                                      (node, node_helper), errors.ECODE_ENVIRON)
3645
3646     self.cluster = cluster = self.cfg.GetClusterInfo()
3647     # validate params changes
3648     if self.op.beparams:
3649       objects.UpgradeBeParams(self.op.beparams)
3650       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3651       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3652
3653     if self.op.ndparams:
3654       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3655       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3656
3657       # TODO: we need a more general way to handle resetting
3658       # cluster-level parameters to default values
3659       if self.new_ndparams["oob_program"] == "":
3660         self.new_ndparams["oob_program"] = \
3661             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3662
3663     if self.op.hv_state:
3664       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3665                                             self.cluster.hv_state_static)
3666       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3667                                for hv, values in new_hv_state.items())
3668
3669     if self.op.disk_state:
3670       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3671                                                 self.cluster.disk_state_static)
3672       self.new_disk_state = \
3673         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3674                             for name, values in svalues.items()))
3675              for storage, svalues in new_disk_state.items())
3676
3677     if self.op.ipolicy:
3678       ipolicy = {}
3679       for key, value in self.op.ipolicy.items():
3680         utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
3681         ipolicy[key] = _GetUpdatedParams(cluster.ipolicy.get(key, {}),
3682                                           value)
3683       objects.InstancePolicy.CheckParameterSyntax(ipolicy)
3684       self.new_ipolicy = ipolicy
3685
3686     if self.op.nicparams:
3687       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3688       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3689       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3690       nic_errors = []
3691
3692       # check all instances for consistency
3693       for instance in self.cfg.GetAllInstancesInfo().values():
3694         for nic_idx, nic in enumerate(instance.nics):
3695           params_copy = copy.deepcopy(nic.nicparams)
3696           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3697
3698           # check parameter syntax
3699           try:
3700             objects.NIC.CheckParameterSyntax(params_filled)
3701           except errors.ConfigurationError, err:
3702             nic_errors.append("Instance %s, nic/%d: %s" %
3703                               (instance.name, nic_idx, err))
3704
3705           # if we're moving instances to routed, check that they have an ip
3706           target_mode = params_filled[constants.NIC_MODE]
3707           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3708             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3709                               " address" % (instance.name, nic_idx))
3710       if nic_errors:
3711         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3712                                    "\n".join(nic_errors))
3713
3714     # hypervisor list/parameters
3715     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3716     if self.op.hvparams:
3717       for hv_name, hv_dict in self.op.hvparams.items():
3718         if hv_name not in self.new_hvparams:
3719           self.new_hvparams[hv_name] = hv_dict
3720         else:
3721           self.new_hvparams[hv_name].update(hv_dict)
3722
3723     # disk template parameters
3724     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3725     if self.op.diskparams:
3726       for dt_name, dt_params in self.op.diskparams.items():
3727         if dt_name not in self.op.diskparams:
3728           self.new_diskparams[dt_name] = dt_params
3729         else:
3730           self.new_diskparams[dt_name].update(dt_params)
3731
3732     # os hypervisor parameters
3733     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3734     if self.op.os_hvp:
3735       for os_name, hvs in self.op.os_hvp.items():
3736         if os_name not in self.new_os_hvp:
3737           self.new_os_hvp[os_name] = hvs
3738         else:
3739           for hv_name, hv_dict in hvs.items():
3740             if hv_name not in self.new_os_hvp[os_name]:
3741               self.new_os_hvp[os_name][hv_name] = hv_dict
3742             else:
3743               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3744
3745     # os parameters
3746     self.new_osp = objects.FillDict(cluster.osparams, {})
3747     if self.op.osparams:
3748       for os_name, osp in self.op.osparams.items():
3749         if os_name not in self.new_osp:
3750           self.new_osp[os_name] = {}
3751
3752         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3753                                                   use_none=True)
3754
3755         if not self.new_osp[os_name]:
3756           # we removed all parameters
3757           del self.new_osp[os_name]
3758         else:
3759           # check the parameter validity (remote check)
3760           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3761                          os_name, self.new_osp[os_name])
3762
3763     # changes to the hypervisor list
3764     if self.op.enabled_hypervisors is not None:
3765       self.hv_list = self.op.enabled_hypervisors
3766       for hv in self.hv_list:
3767         # if the hypervisor doesn't already exist in the cluster
3768         # hvparams, we initialize it to empty, and then (in both
3769         # cases) we make sure to fill the defaults, as we might not
3770         # have a complete defaults list if the hypervisor wasn't
3771         # enabled before
3772         if hv not in new_hvp:
3773           new_hvp[hv] = {}
3774         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3775         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3776     else:
3777       self.hv_list = cluster.enabled_hypervisors
3778
3779     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3780       # either the enabled list has changed, or the parameters have, validate
3781       for hv_name, hv_params in self.new_hvparams.items():
3782         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3783             (self.op.enabled_hypervisors and
3784              hv_name in self.op.enabled_hypervisors)):
3785           # either this is a new hypervisor, or its parameters have changed
3786           hv_class = hypervisor.GetHypervisor(hv_name)
3787           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3788           hv_class.CheckParameterSyntax(hv_params)
3789           _CheckHVParams(self, node_list, hv_name, hv_params)
3790
3791     if self.op.os_hvp:
3792       # no need to check any newly-enabled hypervisors, since the
3793       # defaults have already been checked in the above code-block
3794       for os_name, os_hvp in self.new_os_hvp.items():
3795         for hv_name, hv_params in os_hvp.items():
3796           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3797           # we need to fill in the new os_hvp on top of the actual hv_p
3798           cluster_defaults = self.new_hvparams.get(hv_name, {})
3799           new_osp = objects.FillDict(cluster_defaults, hv_params)
3800           hv_class = hypervisor.GetHypervisor(hv_name)
3801           hv_class.CheckParameterSyntax(new_osp)
3802           _CheckHVParams(self, node_list, hv_name, new_osp)
3803
3804     if self.op.default_iallocator:
3805       alloc_script = utils.FindFile(self.op.default_iallocator,
3806                                     constants.IALLOCATOR_SEARCH_PATH,
3807                                     os.path.isfile)
3808       if alloc_script is None:
3809         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3810                                    " specified" % self.op.default_iallocator,
3811                                    errors.ECODE_INVAL)
3812
3813   def Exec(self, feedback_fn):
3814     """Change the parameters of the cluster.
3815
3816     """
3817     if self.op.vg_name is not None:
3818       new_volume = self.op.vg_name
3819       if not new_volume:
3820         new_volume = None
3821       if new_volume != self.cfg.GetVGName():
3822         self.cfg.SetVGName(new_volume)
3823       else:
3824         feedback_fn("Cluster LVM configuration already in desired"
3825                     " state, not changing")
3826     if self.op.drbd_helper is not None:
3827       new_helper = self.op.drbd_helper
3828       if not new_helper:
3829         new_helper = None
3830       if new_helper != self.cfg.GetDRBDHelper():
3831         self.cfg.SetDRBDHelper(new_helper)
3832       else:
3833         feedback_fn("Cluster DRBD helper already in desired state,"
3834                     " not changing")
3835     if self.op.hvparams:
3836       self.cluster.hvparams = self.new_hvparams
3837     if self.op.os_hvp:
3838       self.cluster.os_hvp = self.new_os_hvp
3839     if self.op.enabled_hypervisors is not None:
3840       self.cluster.hvparams = self.new_hvparams
3841       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3842     if self.op.beparams:
3843       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3844     if self.op.nicparams:
3845       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3846     if self.op.ipolicy:
3847       self.cluster.ipolicy = self.new_ipolicy
3848     if self.op.osparams:
3849       self.cluster.osparams = self.new_osp
3850     if self.op.ndparams:
3851       self.cluster.ndparams = self.new_ndparams
3852     if self.op.diskparams:
3853       self.cluster.diskparams = self.new_diskparams
3854     if self.op.hv_state:
3855       self.cluster.hv_state_static = self.new_hv_state
3856     if self.op.disk_state:
3857       self.cluster.disk_state_static = self.new_disk_state
3858
3859     if self.op.candidate_pool_size is not None:
3860       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3861       # we need to update the pool size here, otherwise the save will fail
3862       _AdjustCandidatePool(self, [])
3863
3864     if self.op.maintain_node_health is not None:
3865       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3866         feedback_fn("Note: CONFD was disabled at build time, node health"
3867                     " maintenance is not useful (still enabling it)")
3868       self.cluster.maintain_node_health = self.op.maintain_node_health
3869
3870     if self.op.prealloc_wipe_disks is not None:
3871       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3872
3873     if self.op.add_uids is not None:
3874       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3875
3876     if self.op.remove_uids is not None:
3877       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3878
3879     if self.op.uid_pool is not None:
3880       self.cluster.uid_pool = self.op.uid_pool
3881
3882     if self.op.default_iallocator is not None:
3883       self.cluster.default_iallocator = self.op.default_iallocator
3884
3885     if self.op.reserved_lvs is not None:
3886       self.cluster.reserved_lvs = self.op.reserved_lvs
3887
3888     if self.op.use_external_mip_script is not None:
3889       self.cluster.use_external_mip_script = self.op.use_external_mip_script
3890
3891     def helper_os(aname, mods, desc):
3892       desc += " OS list"
3893       lst = getattr(self.cluster, aname)
3894       for key, val in mods:
3895         if key == constants.DDM_ADD:
3896           if val in lst:
3897             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3898           else:
3899             lst.append(val)
3900         elif key == constants.DDM_REMOVE:
3901           if val in lst:
3902             lst.remove(val)
3903           else:
3904             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3905         else:
3906           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3907
3908     if self.op.hidden_os:
3909       helper_os("hidden_os", self.op.hidden_os, "hidden")
3910
3911     if self.op.blacklisted_os:
3912       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3913
3914     if self.op.master_netdev:
3915       master_params = self.cfg.GetMasterNetworkParameters()
3916       ems = self.cfg.GetUseExternalMipScript()
3917       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3918                   self.cluster.master_netdev)
3919       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3920                                                        master_params, ems)
3921       result.Raise("Could not disable the master ip")
3922       feedback_fn("Changing master_netdev from %s to %s" %
3923                   (master_params.netdev, self.op.master_netdev))
3924       self.cluster.master_netdev = self.op.master_netdev
3925
3926     if self.op.master_netmask:
3927       master_params = self.cfg.GetMasterNetworkParameters()
3928       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3929       result = self.rpc.call_node_change_master_netmask(master_params.name,
3930                                                         master_params.netmask,
3931                                                         self.op.master_netmask,
3932                                                         master_params.ip,
3933                                                         master_params.netdev)
3934       if result.fail_msg:
3935         msg = "Could not change the master IP netmask: %s" % result.fail_msg
3936         feedback_fn(msg)
3937
3938       self.cluster.master_netmask = self.op.master_netmask
3939
3940     self.cfg.Update(self.cluster, feedback_fn)
3941
3942     if self.op.master_netdev:
3943       master_params = self.cfg.GetMasterNetworkParameters()
3944       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3945                   self.op.master_netdev)
3946       ems = self.cfg.GetUseExternalMipScript()
3947       result = self.rpc.call_node_activate_master_ip(master_params.name,
3948                                                      master_params, ems)
3949       if result.fail_msg:
3950         self.LogWarning("Could not re-enable the master ip on"
3951                         " the master, please restart manually: %s",
3952                         result.fail_msg)
3953
3954
3955 def _UploadHelper(lu, nodes, fname):
3956   """Helper for uploading a file and showing warnings.
3957
3958   """
3959   if os.path.exists(fname):
3960     result = lu.rpc.call_upload_file(nodes, fname)
3961     for to_node, to_result in result.items():
3962       msg = to_result.fail_msg
3963       if msg:
3964         msg = ("Copy of file %s to node %s failed: %s" %
3965                (fname, to_node, msg))
3966         lu.proc.LogWarning(msg)
3967
3968
3969 def _ComputeAncillaryFiles(cluster, redist):
3970   """Compute files external to Ganeti which need to be consistent.
3971
3972   @type redist: boolean
3973   @param redist: Whether to include files which need to be redistributed
3974
3975   """
3976   # Compute files for all nodes
3977   files_all = set([
3978     constants.SSH_KNOWN_HOSTS_FILE,
3979     constants.CONFD_HMAC_KEY,
3980     constants.CLUSTER_DOMAIN_SECRET_FILE,
3981     constants.SPICE_CERT_FILE,
3982     constants.SPICE_CACERT_FILE,
3983     constants.RAPI_USERS_FILE,
3984     ])
3985
3986   if not redist:
3987     files_all.update(constants.ALL_CERT_FILES)
3988     files_all.update(ssconf.SimpleStore().GetFileList())
3989   else:
3990     # we need to ship at least the RAPI certificate
3991     files_all.add(constants.RAPI_CERT_FILE)
3992
3993   if cluster.modify_etc_hosts:
3994     files_all.add(constants.ETC_HOSTS)
3995
3996   # Files which are optional, these must:
3997   # - be present in one other category as well
3998   # - either exist or not exist on all nodes of that category (mc, vm all)
3999   files_opt = set([
4000     constants.RAPI_USERS_FILE,
4001     ])
4002
4003   # Files which should only be on master candidates
4004   files_mc = set()
4005
4006   if not redist:
4007     files_mc.add(constants.CLUSTER_CONF_FILE)
4008
4009     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4010     # replication
4011     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4012
4013   # Files which should only be on VM-capable nodes
4014   files_vm = set(filename
4015     for hv_name in cluster.enabled_hypervisors
4016     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4017
4018   files_opt |= set(filename
4019     for hv_name in cluster.enabled_hypervisors
4020     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4021
4022   # Filenames in each category must be unique
4023   all_files_set = files_all | files_mc | files_vm
4024   assert (len(all_files_set) ==
4025           sum(map(len, [files_all, files_mc, files_vm]))), \
4026          "Found file listed in more than one file list"
4027
4028   # Optional files must be present in one other category
4029   assert all_files_set.issuperset(files_opt), \
4030          "Optional file not in a different required list"
4031
4032   return (files_all, files_opt, files_mc, files_vm)
4033
4034
4035 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4036   """Distribute additional files which are part of the cluster configuration.
4037
4038   ConfigWriter takes care of distributing the config and ssconf files, but
4039   there are more files which should be distributed to all nodes. This function
4040   makes sure those are copied.
4041
4042   @param lu: calling logical unit
4043   @param additional_nodes: list of nodes not in the config to distribute to
4044   @type additional_vm: boolean
4045   @param additional_vm: whether the additional nodes are vm-capable or not
4046
4047   """
4048   # Gather target nodes
4049   cluster = lu.cfg.GetClusterInfo()
4050   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4051
4052   online_nodes = lu.cfg.GetOnlineNodeList()
4053   vm_nodes = lu.cfg.GetVmCapableNodeList()
4054
4055   if additional_nodes is not None:
4056     online_nodes.extend(additional_nodes)
4057     if additional_vm:
4058       vm_nodes.extend(additional_nodes)
4059
4060   # Never distribute to master node
4061   for nodelist in [online_nodes, vm_nodes]:
4062     if master_info.name in nodelist:
4063       nodelist.remove(master_info.name)
4064
4065   # Gather file lists
4066   (files_all, _, files_mc, files_vm) = \
4067     _ComputeAncillaryFiles(cluster, True)
4068
4069   # Never re-distribute configuration file from here
4070   assert not (constants.CLUSTER_CONF_FILE in files_all or
4071               constants.CLUSTER_CONF_FILE in files_vm)
4072   assert not files_mc, "Master candidates not handled in this function"
4073
4074   filemap = [
4075     (online_nodes, files_all),
4076     (vm_nodes, files_vm),
4077     ]
4078
4079   # Upload the files
4080   for (node_list, files) in filemap:
4081     for fname in files:
4082       _UploadHelper(lu, node_list, fname)
4083
4084
4085 class LUClusterRedistConf(NoHooksLU):
4086   """Force the redistribution of cluster configuration.
4087
4088   This is a very simple LU.
4089
4090   """
4091   REQ_BGL = False
4092
4093   def ExpandNames(self):
4094     self.needed_locks = {
4095       locking.LEVEL_NODE: locking.ALL_SET,
4096     }
4097     self.share_locks[locking.LEVEL_NODE] = 1
4098
4099   def Exec(self, feedback_fn):
4100     """Redistribute the configuration.
4101
4102     """
4103     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4104     _RedistributeAncillaryFiles(self)
4105
4106
4107 class LUClusterActivateMasterIp(NoHooksLU):
4108   """Activate the master IP on the master node.
4109
4110   """
4111   def Exec(self, feedback_fn):
4112     """Activate the master IP.
4113
4114     """
4115     master_params = self.cfg.GetMasterNetworkParameters()
4116     ems = self.cfg.GetUseExternalMipScript()
4117     result = self.rpc.call_node_activate_master_ip(master_params.name,
4118                                                    master_params, ems)
4119     result.Raise("Could not activate the master IP")
4120
4121
4122 class LUClusterDeactivateMasterIp(NoHooksLU):
4123   """Deactivate the master IP on the master node.
4124
4125   """
4126   def Exec(self, feedback_fn):
4127     """Deactivate the master IP.
4128
4129     """
4130     master_params = self.cfg.GetMasterNetworkParameters()
4131     ems = self.cfg.GetUseExternalMipScript()
4132     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4133                                                      master_params, ems)
4134     result.Raise("Could not deactivate the master IP")
4135
4136
4137 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4138   """Sleep and poll for an instance's disk to sync.
4139
4140   """
4141   if not instance.disks or disks is not None and not disks:
4142     return True
4143
4144   disks = _ExpandCheckDisks(instance, disks)
4145
4146   if not oneshot:
4147     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4148
4149   node = instance.primary_node
4150
4151   for dev in disks:
4152     lu.cfg.SetDiskID(dev, node)
4153
4154   # TODO: Convert to utils.Retry
4155
4156   retries = 0
4157   degr_retries = 10 # in seconds, as we sleep 1 second each time
4158   while True:
4159     max_time = 0
4160     done = True
4161     cumul_degraded = False
4162     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4163     msg = rstats.fail_msg
4164     if msg:
4165       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4166       retries += 1
4167       if retries >= 10:
4168         raise errors.RemoteError("Can't contact node %s for mirror data,"
4169                                  " aborting." % node)
4170       time.sleep(6)
4171       continue
4172     rstats = rstats.payload
4173     retries = 0
4174     for i, mstat in enumerate(rstats):
4175       if mstat is None:
4176         lu.LogWarning("Can't compute data for node %s/%s",
4177                            node, disks[i].iv_name)
4178         continue
4179
4180       cumul_degraded = (cumul_degraded or
4181                         (mstat.is_degraded and mstat.sync_percent is None))
4182       if mstat.sync_percent is not None:
4183         done = False
4184         if mstat.estimated_time is not None:
4185           rem_time = ("%s remaining (estimated)" %
4186                       utils.FormatSeconds(mstat.estimated_time))
4187           max_time = mstat.estimated_time
4188         else:
4189           rem_time = "no time estimate"
4190         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4191                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4192
4193     # if we're done but degraded, let's do a few small retries, to
4194     # make sure we see a stable and not transient situation; therefore
4195     # we force restart of the loop
4196     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4197       logging.info("Degraded disks found, %d retries left", degr_retries)
4198       degr_retries -= 1
4199       time.sleep(1)
4200       continue
4201
4202     if done or oneshot:
4203       break
4204
4205     time.sleep(min(60, max_time))
4206
4207   if done:
4208     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4209   return not cumul_degraded
4210
4211
4212 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4213   """Check that mirrors are not degraded.
4214
4215   The ldisk parameter, if True, will change the test from the
4216   is_degraded attribute (which represents overall non-ok status for
4217   the device(s)) to the ldisk (representing the local storage status).
4218
4219   """
4220   lu.cfg.SetDiskID(dev, node)
4221
4222   result = True
4223
4224   if on_primary or dev.AssembleOnSecondary():
4225     rstats = lu.rpc.call_blockdev_find(node, dev)
4226     msg = rstats.fail_msg
4227     if msg:
4228       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4229       result = False
4230     elif not rstats.payload:
4231       lu.LogWarning("Can't find disk on node %s", node)
4232       result = False
4233     else:
4234       if ldisk:
4235         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4236       else:
4237         result = result and not rstats.payload.is_degraded
4238
4239   if dev.children:
4240     for child in dev.children:
4241       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4242
4243   return result
4244
4245
4246 class LUOobCommand(NoHooksLU):
4247   """Logical unit for OOB handling.
4248
4249   """
4250   REG_BGL = False
4251   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4252
4253   def ExpandNames(self):
4254     """Gather locks we need.
4255
4256     """
4257     if self.op.node_names:
4258       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4259       lock_names = self.op.node_names
4260     else:
4261       lock_names = locking.ALL_SET
4262
4263     self.needed_locks = {
4264       locking.LEVEL_NODE: lock_names,
4265       }
4266
4267   def CheckPrereq(self):
4268     """Check prerequisites.
4269
4270     This checks:
4271      - the node exists in the configuration
4272      - OOB is supported
4273
4274     Any errors are signaled by raising errors.OpPrereqError.
4275
4276     """
4277     self.nodes = []
4278     self.master_node = self.cfg.GetMasterNode()
4279
4280     assert self.op.power_delay >= 0.0
4281
4282     if self.op.node_names:
4283       if (self.op.command in self._SKIP_MASTER and
4284           self.master_node in self.op.node_names):
4285         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4286         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4287
4288         if master_oob_handler:
4289           additional_text = ("run '%s %s %s' if you want to operate on the"
4290                              " master regardless") % (master_oob_handler,
4291                                                       self.op.command,
4292                                                       self.master_node)
4293         else:
4294           additional_text = "it does not support out-of-band operations"
4295
4296         raise errors.OpPrereqError(("Operating on the master node %s is not"
4297                                     " allowed for %s; %s") %
4298                                    (self.master_node, self.op.command,
4299                                     additional_text), errors.ECODE_INVAL)
4300     else:
4301       self.op.node_names = self.cfg.GetNodeList()
4302       if self.op.command in self._SKIP_MASTER:
4303         self.op.node_names.remove(self.master_node)
4304
4305     if self.op.command in self._SKIP_MASTER:
4306       assert self.master_node not in self.op.node_names
4307
4308     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4309       if node is None:
4310         raise errors.OpPrereqError("Node %s not found" % node_name,
4311                                    errors.ECODE_NOENT)
4312       else:
4313         self.nodes.append(node)
4314
4315       if (not self.op.ignore_status and
4316           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4317         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4318                                     " not marked offline") % node_name,
4319                                    errors.ECODE_STATE)
4320
4321   def Exec(self, feedback_fn):
4322     """Execute OOB and return result if we expect any.
4323
4324     """
4325     master_node = self.master_node
4326     ret = []
4327
4328     for idx, node in enumerate(utils.NiceSort(self.nodes,
4329                                               key=lambda node: node.name)):
4330       node_entry = [(constants.RS_NORMAL, node.name)]
4331       ret.append(node_entry)
4332
4333       oob_program = _SupportsOob(self.cfg, node)
4334
4335       if not oob_program:
4336         node_entry.append((constants.RS_UNAVAIL, None))
4337         continue
4338
4339       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4340                    self.op.command, oob_program, node.name)
4341       result = self.rpc.call_run_oob(master_node, oob_program,
4342                                      self.op.command, node.name,
4343                                      self.op.timeout)
4344
4345       if result.fail_msg:
4346         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4347                         node.name, result.fail_msg)
4348         node_entry.append((constants.RS_NODATA, None))
4349       else:
4350         try:
4351           self._CheckPayload(result)
4352         except errors.OpExecError, err:
4353           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4354                           node.name, err)
4355           node_entry.append((constants.RS_NODATA, None))
4356         else:
4357           if self.op.command == constants.OOB_HEALTH:
4358             # For health we should log important events
4359             for item, status in result.payload:
4360               if status in [constants.OOB_STATUS_WARNING,
4361                             constants.OOB_STATUS_CRITICAL]:
4362                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4363                                 item, node.name, status)
4364
4365           if self.op.command == constants.OOB_POWER_ON:
4366             node.powered = True
4367           elif self.op.command == constants.OOB_POWER_OFF:
4368             node.powered = False
4369           elif self.op.command == constants.OOB_POWER_STATUS:
4370             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4371             if powered != node.powered:
4372               logging.warning(("Recorded power state (%s) of node '%s' does not"
4373                                " match actual power state (%s)"), node.powered,
4374                               node.name, powered)
4375
4376           # For configuration changing commands we should update the node
4377           if self.op.command in (constants.OOB_POWER_ON,
4378                                  constants.OOB_POWER_OFF):
4379             self.cfg.Update(node, feedback_fn)
4380
4381           node_entry.append((constants.RS_NORMAL, result.payload))
4382
4383           if (self.op.command == constants.OOB_POWER_ON and
4384               idx < len(self.nodes) - 1):
4385             time.sleep(self.op.power_delay)
4386
4387     return ret
4388
4389   def _CheckPayload(self, result):
4390     """Checks if the payload is valid.
4391
4392     @param result: RPC result
4393     @raises errors.OpExecError: If payload is not valid
4394
4395     """
4396     errs = []
4397     if self.op.command == constants.OOB_HEALTH:
4398       if not isinstance(result.payload, list):
4399         errs.append("command 'health' is expected to return a list but got %s" %
4400                     type(result.payload))
4401       else:
4402         for item, status in result.payload:
4403           if status not in constants.OOB_STATUSES:
4404             errs.append("health item '%s' has invalid status '%s'" %
4405                         (item, status))
4406
4407     if self.op.command == constants.OOB_POWER_STATUS:
4408       if not isinstance(result.payload, dict):
4409         errs.append("power-status is expected to return a dict but got %s" %
4410                     type(result.payload))
4411
4412     if self.op.command in [
4413         constants.OOB_POWER_ON,
4414         constants.OOB_POWER_OFF,
4415         constants.OOB_POWER_CYCLE,
4416         ]:
4417       if result.payload is not None:
4418         errs.append("%s is expected to not return payload but got '%s'" %
4419                     (self.op.command, result.payload))
4420
4421     if errs:
4422       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4423                                utils.CommaJoin(errs))
4424
4425
4426 class _OsQuery(_QueryBase):
4427   FIELDS = query.OS_FIELDS
4428
4429   def ExpandNames(self, lu):
4430     # Lock all nodes in shared mode
4431     # Temporary removal of locks, should be reverted later
4432     # TODO: reintroduce locks when they are lighter-weight
4433     lu.needed_locks = {}
4434     #self.share_locks[locking.LEVEL_NODE] = 1
4435     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4436
4437     # The following variables interact with _QueryBase._GetNames
4438     if self.names:
4439       self.wanted = self.names
4440     else:
4441       self.wanted = locking.ALL_SET
4442
4443     self.do_locking = self.use_locking
4444
4445   def DeclareLocks(self, lu, level):
4446     pass
4447
4448   @staticmethod
4449   def _DiagnoseByOS(rlist):
4450     """Remaps a per-node return list into an a per-os per-node dictionary
4451
4452     @param rlist: a map with node names as keys and OS objects as values
4453
4454     @rtype: dict
4455     @return: a dictionary with osnames as keys and as value another
4456         map, with nodes as keys and tuples of (path, status, diagnose,
4457         variants, parameters, api_versions) as values, eg::
4458
4459           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4460                                      (/srv/..., False, "invalid api")],
4461                            "node2": [(/srv/..., True, "", [], [])]}
4462           }
4463
4464     """
4465     all_os = {}
4466     # we build here the list of nodes that didn't fail the RPC (at RPC
4467     # level), so that nodes with a non-responding node daemon don't
4468     # make all OSes invalid
4469     good_nodes = [node_name for node_name in rlist
4470                   if not rlist[node_name].fail_msg]
4471     for node_name, nr in rlist.items():
4472       if nr.fail_msg or not nr.payload:
4473         continue
4474       for (name, path, status, diagnose, variants,
4475            params, api_versions) in nr.payload:
4476         if name not in all_os:
4477           # build a list of nodes for this os containing empty lists
4478           # for each node in node_list
4479           all_os[name] = {}
4480           for nname in good_nodes:
4481             all_os[name][nname] = []
4482         # convert params from [name, help] to (name, help)
4483         params = [tuple(v) for v in params]
4484         all_os[name][node_name].append((path, status, diagnose,
4485                                         variants, params, api_versions))
4486     return all_os
4487
4488   def _GetQueryData(self, lu):
4489     """Computes the list of nodes and their attributes.
4490
4491     """
4492     # Locking is not used
4493     assert not (compat.any(lu.glm.is_owned(level)
4494                            for level in locking.LEVELS
4495                            if level != locking.LEVEL_CLUSTER) or
4496                 self.do_locking or self.use_locking)
4497
4498     valid_nodes = [node.name
4499                    for node in lu.cfg.GetAllNodesInfo().values()
4500                    if not node.offline and node.vm_capable]
4501     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4502     cluster = lu.cfg.GetClusterInfo()
4503
4504     data = {}
4505
4506     for (os_name, os_data) in pol.items():
4507       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4508                           hidden=(os_name in cluster.hidden_os),
4509                           blacklisted=(os_name in cluster.blacklisted_os))
4510
4511       variants = set()
4512       parameters = set()
4513       api_versions = set()
4514
4515       for idx, osl in enumerate(os_data.values()):
4516         info.valid = bool(info.valid and osl and osl[0][1])
4517         if not info.valid:
4518           break
4519
4520         (node_variants, node_params, node_api) = osl[0][3:6]
4521         if idx == 0:
4522           # First entry
4523           variants.update(node_variants)
4524           parameters.update(node_params)
4525           api_versions.update(node_api)
4526         else:
4527           # Filter out inconsistent values
4528           variants.intersection_update(node_variants)
4529           parameters.intersection_update(node_params)
4530           api_versions.intersection_update(node_api)
4531
4532       info.variants = list(variants)
4533       info.parameters = list(parameters)
4534       info.api_versions = list(api_versions)
4535
4536       data[os_name] = info
4537
4538     # Prepare data in requested order
4539     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4540             if name in data]
4541
4542
4543 class LUOsDiagnose(NoHooksLU):
4544   """Logical unit for OS diagnose/query.
4545
4546   """
4547   REQ_BGL = False
4548
4549   @staticmethod
4550   def _BuildFilter(fields, names):
4551     """Builds a filter for querying OSes.
4552
4553     """
4554     name_filter = qlang.MakeSimpleFilter("name", names)
4555
4556     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4557     # respective field is not requested
4558     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4559                      for fname in ["hidden", "blacklisted"]
4560                      if fname not in fields]
4561     if "valid" not in fields:
4562       status_filter.append([qlang.OP_TRUE, "valid"])
4563
4564     if status_filter:
4565       status_filter.insert(0, qlang.OP_AND)
4566     else:
4567       status_filter = None
4568
4569     if name_filter and status_filter:
4570       return [qlang.OP_AND, name_filter, status_filter]
4571     elif name_filter:
4572       return name_filter
4573     else:
4574       return status_filter
4575
4576   def CheckArguments(self):
4577     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4578                        self.op.output_fields, False)
4579
4580   def ExpandNames(self):
4581     self.oq.ExpandNames(self)
4582
4583   def Exec(self, feedback_fn):
4584     return self.oq.OldStyleQuery(self)
4585
4586
4587 class LUNodeRemove(LogicalUnit):
4588   """Logical unit for removing a node.
4589
4590   """
4591   HPATH = "node-remove"
4592   HTYPE = constants.HTYPE_NODE
4593
4594   def BuildHooksEnv(self):
4595     """Build hooks env.
4596
4597     This doesn't run on the target node in the pre phase as a failed
4598     node would then be impossible to remove.
4599
4600     """
4601     return {
4602       "OP_TARGET": self.op.node_name,
4603       "NODE_NAME": self.op.node_name,
4604       }
4605
4606   def BuildHooksNodes(self):
4607     """Build hooks nodes.
4608
4609     """
4610     all_nodes = self.cfg.GetNodeList()
4611     try:
4612       all_nodes.remove(self.op.node_name)
4613     except ValueError:
4614       logging.warning("Node '%s', which is about to be removed, was not found"
4615                       " in the list of all nodes", self.op.node_name)
4616     return (all_nodes, all_nodes)
4617
4618   def CheckPrereq(self):
4619     """Check prerequisites.
4620
4621     This checks:
4622      - the node exists in the configuration
4623      - it does not have primary or secondary instances
4624      - it's not the master
4625
4626     Any errors are signaled by raising errors.OpPrereqError.
4627
4628     """
4629     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4630     node = self.cfg.GetNodeInfo(self.op.node_name)
4631     assert node is not None
4632
4633     masternode = self.cfg.GetMasterNode()
4634     if node.name == masternode:
4635       raise errors.OpPrereqError("Node is the master node, failover to another"
4636                                  " node is required", errors.ECODE_INVAL)
4637
4638     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4639       if node.name in instance.all_nodes:
4640         raise errors.OpPrereqError("Instance %s is still running on the node,"
4641                                    " please remove first" % instance_name,
4642                                    errors.ECODE_INVAL)
4643     self.op.node_name = node.name
4644     self.node = node
4645
4646   def Exec(self, feedback_fn):
4647     """Removes the node from the cluster.
4648
4649     """
4650     node = self.node
4651     logging.info("Stopping the node daemon and removing configs from node %s",
4652                  node.name)
4653
4654     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4655
4656     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4657       "Not owning BGL"
4658
4659     # Promote nodes to master candidate as needed
4660     _AdjustCandidatePool(self, exceptions=[node.name])
4661     self.context.RemoveNode(node.name)
4662
4663     # Run post hooks on the node before it's removed
4664     _RunPostHook(self, node.name)
4665
4666     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4667     msg = result.fail_msg
4668     if msg:
4669       self.LogWarning("Errors encountered on the remote node while leaving"
4670                       " the cluster: %s", msg)
4671
4672     # Remove node from our /etc/hosts
4673     if self.cfg.GetClusterInfo().modify_etc_hosts:
4674       master_node = self.cfg.GetMasterNode()
4675       result = self.rpc.call_etc_hosts_modify(master_node,
4676                                               constants.ETC_HOSTS_REMOVE,
4677                                               node.name, None)
4678       result.Raise("Can't update hosts file with new host data")
4679       _RedistributeAncillaryFiles(self)
4680
4681
4682 class _NodeQuery(_QueryBase):
4683   FIELDS = query.NODE_FIELDS
4684
4685   def ExpandNames(self, lu):
4686     lu.needed_locks = {}
4687     lu.share_locks = _ShareAll()
4688
4689     if self.names:
4690       self.wanted = _GetWantedNodes(lu, self.names)
4691     else:
4692       self.wanted = locking.ALL_SET
4693
4694     self.do_locking = (self.use_locking and
4695                        query.NQ_LIVE in self.requested_data)
4696
4697     if self.do_locking:
4698       # If any non-static field is requested we need to lock the nodes
4699       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4700
4701   def DeclareLocks(self, lu, level):
4702     pass
4703
4704   def _GetQueryData(self, lu):
4705     """Computes the list of nodes and their attributes.
4706
4707     """
4708     all_info = lu.cfg.GetAllNodesInfo()
4709
4710     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4711
4712     # Gather data as requested
4713     if query.NQ_LIVE in self.requested_data:
4714       # filter out non-vm_capable nodes
4715       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4716
4717       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4718                                         [lu.cfg.GetHypervisorType()])
4719       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4720                        for (name, nresult) in node_data.items()
4721                        if not nresult.fail_msg and nresult.payload)
4722     else:
4723       live_data = None
4724
4725     if query.NQ_INST in self.requested_data:
4726       node_to_primary = dict([(name, set()) for name in nodenames])
4727       node_to_secondary = dict([(name, set()) for name in nodenames])
4728
4729       inst_data = lu.cfg.GetAllInstancesInfo()
4730
4731       for inst in inst_data.values():
4732         if inst.primary_node in node_to_primary:
4733           node_to_primary[inst.primary_node].add(inst.name)
4734         for secnode in inst.secondary_nodes:
4735           if secnode in node_to_secondary:
4736             node_to_secondary[secnode].add(inst.name)
4737     else:
4738       node_to_primary = None
4739       node_to_secondary = None
4740
4741     if query.NQ_OOB in self.requested_data:
4742       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4743                          for name, node in all_info.iteritems())
4744     else:
4745       oob_support = None
4746
4747     if query.NQ_GROUP in self.requested_data:
4748       groups = lu.cfg.GetAllNodeGroupsInfo()
4749     else:
4750       groups = {}
4751
4752     return query.NodeQueryData([all_info[name] for name in nodenames],
4753                                live_data, lu.cfg.GetMasterNode(),
4754                                node_to_primary, node_to_secondary, groups,
4755                                oob_support, lu.cfg.GetClusterInfo())
4756
4757
4758 class LUNodeQuery(NoHooksLU):
4759   """Logical unit for querying nodes.
4760
4761   """
4762   # pylint: disable=W0142
4763   REQ_BGL = False
4764
4765   def CheckArguments(self):
4766     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4767                          self.op.output_fields, self.op.use_locking)
4768
4769   def ExpandNames(self):
4770     self.nq.ExpandNames(self)
4771
4772   def DeclareLocks(self, level):
4773     self.nq.DeclareLocks(self, level)
4774
4775   def Exec(self, feedback_fn):
4776     return self.nq.OldStyleQuery(self)
4777
4778
4779 class LUNodeQueryvols(NoHooksLU):
4780   """Logical unit for getting volumes on node(s).
4781
4782   """
4783   REQ_BGL = False
4784   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4785   _FIELDS_STATIC = utils.FieldSet("node")
4786
4787   def CheckArguments(self):
4788     _CheckOutputFields(static=self._FIELDS_STATIC,
4789                        dynamic=self._FIELDS_DYNAMIC,
4790                        selected=self.op.output_fields)
4791
4792   def ExpandNames(self):
4793     self.share_locks = _ShareAll()
4794     self.needed_locks = {}
4795
4796     if not self.op.nodes:
4797       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4798     else:
4799       self.needed_locks[locking.LEVEL_NODE] = \
4800         _GetWantedNodes(self, self.op.nodes)
4801
4802   def Exec(self, feedback_fn):
4803     """Computes the list of nodes and their attributes.
4804
4805     """
4806     nodenames = self.owned_locks(locking.LEVEL_NODE)
4807     volumes = self.rpc.call_node_volumes(nodenames)
4808
4809     ilist = self.cfg.GetAllInstancesInfo()
4810     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4811
4812     output = []
4813     for node in nodenames:
4814       nresult = volumes[node]
4815       if nresult.offline:
4816         continue
4817       msg = nresult.fail_msg
4818       if msg:
4819         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4820         continue
4821
4822       node_vols = sorted(nresult.payload,
4823                          key=operator.itemgetter("dev"))
4824
4825       for vol in node_vols:
4826         node_output = []
4827         for field in self.op.output_fields:
4828           if field == "node":
4829             val = node
4830           elif field == "phys":
4831             val = vol["dev"]
4832           elif field == "vg":
4833             val = vol["vg"]
4834           elif field == "name":
4835             val = vol["name"]
4836           elif field == "size":
4837             val = int(float(vol["size"]))
4838           elif field == "instance":
4839             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4840           else:
4841             raise errors.ParameterError(field)
4842           node_output.append(str(val))
4843
4844         output.append(node_output)
4845
4846     return output
4847
4848
4849 class LUNodeQueryStorage(NoHooksLU):
4850   """Logical unit for getting information on storage units on node(s).
4851
4852   """
4853   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4854   REQ_BGL = False
4855
4856   def CheckArguments(self):
4857     _CheckOutputFields(static=self._FIELDS_STATIC,
4858                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4859                        selected=self.op.output_fields)
4860
4861   def ExpandNames(self):
4862     self.share_locks = _ShareAll()
4863     self.needed_locks = {}
4864
4865     if self.op.nodes:
4866       self.needed_locks[locking.LEVEL_NODE] = \
4867         _GetWantedNodes(self, self.op.nodes)
4868     else:
4869       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4870
4871   def Exec(self, feedback_fn):
4872     """Computes the list of nodes and their attributes.
4873
4874     """
4875     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4876
4877     # Always get name to sort by
4878     if constants.SF_NAME in self.op.output_fields:
4879       fields = self.op.output_fields[:]
4880     else:
4881       fields = [constants.SF_NAME] + self.op.output_fields
4882
4883     # Never ask for node or type as it's only known to the LU
4884     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4885       while extra in fields:
4886         fields.remove(extra)
4887
4888     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4889     name_idx = field_idx[constants.SF_NAME]
4890
4891     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4892     data = self.rpc.call_storage_list(self.nodes,
4893                                       self.op.storage_type, st_args,
4894                                       self.op.name, fields)
4895
4896     result = []
4897
4898     for node in utils.NiceSort(self.nodes):
4899       nresult = data[node]
4900       if nresult.offline:
4901         continue
4902
4903       msg = nresult.fail_msg
4904       if msg:
4905         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4906         continue
4907
4908       rows = dict([(row[name_idx], row) for row in nresult.payload])
4909
4910       for name in utils.NiceSort(rows.keys()):
4911         row = rows[name]
4912
4913         out = []
4914
4915         for field in self.op.output_fields:
4916           if field == constants.SF_NODE:
4917             val = node
4918           elif field == constants.SF_TYPE:
4919             val = self.op.storage_type
4920           elif field in field_idx:
4921             val = row[field_idx[field]]
4922           else:
4923             raise errors.ParameterError(field)
4924
4925           out.append(val)
4926
4927         result.append(out)
4928
4929     return result
4930
4931
4932 class _InstanceQuery(_QueryBase):
4933   FIELDS = query.INSTANCE_FIELDS
4934
4935   def ExpandNames(self, lu):
4936     lu.needed_locks = {}
4937     lu.share_locks = _ShareAll()
4938
4939     if self.names:
4940       self.wanted = _GetWantedInstances(lu, self.names)
4941     else:
4942       self.wanted = locking.ALL_SET
4943
4944     self.do_locking = (self.use_locking and
4945                        query.IQ_LIVE in self.requested_data)
4946     if self.do_locking:
4947       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4948       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4949       lu.needed_locks[locking.LEVEL_NODE] = []
4950       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4951
4952     self.do_grouplocks = (self.do_locking and
4953                           query.IQ_NODES in self.requested_data)
4954
4955   def DeclareLocks(self, lu, level):
4956     if self.do_locking:
4957       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4958         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4959
4960         # Lock all groups used by instances optimistically; this requires going
4961         # via the node before it's locked, requiring verification later on
4962         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4963           set(group_uuid
4964               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4965               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4966       elif level == locking.LEVEL_NODE:
4967         lu._LockInstancesNodes() # pylint: disable=W0212
4968
4969   @staticmethod
4970   def _CheckGroupLocks(lu):
4971     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4972     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4973
4974     # Check if node groups for locked instances are still correct
4975     for instance_name in owned_instances:
4976       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4977
4978   def _GetQueryData(self, lu):
4979     """Computes the list of instances and their attributes.
4980
4981     """
4982     if self.do_grouplocks:
4983       self._CheckGroupLocks(lu)
4984
4985     cluster = lu.cfg.GetClusterInfo()
4986     all_info = lu.cfg.GetAllInstancesInfo()
4987
4988     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4989
4990     instance_list = [all_info[name] for name in instance_names]
4991     nodes = frozenset(itertools.chain(*(inst.all_nodes
4992                                         for inst in instance_list)))
4993     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4994     bad_nodes = []
4995     offline_nodes = []
4996     wrongnode_inst = set()
4997
4998     # Gather data as requested
4999     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5000       live_data = {}
5001       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5002       for name in nodes:
5003         result = node_data[name]
5004         if result.offline:
5005           # offline nodes will be in both lists
5006           assert result.fail_msg
5007           offline_nodes.append(name)
5008         if result.fail_msg:
5009           bad_nodes.append(name)
5010         elif result.payload:
5011           for inst in result.payload:
5012             if inst in all_info:
5013               if all_info[inst].primary_node == name:
5014                 live_data.update(result.payload)
5015               else:
5016                 wrongnode_inst.add(inst)
5017             else:
5018               # orphan instance; we don't list it here as we don't
5019               # handle this case yet in the output of instance listing
5020               logging.warning("Orphan instance '%s' found on node %s",
5021                               inst, name)
5022         # else no instance is alive
5023     else:
5024       live_data = {}
5025
5026     if query.IQ_DISKUSAGE in self.requested_data:
5027       disk_usage = dict((inst.name,
5028                          _ComputeDiskSize(inst.disk_template,
5029                                           [{constants.IDISK_SIZE: disk.size}
5030                                            for disk in inst.disks]))
5031                         for inst in instance_list)
5032     else:
5033       disk_usage = None
5034
5035     if query.IQ_CONSOLE in self.requested_data:
5036       consinfo = {}
5037       for inst in instance_list:
5038         if inst.name in live_data:
5039           # Instance is running
5040           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5041         else:
5042           consinfo[inst.name] = None
5043       assert set(consinfo.keys()) == set(instance_names)
5044     else:
5045       consinfo = None
5046
5047     if query.IQ_NODES in self.requested_data:
5048       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5049                                             instance_list)))
5050       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5051       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5052                     for uuid in set(map(operator.attrgetter("group"),
5053                                         nodes.values())))
5054     else:
5055       nodes = None
5056       groups = None
5057
5058     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5059                                    disk_usage, offline_nodes, bad_nodes,
5060                                    live_data, wrongnode_inst, consinfo,
5061                                    nodes, groups)
5062
5063
5064 class LUQuery(NoHooksLU):
5065   """Query for resources/items of a certain kind.
5066
5067   """
5068   # pylint: disable=W0142
5069   REQ_BGL = False
5070
5071   def CheckArguments(self):
5072     qcls = _GetQueryImplementation(self.op.what)
5073
5074     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5075
5076   def ExpandNames(self):
5077     self.impl.ExpandNames(self)
5078
5079   def DeclareLocks(self, level):
5080     self.impl.DeclareLocks(self, level)
5081
5082   def Exec(self, feedback_fn):
5083     return self.impl.NewStyleQuery(self)
5084
5085
5086 class LUQueryFields(NoHooksLU):
5087   """Query for resources/items of a certain kind.
5088
5089   """
5090   # pylint: disable=W0142
5091   REQ_BGL = False
5092
5093   def CheckArguments(self):
5094     self.qcls = _GetQueryImplementation(self.op.what)
5095
5096   def ExpandNames(self):
5097     self.needed_locks = {}
5098
5099   def Exec(self, feedback_fn):
5100     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5101
5102
5103 class LUNodeModifyStorage(NoHooksLU):
5104   """Logical unit for modifying a storage volume on a node.
5105
5106   """
5107   REQ_BGL = False
5108
5109   def CheckArguments(self):
5110     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5111
5112     storage_type = self.op.storage_type
5113
5114     try:
5115       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5116     except KeyError:
5117       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5118                                  " modified" % storage_type,
5119                                  errors.ECODE_INVAL)
5120
5121     diff = set(self.op.changes.keys()) - modifiable
5122     if diff:
5123       raise errors.OpPrereqError("The following fields can not be modified for"
5124                                  " storage units of type '%s': %r" %
5125                                  (storage_type, list(diff)),
5126                                  errors.ECODE_INVAL)
5127
5128   def ExpandNames(self):
5129     self.needed_locks = {
5130       locking.LEVEL_NODE: self.op.node_name,
5131       }
5132
5133   def Exec(self, feedback_fn):
5134     """Computes the list of nodes and their attributes.
5135
5136     """
5137     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5138     result = self.rpc.call_storage_modify(self.op.node_name,
5139                                           self.op.storage_type, st_args,
5140                                           self.op.name, self.op.changes)
5141     result.Raise("Failed to modify storage unit '%s' on %s" %
5142                  (self.op.name, self.op.node_name))
5143
5144
5145 class LUNodeAdd(LogicalUnit):
5146   """Logical unit for adding node to the cluster.
5147
5148   """
5149   HPATH = "node-add"
5150   HTYPE = constants.HTYPE_NODE
5151   _NFLAGS = ["master_capable", "vm_capable"]
5152
5153   def CheckArguments(self):
5154     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5155     # validate/normalize the node name
5156     self.hostname = netutils.GetHostname(name=self.op.node_name,
5157                                          family=self.primary_ip_family)
5158     self.op.node_name = self.hostname.name
5159
5160     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5161       raise errors.OpPrereqError("Cannot readd the master node",
5162                                  errors.ECODE_STATE)
5163
5164     if self.op.readd and self.op.group:
5165       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5166                                  " being readded", errors.ECODE_INVAL)
5167
5168   def BuildHooksEnv(self):
5169     """Build hooks env.
5170
5171     This will run on all nodes before, and on all nodes + the new node after.
5172
5173     """
5174     return {
5175       "OP_TARGET": self.op.node_name,
5176       "NODE_NAME": self.op.node_name,
5177       "NODE_PIP": self.op.primary_ip,
5178       "NODE_SIP": self.op.secondary_ip,
5179       "MASTER_CAPABLE": str(self.op.master_capable),
5180       "VM_CAPABLE": str(self.op.vm_capable),
5181       }
5182
5183   def BuildHooksNodes(self):
5184     """Build hooks nodes.
5185
5186     """
5187     # Exclude added node
5188     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5189     post_nodes = pre_nodes + [self.op.node_name, ]
5190
5191     return (pre_nodes, post_nodes)
5192
5193   def CheckPrereq(self):
5194     """Check prerequisites.
5195
5196     This checks:
5197      - the new node is not already in the config
5198      - it is resolvable
5199      - its parameters (single/dual homed) matches the cluster
5200
5201     Any errors are signaled by raising errors.OpPrereqError.
5202
5203     """
5204     cfg = self.cfg
5205     hostname = self.hostname
5206     node = hostname.name
5207     primary_ip = self.op.primary_ip = hostname.ip
5208     if self.op.secondary_ip is None:
5209       if self.primary_ip_family == netutils.IP6Address.family:
5210         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5211                                    " IPv4 address must be given as secondary",
5212                                    errors.ECODE_INVAL)
5213       self.op.secondary_ip = primary_ip
5214
5215     secondary_ip = self.op.secondary_ip
5216     if not netutils.IP4Address.IsValid(secondary_ip):
5217       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5218                                  " address" % secondary_ip, errors.ECODE_INVAL)
5219
5220     node_list = cfg.GetNodeList()
5221     if not self.op.readd and node in node_list:
5222       raise errors.OpPrereqError("Node %s is already in the configuration" %
5223                                  node, errors.ECODE_EXISTS)
5224     elif self.op.readd and node not in node_list:
5225       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5226                                  errors.ECODE_NOENT)
5227
5228     self.changed_primary_ip = False
5229
5230     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5231       if self.op.readd and node == existing_node_name:
5232         if existing_node.secondary_ip != secondary_ip:
5233           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5234                                      " address configuration as before",
5235                                      errors.ECODE_INVAL)
5236         if existing_node.primary_ip != primary_ip:
5237           self.changed_primary_ip = True
5238
5239         continue
5240
5241       if (existing_node.primary_ip == primary_ip or
5242           existing_node.secondary_ip == primary_ip or
5243           existing_node.primary_ip == secondary_ip or
5244           existing_node.secondary_ip == secondary_ip):
5245         raise errors.OpPrereqError("New node ip address(es) conflict with"
5246                                    " existing node %s" % existing_node.name,
5247                                    errors.ECODE_NOTUNIQUE)
5248
5249     # After this 'if' block, None is no longer a valid value for the
5250     # _capable op attributes
5251     if self.op.readd:
5252       old_node = self.cfg.GetNodeInfo(node)
5253       assert old_node is not None, "Can't retrieve locked node %s" % node
5254       for attr in self._NFLAGS:
5255         if getattr(self.op, attr) is None:
5256           setattr(self.op, attr, getattr(old_node, attr))
5257     else:
5258       for attr in self._NFLAGS:
5259         if getattr(self.op, attr) is None:
5260           setattr(self.op, attr, True)
5261
5262     if self.op.readd and not self.op.vm_capable:
5263       pri, sec = cfg.GetNodeInstances(node)
5264       if pri or sec:
5265         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5266                                    " flag set to false, but it already holds"
5267                                    " instances" % node,
5268                                    errors.ECODE_STATE)
5269
5270     # check that the type of the node (single versus dual homed) is the
5271     # same as for the master
5272     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5273     master_singlehomed = myself.secondary_ip == myself.primary_ip
5274     newbie_singlehomed = secondary_ip == primary_ip
5275     if master_singlehomed != newbie_singlehomed:
5276       if master_singlehomed:
5277         raise errors.OpPrereqError("The master has no secondary ip but the"
5278                                    " new node has one",
5279                                    errors.ECODE_INVAL)
5280       else:
5281         raise errors.OpPrereqError("The master has a secondary ip but the"
5282                                    " new node doesn't have one",
5283                                    errors.ECODE_INVAL)
5284
5285     # checks reachability
5286     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5287       raise errors.OpPrereqError("Node not reachable by ping",
5288                                  errors.ECODE_ENVIRON)
5289
5290     if not newbie_singlehomed:
5291       # check reachability from my secondary ip to newbie's secondary ip
5292       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5293                            source=myself.secondary_ip):
5294         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5295                                    " based ping to node daemon port",
5296                                    errors.ECODE_ENVIRON)
5297
5298     if self.op.readd:
5299       exceptions = [node]
5300     else:
5301       exceptions = []
5302
5303     if self.op.master_capable:
5304       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5305     else:
5306       self.master_candidate = False
5307
5308     if self.op.readd:
5309       self.new_node = old_node
5310     else:
5311       node_group = cfg.LookupNodeGroup(self.op.group)
5312       self.new_node = objects.Node(name=node,
5313                                    primary_ip=primary_ip,
5314                                    secondary_ip=secondary_ip,
5315                                    master_candidate=self.master_candidate,
5316                                    offline=False, drained=False,
5317                                    group=node_group)
5318
5319     if self.op.ndparams:
5320       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5321
5322   def Exec(self, feedback_fn):
5323     """Adds the new node to the cluster.
5324
5325     """
5326     new_node = self.new_node
5327     node = new_node.name
5328
5329     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5330       "Not owning BGL"
5331
5332     # We adding a new node so we assume it's powered
5333     new_node.powered = True
5334
5335     # for re-adds, reset the offline/drained/master-candidate flags;
5336     # we need to reset here, otherwise offline would prevent RPC calls
5337     # later in the procedure; this also means that if the re-add
5338     # fails, we are left with a non-offlined, broken node
5339     if self.op.readd:
5340       new_node.drained = new_node.offline = False # pylint: disable=W0201
5341       self.LogInfo("Readding a node, the offline/drained flags were reset")
5342       # if we demote the node, we do cleanup later in the procedure
5343       new_node.master_candidate = self.master_candidate
5344       if self.changed_primary_ip:
5345         new_node.primary_ip = self.op.primary_ip
5346
5347     # copy the master/vm_capable flags
5348     for attr in self._NFLAGS:
5349       setattr(new_node, attr, getattr(self.op, attr))
5350
5351     # notify the user about any possible mc promotion
5352     if new_node.master_candidate:
5353       self.LogInfo("Node will be a master candidate")
5354
5355     if self.op.ndparams:
5356       new_node.ndparams = self.op.ndparams
5357     else:
5358       new_node.ndparams = {}
5359
5360     # check connectivity
5361     result = self.rpc.call_version([node])[node]
5362     result.Raise("Can't get version information from node %s" % node)
5363     if constants.PROTOCOL_VERSION == result.payload:
5364       logging.info("Communication to node %s fine, sw version %s match",
5365                    node, result.payload)
5366     else:
5367       raise errors.OpExecError("Version mismatch master version %s,"
5368                                " node version %s" %
5369                                (constants.PROTOCOL_VERSION, result.payload))
5370
5371     # Add node to our /etc/hosts, and add key to known_hosts
5372     if self.cfg.GetClusterInfo().modify_etc_hosts:
5373       master_node = self.cfg.GetMasterNode()
5374       result = self.rpc.call_etc_hosts_modify(master_node,
5375                                               constants.ETC_HOSTS_ADD,
5376                                               self.hostname.name,
5377                                               self.hostname.ip)
5378       result.Raise("Can't update hosts file with new host data")
5379
5380     if new_node.secondary_ip != new_node.primary_ip:
5381       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5382                                False)
5383
5384     node_verify_list = [self.cfg.GetMasterNode()]
5385     node_verify_param = {
5386       constants.NV_NODELIST: ([node], {}),
5387       # TODO: do a node-net-test as well?
5388     }
5389
5390     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5391                                        self.cfg.GetClusterName())
5392     for verifier in node_verify_list:
5393       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5394       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5395       if nl_payload:
5396         for failed in nl_payload:
5397           feedback_fn("ssh/hostname verification failed"
5398                       " (checking from %s): %s" %
5399                       (verifier, nl_payload[failed]))
5400         raise errors.OpExecError("ssh/hostname verification failed")
5401
5402     if self.op.readd:
5403       _RedistributeAncillaryFiles(self)
5404       self.context.ReaddNode(new_node)
5405       # make sure we redistribute the config
5406       self.cfg.Update(new_node, feedback_fn)
5407       # and make sure the new node will not have old files around
5408       if not new_node.master_candidate:
5409         result = self.rpc.call_node_demote_from_mc(new_node.name)
5410         msg = result.fail_msg
5411         if msg:
5412           self.LogWarning("Node failed to demote itself from master"
5413                           " candidate status: %s" % msg)
5414     else:
5415       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5416                                   additional_vm=self.op.vm_capable)
5417       self.context.AddNode(new_node, self.proc.GetECId())
5418
5419
5420 class LUNodeSetParams(LogicalUnit):
5421   """Modifies the parameters of a node.
5422
5423   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5424       to the node role (as _ROLE_*)
5425   @cvar _R2F: a dictionary from node role to tuples of flags
5426   @cvar _FLAGS: a list of attribute names corresponding to the flags
5427
5428   """
5429   HPATH = "node-modify"
5430   HTYPE = constants.HTYPE_NODE
5431   REQ_BGL = False
5432   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5433   _F2R = {
5434     (True, False, False): _ROLE_CANDIDATE,
5435     (False, True, False): _ROLE_DRAINED,
5436     (False, False, True): _ROLE_OFFLINE,
5437     (False, False, False): _ROLE_REGULAR,
5438     }
5439   _R2F = dict((v, k) for k, v in _F2R.items())
5440   _FLAGS = ["master_candidate", "drained", "offline"]
5441
5442   def CheckArguments(self):
5443     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5444     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5445                 self.op.master_capable, self.op.vm_capable,
5446                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5447                 self.op.disk_state]
5448     if all_mods.count(None) == len(all_mods):
5449       raise errors.OpPrereqError("Please pass at least one modification",
5450                                  errors.ECODE_INVAL)
5451     if all_mods.count(True) > 1:
5452       raise errors.OpPrereqError("Can't set the node into more than one"
5453                                  " state at the same time",
5454                                  errors.ECODE_INVAL)
5455
5456     # Boolean value that tells us whether we might be demoting from MC
5457     self.might_demote = (self.op.master_candidate == False or
5458                          self.op.offline == True or
5459                          self.op.drained == True or
5460                          self.op.master_capable == False)
5461
5462     if self.op.secondary_ip:
5463       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5464         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5465                                    " address" % self.op.secondary_ip,
5466                                    errors.ECODE_INVAL)
5467
5468     self.lock_all = self.op.auto_promote and self.might_demote
5469     self.lock_instances = self.op.secondary_ip is not None
5470
5471   def _InstanceFilter(self, instance):
5472     """Filter for getting affected instances.
5473
5474     """
5475     return (instance.disk_template in constants.DTS_INT_MIRROR and
5476             self.op.node_name in instance.all_nodes)
5477
5478   def ExpandNames(self):
5479     if self.lock_all:
5480       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5481     else:
5482       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5483
5484     # Since modifying a node can have severe effects on currently running
5485     # operations the resource lock is at least acquired in shared mode
5486     self.needed_locks[locking.LEVEL_NODE_RES] = \
5487       self.needed_locks[locking.LEVEL_NODE]
5488
5489     # Get node resource and instance locks in shared mode; they are not used
5490     # for anything but read-only access
5491     self.share_locks[locking.LEVEL_NODE_RES] = 1
5492     self.share_locks[locking.LEVEL_INSTANCE] = 1
5493
5494     if self.lock_instances:
5495       self.needed_locks[locking.LEVEL_INSTANCE] = \
5496         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5497
5498   def BuildHooksEnv(self):
5499     """Build hooks env.
5500
5501     This runs on the master node.
5502
5503     """
5504     return {
5505       "OP_TARGET": self.op.node_name,
5506       "MASTER_CANDIDATE": str(self.op.master_candidate),
5507       "OFFLINE": str(self.op.offline),
5508       "DRAINED": str(self.op.drained),
5509       "MASTER_CAPABLE": str(self.op.master_capable),
5510       "VM_CAPABLE": str(self.op.vm_capable),
5511       }
5512
5513   def BuildHooksNodes(self):
5514     """Build hooks nodes.
5515
5516     """
5517     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5518     return (nl, nl)
5519
5520   def CheckPrereq(self):
5521     """Check prerequisites.
5522
5523     This only checks the instance list against the existing names.
5524
5525     """
5526     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5527
5528     if self.lock_instances:
5529       affected_instances = \
5530         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5531
5532       # Verify instance locks
5533       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5534       wanted_instances = frozenset(affected_instances.keys())
5535       if wanted_instances - owned_instances:
5536         raise errors.OpPrereqError("Instances affected by changing node %s's"
5537                                    " secondary IP address have changed since"
5538                                    " locks were acquired, wanted '%s', have"
5539                                    " '%s'; retry the operation" %
5540                                    (self.op.node_name,
5541                                     utils.CommaJoin(wanted_instances),
5542                                     utils.CommaJoin(owned_instances)),
5543                                    errors.ECODE_STATE)
5544     else:
5545       affected_instances = None
5546
5547     if (self.op.master_candidate is not None or
5548         self.op.drained is not None or
5549         self.op.offline is not None):
5550       # we can't change the master's node flags
5551       if self.op.node_name == self.cfg.GetMasterNode():
5552         raise errors.OpPrereqError("The master role can be changed"
5553                                    " only via master-failover",
5554                                    errors.ECODE_INVAL)
5555
5556     if self.op.master_candidate and not node.master_capable:
5557       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5558                                  " it a master candidate" % node.name,
5559                                  errors.ECODE_STATE)
5560
5561     if self.op.vm_capable == False:
5562       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5563       if ipri or isec:
5564         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5565                                    " the vm_capable flag" % node.name,
5566                                    errors.ECODE_STATE)
5567
5568     if node.master_candidate and self.might_demote and not self.lock_all:
5569       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5570       # check if after removing the current node, we're missing master
5571       # candidates
5572       (mc_remaining, mc_should, _) = \
5573           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5574       if mc_remaining < mc_should:
5575         raise errors.OpPrereqError("Not enough master candidates, please"
5576                                    " pass auto promote option to allow"
5577                                    " promotion", errors.ECODE_STATE)
5578
5579     self.old_flags = old_flags = (node.master_candidate,
5580                                   node.drained, node.offline)
5581     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5582     self.old_role = old_role = self._F2R[old_flags]
5583
5584     # Check for ineffective changes
5585     for attr in self._FLAGS:
5586       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5587         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5588         setattr(self.op, attr, None)
5589
5590     # Past this point, any flag change to False means a transition
5591     # away from the respective state, as only real changes are kept
5592
5593     # TODO: We might query the real power state if it supports OOB
5594     if _SupportsOob(self.cfg, node):
5595       if self.op.offline is False and not (node.powered or
5596                                            self.op.powered == True):
5597         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5598                                     " offline status can be reset") %
5599                                    self.op.node_name)
5600     elif self.op.powered is not None:
5601       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5602                                   " as it does not support out-of-band"
5603                                   " handling") % self.op.node_name)
5604
5605     # If we're being deofflined/drained, we'll MC ourself if needed
5606     if (self.op.drained == False or self.op.offline == False or
5607         (self.op.master_capable and not node.master_capable)):
5608       if _DecideSelfPromotion(self):
5609         self.op.master_candidate = True
5610         self.LogInfo("Auto-promoting node to master candidate")
5611
5612     # If we're no longer master capable, we'll demote ourselves from MC
5613     if self.op.master_capable == False and node.master_candidate:
5614       self.LogInfo("Demoting from master candidate")
5615       self.op.master_candidate = False
5616
5617     # Compute new role
5618     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5619     if self.op.master_candidate:
5620       new_role = self._ROLE_CANDIDATE
5621     elif self.op.drained:
5622       new_role = self._ROLE_DRAINED
5623     elif self.op.offline:
5624       new_role = self._ROLE_OFFLINE
5625     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5626       # False is still in new flags, which means we're un-setting (the
5627       # only) True flag
5628       new_role = self._ROLE_REGULAR
5629     else: # no new flags, nothing, keep old role
5630       new_role = old_role
5631
5632     self.new_role = new_role
5633
5634     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5635       # Trying to transition out of offline status
5636       # TODO: Use standard RPC runner, but make sure it works when the node is
5637       # still marked offline
5638       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5639       if result.fail_msg:
5640         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5641                                    " to report its version: %s" %
5642                                    (node.name, result.fail_msg),
5643                                    errors.ECODE_STATE)
5644       else:
5645         self.LogWarning("Transitioning node from offline to online state"
5646                         " without using re-add. Please make sure the node"
5647                         " is healthy!")
5648
5649     if self.op.secondary_ip:
5650       # Ok even without locking, because this can't be changed by any LU
5651       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5652       master_singlehomed = master.secondary_ip == master.primary_ip
5653       if master_singlehomed and self.op.secondary_ip:
5654         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5655                                    " homed cluster", errors.ECODE_INVAL)
5656
5657       assert not (frozenset(affected_instances) -
5658                   self.owned_locks(locking.LEVEL_INSTANCE))
5659
5660       if node.offline:
5661         if affected_instances:
5662           raise errors.OpPrereqError("Cannot change secondary IP address:"
5663                                      " offline node has instances (%s)"
5664                                      " configured to use it" %
5665                                      utils.CommaJoin(affected_instances.keys()))
5666       else:
5667         # On online nodes, check that no instances are running, and that
5668         # the node has the new ip and we can reach it.
5669         for instance in affected_instances.values():
5670           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5671                               msg="cannot change secondary ip")
5672
5673         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5674         if master.name != node.name:
5675           # check reachability from master secondary ip to new secondary ip
5676           if not netutils.TcpPing(self.op.secondary_ip,
5677                                   constants.DEFAULT_NODED_PORT,
5678                                   source=master.secondary_ip):
5679             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5680                                        " based ping to node daemon port",
5681                                        errors.ECODE_ENVIRON)
5682
5683     if self.op.ndparams:
5684       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5685       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5686       self.new_ndparams = new_ndparams
5687
5688     if self.op.hv_state:
5689       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5690                                                  self.node.hv_state_static)
5691
5692     if self.op.disk_state:
5693       self.new_disk_state = \
5694         _MergeAndVerifyDiskState(self.op.disk_state,
5695                                  self.node.disk_state_static)
5696
5697   def Exec(self, feedback_fn):
5698     """Modifies a node.
5699
5700     """
5701     node = self.node
5702     old_role = self.old_role
5703     new_role = self.new_role
5704
5705     result = []
5706
5707     if self.op.ndparams:
5708       node.ndparams = self.new_ndparams
5709
5710     if self.op.powered is not None:
5711       node.powered = self.op.powered
5712
5713     if self.op.hv_state:
5714       node.hv_state_static = self.new_hv_state
5715
5716     if self.op.disk_state:
5717       node.disk_state_static = self.new_disk_state
5718
5719     for attr in ["master_capable", "vm_capable"]:
5720       val = getattr(self.op, attr)
5721       if val is not None:
5722         setattr(node, attr, val)
5723         result.append((attr, str(val)))
5724
5725     if new_role != old_role:
5726       # Tell the node to demote itself, if no longer MC and not offline
5727       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5728         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5729         if msg:
5730           self.LogWarning("Node failed to demote itself: %s", msg)
5731
5732       new_flags = self._R2F[new_role]
5733       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5734         if of != nf:
5735           result.append((desc, str(nf)))
5736       (node.master_candidate, node.drained, node.offline) = new_flags
5737
5738       # we locked all nodes, we adjust the CP before updating this node
5739       if self.lock_all:
5740         _AdjustCandidatePool(self, [node.name])
5741
5742     if self.op.secondary_ip:
5743       node.secondary_ip = self.op.secondary_ip
5744       result.append(("secondary_ip", self.op.secondary_ip))
5745
5746     # this will trigger configuration file update, if needed
5747     self.cfg.Update(node, feedback_fn)
5748
5749     # this will trigger job queue propagation or cleanup if the mc
5750     # flag changed
5751     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5752       self.context.ReaddNode(node)
5753
5754     return result
5755
5756
5757 class LUNodePowercycle(NoHooksLU):
5758   """Powercycles a node.
5759
5760   """
5761   REQ_BGL = False
5762
5763   def CheckArguments(self):
5764     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5765     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5766       raise errors.OpPrereqError("The node is the master and the force"
5767                                  " parameter was not set",
5768                                  errors.ECODE_INVAL)
5769
5770   def ExpandNames(self):
5771     """Locking for PowercycleNode.
5772
5773     This is a last-resort option and shouldn't block on other
5774     jobs. Therefore, we grab no locks.
5775
5776     """
5777     self.needed_locks = {}
5778
5779   def Exec(self, feedback_fn):
5780     """Reboots a node.
5781
5782     """
5783     result = self.rpc.call_node_powercycle(self.op.node_name,
5784                                            self.cfg.GetHypervisorType())
5785     result.Raise("Failed to schedule the reboot")
5786     return result.payload
5787
5788
5789 class LUClusterQuery(NoHooksLU):
5790   """Query cluster configuration.
5791
5792   """
5793   REQ_BGL = False
5794
5795   def ExpandNames(self):
5796     self.needed_locks = {}
5797
5798   def Exec(self, feedback_fn):
5799     """Return cluster config.
5800
5801     """
5802     cluster = self.cfg.GetClusterInfo()
5803     os_hvp = {}
5804
5805     # Filter just for enabled hypervisors
5806     for os_name, hv_dict in cluster.os_hvp.items():
5807       os_hvp[os_name] = {}
5808       for hv_name, hv_params in hv_dict.items():
5809         if hv_name in cluster.enabled_hypervisors:
5810           os_hvp[os_name][hv_name] = hv_params
5811
5812     # Convert ip_family to ip_version
5813     primary_ip_version = constants.IP4_VERSION
5814     if cluster.primary_ip_family == netutils.IP6Address.family:
5815       primary_ip_version = constants.IP6_VERSION
5816
5817     result = {
5818       "software_version": constants.RELEASE_VERSION,
5819       "protocol_version": constants.PROTOCOL_VERSION,
5820       "config_version": constants.CONFIG_VERSION,
5821       "os_api_version": max(constants.OS_API_VERSIONS),
5822       "export_version": constants.EXPORT_VERSION,
5823       "architecture": (platform.architecture()[0], platform.machine()),
5824       "name": cluster.cluster_name,
5825       "master": cluster.master_node,
5826       "default_hypervisor": cluster.primary_hypervisor,
5827       "enabled_hypervisors": cluster.enabled_hypervisors,
5828       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5829                         for hypervisor_name in cluster.enabled_hypervisors]),
5830       "os_hvp": os_hvp,
5831       "beparams": cluster.beparams,
5832       "osparams": cluster.osparams,
5833       "nicparams": cluster.nicparams,
5834       "ndparams": cluster.ndparams,
5835       "candidate_pool_size": cluster.candidate_pool_size,
5836       "master_netdev": cluster.master_netdev,
5837       "master_netmask": cluster.master_netmask,
5838       "use_external_mip_script": cluster.use_external_mip_script,
5839       "volume_group_name": cluster.volume_group_name,
5840       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5841       "file_storage_dir": cluster.file_storage_dir,
5842       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5843       "maintain_node_health": cluster.maintain_node_health,
5844       "ctime": cluster.ctime,
5845       "mtime": cluster.mtime,
5846       "uuid": cluster.uuid,
5847       "tags": list(cluster.GetTags()),
5848       "uid_pool": cluster.uid_pool,
5849       "default_iallocator": cluster.default_iallocator,
5850       "reserved_lvs": cluster.reserved_lvs,
5851       "primary_ip_version": primary_ip_version,
5852       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5853       "hidden_os": cluster.hidden_os,
5854       "blacklisted_os": cluster.blacklisted_os,
5855       }
5856
5857     return result
5858
5859
5860 class LUClusterConfigQuery(NoHooksLU):
5861   """Return configuration values.
5862
5863   """
5864   REQ_BGL = False
5865   _FIELDS_DYNAMIC = utils.FieldSet()
5866   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5867                                   "watcher_pause", "volume_group_name")
5868
5869   def CheckArguments(self):
5870     _CheckOutputFields(static=self._FIELDS_STATIC,
5871                        dynamic=self._FIELDS_DYNAMIC,
5872                        selected=self.op.output_fields)
5873
5874   def ExpandNames(self):
5875     self.needed_locks = {}
5876
5877   def Exec(self, feedback_fn):
5878     """Dump a representation of the cluster config to the standard output.
5879
5880     """
5881     values = []
5882     for field in self.op.output_fields:
5883       if field == "cluster_name":
5884         entry = self.cfg.GetClusterName()
5885       elif field == "master_node":
5886         entry = self.cfg.GetMasterNode()
5887       elif field == "drain_flag":
5888         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5889       elif field == "watcher_pause":
5890         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5891       elif field == "volume_group_name":
5892         entry = self.cfg.GetVGName()
5893       else:
5894         raise errors.ParameterError(field)
5895       values.append(entry)
5896     return values
5897
5898
5899 class LUInstanceActivateDisks(NoHooksLU):
5900   """Bring up an instance's disks.
5901
5902   """
5903   REQ_BGL = False
5904
5905   def ExpandNames(self):
5906     self._ExpandAndLockInstance()
5907     self.needed_locks[locking.LEVEL_NODE] = []
5908     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5909
5910   def DeclareLocks(self, level):
5911     if level == locking.LEVEL_NODE:
5912       self._LockInstancesNodes()
5913
5914   def CheckPrereq(self):
5915     """Check prerequisites.
5916
5917     This checks that the instance is in the cluster.
5918
5919     """
5920     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5921     assert self.instance is not None, \
5922       "Cannot retrieve locked instance %s" % self.op.instance_name
5923     _CheckNodeOnline(self, self.instance.primary_node)
5924
5925   def Exec(self, feedback_fn):
5926     """Activate the disks.
5927
5928     """
5929     disks_ok, disks_info = \
5930               _AssembleInstanceDisks(self, self.instance,
5931                                      ignore_size=self.op.ignore_size)
5932     if not disks_ok:
5933       raise errors.OpExecError("Cannot activate block devices")
5934
5935     return disks_info
5936
5937
5938 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5939                            ignore_size=False):
5940   """Prepare the block devices for an instance.
5941
5942   This sets up the block devices on all nodes.
5943
5944   @type lu: L{LogicalUnit}
5945   @param lu: the logical unit on whose behalf we execute
5946   @type instance: L{objects.Instance}
5947   @param instance: the instance for whose disks we assemble
5948   @type disks: list of L{objects.Disk} or None
5949   @param disks: which disks to assemble (or all, if None)
5950   @type ignore_secondaries: boolean
5951   @param ignore_secondaries: if true, errors on secondary nodes
5952       won't result in an error return from the function
5953   @type ignore_size: boolean
5954   @param ignore_size: if true, the current known size of the disk
5955       will not be used during the disk activation, useful for cases
5956       when the size is wrong
5957   @return: False if the operation failed, otherwise a list of
5958       (host, instance_visible_name, node_visible_name)
5959       with the mapping from node devices to instance devices
5960
5961   """
5962   device_info = []
5963   disks_ok = True
5964   iname = instance.name
5965   disks = _ExpandCheckDisks(instance, disks)
5966
5967   # With the two passes mechanism we try to reduce the window of
5968   # opportunity for the race condition of switching DRBD to primary
5969   # before handshaking occured, but we do not eliminate it
5970
5971   # The proper fix would be to wait (with some limits) until the
5972   # connection has been made and drbd transitions from WFConnection
5973   # into any other network-connected state (Connected, SyncTarget,
5974   # SyncSource, etc.)
5975
5976   # 1st pass, assemble on all nodes in secondary mode
5977   for idx, inst_disk in enumerate(disks):
5978     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5979       if ignore_size:
5980         node_disk = node_disk.Copy()
5981         node_disk.UnsetSize()
5982       lu.cfg.SetDiskID(node_disk, node)
5983       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5984       msg = result.fail_msg
5985       if msg:
5986         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5987                            " (is_primary=False, pass=1): %s",
5988                            inst_disk.iv_name, node, msg)
5989         if not ignore_secondaries:
5990           disks_ok = False
5991
5992   # FIXME: race condition on drbd migration to primary
5993
5994   # 2nd pass, do only the primary node
5995   for idx, inst_disk in enumerate(disks):
5996     dev_path = None
5997
5998     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5999       if node != instance.primary_node:
6000         continue
6001       if ignore_size:
6002         node_disk = node_disk.Copy()
6003         node_disk.UnsetSize()
6004       lu.cfg.SetDiskID(node_disk, node)
6005       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6006       msg = result.fail_msg
6007       if msg:
6008         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6009                            " (is_primary=True, pass=2): %s",
6010                            inst_disk.iv_name, node, msg)
6011         disks_ok = False
6012       else:
6013         dev_path = result.payload
6014
6015     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6016
6017   # leave the disks configured for the primary node
6018   # this is a workaround that would be fixed better by
6019   # improving the logical/physical id handling
6020   for disk in disks:
6021     lu.cfg.SetDiskID(disk, instance.primary_node)
6022
6023   return disks_ok, device_info
6024
6025
6026 def _StartInstanceDisks(lu, instance, force):
6027   """Start the disks of an instance.
6028
6029   """
6030   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6031                                            ignore_secondaries=force)
6032   if not disks_ok:
6033     _ShutdownInstanceDisks(lu, instance)
6034     if force is not None and not force:
6035       lu.proc.LogWarning("", hint="If the message above refers to a"
6036                          " secondary node,"
6037                          " you can retry the operation using '--force'.")
6038     raise errors.OpExecError("Disk consistency error")
6039
6040
6041 class LUInstanceDeactivateDisks(NoHooksLU):
6042   """Shutdown an instance's disks.
6043
6044   """
6045   REQ_BGL = False
6046
6047   def ExpandNames(self):
6048     self._ExpandAndLockInstance()
6049     self.needed_locks[locking.LEVEL_NODE] = []
6050     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6051
6052   def DeclareLocks(self, level):
6053     if level == locking.LEVEL_NODE:
6054       self._LockInstancesNodes()
6055
6056   def CheckPrereq(self):
6057     """Check prerequisites.
6058
6059     This checks that the instance is in the cluster.
6060
6061     """
6062     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6063     assert self.instance is not None, \
6064       "Cannot retrieve locked instance %s" % self.op.instance_name
6065
6066   def Exec(self, feedback_fn):
6067     """Deactivate the disks
6068
6069     """
6070     instance = self.instance
6071     if self.op.force:
6072       _ShutdownInstanceDisks(self, instance)
6073     else:
6074       _SafeShutdownInstanceDisks(self, instance)
6075
6076
6077 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6078   """Shutdown block devices of an instance.
6079
6080   This function checks if an instance is running, before calling
6081   _ShutdownInstanceDisks.
6082
6083   """
6084   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6085   _ShutdownInstanceDisks(lu, instance, disks=disks)
6086
6087
6088 def _ExpandCheckDisks(instance, disks):
6089   """Return the instance disks selected by the disks list
6090
6091   @type disks: list of L{objects.Disk} or None
6092   @param disks: selected disks
6093   @rtype: list of L{objects.Disk}
6094   @return: selected instance disks to act on
6095
6096   """
6097   if disks is None:
6098     return instance.disks
6099   else:
6100     if not set(disks).issubset(instance.disks):
6101       raise errors.ProgrammerError("Can only act on disks belonging to the"
6102                                    " target instance")
6103     return disks
6104
6105
6106 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6107   """Shutdown block devices of an instance.
6108
6109   This does the shutdown on all nodes of the instance.
6110
6111   If the ignore_primary is false, errors on the primary node are
6112   ignored.
6113
6114   """
6115   all_result = True
6116   disks = _ExpandCheckDisks(instance, disks)
6117
6118   for disk in disks:
6119     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6120       lu.cfg.SetDiskID(top_disk, node)
6121       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6122       msg = result.fail_msg
6123       if msg:
6124         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6125                       disk.iv_name, node, msg)
6126         if ((node == instance.primary_node and not ignore_primary) or
6127             (node != instance.primary_node and not result.offline)):
6128           all_result = False
6129   return all_result
6130
6131
6132 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6133   """Checks if a node has enough free memory.
6134
6135   This function check if a given node has the needed amount of free
6136   memory. In case the node has less memory or we cannot get the
6137   information from the node, this function raise an OpPrereqError
6138   exception.
6139
6140   @type lu: C{LogicalUnit}
6141   @param lu: a logical unit from which we get configuration data
6142   @type node: C{str}
6143   @param node: the node to check
6144   @type reason: C{str}
6145   @param reason: string to use in the error message
6146   @type requested: C{int}
6147   @param requested: the amount of memory in MiB to check for
6148   @type hypervisor_name: C{str}
6149   @param hypervisor_name: the hypervisor to ask for memory stats
6150   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6151       we cannot check the node
6152
6153   """
6154   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6155   nodeinfo[node].Raise("Can't get data from node %s" % node,
6156                        prereq=True, ecode=errors.ECODE_ENVIRON)
6157   (_, _, (hv_info, )) = nodeinfo[node].payload
6158
6159   free_mem = hv_info.get("memory_free", None)
6160   if not isinstance(free_mem, int):
6161     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6162                                " was '%s'" % (node, free_mem),
6163                                errors.ECODE_ENVIRON)
6164   if requested > free_mem:
6165     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6166                                " needed %s MiB, available %s MiB" %
6167                                (node, reason, requested, free_mem),
6168                                errors.ECODE_NORES)
6169
6170
6171 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6172   """Checks if nodes have enough free disk space in the all VGs.
6173
6174   This function check if all given nodes have the needed amount of
6175   free disk. In case any node has less disk or we cannot get the
6176   information from the node, this function raise an OpPrereqError
6177   exception.
6178
6179   @type lu: C{LogicalUnit}
6180   @param lu: a logical unit from which we get configuration data
6181   @type nodenames: C{list}
6182   @param nodenames: the list of node names to check
6183   @type req_sizes: C{dict}
6184   @param req_sizes: the hash of vg and corresponding amount of disk in
6185       MiB to check for
6186   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6187       or we cannot check the node
6188
6189   """
6190   for vg, req_size in req_sizes.items():
6191     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6192
6193
6194 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6195   """Checks if nodes have enough free disk space in the specified VG.
6196
6197   This function check if all given nodes have the needed amount of
6198   free disk. In case any node has less disk or we cannot get the
6199   information from the node, this function raise an OpPrereqError
6200   exception.
6201
6202   @type lu: C{LogicalUnit}
6203   @param lu: a logical unit from which we get configuration data
6204   @type nodenames: C{list}
6205   @param nodenames: the list of node names to check
6206   @type vg: C{str}
6207   @param vg: the volume group to check
6208   @type requested: C{int}
6209   @param requested: the amount of disk in MiB to check for
6210   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6211       or we cannot check the node
6212
6213   """
6214   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6215   for node in nodenames:
6216     info = nodeinfo[node]
6217     info.Raise("Cannot get current information from node %s" % node,
6218                prereq=True, ecode=errors.ECODE_ENVIRON)
6219     (_, (vg_info, ), _) = info.payload
6220     vg_free = vg_info.get("vg_free", None)
6221     if not isinstance(vg_free, int):
6222       raise errors.OpPrereqError("Can't compute free disk space on node"
6223                                  " %s for vg %s, result was '%s'" %
6224                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6225     if requested > vg_free:
6226       raise errors.OpPrereqError("Not enough disk space on target node %s"
6227                                  " vg %s: required %d MiB, available %d MiB" %
6228                                  (node, vg, requested, vg_free),
6229                                  errors.ECODE_NORES)
6230
6231
6232 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6233   """Checks if nodes have enough physical CPUs
6234
6235   This function checks if all given nodes have the needed number of
6236   physical CPUs. In case any node has less CPUs or we cannot get the
6237   information from the node, this function raises an OpPrereqError
6238   exception.
6239
6240   @type lu: C{LogicalUnit}
6241   @param lu: a logical unit from which we get configuration data
6242   @type nodenames: C{list}
6243   @param nodenames: the list of node names to check
6244   @type requested: C{int}
6245   @param requested: the minimum acceptable number of physical CPUs
6246   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6247       or we cannot check the node
6248
6249   """
6250   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6251   for node in nodenames:
6252     info = nodeinfo[node]
6253     info.Raise("Cannot get current information from node %s" % node,
6254                prereq=True, ecode=errors.ECODE_ENVIRON)
6255     (_, _, (hv_info, )) = info.payload
6256     num_cpus = hv_info.get("cpu_total", None)
6257     if not isinstance(num_cpus, int):
6258       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6259                                  " on node %s, result was '%s'" %
6260                                  (node, num_cpus), errors.ECODE_ENVIRON)
6261     if requested > num_cpus:
6262       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6263                                  "required" % (node, num_cpus, requested),
6264                                  errors.ECODE_NORES)
6265
6266
6267 class LUInstanceStartup(LogicalUnit):
6268   """Starts an instance.
6269
6270   """
6271   HPATH = "instance-start"
6272   HTYPE = constants.HTYPE_INSTANCE
6273   REQ_BGL = False
6274
6275   def CheckArguments(self):
6276     # extra beparams
6277     if self.op.beparams:
6278       # fill the beparams dict
6279       objects.UpgradeBeParams(self.op.beparams)
6280       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6281
6282   def ExpandNames(self):
6283     self._ExpandAndLockInstance()
6284
6285   def BuildHooksEnv(self):
6286     """Build hooks env.
6287
6288     This runs on master, primary and secondary nodes of the instance.
6289
6290     """
6291     env = {
6292       "FORCE": self.op.force,
6293       }
6294
6295     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6296
6297     return env
6298
6299   def BuildHooksNodes(self):
6300     """Build hooks nodes.
6301
6302     """
6303     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6304     return (nl, nl)
6305
6306   def CheckPrereq(self):
6307     """Check prerequisites.
6308
6309     This checks that the instance is in the cluster.
6310
6311     """
6312     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6313     assert self.instance is not None, \
6314       "Cannot retrieve locked instance %s" % self.op.instance_name
6315
6316     # extra hvparams
6317     if self.op.hvparams:
6318       # check hypervisor parameter syntax (locally)
6319       cluster = self.cfg.GetClusterInfo()
6320       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6321       filled_hvp = cluster.FillHV(instance)
6322       filled_hvp.update(self.op.hvparams)
6323       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6324       hv_type.CheckParameterSyntax(filled_hvp)
6325       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6326
6327     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6328
6329     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6330
6331     if self.primary_offline and self.op.ignore_offline_nodes:
6332       self.proc.LogWarning("Ignoring offline primary node")
6333
6334       if self.op.hvparams or self.op.beparams:
6335         self.proc.LogWarning("Overridden parameters are ignored")
6336     else:
6337       _CheckNodeOnline(self, instance.primary_node)
6338
6339       bep = self.cfg.GetClusterInfo().FillBE(instance)
6340
6341       # check bridges existence
6342       _CheckInstanceBridgesExist(self, instance)
6343
6344       remote_info = self.rpc.call_instance_info(instance.primary_node,
6345                                                 instance.name,
6346                                                 instance.hypervisor)
6347       remote_info.Raise("Error checking node %s" % instance.primary_node,
6348                         prereq=True, ecode=errors.ECODE_ENVIRON)
6349       if not remote_info.payload: # not running already
6350         _CheckNodeFreeMemory(self, instance.primary_node,
6351                              "starting instance %s" % instance.name,
6352                              bep[constants.BE_MAXMEM], instance.hypervisor)
6353
6354   def Exec(self, feedback_fn):
6355     """Start the instance.
6356
6357     """
6358     instance = self.instance
6359     force = self.op.force
6360
6361     if not self.op.no_remember:
6362       self.cfg.MarkInstanceUp(instance.name)
6363
6364     if self.primary_offline:
6365       assert self.op.ignore_offline_nodes
6366       self.proc.LogInfo("Primary node offline, marked instance as started")
6367     else:
6368       node_current = instance.primary_node
6369
6370       _StartInstanceDisks(self, instance, force)
6371
6372       result = \
6373         self.rpc.call_instance_start(node_current,
6374                                      (instance, self.op.hvparams,
6375                                       self.op.beparams),
6376                                      self.op.startup_paused)
6377       msg = result.fail_msg
6378       if msg:
6379         _ShutdownInstanceDisks(self, instance)
6380         raise errors.OpExecError("Could not start instance: %s" % msg)
6381
6382
6383 class LUInstanceReboot(LogicalUnit):
6384   """Reboot an instance.
6385
6386   """
6387   HPATH = "instance-reboot"
6388   HTYPE = constants.HTYPE_INSTANCE
6389   REQ_BGL = False
6390
6391   def ExpandNames(self):
6392     self._ExpandAndLockInstance()
6393
6394   def BuildHooksEnv(self):
6395     """Build hooks env.
6396
6397     This runs on master, primary and secondary nodes of the instance.
6398
6399     """
6400     env = {
6401       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6402       "REBOOT_TYPE": self.op.reboot_type,
6403       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6404       }
6405
6406     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6407
6408     return env
6409
6410   def BuildHooksNodes(self):
6411     """Build hooks nodes.
6412
6413     """
6414     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6415     return (nl, nl)
6416
6417   def CheckPrereq(self):
6418     """Check prerequisites.
6419
6420     This checks that the instance is in the cluster.
6421
6422     """
6423     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6424     assert self.instance is not None, \
6425       "Cannot retrieve locked instance %s" % self.op.instance_name
6426     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6427     _CheckNodeOnline(self, instance.primary_node)
6428
6429     # check bridges existence
6430     _CheckInstanceBridgesExist(self, instance)
6431
6432   def Exec(self, feedback_fn):
6433     """Reboot the instance.
6434
6435     """
6436     instance = self.instance
6437     ignore_secondaries = self.op.ignore_secondaries
6438     reboot_type = self.op.reboot_type
6439
6440     remote_info = self.rpc.call_instance_info(instance.primary_node,
6441                                               instance.name,
6442                                               instance.hypervisor)
6443     remote_info.Raise("Error checking node %s" % instance.primary_node)
6444     instance_running = bool(remote_info.payload)
6445
6446     node_current = instance.primary_node
6447
6448     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6449                                             constants.INSTANCE_REBOOT_HARD]:
6450       for disk in instance.disks:
6451         self.cfg.SetDiskID(disk, node_current)
6452       result = self.rpc.call_instance_reboot(node_current, instance,
6453                                              reboot_type,
6454                                              self.op.shutdown_timeout)
6455       result.Raise("Could not reboot instance")
6456     else:
6457       if instance_running:
6458         result = self.rpc.call_instance_shutdown(node_current, instance,
6459                                                  self.op.shutdown_timeout)
6460         result.Raise("Could not shutdown instance for full reboot")
6461         _ShutdownInstanceDisks(self, instance)
6462       else:
6463         self.LogInfo("Instance %s was already stopped, starting now",
6464                      instance.name)
6465       _StartInstanceDisks(self, instance, ignore_secondaries)
6466       result = self.rpc.call_instance_start(node_current,
6467                                             (instance, None, None), False)
6468       msg = result.fail_msg
6469       if msg:
6470         _ShutdownInstanceDisks(self, instance)
6471         raise errors.OpExecError("Could not start instance for"
6472                                  " full reboot: %s" % msg)
6473
6474     self.cfg.MarkInstanceUp(instance.name)
6475
6476
6477 class LUInstanceShutdown(LogicalUnit):
6478   """Shutdown an instance.
6479
6480   """
6481   HPATH = "instance-stop"
6482   HTYPE = constants.HTYPE_INSTANCE
6483   REQ_BGL = False
6484
6485   def ExpandNames(self):
6486     self._ExpandAndLockInstance()
6487
6488   def BuildHooksEnv(self):
6489     """Build hooks env.
6490
6491     This runs on master, primary and secondary nodes of the instance.
6492
6493     """
6494     env = _BuildInstanceHookEnvByObject(self, self.instance)
6495     env["TIMEOUT"] = self.op.timeout
6496     return env
6497
6498   def BuildHooksNodes(self):
6499     """Build hooks nodes.
6500
6501     """
6502     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6503     return (nl, nl)
6504
6505   def CheckPrereq(self):
6506     """Check prerequisites.
6507
6508     This checks that the instance is in the cluster.
6509
6510     """
6511     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6512     assert self.instance is not None, \
6513       "Cannot retrieve locked instance %s" % self.op.instance_name
6514
6515     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6516
6517     self.primary_offline = \
6518       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6519
6520     if self.primary_offline and self.op.ignore_offline_nodes:
6521       self.proc.LogWarning("Ignoring offline primary node")
6522     else:
6523       _CheckNodeOnline(self, self.instance.primary_node)
6524
6525   def Exec(self, feedback_fn):
6526     """Shutdown the instance.
6527
6528     """
6529     instance = self.instance
6530     node_current = instance.primary_node
6531     timeout = self.op.timeout
6532
6533     if not self.op.no_remember:
6534       self.cfg.MarkInstanceDown(instance.name)
6535
6536     if self.primary_offline:
6537       assert self.op.ignore_offline_nodes
6538       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6539     else:
6540       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6541       msg = result.fail_msg
6542       if msg:
6543         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6544
6545       _ShutdownInstanceDisks(self, instance)
6546
6547
6548 class LUInstanceReinstall(LogicalUnit):
6549   """Reinstall an instance.
6550
6551   """
6552   HPATH = "instance-reinstall"
6553   HTYPE = constants.HTYPE_INSTANCE
6554   REQ_BGL = False
6555
6556   def ExpandNames(self):
6557     self._ExpandAndLockInstance()
6558
6559   def BuildHooksEnv(self):
6560     """Build hooks env.
6561
6562     This runs on master, primary and secondary nodes of the instance.
6563
6564     """
6565     return _BuildInstanceHookEnvByObject(self, self.instance)
6566
6567   def BuildHooksNodes(self):
6568     """Build hooks nodes.
6569
6570     """
6571     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6572     return (nl, nl)
6573
6574   def CheckPrereq(self):
6575     """Check prerequisites.
6576
6577     This checks that the instance is in the cluster and is not running.
6578
6579     """
6580     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6581     assert instance is not None, \
6582       "Cannot retrieve locked instance %s" % self.op.instance_name
6583     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6584                      " offline, cannot reinstall")
6585     for node in instance.secondary_nodes:
6586       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6587                        " cannot reinstall")
6588
6589     if instance.disk_template == constants.DT_DISKLESS:
6590       raise errors.OpPrereqError("Instance '%s' has no disks" %
6591                                  self.op.instance_name,
6592                                  errors.ECODE_INVAL)
6593     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6594
6595     if self.op.os_type is not None:
6596       # OS verification
6597       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6598       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6599       instance_os = self.op.os_type
6600     else:
6601       instance_os = instance.os
6602
6603     nodelist = list(instance.all_nodes)
6604
6605     if self.op.osparams:
6606       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6607       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6608       self.os_inst = i_osdict # the new dict (without defaults)
6609     else:
6610       self.os_inst = None
6611
6612     self.instance = instance
6613
6614   def Exec(self, feedback_fn):
6615     """Reinstall the instance.
6616
6617     """
6618     inst = self.instance
6619
6620     if self.op.os_type is not None:
6621       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6622       inst.os = self.op.os_type
6623       # Write to configuration
6624       self.cfg.Update(inst, feedback_fn)
6625
6626     _StartInstanceDisks(self, inst, None)
6627     try:
6628       feedback_fn("Running the instance OS create scripts...")
6629       # FIXME: pass debug option from opcode to backend
6630       result = self.rpc.call_instance_os_add(inst.primary_node,
6631                                              (inst, self.os_inst), True,
6632                                              self.op.debug_level)
6633       result.Raise("Could not install OS for instance %s on node %s" %
6634                    (inst.name, inst.primary_node))
6635     finally:
6636       _ShutdownInstanceDisks(self, inst)
6637
6638
6639 class LUInstanceRecreateDisks(LogicalUnit):
6640   """Recreate an instance's missing disks.
6641
6642   """
6643   HPATH = "instance-recreate-disks"
6644   HTYPE = constants.HTYPE_INSTANCE
6645   REQ_BGL = False
6646
6647   def CheckArguments(self):
6648     # normalise the disk list
6649     self.op.disks = sorted(frozenset(self.op.disks))
6650
6651   def ExpandNames(self):
6652     self._ExpandAndLockInstance()
6653     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6654     if self.op.nodes:
6655       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6656       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6657     else:
6658       self.needed_locks[locking.LEVEL_NODE] = []
6659
6660   def DeclareLocks(self, level):
6661     if level == locking.LEVEL_NODE:
6662       # if we replace the nodes, we only need to lock the old primary,
6663       # otherwise we need to lock all nodes for disk re-creation
6664       primary_only = bool(self.op.nodes)
6665       self._LockInstancesNodes(primary_only=primary_only)
6666     elif level == locking.LEVEL_NODE_RES:
6667       # Copy node locks
6668       self.needed_locks[locking.LEVEL_NODE_RES] = \
6669         self.needed_locks[locking.LEVEL_NODE][:]
6670
6671   def BuildHooksEnv(self):
6672     """Build hooks env.
6673
6674     This runs on master, primary and secondary nodes of the instance.
6675
6676     """
6677     return _BuildInstanceHookEnvByObject(self, self.instance)
6678
6679   def BuildHooksNodes(self):
6680     """Build hooks nodes.
6681
6682     """
6683     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6684     return (nl, nl)
6685
6686   def CheckPrereq(self):
6687     """Check prerequisites.
6688
6689     This checks that the instance is in the cluster and is not running.
6690
6691     """
6692     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6693     assert instance is not None, \
6694       "Cannot retrieve locked instance %s" % self.op.instance_name
6695     if self.op.nodes:
6696       if len(self.op.nodes) != len(instance.all_nodes):
6697         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6698                                    " %d replacement nodes were specified" %
6699                                    (instance.name, len(instance.all_nodes),
6700                                     len(self.op.nodes)),
6701                                    errors.ECODE_INVAL)
6702       assert instance.disk_template != constants.DT_DRBD8 or \
6703           len(self.op.nodes) == 2
6704       assert instance.disk_template != constants.DT_PLAIN or \
6705           len(self.op.nodes) == 1
6706       primary_node = self.op.nodes[0]
6707     else:
6708       primary_node = instance.primary_node
6709     _CheckNodeOnline(self, primary_node)
6710
6711     if instance.disk_template == constants.DT_DISKLESS:
6712       raise errors.OpPrereqError("Instance '%s' has no disks" %
6713                                  self.op.instance_name, errors.ECODE_INVAL)
6714     # if we replace nodes *and* the old primary is offline, we don't
6715     # check
6716     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6717     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6718     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6719     if not (self.op.nodes and old_pnode.offline):
6720       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6721                           msg="cannot recreate disks")
6722
6723     if not self.op.disks:
6724       self.op.disks = range(len(instance.disks))
6725     else:
6726       for idx in self.op.disks:
6727         if idx >= len(instance.disks):
6728           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6729                                      errors.ECODE_INVAL)
6730     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6731       raise errors.OpPrereqError("Can't recreate disks partially and"
6732                                  " change the nodes at the same time",
6733                                  errors.ECODE_INVAL)
6734     self.instance = instance
6735
6736   def Exec(self, feedback_fn):
6737     """Recreate the disks.
6738
6739     """
6740     instance = self.instance
6741
6742     assert (self.owned_locks(locking.LEVEL_NODE) ==
6743             self.owned_locks(locking.LEVEL_NODE_RES))
6744
6745     to_skip = []
6746     mods = [] # keeps track of needed logical_id changes
6747
6748     for idx, disk in enumerate(instance.disks):
6749       if idx not in self.op.disks: # disk idx has not been passed in
6750         to_skip.append(idx)
6751         continue
6752       # update secondaries for disks, if needed
6753       if self.op.nodes:
6754         if disk.dev_type == constants.LD_DRBD8:
6755           # need to update the nodes and minors
6756           assert len(self.op.nodes) == 2
6757           assert len(disk.logical_id) == 6 # otherwise disk internals
6758                                            # have changed
6759           (_, _, old_port, _, _, old_secret) = disk.logical_id
6760           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6761           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6762                     new_minors[0], new_minors[1], old_secret)
6763           assert len(disk.logical_id) == len(new_id)
6764           mods.append((idx, new_id))
6765
6766     # now that we have passed all asserts above, we can apply the mods
6767     # in a single run (to avoid partial changes)
6768     for idx, new_id in mods:
6769       instance.disks[idx].logical_id = new_id
6770
6771     # change primary node, if needed
6772     if self.op.nodes:
6773       instance.primary_node = self.op.nodes[0]
6774       self.LogWarning("Changing the instance's nodes, you will have to"
6775                       " remove any disks left on the older nodes manually")
6776
6777     if self.op.nodes:
6778       self.cfg.Update(instance, feedback_fn)
6779
6780     _CreateDisks(self, instance, to_skip=to_skip)
6781
6782
6783 class LUInstanceRename(LogicalUnit):
6784   """Rename an instance.
6785
6786   """
6787   HPATH = "instance-rename"
6788   HTYPE = constants.HTYPE_INSTANCE
6789
6790   def CheckArguments(self):
6791     """Check arguments.
6792
6793     """
6794     if self.op.ip_check and not self.op.name_check:
6795       # TODO: make the ip check more flexible and not depend on the name check
6796       raise errors.OpPrereqError("IP address check requires a name check",
6797                                  errors.ECODE_INVAL)
6798
6799   def BuildHooksEnv(self):
6800     """Build hooks env.
6801
6802     This runs on master, primary and secondary nodes of the instance.
6803
6804     """
6805     env = _BuildInstanceHookEnvByObject(self, self.instance)
6806     env["INSTANCE_NEW_NAME"] = self.op.new_name
6807     return env
6808
6809   def BuildHooksNodes(self):
6810     """Build hooks nodes.
6811
6812     """
6813     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6814     return (nl, nl)
6815
6816   def CheckPrereq(self):
6817     """Check prerequisites.
6818
6819     This checks that the instance is in the cluster and is not running.
6820
6821     """
6822     self.op.instance_name = _ExpandInstanceName(self.cfg,
6823                                                 self.op.instance_name)
6824     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6825     assert instance is not None
6826     _CheckNodeOnline(self, instance.primary_node)
6827     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6828                         msg="cannot rename")
6829     self.instance = instance
6830
6831     new_name = self.op.new_name
6832     if self.op.name_check:
6833       hostname = netutils.GetHostname(name=new_name)
6834       if hostname.name != new_name:
6835         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6836                      hostname.name)
6837       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6838         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6839                                     " same as given hostname '%s'") %
6840                                     (hostname.name, self.op.new_name),
6841                                     errors.ECODE_INVAL)
6842       new_name = self.op.new_name = hostname.name
6843       if (self.op.ip_check and
6844           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6845         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6846                                    (hostname.ip, new_name),
6847                                    errors.ECODE_NOTUNIQUE)
6848
6849     instance_list = self.cfg.GetInstanceList()
6850     if new_name in instance_list and new_name != instance.name:
6851       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6852                                  new_name, errors.ECODE_EXISTS)
6853
6854   def Exec(self, feedback_fn):
6855     """Rename the instance.
6856
6857     """
6858     inst = self.instance
6859     old_name = inst.name
6860
6861     rename_file_storage = False
6862     if (inst.disk_template in constants.DTS_FILEBASED and
6863         self.op.new_name != inst.name):
6864       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6865       rename_file_storage = True
6866
6867     self.cfg.RenameInstance(inst.name, self.op.new_name)
6868     # Change the instance lock. This is definitely safe while we hold the BGL.
6869     # Otherwise the new lock would have to be added in acquired mode.
6870     assert self.REQ_BGL
6871     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6872     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6873
6874     # re-read the instance from the configuration after rename
6875     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6876
6877     if rename_file_storage:
6878       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6879       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6880                                                      old_file_storage_dir,
6881                                                      new_file_storage_dir)
6882       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6883                    " (but the instance has been renamed in Ganeti)" %
6884                    (inst.primary_node, old_file_storage_dir,
6885                     new_file_storage_dir))
6886
6887     _StartInstanceDisks(self, inst, None)
6888     try:
6889       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6890                                                  old_name, self.op.debug_level)
6891       msg = result.fail_msg
6892       if msg:
6893         msg = ("Could not run OS rename script for instance %s on node %s"
6894                " (but the instance has been renamed in Ganeti): %s" %
6895                (inst.name, inst.primary_node, msg))
6896         self.proc.LogWarning(msg)
6897     finally:
6898       _ShutdownInstanceDisks(self, inst)
6899
6900     return inst.name
6901
6902
6903 class LUInstanceRemove(LogicalUnit):
6904   """Remove an instance.
6905
6906   """
6907   HPATH = "instance-remove"
6908   HTYPE = constants.HTYPE_INSTANCE
6909   REQ_BGL = False
6910
6911   def ExpandNames(self):
6912     self._ExpandAndLockInstance()
6913     self.needed_locks[locking.LEVEL_NODE] = []
6914     self.needed_locks[locking.LEVEL_NODE_RES] = []
6915     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6916
6917   def DeclareLocks(self, level):
6918     if level == locking.LEVEL_NODE:
6919       self._LockInstancesNodes()
6920     elif level == locking.LEVEL_NODE_RES:
6921       # Copy node locks
6922       self.needed_locks[locking.LEVEL_NODE_RES] = \
6923         self.needed_locks[locking.LEVEL_NODE][:]
6924
6925   def BuildHooksEnv(self):
6926     """Build hooks env.
6927
6928     This runs on master, primary and secondary nodes of the instance.
6929
6930     """
6931     env = _BuildInstanceHookEnvByObject(self, self.instance)
6932     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6933     return env
6934
6935   def BuildHooksNodes(self):
6936     """Build hooks nodes.
6937
6938     """
6939     nl = [self.cfg.GetMasterNode()]
6940     nl_post = list(self.instance.all_nodes) + nl
6941     return (nl, nl_post)
6942
6943   def CheckPrereq(self):
6944     """Check prerequisites.
6945
6946     This checks that the instance is in the cluster.
6947
6948     """
6949     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6950     assert self.instance is not None, \
6951       "Cannot retrieve locked instance %s" % self.op.instance_name
6952
6953   def Exec(self, feedback_fn):
6954     """Remove the instance.
6955
6956     """
6957     instance = self.instance
6958     logging.info("Shutting down instance %s on node %s",
6959                  instance.name, instance.primary_node)
6960
6961     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6962                                              self.op.shutdown_timeout)
6963     msg = result.fail_msg
6964     if msg:
6965       if self.op.ignore_failures:
6966         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6967       else:
6968         raise errors.OpExecError("Could not shutdown instance %s on"
6969                                  " node %s: %s" %
6970                                  (instance.name, instance.primary_node, msg))
6971
6972     assert (self.owned_locks(locking.LEVEL_NODE) ==
6973             self.owned_locks(locking.LEVEL_NODE_RES))
6974     assert not (set(instance.all_nodes) -
6975                 self.owned_locks(locking.LEVEL_NODE)), \
6976       "Not owning correct locks"
6977
6978     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6979
6980
6981 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6982   """Utility function to remove an instance.
6983
6984   """
6985   logging.info("Removing block devices for instance %s", instance.name)
6986
6987   if not _RemoveDisks(lu, instance):
6988     if not ignore_failures:
6989       raise errors.OpExecError("Can't remove instance's disks")
6990     feedback_fn("Warning: can't remove instance's disks")
6991
6992   logging.info("Removing instance %s out of cluster config", instance.name)
6993
6994   lu.cfg.RemoveInstance(instance.name)
6995
6996   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6997     "Instance lock removal conflict"
6998
6999   # Remove lock for the instance
7000   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7001
7002
7003 class LUInstanceQuery(NoHooksLU):
7004   """Logical unit for querying instances.
7005
7006   """
7007   # pylint: disable=W0142
7008   REQ_BGL = False
7009
7010   def CheckArguments(self):
7011     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7012                              self.op.output_fields, self.op.use_locking)
7013
7014   def ExpandNames(self):
7015     self.iq.ExpandNames(self)
7016
7017   def DeclareLocks(self, level):
7018     self.iq.DeclareLocks(self, level)
7019
7020   def Exec(self, feedback_fn):
7021     return self.iq.OldStyleQuery(self)
7022
7023
7024 class LUInstanceFailover(LogicalUnit):
7025   """Failover an instance.
7026
7027   """
7028   HPATH = "instance-failover"
7029   HTYPE = constants.HTYPE_INSTANCE
7030   REQ_BGL = False
7031
7032   def CheckArguments(self):
7033     """Check the arguments.
7034
7035     """
7036     self.iallocator = getattr(self.op, "iallocator", None)
7037     self.target_node = getattr(self.op, "target_node", None)
7038
7039   def ExpandNames(self):
7040     self._ExpandAndLockInstance()
7041
7042     if self.op.target_node is not None:
7043       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7044
7045     self.needed_locks[locking.LEVEL_NODE] = []
7046     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7047
7048     ignore_consistency = self.op.ignore_consistency
7049     shutdown_timeout = self.op.shutdown_timeout
7050     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7051                                        cleanup=False,
7052                                        failover=True,
7053                                        ignore_consistency=ignore_consistency,
7054                                        shutdown_timeout=shutdown_timeout)
7055     self.tasklets = [self._migrater]
7056
7057   def DeclareLocks(self, level):
7058     if level == locking.LEVEL_NODE:
7059       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7060       if instance.disk_template in constants.DTS_EXT_MIRROR:
7061         if self.op.target_node is None:
7062           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7063         else:
7064           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7065                                                    self.op.target_node]
7066         del self.recalculate_locks[locking.LEVEL_NODE]
7067       else:
7068         self._LockInstancesNodes()
7069
7070   def BuildHooksEnv(self):
7071     """Build hooks env.
7072
7073     This runs on master, primary and secondary nodes of the instance.
7074
7075     """
7076     instance = self._migrater.instance
7077     source_node = instance.primary_node
7078     target_node = self.op.target_node
7079     env = {
7080       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7081       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7082       "OLD_PRIMARY": source_node,
7083       "NEW_PRIMARY": target_node,
7084       }
7085
7086     if instance.disk_template in constants.DTS_INT_MIRROR:
7087       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7088       env["NEW_SECONDARY"] = source_node
7089     else:
7090       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7091
7092     env.update(_BuildInstanceHookEnvByObject(self, instance))
7093
7094     return env
7095
7096   def BuildHooksNodes(self):
7097     """Build hooks nodes.
7098
7099     """
7100     instance = self._migrater.instance
7101     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7102     return (nl, nl + [instance.primary_node])
7103
7104
7105 class LUInstanceMigrate(LogicalUnit):
7106   """Migrate an instance.
7107
7108   This is migration without shutting down, compared to the failover,
7109   which is done with shutdown.
7110
7111   """
7112   HPATH = "instance-migrate"
7113   HTYPE = constants.HTYPE_INSTANCE
7114   REQ_BGL = False
7115
7116   def ExpandNames(self):
7117     self._ExpandAndLockInstance()
7118
7119     if self.op.target_node is not None:
7120       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7121
7122     self.needed_locks[locking.LEVEL_NODE] = []
7123     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7124
7125     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7126                                        cleanup=self.op.cleanup,
7127                                        failover=False,
7128                                        fallback=self.op.allow_failover)
7129     self.tasklets = [self._migrater]
7130
7131   def DeclareLocks(self, level):
7132     if level == locking.LEVEL_NODE:
7133       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7134       if instance.disk_template in constants.DTS_EXT_MIRROR:
7135         if self.op.target_node is None:
7136           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7137         else:
7138           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7139                                                    self.op.target_node]
7140         del self.recalculate_locks[locking.LEVEL_NODE]
7141       else:
7142         self._LockInstancesNodes()
7143
7144   def BuildHooksEnv(self):
7145     """Build hooks env.
7146
7147     This runs on master, primary and secondary nodes of the instance.
7148
7149     """
7150     instance = self._migrater.instance
7151     source_node = instance.primary_node
7152     target_node = self.op.target_node
7153     env = _BuildInstanceHookEnvByObject(self, instance)
7154     env.update({
7155       "MIGRATE_LIVE": self._migrater.live,
7156       "MIGRATE_CLEANUP": self.op.cleanup,
7157       "OLD_PRIMARY": source_node,
7158       "NEW_PRIMARY": target_node,
7159       })
7160
7161     if instance.disk_template in constants.DTS_INT_MIRROR:
7162       env["OLD_SECONDARY"] = target_node
7163       env["NEW_SECONDARY"] = source_node
7164     else:
7165       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7166
7167     return env
7168
7169   def BuildHooksNodes(self):
7170     """Build hooks nodes.
7171
7172     """
7173     instance = self._migrater.instance
7174     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7175     return (nl, nl + [instance.primary_node])
7176
7177
7178 class LUInstanceMove(LogicalUnit):
7179   """Move an instance by data-copying.
7180
7181   """
7182   HPATH = "instance-move"
7183   HTYPE = constants.HTYPE_INSTANCE
7184   REQ_BGL = False
7185
7186   def ExpandNames(self):
7187     self._ExpandAndLockInstance()
7188     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7189     self.op.target_node = target_node
7190     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7191     self.needed_locks[locking.LEVEL_NODE_RES] = []
7192     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7193
7194   def DeclareLocks(self, level):
7195     if level == locking.LEVEL_NODE:
7196       self._LockInstancesNodes(primary_only=True)
7197     elif level == locking.LEVEL_NODE_RES:
7198       # Copy node locks
7199       self.needed_locks[locking.LEVEL_NODE_RES] = \
7200         self.needed_locks[locking.LEVEL_NODE][:]
7201
7202   def BuildHooksEnv(self):
7203     """Build hooks env.
7204
7205     This runs on master, primary and secondary nodes of the instance.
7206
7207     """
7208     env = {
7209       "TARGET_NODE": self.op.target_node,
7210       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7211       }
7212     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7213     return env
7214
7215   def BuildHooksNodes(self):
7216     """Build hooks nodes.
7217
7218     """
7219     nl = [
7220       self.cfg.GetMasterNode(),
7221       self.instance.primary_node,
7222       self.op.target_node,
7223       ]
7224     return (nl, nl)
7225
7226   def CheckPrereq(self):
7227     """Check prerequisites.
7228
7229     This checks that the instance is in the cluster.
7230
7231     """
7232     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7233     assert self.instance is not None, \
7234       "Cannot retrieve locked instance %s" % self.op.instance_name
7235
7236     node = self.cfg.GetNodeInfo(self.op.target_node)
7237     assert node is not None, \
7238       "Cannot retrieve locked node %s" % self.op.target_node
7239
7240     self.target_node = target_node = node.name
7241
7242     if target_node == instance.primary_node:
7243       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7244                                  (instance.name, target_node),
7245                                  errors.ECODE_STATE)
7246
7247     bep = self.cfg.GetClusterInfo().FillBE(instance)
7248
7249     for idx, dsk in enumerate(instance.disks):
7250       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7251         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7252                                    " cannot copy" % idx, errors.ECODE_STATE)
7253
7254     _CheckNodeOnline(self, target_node)
7255     _CheckNodeNotDrained(self, target_node)
7256     _CheckNodeVmCapable(self, target_node)
7257
7258     if instance.admin_state == constants.ADMINST_UP:
7259       # check memory requirements on the secondary node
7260       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7261                            instance.name, bep[constants.BE_MAXMEM],
7262                            instance.hypervisor)
7263     else:
7264       self.LogInfo("Not checking memory on the secondary node as"
7265                    " instance will not be started")
7266
7267     # check bridge existance
7268     _CheckInstanceBridgesExist(self, instance, node=target_node)
7269
7270   def Exec(self, feedback_fn):
7271     """Move an instance.
7272
7273     The move is done by shutting it down on its present node, copying
7274     the data over (slow) and starting it on the new node.
7275
7276     """
7277     instance = self.instance
7278
7279     source_node = instance.primary_node
7280     target_node = self.target_node
7281
7282     self.LogInfo("Shutting down instance %s on source node %s",
7283                  instance.name, source_node)
7284
7285     assert (self.owned_locks(locking.LEVEL_NODE) ==
7286             self.owned_locks(locking.LEVEL_NODE_RES))
7287
7288     result = self.rpc.call_instance_shutdown(source_node, instance,
7289                                              self.op.shutdown_timeout)
7290     msg = result.fail_msg
7291     if msg:
7292       if self.op.ignore_consistency:
7293         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7294                              " Proceeding anyway. Please make sure node"
7295                              " %s is down. Error details: %s",
7296                              instance.name, source_node, source_node, msg)
7297       else:
7298         raise errors.OpExecError("Could not shutdown instance %s on"
7299                                  " node %s: %s" %
7300                                  (instance.name, source_node, msg))
7301
7302     # create the target disks
7303     try:
7304       _CreateDisks(self, instance, target_node=target_node)
7305     except errors.OpExecError:
7306       self.LogWarning("Device creation failed, reverting...")
7307       try:
7308         _RemoveDisks(self, instance, target_node=target_node)
7309       finally:
7310         self.cfg.ReleaseDRBDMinors(instance.name)
7311         raise
7312
7313     cluster_name = self.cfg.GetClusterInfo().cluster_name
7314
7315     errs = []
7316     # activate, get path, copy the data over
7317     for idx, disk in enumerate(instance.disks):
7318       self.LogInfo("Copying data for disk %d", idx)
7319       result = self.rpc.call_blockdev_assemble(target_node, disk,
7320                                                instance.name, True, idx)
7321       if result.fail_msg:
7322         self.LogWarning("Can't assemble newly created disk %d: %s",
7323                         idx, result.fail_msg)
7324         errs.append(result.fail_msg)
7325         break
7326       dev_path = result.payload
7327       result = self.rpc.call_blockdev_export(source_node, disk,
7328                                              target_node, dev_path,
7329                                              cluster_name)
7330       if result.fail_msg:
7331         self.LogWarning("Can't copy data over for disk %d: %s",
7332                         idx, result.fail_msg)
7333         errs.append(result.fail_msg)
7334         break
7335
7336     if errs:
7337       self.LogWarning("Some disks failed to copy, aborting")
7338       try:
7339         _RemoveDisks(self, instance, target_node=target_node)
7340       finally:
7341         self.cfg.ReleaseDRBDMinors(instance.name)
7342         raise errors.OpExecError("Errors during disk copy: %s" %
7343                                  (",".join(errs),))
7344
7345     instance.primary_node = target_node
7346     self.cfg.Update(instance, feedback_fn)
7347
7348     self.LogInfo("Removing the disks on the original node")
7349     _RemoveDisks(self, instance, target_node=source_node)
7350
7351     # Only start the instance if it's marked as up
7352     if instance.admin_state == constants.ADMINST_UP:
7353       self.LogInfo("Starting instance %s on node %s",
7354                    instance.name, target_node)
7355
7356       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7357                                            ignore_secondaries=True)
7358       if not disks_ok:
7359         _ShutdownInstanceDisks(self, instance)
7360         raise errors.OpExecError("Can't activate the instance's disks")
7361
7362       result = self.rpc.call_instance_start(target_node,
7363                                             (instance, None, None), False)
7364       msg = result.fail_msg
7365       if msg:
7366         _ShutdownInstanceDisks(self, instance)
7367         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7368                                  (instance.name, target_node, msg))
7369
7370
7371 class LUNodeMigrate(LogicalUnit):
7372   """Migrate all instances from a node.
7373
7374   """
7375   HPATH = "node-migrate"
7376   HTYPE = constants.HTYPE_NODE
7377   REQ_BGL = False
7378
7379   def CheckArguments(self):
7380     pass
7381
7382   def ExpandNames(self):
7383     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7384
7385     self.share_locks = _ShareAll()
7386     self.needed_locks = {
7387       locking.LEVEL_NODE: [self.op.node_name],
7388       }
7389
7390   def BuildHooksEnv(self):
7391     """Build hooks env.
7392
7393     This runs on the master, the primary and all the secondaries.
7394
7395     """
7396     return {
7397       "NODE_NAME": self.op.node_name,
7398       }
7399
7400   def BuildHooksNodes(self):
7401     """Build hooks nodes.
7402
7403     """
7404     nl = [self.cfg.GetMasterNode()]
7405     return (nl, nl)
7406
7407   def CheckPrereq(self):
7408     pass
7409
7410   def Exec(self, feedback_fn):
7411     # Prepare jobs for migration instances
7412     jobs = [
7413       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7414                                  mode=self.op.mode,
7415                                  live=self.op.live,
7416                                  iallocator=self.op.iallocator,
7417                                  target_node=self.op.target_node)]
7418       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7419       ]
7420
7421     # TODO: Run iallocator in this opcode and pass correct placement options to
7422     # OpInstanceMigrate. Since other jobs can modify the cluster between
7423     # running the iallocator and the actual migration, a good consistency model
7424     # will have to be found.
7425
7426     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7427             frozenset([self.op.node_name]))
7428
7429     return ResultWithJobs(jobs)
7430
7431
7432 class TLMigrateInstance(Tasklet):
7433   """Tasklet class for instance migration.
7434
7435   @type live: boolean
7436   @ivar live: whether the migration will be done live or non-live;
7437       this variable is initalized only after CheckPrereq has run
7438   @type cleanup: boolean
7439   @ivar cleanup: Wheater we cleanup from a failed migration
7440   @type iallocator: string
7441   @ivar iallocator: The iallocator used to determine target_node
7442   @type target_node: string
7443   @ivar target_node: If given, the target_node to reallocate the instance to
7444   @type failover: boolean
7445   @ivar failover: Whether operation results in failover or migration
7446   @type fallback: boolean
7447   @ivar fallback: Whether fallback to failover is allowed if migration not
7448                   possible
7449   @type ignore_consistency: boolean
7450   @ivar ignore_consistency: Wheter we should ignore consistency between source
7451                             and target node
7452   @type shutdown_timeout: int
7453   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7454
7455   """
7456
7457   # Constants
7458   _MIGRATION_POLL_INTERVAL = 1      # seconds
7459   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7460
7461   def __init__(self, lu, instance_name, cleanup=False,
7462                failover=False, fallback=False,
7463                ignore_consistency=False,
7464                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7465     """Initializes this class.
7466
7467     """
7468     Tasklet.__init__(self, lu)
7469
7470     # Parameters
7471     self.instance_name = instance_name
7472     self.cleanup = cleanup
7473     self.live = False # will be overridden later
7474     self.failover = failover
7475     self.fallback = fallback
7476     self.ignore_consistency = ignore_consistency
7477     self.shutdown_timeout = shutdown_timeout
7478
7479   def CheckPrereq(self):
7480     """Check prerequisites.
7481
7482     This checks that the instance is in the cluster.
7483
7484     """
7485     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7486     instance = self.cfg.GetInstanceInfo(instance_name)
7487     assert instance is not None
7488     self.instance = instance
7489
7490     if (not self.cleanup and
7491         not instance.admin_state == constants.ADMINST_UP and
7492         not self.failover and self.fallback):
7493       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7494                       " switching to failover")
7495       self.failover = True
7496
7497     if instance.disk_template not in constants.DTS_MIRRORED:
7498       if self.failover:
7499         text = "failovers"
7500       else:
7501         text = "migrations"
7502       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7503                                  " %s" % (instance.disk_template, text),
7504                                  errors.ECODE_STATE)
7505
7506     if instance.disk_template in constants.DTS_EXT_MIRROR:
7507       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7508
7509       if self.lu.op.iallocator:
7510         self._RunAllocator()
7511       else:
7512         # We set set self.target_node as it is required by
7513         # BuildHooksEnv
7514         self.target_node = self.lu.op.target_node
7515
7516       # self.target_node is already populated, either directly or by the
7517       # iallocator run
7518       target_node = self.target_node
7519       if self.target_node == instance.primary_node:
7520         raise errors.OpPrereqError("Cannot migrate instance %s"
7521                                    " to its primary (%s)" %
7522                                    (instance.name, instance.primary_node))
7523
7524       if len(self.lu.tasklets) == 1:
7525         # It is safe to release locks only when we're the only tasklet
7526         # in the LU
7527         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7528                       keep=[instance.primary_node, self.target_node])
7529
7530     else:
7531       secondary_nodes = instance.secondary_nodes
7532       if not secondary_nodes:
7533         raise errors.ConfigurationError("No secondary node but using"
7534                                         " %s disk template" %
7535                                         instance.disk_template)
7536       target_node = secondary_nodes[0]
7537       if self.lu.op.iallocator or (self.lu.op.target_node and
7538                                    self.lu.op.target_node != target_node):
7539         if self.failover:
7540           text = "failed over"
7541         else:
7542           text = "migrated"
7543         raise errors.OpPrereqError("Instances with disk template %s cannot"
7544                                    " be %s to arbitrary nodes"
7545                                    " (neither an iallocator nor a target"
7546                                    " node can be passed)" %
7547                                    (instance.disk_template, text),
7548                                    errors.ECODE_INVAL)
7549
7550     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7551
7552     # check memory requirements on the secondary node
7553     if not self.failover or instance.admin_state == constants.ADMINST_UP:
7554       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7555                            instance.name, i_be[constants.BE_MAXMEM],
7556                            instance.hypervisor)
7557     else:
7558       self.lu.LogInfo("Not checking memory on the secondary node as"
7559                       " instance will not be started")
7560
7561     # check if failover must be forced instead of migration
7562     if (not self.cleanup and not self.failover and
7563         i_be[constants.BE_ALWAYS_FAILOVER]):
7564       if self.fallback:
7565         self.lu.LogInfo("Instance configured to always failover; fallback"
7566                         " to failover")
7567         self.failover = True
7568       else:
7569         raise errors.OpPrereqError("This instance has been configured to"
7570                                    " always failover, please allow failover",
7571                                    errors.ECODE_STATE)
7572
7573     # check bridge existance
7574     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7575
7576     if not self.cleanup:
7577       _CheckNodeNotDrained(self.lu, target_node)
7578       if not self.failover:
7579         result = self.rpc.call_instance_migratable(instance.primary_node,
7580                                                    instance)
7581         if result.fail_msg and self.fallback:
7582           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7583                           " failover")
7584           self.failover = True
7585         else:
7586           result.Raise("Can't migrate, please use failover",
7587                        prereq=True, ecode=errors.ECODE_STATE)
7588
7589     assert not (self.failover and self.cleanup)
7590
7591     if not self.failover:
7592       if self.lu.op.live is not None and self.lu.op.mode is not None:
7593         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7594                                    " parameters are accepted",
7595                                    errors.ECODE_INVAL)
7596       if self.lu.op.live is not None:
7597         if self.lu.op.live:
7598           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7599         else:
7600           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7601         # reset the 'live' parameter to None so that repeated
7602         # invocations of CheckPrereq do not raise an exception
7603         self.lu.op.live = None
7604       elif self.lu.op.mode is None:
7605         # read the default value from the hypervisor
7606         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7607                                                 skip_globals=False)
7608         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7609
7610       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7611     else:
7612       # Failover is never live
7613       self.live = False
7614
7615   def _RunAllocator(self):
7616     """Run the allocator based on input opcode.
7617
7618     """
7619     ial = IAllocator(self.cfg, self.rpc,
7620                      mode=constants.IALLOCATOR_MODE_RELOC,
7621                      name=self.instance_name,
7622                      # TODO See why hail breaks with a single node below
7623                      relocate_from=[self.instance.primary_node,
7624                                     self.instance.primary_node],
7625                      )
7626
7627     ial.Run(self.lu.op.iallocator)
7628
7629     if not ial.success:
7630       raise errors.OpPrereqError("Can't compute nodes using"
7631                                  " iallocator '%s': %s" %
7632                                  (self.lu.op.iallocator, ial.info),
7633                                  errors.ECODE_NORES)
7634     if len(ial.result) != ial.required_nodes:
7635       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7636                                  " of nodes (%s), required %s" %
7637                                  (self.lu.op.iallocator, len(ial.result),
7638                                   ial.required_nodes), errors.ECODE_FAULT)
7639     self.target_node = ial.result[0]
7640     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7641                  self.instance_name, self.lu.op.iallocator,
7642                  utils.CommaJoin(ial.result))
7643
7644   def _WaitUntilSync(self):
7645     """Poll with custom rpc for disk sync.
7646
7647     This uses our own step-based rpc call.
7648
7649     """
7650     self.feedback_fn("* wait until resync is done")
7651     all_done = False
7652     while not all_done:
7653       all_done = True
7654       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7655                                             self.nodes_ip,
7656                                             self.instance.disks)
7657       min_percent = 100
7658       for node, nres in result.items():
7659         nres.Raise("Cannot resync disks on node %s" % node)
7660         node_done, node_percent = nres.payload
7661         all_done = all_done and node_done
7662         if node_percent is not None:
7663           min_percent = min(min_percent, node_percent)
7664       if not all_done:
7665         if min_percent < 100:
7666           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7667         time.sleep(2)
7668
7669   def _EnsureSecondary(self, node):
7670     """Demote a node to secondary.
7671
7672     """
7673     self.feedback_fn("* switching node %s to secondary mode" % node)
7674
7675     for dev in self.instance.disks:
7676       self.cfg.SetDiskID(dev, node)
7677
7678     result = self.rpc.call_blockdev_close(node, self.instance.name,
7679                                           self.instance.disks)
7680     result.Raise("Cannot change disk to secondary on node %s" % node)
7681
7682   def _GoStandalone(self):
7683     """Disconnect from the network.
7684
7685     """
7686     self.feedback_fn("* changing into standalone mode")
7687     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7688                                                self.instance.disks)
7689     for node, nres in result.items():
7690       nres.Raise("Cannot disconnect disks node %s" % node)
7691
7692   def _GoReconnect(self, multimaster):
7693     """Reconnect to the network.
7694
7695     """
7696     if multimaster:
7697       msg = "dual-master"
7698     else:
7699       msg = "single-master"
7700     self.feedback_fn("* changing disks into %s mode" % msg)
7701     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7702                                            self.instance.disks,
7703                                            self.instance.name, multimaster)
7704     for node, nres in result.items():
7705       nres.Raise("Cannot change disks config on node %s" % node)
7706
7707   def _ExecCleanup(self):
7708     """Try to cleanup after a failed migration.
7709
7710     The cleanup is done by:
7711       - check that the instance is running only on one node
7712         (and update the config if needed)
7713       - change disks on its secondary node to secondary
7714       - wait until disks are fully synchronized
7715       - disconnect from the network
7716       - change disks into single-master mode
7717       - wait again until disks are fully synchronized
7718
7719     """
7720     instance = self.instance
7721     target_node = self.target_node
7722     source_node = self.source_node
7723
7724     # check running on only one node
7725     self.feedback_fn("* checking where the instance actually runs"
7726                      " (if this hangs, the hypervisor might be in"
7727                      " a bad state)")
7728     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7729     for node, result in ins_l.items():
7730       result.Raise("Can't contact node %s" % node)
7731
7732     runningon_source = instance.name in ins_l[source_node].payload
7733     runningon_target = instance.name in ins_l[target_node].payload
7734
7735     if runningon_source and runningon_target:
7736       raise errors.OpExecError("Instance seems to be running on two nodes,"
7737                                " or the hypervisor is confused; you will have"
7738                                " to ensure manually that it runs only on one"
7739                                " and restart this operation")
7740
7741     if not (runningon_source or runningon_target):
7742       raise errors.OpExecError("Instance does not seem to be running at all;"
7743                                " in this case it's safer to repair by"
7744                                " running 'gnt-instance stop' to ensure disk"
7745                                " shutdown, and then restarting it")
7746
7747     if runningon_target:
7748       # the migration has actually succeeded, we need to update the config
7749       self.feedback_fn("* instance running on secondary node (%s),"
7750                        " updating config" % target_node)
7751       instance.primary_node = target_node
7752       self.cfg.Update(instance, self.feedback_fn)
7753       demoted_node = source_node
7754     else:
7755       self.feedback_fn("* instance confirmed to be running on its"
7756                        " primary node (%s)" % source_node)
7757       demoted_node = target_node
7758
7759     if instance.disk_template in constants.DTS_INT_MIRROR:
7760       self._EnsureSecondary(demoted_node)
7761       try:
7762         self._WaitUntilSync()
7763       except errors.OpExecError:
7764         # we ignore here errors, since if the device is standalone, it
7765         # won't be able to sync
7766         pass
7767       self._GoStandalone()
7768       self._GoReconnect(False)
7769       self._WaitUntilSync()
7770
7771     self.feedback_fn("* done")
7772
7773   def _RevertDiskStatus(self):
7774     """Try to revert the disk status after a failed migration.
7775
7776     """
7777     target_node = self.target_node
7778     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7779       return
7780
7781     try:
7782       self._EnsureSecondary(target_node)
7783       self._GoStandalone()
7784       self._GoReconnect(False)
7785       self._WaitUntilSync()
7786     except errors.OpExecError, err:
7787       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7788                          " please try to recover the instance manually;"
7789                          " error '%s'" % str(err))
7790
7791   def _AbortMigration(self):
7792     """Call the hypervisor code to abort a started migration.
7793
7794     """
7795     instance = self.instance
7796     target_node = self.target_node
7797     source_node = self.source_node
7798     migration_info = self.migration_info
7799
7800     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7801                                                                  instance,
7802                                                                  migration_info,
7803                                                                  False)
7804     abort_msg = abort_result.fail_msg
7805     if abort_msg:
7806       logging.error("Aborting migration failed on target node %s: %s",
7807                     target_node, abort_msg)
7808       # Don't raise an exception here, as we stil have to try to revert the
7809       # disk status, even if this step failed.
7810
7811     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7812         instance, False, self.live)
7813     abort_msg = abort_result.fail_msg
7814     if abort_msg:
7815       logging.error("Aborting migration failed on source node %s: %s",
7816                     source_node, abort_msg)
7817
7818   def _ExecMigration(self):
7819     """Migrate an instance.
7820
7821     The migrate is done by:
7822       - change the disks into dual-master mode
7823       - wait until disks are fully synchronized again
7824       - migrate the instance
7825       - change disks on the new secondary node (the old primary) to secondary
7826       - wait until disks are fully synchronized
7827       - change disks into single-master mode
7828
7829     """
7830     instance = self.instance
7831     target_node = self.target_node
7832     source_node = self.source_node
7833
7834     # Check for hypervisor version mismatch and warn the user.
7835     nodeinfo = self.rpc.call_node_info([source_node, target_node],
7836                                        None, [self.instance.hypervisor])
7837     for ninfo in nodeinfo.values():
7838       ninfo.Raise("Unable to retrieve node information from node '%s'" %
7839                   ninfo.node)
7840     (_, _, (src_info, )) = nodeinfo[source_node].payload
7841     (_, _, (dst_info, )) = nodeinfo[target_node].payload
7842
7843     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
7844         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
7845       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
7846       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
7847       if src_version != dst_version:
7848         self.feedback_fn("* warning: hypervisor version mismatch between"
7849                          " source (%s) and target (%s) node" %
7850                          (src_version, dst_version))
7851
7852     self.feedback_fn("* checking disk consistency between source and target")
7853     for dev in instance.disks:
7854       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7855         raise errors.OpExecError("Disk %s is degraded or not fully"
7856                                  " synchronized on target node,"
7857                                  " aborting migration" % dev.iv_name)
7858
7859     # First get the migration information from the remote node
7860     result = self.rpc.call_migration_info(source_node, instance)
7861     msg = result.fail_msg
7862     if msg:
7863       log_err = ("Failed fetching source migration information from %s: %s" %
7864                  (source_node, msg))
7865       logging.error(log_err)
7866       raise errors.OpExecError(log_err)
7867
7868     self.migration_info = migration_info = result.payload
7869
7870     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7871       # Then switch the disks to master/master mode
7872       self._EnsureSecondary(target_node)
7873       self._GoStandalone()
7874       self._GoReconnect(True)
7875       self._WaitUntilSync()
7876
7877     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7878     result = self.rpc.call_accept_instance(target_node,
7879                                            instance,
7880                                            migration_info,
7881                                            self.nodes_ip[target_node])
7882
7883     msg = result.fail_msg
7884     if msg:
7885       logging.error("Instance pre-migration failed, trying to revert"
7886                     " disk status: %s", msg)
7887       self.feedback_fn("Pre-migration failed, aborting")
7888       self._AbortMigration()
7889       self._RevertDiskStatus()
7890       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7891                                (instance.name, msg))
7892
7893     self.feedback_fn("* migrating instance to %s" % target_node)
7894     result = self.rpc.call_instance_migrate(source_node, instance,
7895                                             self.nodes_ip[target_node],
7896                                             self.live)
7897     msg = result.fail_msg
7898     if msg:
7899       logging.error("Instance migration failed, trying to revert"
7900                     " disk status: %s", msg)
7901       self.feedback_fn("Migration failed, aborting")
7902       self._AbortMigration()
7903       self._RevertDiskStatus()
7904       raise errors.OpExecError("Could not migrate instance %s: %s" %
7905                                (instance.name, msg))
7906
7907     self.feedback_fn("* starting memory transfer")
7908     last_feedback = time.time()
7909     while True:
7910       result = self.rpc.call_instance_get_migration_status(source_node,
7911                                                            instance)
7912       msg = result.fail_msg
7913       ms = result.payload   # MigrationStatus instance
7914       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7915         logging.error("Instance migration failed, trying to revert"
7916                       " disk status: %s", msg)
7917         self.feedback_fn("Migration failed, aborting")
7918         self._AbortMigration()
7919         self._RevertDiskStatus()
7920         raise errors.OpExecError("Could not migrate instance %s: %s" %
7921                                  (instance.name, msg))
7922
7923       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7924         self.feedback_fn("* memory transfer complete")
7925         break
7926
7927       if (utils.TimeoutExpired(last_feedback,
7928                                self._MIGRATION_FEEDBACK_INTERVAL) and
7929           ms.transferred_ram is not None):
7930         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7931         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7932         last_feedback = time.time()
7933
7934       time.sleep(self._MIGRATION_POLL_INTERVAL)
7935
7936     result = self.rpc.call_instance_finalize_migration_src(source_node,
7937                                                            instance,
7938                                                            True,
7939                                                            self.live)
7940     msg = result.fail_msg
7941     if msg:
7942       logging.error("Instance migration succeeded, but finalization failed"
7943                     " on the source node: %s", msg)
7944       raise errors.OpExecError("Could not finalize instance migration: %s" %
7945                                msg)
7946
7947     instance.primary_node = target_node
7948
7949     # distribute new instance config to the other nodes
7950     self.cfg.Update(instance, self.feedback_fn)
7951
7952     result = self.rpc.call_instance_finalize_migration_dst(target_node,
7953                                                            instance,
7954                                                            migration_info,
7955                                                            True)
7956     msg = result.fail_msg
7957     if msg:
7958       logging.error("Instance migration succeeded, but finalization failed"
7959                     " on the target node: %s", msg)
7960       raise errors.OpExecError("Could not finalize instance migration: %s" %
7961                                msg)
7962
7963     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7964       self._EnsureSecondary(source_node)
7965       self._WaitUntilSync()
7966       self._GoStandalone()
7967       self._GoReconnect(False)
7968       self._WaitUntilSync()
7969
7970     self.feedback_fn("* done")
7971
7972   def _ExecFailover(self):
7973     """Failover an instance.
7974
7975     The failover is done by shutting it down on its present node and
7976     starting it on the secondary.
7977
7978     """
7979     instance = self.instance
7980     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7981
7982     source_node = instance.primary_node
7983     target_node = self.target_node
7984
7985     if instance.admin_state == constants.ADMINST_UP:
7986       self.feedback_fn("* checking disk consistency between source and target")
7987       for dev in instance.disks:
7988         # for drbd, these are drbd over lvm
7989         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7990           if primary_node.offline:
7991             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7992                              " target node %s" %
7993                              (primary_node.name, dev.iv_name, target_node))
7994           elif not self.ignore_consistency:
7995             raise errors.OpExecError("Disk %s is degraded on target node,"
7996                                      " aborting failover" % dev.iv_name)
7997     else:
7998       self.feedback_fn("* not checking disk consistency as instance is not"
7999                        " running")
8000
8001     self.feedback_fn("* shutting down instance on source node")
8002     logging.info("Shutting down instance %s on node %s",
8003                  instance.name, source_node)
8004
8005     result = self.rpc.call_instance_shutdown(source_node, instance,
8006                                              self.shutdown_timeout)
8007     msg = result.fail_msg
8008     if msg:
8009       if self.ignore_consistency or primary_node.offline:
8010         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8011                            " proceeding anyway; please make sure node"
8012                            " %s is down; error details: %s",
8013                            instance.name, source_node, source_node, msg)
8014       else:
8015         raise errors.OpExecError("Could not shutdown instance %s on"
8016                                  " node %s: %s" %
8017                                  (instance.name, source_node, msg))
8018
8019     self.feedback_fn("* deactivating the instance's disks on source node")
8020     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8021       raise errors.OpExecError("Can't shut down the instance's disks")
8022
8023     instance.primary_node = target_node
8024     # distribute new instance config to the other nodes
8025     self.cfg.Update(instance, self.feedback_fn)
8026
8027     # Only start the instance if it's marked as up
8028     if instance.admin_state == constants.ADMINST_UP:
8029       self.feedback_fn("* activating the instance's disks on target node %s" %
8030                        target_node)
8031       logging.info("Starting instance %s on node %s",
8032                    instance.name, target_node)
8033
8034       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8035                                            ignore_secondaries=True)
8036       if not disks_ok:
8037         _ShutdownInstanceDisks(self.lu, instance)
8038         raise errors.OpExecError("Can't activate the instance's disks")
8039
8040       self.feedback_fn("* starting the instance on the target node %s" %
8041                        target_node)
8042       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8043                                             False)
8044       msg = result.fail_msg
8045       if msg:
8046         _ShutdownInstanceDisks(self.lu, instance)
8047         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8048                                  (instance.name, target_node, msg))
8049
8050   def Exec(self, feedback_fn):
8051     """Perform the migration.
8052
8053     """
8054     self.feedback_fn = feedback_fn
8055     self.source_node = self.instance.primary_node
8056
8057     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8058     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8059       self.target_node = self.instance.secondary_nodes[0]
8060       # Otherwise self.target_node has been populated either
8061       # directly, or through an iallocator.
8062
8063     self.all_nodes = [self.source_node, self.target_node]
8064     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8065                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8066
8067     if self.failover:
8068       feedback_fn("Failover instance %s" % self.instance.name)
8069       self._ExecFailover()
8070     else:
8071       feedback_fn("Migrating instance %s" % self.instance.name)
8072
8073       if self.cleanup:
8074         return self._ExecCleanup()
8075       else:
8076         return self._ExecMigration()
8077
8078
8079 def _CreateBlockDev(lu, node, instance, device, force_create,
8080                     info, force_open):
8081   """Create a tree of block devices on a given node.
8082
8083   If this device type has to be created on secondaries, create it and
8084   all its children.
8085
8086   If not, just recurse to children keeping the same 'force' value.
8087
8088   @param lu: the lu on whose behalf we execute
8089   @param node: the node on which to create the device
8090   @type instance: L{objects.Instance}
8091   @param instance: the instance which owns the device
8092   @type device: L{objects.Disk}
8093   @param device: the device to create
8094   @type force_create: boolean
8095   @param force_create: whether to force creation of this device; this
8096       will be change to True whenever we find a device which has
8097       CreateOnSecondary() attribute
8098   @param info: the extra 'metadata' we should attach to the device
8099       (this will be represented as a LVM tag)
8100   @type force_open: boolean
8101   @param force_open: this parameter will be passes to the
8102       L{backend.BlockdevCreate} function where it specifies
8103       whether we run on primary or not, and it affects both
8104       the child assembly and the device own Open() execution
8105
8106   """
8107   if device.CreateOnSecondary():
8108     force_create = True
8109
8110   if device.children:
8111     for child in device.children:
8112       _CreateBlockDev(lu, node, instance, child, force_create,
8113                       info, force_open)
8114
8115   if not force_create:
8116     return
8117
8118   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8119
8120
8121 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8122   """Create a single block device on a given node.
8123
8124   This will not recurse over children of the device, so they must be
8125   created in advance.
8126
8127   @param lu: the lu on whose behalf we execute
8128   @param node: the node on which to create the device
8129   @type instance: L{objects.Instance}
8130   @param instance: the instance which owns the device
8131   @type device: L{objects.Disk}
8132   @param device: the device to create
8133   @param info: the extra 'metadata' we should attach to the device
8134       (this will be represented as a LVM tag)
8135   @type force_open: boolean
8136   @param force_open: this parameter will be passes to the
8137       L{backend.BlockdevCreate} function where it specifies
8138       whether we run on primary or not, and it affects both
8139       the child assembly and the device own Open() execution
8140
8141   """
8142   lu.cfg.SetDiskID(device, node)
8143   result = lu.rpc.call_blockdev_create(node, device, device.size,
8144                                        instance.name, force_open, info)
8145   result.Raise("Can't create block device %s on"
8146                " node %s for instance %s" % (device, node, instance.name))
8147   if device.physical_id is None:
8148     device.physical_id = result.payload
8149
8150
8151 def _GenerateUniqueNames(lu, exts):
8152   """Generate a suitable LV name.
8153
8154   This will generate a logical volume name for the given instance.
8155
8156   """
8157   results = []
8158   for val in exts:
8159     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8160     results.append("%s%s" % (new_id, val))
8161   return results
8162
8163
8164 def _ComputeLDParams(disk_template, disk_params):
8165   """Computes Logical Disk parameters from Disk Template parameters.
8166
8167   @type disk_template: string
8168   @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8169   @type disk_params: dict
8170   @param disk_params: disk template parameters; dict(template_name -> parameters
8171   @rtype: list(dict)
8172   @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8173     contains the LD parameters of the node. The tree is flattened in-order.
8174
8175   """
8176   if disk_template not in constants.DISK_TEMPLATES:
8177     raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8178
8179   result = list()
8180   dt_params = disk_params[disk_template]
8181   if disk_template == constants.DT_DRBD8:
8182     drbd_params = {
8183       constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8184       constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8185       constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8186       constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8187       constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8188       constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8189       }
8190
8191     drbd_params = \
8192       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8193                        drbd_params)
8194
8195     result.append(drbd_params)
8196
8197     # data LV
8198     data_params = {
8199       constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8200       }
8201     data_params = \
8202       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8203                        data_params)
8204     result.append(data_params)
8205
8206     # metadata LV
8207     meta_params = {
8208       constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8209       }
8210     meta_params = \
8211       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8212                        meta_params)
8213     result.append(meta_params)
8214
8215   elif (disk_template == constants.DT_FILE or
8216         disk_template == constants.DT_SHARED_FILE):
8217     result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8218
8219   elif disk_template == constants.DT_PLAIN:
8220     params = {
8221       constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8222       }
8223     params = \
8224       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8225                        params)
8226     result.append(params)
8227
8228   elif disk_template == constants.DT_BLOCK:
8229     result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8230
8231   return result
8232
8233
8234 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8235                          iv_name, p_minor, s_minor, drbd_params, data_params,
8236                          meta_params):
8237   """Generate a drbd8 device complete with its children.
8238
8239   """
8240   assert len(vgnames) == len(names) == 2
8241   port = lu.cfg.AllocatePort()
8242   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8243
8244   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8245                           logical_id=(vgnames[0], names[0]),
8246                           params=data_params)
8247   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8248                           logical_id=(vgnames[1], names[1]),
8249                           params=meta_params)
8250   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8251                           logical_id=(primary, secondary, port,
8252                                       p_minor, s_minor,
8253                                       shared_secret),
8254                           children=[dev_data, dev_meta],
8255                           iv_name=iv_name, params=drbd_params)
8256   return drbd_dev
8257
8258
8259 def _GenerateDiskTemplate(lu, template_name,
8260                           instance_name, primary_node,
8261                           secondary_nodes, disk_info,
8262                           file_storage_dir, file_driver,
8263                           base_index, feedback_fn, disk_params):
8264   """Generate the entire disk layout for a given template type.
8265
8266   """
8267   #TODO: compute space requirements
8268
8269   vgname = lu.cfg.GetVGName()
8270   disk_count = len(disk_info)
8271   disks = []
8272   ld_params = _ComputeLDParams(template_name, disk_params)
8273   if template_name == constants.DT_DISKLESS:
8274     pass
8275   elif template_name == constants.DT_PLAIN:
8276     if len(secondary_nodes) != 0:
8277       raise errors.ProgrammerError("Wrong template configuration")
8278
8279     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8280                                       for i in range(disk_count)])
8281     for idx, disk in enumerate(disk_info):
8282       disk_index = idx + base_index
8283       vg = disk.get(constants.IDISK_VG, vgname)
8284       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8285       disk_dev = objects.Disk(dev_type=constants.LD_LV,
8286                               size=disk[constants.IDISK_SIZE],
8287                               logical_id=(vg, names[idx]),
8288                               iv_name="disk/%d" % disk_index,
8289                               mode=disk[constants.IDISK_MODE],
8290                               params=ld_params[0])
8291       disks.append(disk_dev)
8292   elif template_name == constants.DT_DRBD8:
8293     drbd_params, data_params, meta_params = ld_params
8294     if len(secondary_nodes) != 1:
8295       raise errors.ProgrammerError("Wrong template configuration")
8296     remote_node = secondary_nodes[0]
8297     minors = lu.cfg.AllocateDRBDMinor(
8298       [primary_node, remote_node] * len(disk_info), instance_name)
8299
8300     names = []
8301     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8302                                                for i in range(disk_count)]):
8303       names.append(lv_prefix + "_data")
8304       names.append(lv_prefix + "_meta")
8305     for idx, disk in enumerate(disk_info):
8306       disk_index = idx + base_index
8307       drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8308       data_vg = disk.get(constants.IDISK_VG, vgname)
8309       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8310       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8311                                       disk[constants.IDISK_SIZE],
8312                                       [data_vg, meta_vg],
8313                                       names[idx * 2:idx * 2 + 2],
8314                                       "disk/%d" % disk_index,
8315                                       minors[idx * 2], minors[idx * 2 + 1],
8316                                       drbd_params, data_params, meta_params)
8317       disk_dev.mode = disk[constants.IDISK_MODE]
8318       disks.append(disk_dev)
8319   elif template_name == constants.DT_FILE:
8320     if len(secondary_nodes) != 0:
8321       raise errors.ProgrammerError("Wrong template configuration")
8322
8323     opcodes.RequireFileStorage()
8324
8325     for idx, disk in enumerate(disk_info):
8326       disk_index = idx + base_index
8327       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8328                               size=disk[constants.IDISK_SIZE],
8329                               iv_name="disk/%d" % disk_index,
8330                               logical_id=(file_driver,
8331                                           "%s/disk%d" % (file_storage_dir,
8332                                                          disk_index)),
8333                               mode=disk[constants.IDISK_MODE],
8334                               params=ld_params[0])
8335       disks.append(disk_dev)
8336   elif template_name == constants.DT_SHARED_FILE:
8337     if len(secondary_nodes) != 0:
8338       raise errors.ProgrammerError("Wrong template configuration")
8339
8340     opcodes.RequireSharedFileStorage()
8341
8342     for idx, disk in enumerate(disk_info):
8343       disk_index = idx + base_index
8344       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8345                               size=disk[constants.IDISK_SIZE],
8346                               iv_name="disk/%d" % disk_index,
8347                               logical_id=(file_driver,
8348                                           "%s/disk%d" % (file_storage_dir,
8349                                                          disk_index)),
8350                               mode=disk[constants.IDISK_MODE],
8351                               params=ld_params[0])
8352       disks.append(disk_dev)
8353   elif template_name == constants.DT_BLOCK:
8354     if len(secondary_nodes) != 0:
8355       raise errors.ProgrammerError("Wrong template configuration")
8356
8357     for idx, disk in enumerate(disk_info):
8358       disk_index = idx + base_index
8359       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8360                               size=disk[constants.IDISK_SIZE],
8361                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8362                                           disk[constants.IDISK_ADOPT]),
8363                               iv_name="disk/%d" % disk_index,
8364                               mode=disk[constants.IDISK_MODE],
8365                               params=ld_params[0])
8366       disks.append(disk_dev)
8367
8368   else:
8369     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8370   return disks
8371
8372
8373 def _GetInstanceInfoText(instance):
8374   """Compute that text that should be added to the disk's metadata.
8375
8376   """
8377   return "originstname+%s" % instance.name
8378
8379
8380 def _CalcEta(time_taken, written, total_size):
8381   """Calculates the ETA based on size written and total size.
8382
8383   @param time_taken: The time taken so far
8384   @param written: amount written so far
8385   @param total_size: The total size of data to be written
8386   @return: The remaining time in seconds
8387
8388   """
8389   avg_time = time_taken / float(written)
8390   return (total_size - written) * avg_time
8391
8392
8393 def _WipeDisks(lu, instance):
8394   """Wipes instance disks.
8395
8396   @type lu: L{LogicalUnit}
8397   @param lu: the logical unit on whose behalf we execute
8398   @type instance: L{objects.Instance}
8399   @param instance: the instance whose disks we should create
8400   @return: the success of the wipe
8401
8402   """
8403   node = instance.primary_node
8404
8405   for device in instance.disks:
8406     lu.cfg.SetDiskID(device, node)
8407
8408   logging.info("Pause sync of instance %s disks", instance.name)
8409   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8410
8411   for idx, success in enumerate(result.payload):
8412     if not success:
8413       logging.warn("pause-sync of instance %s for disks %d failed",
8414                    instance.name, idx)
8415
8416   try:
8417     for idx, device in enumerate(instance.disks):
8418       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8419       # MAX_WIPE_CHUNK at max
8420       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8421                             constants.MIN_WIPE_CHUNK_PERCENT)
8422       # we _must_ make this an int, otherwise rounding errors will
8423       # occur
8424       wipe_chunk_size = int(wipe_chunk_size)
8425
8426       lu.LogInfo("* Wiping disk %d", idx)
8427       logging.info("Wiping disk %d for instance %s, node %s using"
8428                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8429
8430       offset = 0
8431       size = device.size
8432       last_output = 0
8433       start_time = time.time()
8434
8435       while offset < size:
8436         wipe_size = min(wipe_chunk_size, size - offset)
8437         logging.debug("Wiping disk %d, offset %s, chunk %s",
8438                       idx, offset, wipe_size)
8439         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8440         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8441                      (idx, offset, wipe_size))
8442         now = time.time()
8443         offset += wipe_size
8444         if now - last_output >= 60:
8445           eta = _CalcEta(now - start_time, offset, size)
8446           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8447                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8448           last_output = now
8449   finally:
8450     logging.info("Resume sync of instance %s disks", instance.name)
8451
8452     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8453
8454     for idx, success in enumerate(result.payload):
8455       if not success:
8456         lu.LogWarning("Resume sync of disk %d failed, please have a"
8457                       " look at the status and troubleshoot the issue", idx)
8458         logging.warn("resume-sync of instance %s for disks %d failed",
8459                      instance.name, idx)
8460
8461
8462 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8463   """Create all disks for an instance.
8464
8465   This abstracts away some work from AddInstance.
8466
8467   @type lu: L{LogicalUnit}
8468   @param lu: the logical unit on whose behalf we execute
8469   @type instance: L{objects.Instance}
8470   @param instance: the instance whose disks we should create
8471   @type to_skip: list
8472   @param to_skip: list of indices to skip
8473   @type target_node: string
8474   @param target_node: if passed, overrides the target node for creation
8475   @rtype: boolean
8476   @return: the success of the creation
8477
8478   """
8479   info = _GetInstanceInfoText(instance)
8480   if target_node is None:
8481     pnode = instance.primary_node
8482     all_nodes = instance.all_nodes
8483   else:
8484     pnode = target_node
8485     all_nodes = [pnode]
8486
8487   if instance.disk_template in constants.DTS_FILEBASED:
8488     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8489     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8490
8491     result.Raise("Failed to create directory '%s' on"
8492                  " node %s" % (file_storage_dir, pnode))
8493
8494   # Note: this needs to be kept in sync with adding of disks in
8495   # LUInstanceSetParams
8496   for idx, device in enumerate(instance.disks):
8497     if to_skip and idx in to_skip:
8498       continue
8499     logging.info("Creating volume %s for instance %s",
8500                  device.iv_name, instance.name)
8501     #HARDCODE
8502     for node in all_nodes:
8503       f_create = node == pnode
8504       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8505
8506
8507 def _RemoveDisks(lu, instance, target_node=None):
8508   """Remove all disks for an instance.
8509
8510   This abstracts away some work from `AddInstance()` and
8511   `RemoveInstance()`. Note that in case some of the devices couldn't
8512   be removed, the removal will continue with the other ones (compare
8513   with `_CreateDisks()`).
8514
8515   @type lu: L{LogicalUnit}
8516   @param lu: the logical unit on whose behalf we execute
8517   @type instance: L{objects.Instance}
8518   @param instance: the instance whose disks we should remove
8519   @type target_node: string
8520   @param target_node: used to override the node on which to remove the disks
8521   @rtype: boolean
8522   @return: the success of the removal
8523
8524   """
8525   logging.info("Removing block devices for instance %s", instance.name)
8526
8527   all_result = True
8528   for device in instance.disks:
8529     if target_node:
8530       edata = [(target_node, device)]
8531     else:
8532       edata = device.ComputeNodeTree(instance.primary_node)
8533     for node, disk in edata:
8534       lu.cfg.SetDiskID(disk, node)
8535       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8536       if msg:
8537         lu.LogWarning("Could not remove block device %s on node %s,"
8538                       " continuing anyway: %s", device.iv_name, node, msg)
8539         all_result = False
8540
8541     # if this is a DRBD disk, return its port to the pool
8542     if device.dev_type in constants.LDS_DRBD:
8543       tcp_port = device.logical_id[2]
8544       lu.cfg.AddTcpUdpPort(tcp_port)
8545
8546   if instance.disk_template == constants.DT_FILE:
8547     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8548     if target_node:
8549       tgt = target_node
8550     else:
8551       tgt = instance.primary_node
8552     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8553     if result.fail_msg:
8554       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8555                     file_storage_dir, instance.primary_node, result.fail_msg)
8556       all_result = False
8557
8558   return all_result
8559
8560
8561 def _ComputeDiskSizePerVG(disk_template, disks):
8562   """Compute disk size requirements in the volume group
8563
8564   """
8565   def _compute(disks, payload):
8566     """Universal algorithm.
8567
8568     """
8569     vgs = {}
8570     for disk in disks:
8571       vgs[disk[constants.IDISK_VG]] = \
8572         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8573
8574     return vgs
8575
8576   # Required free disk space as a function of disk and swap space
8577   req_size_dict = {
8578     constants.DT_DISKLESS: {},
8579     constants.DT_PLAIN: _compute(disks, 0),
8580     # 128 MB are added for drbd metadata for each disk
8581     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8582     constants.DT_FILE: {},
8583     constants.DT_SHARED_FILE: {},
8584   }
8585
8586   if disk_template not in req_size_dict:
8587     raise errors.ProgrammerError("Disk template '%s' size requirement"
8588                                  " is unknown" % disk_template)
8589
8590   return req_size_dict[disk_template]
8591
8592
8593 def _ComputeDiskSize(disk_template, disks):
8594   """Compute disk size requirements in the volume group
8595
8596   """
8597   # Required free disk space as a function of disk and swap space
8598   req_size_dict = {
8599     constants.DT_DISKLESS: None,
8600     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8601     # 128 MB are added for drbd metadata for each disk
8602     constants.DT_DRBD8:
8603       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8604     constants.DT_FILE: None,
8605     constants.DT_SHARED_FILE: 0,
8606     constants.DT_BLOCK: 0,
8607   }
8608
8609   if disk_template not in req_size_dict:
8610     raise errors.ProgrammerError("Disk template '%s' size requirement"
8611                                  " is unknown" % disk_template)
8612
8613   return req_size_dict[disk_template]
8614
8615
8616 def _FilterVmNodes(lu, nodenames):
8617   """Filters out non-vm_capable nodes from a list.
8618
8619   @type lu: L{LogicalUnit}
8620   @param lu: the logical unit for which we check
8621   @type nodenames: list
8622   @param nodenames: the list of nodes on which we should check
8623   @rtype: list
8624   @return: the list of vm-capable nodes
8625
8626   """
8627   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8628   return [name for name in nodenames if name not in vm_nodes]
8629
8630
8631 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8632   """Hypervisor parameter validation.
8633
8634   This function abstract the hypervisor parameter validation to be
8635   used in both instance create and instance modify.
8636
8637   @type lu: L{LogicalUnit}
8638   @param lu: the logical unit for which we check
8639   @type nodenames: list
8640   @param nodenames: the list of nodes on which we should check
8641   @type hvname: string
8642   @param hvname: the name of the hypervisor we should use
8643   @type hvparams: dict
8644   @param hvparams: the parameters which we need to check
8645   @raise errors.OpPrereqError: if the parameters are not valid
8646
8647   """
8648   nodenames = _FilterVmNodes(lu, nodenames)
8649
8650   cluster = lu.cfg.GetClusterInfo()
8651   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8652
8653   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8654   for node in nodenames:
8655     info = hvinfo[node]
8656     if info.offline:
8657       continue
8658     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8659
8660
8661 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8662   """OS parameters validation.
8663
8664   @type lu: L{LogicalUnit}
8665   @param lu: the logical unit for which we check
8666   @type required: boolean
8667   @param required: whether the validation should fail if the OS is not
8668       found
8669   @type nodenames: list
8670   @param nodenames: the list of nodes on which we should check
8671   @type osname: string
8672   @param osname: the name of the hypervisor we should use
8673   @type osparams: dict
8674   @param osparams: the parameters which we need to check
8675   @raise errors.OpPrereqError: if the parameters are not valid
8676
8677   """
8678   nodenames = _FilterVmNodes(lu, nodenames)
8679   result = lu.rpc.call_os_validate(nodenames, required, osname,
8680                                    [constants.OS_VALIDATE_PARAMETERS],
8681                                    osparams)
8682   for node, nres in result.items():
8683     # we don't check for offline cases since this should be run only
8684     # against the master node and/or an instance's nodes
8685     nres.Raise("OS Parameters validation failed on node %s" % node)
8686     if not nres.payload:
8687       lu.LogInfo("OS %s not found on node %s, validation skipped",
8688                  osname, node)
8689
8690
8691 class LUInstanceCreate(LogicalUnit):
8692   """Create an instance.
8693
8694   """
8695   HPATH = "instance-add"
8696   HTYPE = constants.HTYPE_INSTANCE
8697   REQ_BGL = False
8698
8699   def CheckArguments(self):
8700     """Check arguments.
8701
8702     """
8703     # do not require name_check to ease forward/backward compatibility
8704     # for tools
8705     if self.op.no_install and self.op.start:
8706       self.LogInfo("No-installation mode selected, disabling startup")
8707       self.op.start = False
8708     # validate/normalize the instance name
8709     self.op.instance_name = \
8710       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8711
8712     if self.op.ip_check and not self.op.name_check:
8713       # TODO: make the ip check more flexible and not depend on the name check
8714       raise errors.OpPrereqError("Cannot do IP address check without a name"
8715                                  " check", errors.ECODE_INVAL)
8716
8717     # check nics' parameter names
8718     for nic in self.op.nics:
8719       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8720
8721     # check disks. parameter names and consistent adopt/no-adopt strategy
8722     has_adopt = has_no_adopt = False
8723     for disk in self.op.disks:
8724       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8725       if constants.IDISK_ADOPT in disk:
8726         has_adopt = True
8727       else:
8728         has_no_adopt = True
8729     if has_adopt and has_no_adopt:
8730       raise errors.OpPrereqError("Either all disks are adopted or none is",
8731                                  errors.ECODE_INVAL)
8732     if has_adopt:
8733       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8734         raise errors.OpPrereqError("Disk adoption is not supported for the"
8735                                    " '%s' disk template" %
8736                                    self.op.disk_template,
8737                                    errors.ECODE_INVAL)
8738       if self.op.iallocator is not None:
8739         raise errors.OpPrereqError("Disk adoption not allowed with an"
8740                                    " iallocator script", errors.ECODE_INVAL)
8741       if self.op.mode == constants.INSTANCE_IMPORT:
8742         raise errors.OpPrereqError("Disk adoption not allowed for"
8743                                    " instance import", errors.ECODE_INVAL)
8744     else:
8745       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8746         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8747                                    " but no 'adopt' parameter given" %
8748                                    self.op.disk_template,
8749                                    errors.ECODE_INVAL)
8750
8751     self.adopt_disks = has_adopt
8752
8753     # instance name verification
8754     if self.op.name_check:
8755       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8756       self.op.instance_name = self.hostname1.name
8757       # used in CheckPrereq for ip ping check
8758       self.check_ip = self.hostname1.ip
8759     else:
8760       self.check_ip = None
8761
8762     # file storage checks
8763     if (self.op.file_driver and
8764         not self.op.file_driver in constants.FILE_DRIVER):
8765       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8766                                  self.op.file_driver, errors.ECODE_INVAL)
8767
8768     if self.op.disk_template == constants.DT_FILE:
8769       opcodes.RequireFileStorage()
8770     elif self.op.disk_template == constants.DT_SHARED_FILE:
8771       opcodes.RequireSharedFileStorage()
8772
8773     ### Node/iallocator related checks
8774     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8775
8776     if self.op.pnode is not None:
8777       if self.op.disk_template in constants.DTS_INT_MIRROR:
8778         if self.op.snode is None:
8779           raise errors.OpPrereqError("The networked disk templates need"
8780                                      " a mirror node", errors.ECODE_INVAL)
8781       elif self.op.snode:
8782         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8783                         " template")
8784         self.op.snode = None
8785
8786     self._cds = _GetClusterDomainSecret()
8787
8788     if self.op.mode == constants.INSTANCE_IMPORT:
8789       # On import force_variant must be True, because if we forced it at
8790       # initial install, our only chance when importing it back is that it
8791       # works again!
8792       self.op.force_variant = True
8793
8794       if self.op.no_install:
8795         self.LogInfo("No-installation mode has no effect during import")
8796
8797     elif self.op.mode == constants.INSTANCE_CREATE:
8798       if self.op.os_type is None:
8799         raise errors.OpPrereqError("No guest OS specified",
8800                                    errors.ECODE_INVAL)
8801       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8802         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8803                                    " installation" % self.op.os_type,
8804                                    errors.ECODE_STATE)
8805       if self.op.disk_template is None:
8806         raise errors.OpPrereqError("No disk template specified",
8807                                    errors.ECODE_INVAL)
8808
8809     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8810       # Check handshake to ensure both clusters have the same domain secret
8811       src_handshake = self.op.source_handshake
8812       if not src_handshake:
8813         raise errors.OpPrereqError("Missing source handshake",
8814                                    errors.ECODE_INVAL)
8815
8816       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8817                                                            src_handshake)
8818       if errmsg:
8819         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8820                                    errors.ECODE_INVAL)
8821
8822       # Load and check source CA
8823       self.source_x509_ca_pem = self.op.source_x509_ca
8824       if not self.source_x509_ca_pem:
8825         raise errors.OpPrereqError("Missing source X509 CA",
8826                                    errors.ECODE_INVAL)
8827
8828       try:
8829         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8830                                                     self._cds)
8831       except OpenSSL.crypto.Error, err:
8832         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8833                                    (err, ), errors.ECODE_INVAL)
8834
8835       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8836       if errcode is not None:
8837         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8838                                    errors.ECODE_INVAL)
8839
8840       self.source_x509_ca = cert
8841
8842       src_instance_name = self.op.source_instance_name
8843       if not src_instance_name:
8844         raise errors.OpPrereqError("Missing source instance name",
8845                                    errors.ECODE_INVAL)
8846
8847       self.source_instance_name = \
8848           netutils.GetHostname(name=src_instance_name).name
8849
8850     else:
8851       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8852                                  self.op.mode, errors.ECODE_INVAL)
8853
8854   def ExpandNames(self):
8855     """ExpandNames for CreateInstance.
8856
8857     Figure out the right locks for instance creation.
8858
8859     """
8860     self.needed_locks = {}
8861
8862     instance_name = self.op.instance_name
8863     # this is just a preventive check, but someone might still add this
8864     # instance in the meantime, and creation will fail at lock-add time
8865     if instance_name in self.cfg.GetInstanceList():
8866       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8867                                  instance_name, errors.ECODE_EXISTS)
8868
8869     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8870
8871     if self.op.iallocator:
8872       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8873       # specifying a group on instance creation and then selecting nodes from
8874       # that group
8875       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8876       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8877     else:
8878       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8879       nodelist = [self.op.pnode]
8880       if self.op.snode is not None:
8881         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8882         nodelist.append(self.op.snode)
8883       self.needed_locks[locking.LEVEL_NODE] = nodelist
8884       # Lock resources of instance's primary and secondary nodes (copy to
8885       # prevent accidential modification)
8886       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8887
8888     # in case of import lock the source node too
8889     if self.op.mode == constants.INSTANCE_IMPORT:
8890       src_node = self.op.src_node
8891       src_path = self.op.src_path
8892
8893       if src_path is None:
8894         self.op.src_path = src_path = self.op.instance_name
8895
8896       if src_node is None:
8897         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8898         self.op.src_node = None
8899         if os.path.isabs(src_path):
8900           raise errors.OpPrereqError("Importing an instance from a path"
8901                                      " requires a source node option",
8902                                      errors.ECODE_INVAL)
8903       else:
8904         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8905         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8906           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8907         if not os.path.isabs(src_path):
8908           self.op.src_path = src_path = \
8909             utils.PathJoin(constants.EXPORT_DIR, src_path)
8910
8911   def _RunAllocator(self):
8912     """Run the allocator based on input opcode.
8913
8914     """
8915     nics = [n.ToDict() for n in self.nics]
8916     ial = IAllocator(self.cfg, self.rpc,
8917                      mode=constants.IALLOCATOR_MODE_ALLOC,
8918                      name=self.op.instance_name,
8919                      disk_template=self.op.disk_template,
8920                      tags=self.op.tags,
8921                      os=self.op.os_type,
8922                      vcpus=self.be_full[constants.BE_VCPUS],
8923                      memory=self.be_full[constants.BE_MAXMEM],
8924                      disks=self.disks,
8925                      nics=nics,
8926                      hypervisor=self.op.hypervisor,
8927                      )
8928
8929     ial.Run(self.op.iallocator)
8930
8931     if not ial.success:
8932       raise errors.OpPrereqError("Can't compute nodes using"
8933                                  " iallocator '%s': %s" %
8934                                  (self.op.iallocator, ial.info),
8935                                  errors.ECODE_NORES)
8936     if len(ial.result) != ial.required_nodes:
8937       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8938                                  " of nodes (%s), required %s" %
8939                                  (self.op.iallocator, len(ial.result),
8940                                   ial.required_nodes), errors.ECODE_FAULT)
8941     self.op.pnode = ial.result[0]
8942     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8943                  self.op.instance_name, self.op.iallocator,
8944                  utils.CommaJoin(ial.result))
8945     if ial.required_nodes == 2:
8946       self.op.snode = ial.result[1]
8947
8948   def BuildHooksEnv(self):
8949     """Build hooks env.
8950
8951     This runs on master, primary and secondary nodes of the instance.
8952
8953     """
8954     env = {
8955       "ADD_MODE": self.op.mode,
8956       }
8957     if self.op.mode == constants.INSTANCE_IMPORT:
8958       env["SRC_NODE"] = self.op.src_node
8959       env["SRC_PATH"] = self.op.src_path
8960       env["SRC_IMAGES"] = self.src_images
8961
8962     env.update(_BuildInstanceHookEnv(
8963       name=self.op.instance_name,
8964       primary_node=self.op.pnode,
8965       secondary_nodes=self.secondaries,
8966       status=self.op.start,
8967       os_type=self.op.os_type,
8968       minmem=self.be_full[constants.BE_MINMEM],
8969       maxmem=self.be_full[constants.BE_MAXMEM],
8970       vcpus=self.be_full[constants.BE_VCPUS],
8971       nics=_NICListToTuple(self, self.nics),
8972       disk_template=self.op.disk_template,
8973       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8974              for d in self.disks],
8975       bep=self.be_full,
8976       hvp=self.hv_full,
8977       hypervisor_name=self.op.hypervisor,
8978       tags=self.op.tags,
8979     ))
8980
8981     return env
8982
8983   def BuildHooksNodes(self):
8984     """Build hooks nodes.
8985
8986     """
8987     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8988     return nl, nl
8989
8990   def _ReadExportInfo(self):
8991     """Reads the export information from disk.
8992
8993     It will override the opcode source node and path with the actual
8994     information, if these two were not specified before.
8995
8996     @return: the export information
8997
8998     """
8999     assert self.op.mode == constants.INSTANCE_IMPORT
9000
9001     src_node = self.op.src_node
9002     src_path = self.op.src_path
9003
9004     if src_node is None:
9005       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9006       exp_list = self.rpc.call_export_list(locked_nodes)
9007       found = False
9008       for node in exp_list:
9009         if exp_list[node].fail_msg:
9010           continue
9011         if src_path in exp_list[node].payload:
9012           found = True
9013           self.op.src_node = src_node = node
9014           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9015                                                        src_path)
9016           break
9017       if not found:
9018         raise errors.OpPrereqError("No export found for relative path %s" %
9019                                     src_path, errors.ECODE_INVAL)
9020
9021     _CheckNodeOnline(self, src_node)
9022     result = self.rpc.call_export_info(src_node, src_path)
9023     result.Raise("No export or invalid export found in dir %s" % src_path)
9024
9025     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9026     if not export_info.has_section(constants.INISECT_EXP):
9027       raise errors.ProgrammerError("Corrupted export config",
9028                                    errors.ECODE_ENVIRON)
9029
9030     ei_version = export_info.get(constants.INISECT_EXP, "version")
9031     if (int(ei_version) != constants.EXPORT_VERSION):
9032       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9033                                  (ei_version, constants.EXPORT_VERSION),
9034                                  errors.ECODE_ENVIRON)
9035     return export_info
9036
9037   def _ReadExportParams(self, einfo):
9038     """Use export parameters as defaults.
9039
9040     In case the opcode doesn't specify (as in override) some instance
9041     parameters, then try to use them from the export information, if
9042     that declares them.
9043
9044     """
9045     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9046
9047     if self.op.disk_template is None:
9048       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9049         self.op.disk_template = einfo.get(constants.INISECT_INS,
9050                                           "disk_template")
9051         if self.op.disk_template not in constants.DISK_TEMPLATES:
9052           raise errors.OpPrereqError("Disk template specified in configuration"
9053                                      " file is not one of the allowed values:"
9054                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9055       else:
9056         raise errors.OpPrereqError("No disk template specified and the export"
9057                                    " is missing the disk_template information",
9058                                    errors.ECODE_INVAL)
9059
9060     if not self.op.disks:
9061       disks = []
9062       # TODO: import the disk iv_name too
9063       for idx in range(constants.MAX_DISKS):
9064         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9065           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9066           disks.append({constants.IDISK_SIZE: disk_sz})
9067       self.op.disks = disks
9068       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9069         raise errors.OpPrereqError("No disk info specified and the export"
9070                                    " is missing the disk information",
9071                                    errors.ECODE_INVAL)
9072
9073     if not self.op.nics:
9074       nics = []
9075       for idx in range(constants.MAX_NICS):
9076         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9077           ndict = {}
9078           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9079             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9080             ndict[name] = v
9081           nics.append(ndict)
9082         else:
9083           break
9084       self.op.nics = nics
9085
9086     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9087       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9088
9089     if (self.op.hypervisor is None and
9090         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9091       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9092
9093     if einfo.has_section(constants.INISECT_HYP):
9094       # use the export parameters but do not override the ones
9095       # specified by the user
9096       for name, value in einfo.items(constants.INISECT_HYP):
9097         if name not in self.op.hvparams:
9098           self.op.hvparams[name] = value
9099
9100     if einfo.has_section(constants.INISECT_BEP):
9101       # use the parameters, without overriding
9102       for name, value in einfo.items(constants.INISECT_BEP):
9103         if name not in self.op.beparams:
9104           self.op.beparams[name] = value
9105         # Compatibility for the old "memory" be param
9106         if name == constants.BE_MEMORY:
9107           if constants.BE_MAXMEM not in self.op.beparams:
9108             self.op.beparams[constants.BE_MAXMEM] = value
9109           if constants.BE_MINMEM not in self.op.beparams:
9110             self.op.beparams[constants.BE_MINMEM] = value
9111     else:
9112       # try to read the parameters old style, from the main section
9113       for name in constants.BES_PARAMETERS:
9114         if (name not in self.op.beparams and
9115             einfo.has_option(constants.INISECT_INS, name)):
9116           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9117
9118     if einfo.has_section(constants.INISECT_OSP):
9119       # use the parameters, without overriding
9120       for name, value in einfo.items(constants.INISECT_OSP):
9121         if name not in self.op.osparams:
9122           self.op.osparams[name] = value
9123
9124   def _RevertToDefaults(self, cluster):
9125     """Revert the instance parameters to the default values.
9126
9127     """
9128     # hvparams
9129     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9130     for name in self.op.hvparams.keys():
9131       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9132         del self.op.hvparams[name]
9133     # beparams
9134     be_defs = cluster.SimpleFillBE({})
9135     for name in self.op.beparams.keys():
9136       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9137         del self.op.beparams[name]
9138     # nic params
9139     nic_defs = cluster.SimpleFillNIC({})
9140     for nic in self.op.nics:
9141       for name in constants.NICS_PARAMETERS:
9142         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9143           del nic[name]
9144     # osparams
9145     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9146     for name in self.op.osparams.keys():
9147       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9148         del self.op.osparams[name]
9149
9150   def _CalculateFileStorageDir(self):
9151     """Calculate final instance file storage dir.
9152
9153     """
9154     # file storage dir calculation/check
9155     self.instance_file_storage_dir = None
9156     if self.op.disk_template in constants.DTS_FILEBASED:
9157       # build the full file storage dir path
9158       joinargs = []
9159
9160       if self.op.disk_template == constants.DT_SHARED_FILE:
9161         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9162       else:
9163         get_fsd_fn = self.cfg.GetFileStorageDir
9164
9165       cfg_storagedir = get_fsd_fn()
9166       if not cfg_storagedir:
9167         raise errors.OpPrereqError("Cluster file storage dir not defined")
9168       joinargs.append(cfg_storagedir)
9169
9170       if self.op.file_storage_dir is not None:
9171         joinargs.append(self.op.file_storage_dir)
9172
9173       joinargs.append(self.op.instance_name)
9174
9175       # pylint: disable=W0142
9176       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9177
9178   def CheckPrereq(self):
9179     """Check prerequisites.
9180
9181     """
9182     self._CalculateFileStorageDir()
9183
9184     if self.op.mode == constants.INSTANCE_IMPORT:
9185       export_info = self._ReadExportInfo()
9186       self._ReadExportParams(export_info)
9187
9188     if (not self.cfg.GetVGName() and
9189         self.op.disk_template not in constants.DTS_NOT_LVM):
9190       raise errors.OpPrereqError("Cluster does not support lvm-based"
9191                                  " instances", errors.ECODE_STATE)
9192
9193     if (self.op.hypervisor is None or
9194         self.op.hypervisor == constants.VALUE_AUTO):
9195       self.op.hypervisor = self.cfg.GetHypervisorType()
9196
9197     cluster = self.cfg.GetClusterInfo()
9198     enabled_hvs = cluster.enabled_hypervisors
9199     if self.op.hypervisor not in enabled_hvs:
9200       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9201                                  " cluster (%s)" % (self.op.hypervisor,
9202                                   ",".join(enabled_hvs)),
9203                                  errors.ECODE_STATE)
9204
9205     # Check tag validity
9206     for tag in self.op.tags:
9207       objects.TaggableObject.ValidateTag(tag)
9208
9209     # check hypervisor parameter syntax (locally)
9210     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9211     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9212                                       self.op.hvparams)
9213     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9214     hv_type.CheckParameterSyntax(filled_hvp)
9215     self.hv_full = filled_hvp
9216     # check that we don't specify global parameters on an instance
9217     _CheckGlobalHvParams(self.op.hvparams)
9218
9219     # fill and remember the beparams dict
9220     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9221     for param, value in self.op.beparams.iteritems():
9222       if value == constants.VALUE_AUTO:
9223         self.op.beparams[param] = default_beparams[param]
9224     objects.UpgradeBeParams(self.op.beparams)
9225     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9226     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9227
9228     # build os parameters
9229     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9230
9231     # now that hvp/bep are in final format, let's reset to defaults,
9232     # if told to do so
9233     if self.op.identify_defaults:
9234       self._RevertToDefaults(cluster)
9235
9236     # NIC buildup
9237     self.nics = []
9238     for idx, nic in enumerate(self.op.nics):
9239       nic_mode_req = nic.get(constants.INIC_MODE, None)
9240       nic_mode = nic_mode_req
9241       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9242         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9243
9244       # in routed mode, for the first nic, the default ip is 'auto'
9245       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9246         default_ip_mode = constants.VALUE_AUTO
9247       else:
9248         default_ip_mode = constants.VALUE_NONE
9249
9250       # ip validity checks
9251       ip = nic.get(constants.INIC_IP, default_ip_mode)
9252       if ip is None or ip.lower() == constants.VALUE_NONE:
9253         nic_ip = None
9254       elif ip.lower() == constants.VALUE_AUTO:
9255         if not self.op.name_check:
9256           raise errors.OpPrereqError("IP address set to auto but name checks"
9257                                      " have been skipped",
9258                                      errors.ECODE_INVAL)
9259         nic_ip = self.hostname1.ip
9260       else:
9261         if not netutils.IPAddress.IsValid(ip):
9262           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9263                                      errors.ECODE_INVAL)
9264         nic_ip = ip
9265
9266       # TODO: check the ip address for uniqueness
9267       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9268         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9269                                    errors.ECODE_INVAL)
9270
9271       # MAC address verification
9272       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9273       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9274         mac = utils.NormalizeAndValidateMac(mac)
9275
9276         try:
9277           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9278         except errors.ReservationError:
9279           raise errors.OpPrereqError("MAC address %s already in use"
9280                                      " in cluster" % mac,
9281                                      errors.ECODE_NOTUNIQUE)
9282
9283       #  Build nic parameters
9284       link = nic.get(constants.INIC_LINK, None)
9285       if link == constants.VALUE_AUTO:
9286         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9287       nicparams = {}
9288       if nic_mode_req:
9289         nicparams[constants.NIC_MODE] = nic_mode
9290       if link:
9291         nicparams[constants.NIC_LINK] = link
9292
9293       check_params = cluster.SimpleFillNIC(nicparams)
9294       objects.NIC.CheckParameterSyntax(check_params)
9295       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9296
9297     # disk checks/pre-build
9298     default_vg = self.cfg.GetVGName()
9299     self.disks = []
9300     for disk in self.op.disks:
9301       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9302       if mode not in constants.DISK_ACCESS_SET:
9303         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9304                                    mode, errors.ECODE_INVAL)
9305       size = disk.get(constants.IDISK_SIZE, None)
9306       if size is None:
9307         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9308       try:
9309         size = int(size)
9310       except (TypeError, ValueError):
9311         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9312                                    errors.ECODE_INVAL)
9313
9314       data_vg = disk.get(constants.IDISK_VG, default_vg)
9315       new_disk = {
9316         constants.IDISK_SIZE: size,
9317         constants.IDISK_MODE: mode,
9318         constants.IDISK_VG: data_vg,
9319         }
9320       if constants.IDISK_METAVG in disk:
9321         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9322       if constants.IDISK_ADOPT in disk:
9323         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9324       self.disks.append(new_disk)
9325
9326     if self.op.mode == constants.INSTANCE_IMPORT:
9327       disk_images = []
9328       for idx in range(len(self.disks)):
9329         option = "disk%d_dump" % idx
9330         if export_info.has_option(constants.INISECT_INS, option):
9331           # FIXME: are the old os-es, disk sizes, etc. useful?
9332           export_name = export_info.get(constants.INISECT_INS, option)
9333           image = utils.PathJoin(self.op.src_path, export_name)
9334           disk_images.append(image)
9335         else:
9336           disk_images.append(False)
9337
9338       self.src_images = disk_images
9339
9340       old_name = export_info.get(constants.INISECT_INS, "name")
9341       if self.op.instance_name == old_name:
9342         for idx, nic in enumerate(self.nics):
9343           if nic.mac == constants.VALUE_AUTO:
9344             nic_mac_ini = "nic%d_mac" % idx
9345             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9346
9347     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9348
9349     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9350     if self.op.ip_check:
9351       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9352         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9353                                    (self.check_ip, self.op.instance_name),
9354                                    errors.ECODE_NOTUNIQUE)
9355
9356     #### mac address generation
9357     # By generating here the mac address both the allocator and the hooks get
9358     # the real final mac address rather than the 'auto' or 'generate' value.
9359     # There is a race condition between the generation and the instance object
9360     # creation, which means that we know the mac is valid now, but we're not
9361     # sure it will be when we actually add the instance. If things go bad
9362     # adding the instance will abort because of a duplicate mac, and the
9363     # creation job will fail.
9364     for nic in self.nics:
9365       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9366         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9367
9368     #### allocator run
9369
9370     if self.op.iallocator is not None:
9371       self._RunAllocator()
9372
9373     # Release all unneeded node locks
9374     _ReleaseLocks(self, locking.LEVEL_NODE,
9375                   keep=filter(None, [self.op.pnode, self.op.snode,
9376                                      self.op.src_node]))
9377
9378     #### node related checks
9379
9380     # check primary node
9381     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9382     assert self.pnode is not None, \
9383       "Cannot retrieve locked node %s" % self.op.pnode
9384     if pnode.offline:
9385       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9386                                  pnode.name, errors.ECODE_STATE)
9387     if pnode.drained:
9388       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9389                                  pnode.name, errors.ECODE_STATE)
9390     if not pnode.vm_capable:
9391       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9392                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9393
9394     self.secondaries = []
9395
9396     # mirror node verification
9397     if self.op.disk_template in constants.DTS_INT_MIRROR:
9398       if self.op.snode == pnode.name:
9399         raise errors.OpPrereqError("The secondary node cannot be the"
9400                                    " primary node", errors.ECODE_INVAL)
9401       _CheckNodeOnline(self, self.op.snode)
9402       _CheckNodeNotDrained(self, self.op.snode)
9403       _CheckNodeVmCapable(self, self.op.snode)
9404       self.secondaries.append(self.op.snode)
9405
9406       snode = self.cfg.GetNodeInfo(self.op.snode)
9407       if pnode.group != snode.group:
9408         self.LogWarning("The primary and secondary nodes are in two"
9409                         " different node groups; the disk parameters"
9410                         " from the first disk's node group will be"
9411                         " used")
9412
9413     nodenames = [pnode.name] + self.secondaries
9414
9415     # disk parameters (not customizable at instance or node level)
9416     # just use the primary node parameters, ignoring the secondary.
9417     self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9418
9419     if not self.adopt_disks:
9420       # Check lv size requirements, if not adopting
9421       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9422       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9423
9424     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9425       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9426                                 disk[constants.IDISK_ADOPT])
9427                      for disk in self.disks])
9428       if len(all_lvs) != len(self.disks):
9429         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9430                                    errors.ECODE_INVAL)
9431       for lv_name in all_lvs:
9432         try:
9433           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9434           # to ReserveLV uses the same syntax
9435           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9436         except errors.ReservationError:
9437           raise errors.OpPrereqError("LV named %s used by another instance" %
9438                                      lv_name, errors.ECODE_NOTUNIQUE)
9439
9440       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9441       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9442
9443       node_lvs = self.rpc.call_lv_list([pnode.name],
9444                                        vg_names.payload.keys())[pnode.name]
9445       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9446       node_lvs = node_lvs.payload
9447
9448       delta = all_lvs.difference(node_lvs.keys())
9449       if delta:
9450         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9451                                    utils.CommaJoin(delta),
9452                                    errors.ECODE_INVAL)
9453       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9454       if online_lvs:
9455         raise errors.OpPrereqError("Online logical volumes found, cannot"
9456                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9457                                    errors.ECODE_STATE)
9458       # update the size of disk based on what is found
9459       for dsk in self.disks:
9460         dsk[constants.IDISK_SIZE] = \
9461           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9462                                         dsk[constants.IDISK_ADOPT])][0]))
9463
9464     elif self.op.disk_template == constants.DT_BLOCK:
9465       # Normalize and de-duplicate device paths
9466       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9467                        for disk in self.disks])
9468       if len(all_disks) != len(self.disks):
9469         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9470                                    errors.ECODE_INVAL)
9471       baddisks = [d for d in all_disks
9472                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9473       if baddisks:
9474         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9475                                    " cannot be adopted" %
9476                                    (", ".join(baddisks),
9477                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9478                                    errors.ECODE_INVAL)
9479
9480       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9481                                             list(all_disks))[pnode.name]
9482       node_disks.Raise("Cannot get block device information from node %s" %
9483                        pnode.name)
9484       node_disks = node_disks.payload
9485       delta = all_disks.difference(node_disks.keys())
9486       if delta:
9487         raise errors.OpPrereqError("Missing block device(s): %s" %
9488                                    utils.CommaJoin(delta),
9489                                    errors.ECODE_INVAL)
9490       for dsk in self.disks:
9491         dsk[constants.IDISK_SIZE] = \
9492           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9493
9494     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9495
9496     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9497     # check OS parameters (remotely)
9498     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9499
9500     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9501
9502     # memory check on primary node
9503     #TODO(dynmem): use MINMEM for checking
9504     if self.op.start:
9505       _CheckNodeFreeMemory(self, self.pnode.name,
9506                            "creating instance %s" % self.op.instance_name,
9507                            self.be_full[constants.BE_MAXMEM],
9508                            self.op.hypervisor)
9509
9510     self.dry_run_result = list(nodenames)
9511
9512   def Exec(self, feedback_fn):
9513     """Create and add the instance to the cluster.
9514
9515     """
9516     instance = self.op.instance_name
9517     pnode_name = self.pnode.name
9518
9519     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9520                 self.owned_locks(locking.LEVEL_NODE)), \
9521       "Node locks differ from node resource locks"
9522
9523     ht_kind = self.op.hypervisor
9524     if ht_kind in constants.HTS_REQ_PORT:
9525       network_port = self.cfg.AllocatePort()
9526     else:
9527       network_port = None
9528
9529     disks = _GenerateDiskTemplate(self,
9530                                   self.op.disk_template,
9531                                   instance, pnode_name,
9532                                   self.secondaries,
9533                                   self.disks,
9534                                   self.instance_file_storage_dir,
9535                                   self.op.file_driver,
9536                                   0,
9537                                   feedback_fn,
9538                                   self.diskparams)
9539
9540     iobj = objects.Instance(name=instance, os=self.op.os_type,
9541                             primary_node=pnode_name,
9542                             nics=self.nics, disks=disks,
9543                             disk_template=self.op.disk_template,
9544                             admin_state=constants.ADMINST_DOWN,
9545                             network_port=network_port,
9546                             beparams=self.op.beparams,
9547                             hvparams=self.op.hvparams,
9548                             hypervisor=self.op.hypervisor,
9549                             osparams=self.op.osparams,
9550                             )
9551
9552     if self.op.tags:
9553       for tag in self.op.tags:
9554         iobj.AddTag(tag)
9555
9556     if self.adopt_disks:
9557       if self.op.disk_template == constants.DT_PLAIN:
9558         # rename LVs to the newly-generated names; we need to construct
9559         # 'fake' LV disks with the old data, plus the new unique_id
9560         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9561         rename_to = []
9562         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9563           rename_to.append(t_dsk.logical_id)
9564           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9565           self.cfg.SetDiskID(t_dsk, pnode_name)
9566         result = self.rpc.call_blockdev_rename(pnode_name,
9567                                                zip(tmp_disks, rename_to))
9568         result.Raise("Failed to rename adoped LVs")
9569     else:
9570       feedback_fn("* creating instance disks...")
9571       try:
9572         _CreateDisks(self, iobj)
9573       except errors.OpExecError:
9574         self.LogWarning("Device creation failed, reverting...")
9575         try:
9576           _RemoveDisks(self, iobj)
9577         finally:
9578           self.cfg.ReleaseDRBDMinors(instance)
9579           raise
9580
9581     feedback_fn("adding instance %s to cluster config" % instance)
9582
9583     self.cfg.AddInstance(iobj, self.proc.GetECId())
9584
9585     # Declare that we don't want to remove the instance lock anymore, as we've
9586     # added the instance to the config
9587     del self.remove_locks[locking.LEVEL_INSTANCE]
9588
9589     if self.op.mode == constants.INSTANCE_IMPORT:
9590       # Release unused nodes
9591       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9592     else:
9593       # Release all nodes
9594       _ReleaseLocks(self, locking.LEVEL_NODE)
9595
9596     disk_abort = False
9597     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9598       feedback_fn("* wiping instance disks...")
9599       try:
9600         _WipeDisks(self, iobj)
9601       except errors.OpExecError, err:
9602         logging.exception("Wiping disks failed")
9603         self.LogWarning("Wiping instance disks failed (%s)", err)
9604         disk_abort = True
9605
9606     if disk_abort:
9607       # Something is already wrong with the disks, don't do anything else
9608       pass
9609     elif self.op.wait_for_sync:
9610       disk_abort = not _WaitForSync(self, iobj)
9611     elif iobj.disk_template in constants.DTS_INT_MIRROR:
9612       # make sure the disks are not degraded (still sync-ing is ok)
9613       feedback_fn("* checking mirrors status")
9614       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9615     else:
9616       disk_abort = False
9617
9618     if disk_abort:
9619       _RemoveDisks(self, iobj)
9620       self.cfg.RemoveInstance(iobj.name)
9621       # Make sure the instance lock gets removed
9622       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9623       raise errors.OpExecError("There are some degraded disks for"
9624                                " this instance")
9625
9626     # Release all node resource locks
9627     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9628
9629     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9630       if self.op.mode == constants.INSTANCE_CREATE:
9631         if not self.op.no_install:
9632           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9633                         not self.op.wait_for_sync)
9634           if pause_sync:
9635             feedback_fn("* pausing disk sync to install instance OS")
9636             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9637                                                               iobj.disks, True)
9638             for idx, success in enumerate(result.payload):
9639               if not success:
9640                 logging.warn("pause-sync of instance %s for disk %d failed",
9641                              instance, idx)
9642
9643           feedback_fn("* running the instance OS create scripts...")
9644           # FIXME: pass debug option from opcode to backend
9645           os_add_result = \
9646             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9647                                           self.op.debug_level)
9648           if pause_sync:
9649             feedback_fn("* resuming disk sync")
9650             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9651                                                               iobj.disks, False)
9652             for idx, success in enumerate(result.payload):
9653               if not success:
9654                 logging.warn("resume-sync of instance %s for disk %d failed",
9655                              instance, idx)
9656
9657           os_add_result.Raise("Could not add os for instance %s"
9658                               " on node %s" % (instance, pnode_name))
9659
9660       elif self.op.mode == constants.INSTANCE_IMPORT:
9661         feedback_fn("* running the instance OS import scripts...")
9662
9663         transfers = []
9664
9665         for idx, image in enumerate(self.src_images):
9666           if not image:
9667             continue
9668
9669           # FIXME: pass debug option from opcode to backend
9670           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9671                                              constants.IEIO_FILE, (image, ),
9672                                              constants.IEIO_SCRIPT,
9673                                              (iobj.disks[idx], idx),
9674                                              None)
9675           transfers.append(dt)
9676
9677         import_result = \
9678           masterd.instance.TransferInstanceData(self, feedback_fn,
9679                                                 self.op.src_node, pnode_name,
9680                                                 self.pnode.secondary_ip,
9681                                                 iobj, transfers)
9682         if not compat.all(import_result):
9683           self.LogWarning("Some disks for instance %s on node %s were not"
9684                           " imported successfully" % (instance, pnode_name))
9685
9686       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9687         feedback_fn("* preparing remote import...")
9688         # The source cluster will stop the instance before attempting to make a
9689         # connection. In some cases stopping an instance can take a long time,
9690         # hence the shutdown timeout is added to the connection timeout.
9691         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9692                            self.op.source_shutdown_timeout)
9693         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9694
9695         assert iobj.primary_node == self.pnode.name
9696         disk_results = \
9697           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9698                                         self.source_x509_ca,
9699                                         self._cds, timeouts)
9700         if not compat.all(disk_results):
9701           # TODO: Should the instance still be started, even if some disks
9702           # failed to import (valid for local imports, too)?
9703           self.LogWarning("Some disks for instance %s on node %s were not"
9704                           " imported successfully" % (instance, pnode_name))
9705
9706         # Run rename script on newly imported instance
9707         assert iobj.name == instance
9708         feedback_fn("Running rename script for %s" % instance)
9709         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9710                                                    self.source_instance_name,
9711                                                    self.op.debug_level)
9712         if result.fail_msg:
9713           self.LogWarning("Failed to run rename script for %s on node"
9714                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9715
9716       else:
9717         # also checked in the prereq part
9718         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9719                                      % self.op.mode)
9720
9721     assert not self.owned_locks(locking.LEVEL_NODE_RES)
9722
9723     if self.op.start:
9724       iobj.admin_state = constants.ADMINST_UP
9725       self.cfg.Update(iobj, feedback_fn)
9726       logging.info("Starting instance %s on node %s", instance, pnode_name)
9727       feedback_fn("* starting instance...")
9728       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9729                                             False)
9730       result.Raise("Could not start instance")
9731
9732     return list(iobj.all_nodes)
9733
9734
9735 class LUInstanceConsole(NoHooksLU):
9736   """Connect to an instance's console.
9737
9738   This is somewhat special in that it returns the command line that
9739   you need to run on the master node in order to connect to the
9740   console.
9741
9742   """
9743   REQ_BGL = False
9744
9745   def ExpandNames(self):
9746     self.share_locks = _ShareAll()
9747     self._ExpandAndLockInstance()
9748
9749   def CheckPrereq(self):
9750     """Check prerequisites.
9751
9752     This checks that the instance is in the cluster.
9753
9754     """
9755     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9756     assert self.instance is not None, \
9757       "Cannot retrieve locked instance %s" % self.op.instance_name
9758     _CheckNodeOnline(self, self.instance.primary_node)
9759
9760   def Exec(self, feedback_fn):
9761     """Connect to the console of an instance
9762
9763     """
9764     instance = self.instance
9765     node = instance.primary_node
9766
9767     node_insts = self.rpc.call_instance_list([node],
9768                                              [instance.hypervisor])[node]
9769     node_insts.Raise("Can't get node information from %s" % node)
9770
9771     if instance.name not in node_insts.payload:
9772       if instance.admin_state == constants.ADMINST_UP:
9773         state = constants.INSTST_ERRORDOWN
9774       elif instance.admin_state == constants.ADMINST_DOWN:
9775         state = constants.INSTST_ADMINDOWN
9776       else:
9777         state = constants.INSTST_ADMINOFFLINE
9778       raise errors.OpExecError("Instance %s is not running (state %s)" %
9779                                (instance.name, state))
9780
9781     logging.debug("Connecting to console of %s on %s", instance.name, node)
9782
9783     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9784
9785
9786 def _GetInstanceConsole(cluster, instance):
9787   """Returns console information for an instance.
9788
9789   @type cluster: L{objects.Cluster}
9790   @type instance: L{objects.Instance}
9791   @rtype: dict
9792
9793   """
9794   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9795   # beparams and hvparams are passed separately, to avoid editing the
9796   # instance and then saving the defaults in the instance itself.
9797   hvparams = cluster.FillHV(instance)
9798   beparams = cluster.FillBE(instance)
9799   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9800
9801   assert console.instance == instance.name
9802   assert console.Validate()
9803
9804   return console.ToDict()
9805
9806
9807 class LUInstanceReplaceDisks(LogicalUnit):
9808   """Replace the disks of an instance.
9809
9810   """
9811   HPATH = "mirrors-replace"
9812   HTYPE = constants.HTYPE_INSTANCE
9813   REQ_BGL = False
9814
9815   def CheckArguments(self):
9816     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9817                                   self.op.iallocator)
9818
9819   def ExpandNames(self):
9820     self._ExpandAndLockInstance()
9821
9822     assert locking.LEVEL_NODE not in self.needed_locks
9823     assert locking.LEVEL_NODE_RES not in self.needed_locks
9824     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9825
9826     assert self.op.iallocator is None or self.op.remote_node is None, \
9827       "Conflicting options"
9828
9829     if self.op.remote_node is not None:
9830       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9831
9832       # Warning: do not remove the locking of the new secondary here
9833       # unless DRBD8.AddChildren is changed to work in parallel;
9834       # currently it doesn't since parallel invocations of
9835       # FindUnusedMinor will conflict
9836       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9837       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9838     else:
9839       self.needed_locks[locking.LEVEL_NODE] = []
9840       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9841
9842       if self.op.iallocator is not None:
9843         # iallocator will select a new node in the same group
9844         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9845
9846     self.needed_locks[locking.LEVEL_NODE_RES] = []
9847
9848     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9849                                    self.op.iallocator, self.op.remote_node,
9850                                    self.op.disks, False, self.op.early_release)
9851
9852     self.tasklets = [self.replacer]
9853
9854   def DeclareLocks(self, level):
9855     if level == locking.LEVEL_NODEGROUP:
9856       assert self.op.remote_node is None
9857       assert self.op.iallocator is not None
9858       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9859
9860       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9861       # Lock all groups used by instance optimistically; this requires going
9862       # via the node before it's locked, requiring verification later on
9863       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9864         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9865
9866     elif level == locking.LEVEL_NODE:
9867       if self.op.iallocator is not None:
9868         assert self.op.remote_node is None
9869         assert not self.needed_locks[locking.LEVEL_NODE]
9870
9871         # Lock member nodes of all locked groups
9872         self.needed_locks[locking.LEVEL_NODE] = [node_name
9873           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9874           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9875       else:
9876         self._LockInstancesNodes()
9877     elif level == locking.LEVEL_NODE_RES:
9878       # Reuse node locks
9879       self.needed_locks[locking.LEVEL_NODE_RES] = \
9880         self.needed_locks[locking.LEVEL_NODE]
9881
9882   def BuildHooksEnv(self):
9883     """Build hooks env.
9884
9885     This runs on the master, the primary and all the secondaries.
9886
9887     """
9888     instance = self.replacer.instance
9889     env = {
9890       "MODE": self.op.mode,
9891       "NEW_SECONDARY": self.op.remote_node,
9892       "OLD_SECONDARY": instance.secondary_nodes[0],
9893       }
9894     env.update(_BuildInstanceHookEnvByObject(self, instance))
9895     return env
9896
9897   def BuildHooksNodes(self):
9898     """Build hooks nodes.
9899
9900     """
9901     instance = self.replacer.instance
9902     nl = [
9903       self.cfg.GetMasterNode(),
9904       instance.primary_node,
9905       ]
9906     if self.op.remote_node is not None:
9907       nl.append(self.op.remote_node)
9908     return nl, nl
9909
9910   def CheckPrereq(self):
9911     """Check prerequisites.
9912
9913     """
9914     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9915             self.op.iallocator is None)
9916
9917     # Verify if node group locks are still correct
9918     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9919     if owned_groups:
9920       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9921
9922     return LogicalUnit.CheckPrereq(self)
9923
9924
9925 class TLReplaceDisks(Tasklet):
9926   """Replaces disks for an instance.
9927
9928   Note: Locking is not within the scope of this class.
9929
9930   """
9931   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9932                disks, delay_iallocator, early_release):
9933     """Initializes this class.
9934
9935     """
9936     Tasklet.__init__(self, lu)
9937
9938     # Parameters
9939     self.instance_name = instance_name
9940     self.mode = mode
9941     self.iallocator_name = iallocator_name
9942     self.remote_node = remote_node
9943     self.disks = disks
9944     self.delay_iallocator = delay_iallocator
9945     self.early_release = early_release
9946
9947     # Runtime data
9948     self.instance = None
9949     self.new_node = None
9950     self.target_node = None
9951     self.other_node = None
9952     self.remote_node_info = None
9953     self.node_secondary_ip = None
9954
9955   @staticmethod
9956   def CheckArguments(mode, remote_node, iallocator):
9957     """Helper function for users of this class.
9958
9959     """
9960     # check for valid parameter combination
9961     if mode == constants.REPLACE_DISK_CHG:
9962       if remote_node is None and iallocator is None:
9963         raise errors.OpPrereqError("When changing the secondary either an"
9964                                    " iallocator script must be used or the"
9965                                    " new node given", errors.ECODE_INVAL)
9966
9967       if remote_node is not None and iallocator is not None:
9968         raise errors.OpPrereqError("Give either the iallocator or the new"
9969                                    " secondary, not both", errors.ECODE_INVAL)
9970
9971     elif remote_node is not None or iallocator is not None:
9972       # Not replacing the secondary
9973       raise errors.OpPrereqError("The iallocator and new node options can"
9974                                  " only be used when changing the"
9975                                  " secondary node", errors.ECODE_INVAL)
9976
9977   @staticmethod
9978   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9979     """Compute a new secondary node using an IAllocator.
9980
9981     """
9982     ial = IAllocator(lu.cfg, lu.rpc,
9983                      mode=constants.IALLOCATOR_MODE_RELOC,
9984                      name=instance_name,
9985                      relocate_from=list(relocate_from))
9986
9987     ial.Run(iallocator_name)
9988
9989     if not ial.success:
9990       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9991                                  " %s" % (iallocator_name, ial.info),
9992                                  errors.ECODE_NORES)
9993
9994     if len(ial.result) != ial.required_nodes:
9995       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9996                                  " of nodes (%s), required %s" %
9997                                  (iallocator_name,
9998                                   len(ial.result), ial.required_nodes),
9999                                  errors.ECODE_FAULT)
10000
10001     remote_node_name = ial.result[0]
10002
10003     lu.LogInfo("Selected new secondary for instance '%s': %s",
10004                instance_name, remote_node_name)
10005
10006     return remote_node_name
10007
10008   def _FindFaultyDisks(self, node_name):
10009     """Wrapper for L{_FindFaultyInstanceDisks}.
10010
10011     """
10012     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10013                                     node_name, True)
10014
10015   def _CheckDisksActivated(self, instance):
10016     """Checks if the instance disks are activated.
10017
10018     @param instance: The instance to check disks
10019     @return: True if they are activated, False otherwise
10020
10021     """
10022     nodes = instance.all_nodes
10023
10024     for idx, dev in enumerate(instance.disks):
10025       for node in nodes:
10026         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10027         self.cfg.SetDiskID(dev, node)
10028
10029         result = self.rpc.call_blockdev_find(node, dev)
10030
10031         if result.offline:
10032           continue
10033         elif result.fail_msg or not result.payload:
10034           return False
10035
10036     return True
10037
10038   def CheckPrereq(self):
10039     """Check prerequisites.
10040
10041     This checks that the instance is in the cluster.
10042
10043     """
10044     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10045     assert instance is not None, \
10046       "Cannot retrieve locked instance %s" % self.instance_name
10047
10048     if instance.disk_template != constants.DT_DRBD8:
10049       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10050                                  " instances", errors.ECODE_INVAL)
10051
10052     if len(instance.secondary_nodes) != 1:
10053       raise errors.OpPrereqError("The instance has a strange layout,"
10054                                  " expected one secondary but found %d" %
10055                                  len(instance.secondary_nodes),
10056                                  errors.ECODE_FAULT)
10057
10058     if not self.delay_iallocator:
10059       self._CheckPrereq2()
10060
10061   def _CheckPrereq2(self):
10062     """Check prerequisites, second part.
10063
10064     This function should always be part of CheckPrereq. It was separated and is
10065     now called from Exec because during node evacuation iallocator was only
10066     called with an unmodified cluster model, not taking planned changes into
10067     account.
10068
10069     """
10070     instance = self.instance
10071     secondary_node = instance.secondary_nodes[0]
10072
10073     if self.iallocator_name is None:
10074       remote_node = self.remote_node
10075     else:
10076       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10077                                        instance.name, instance.secondary_nodes)
10078
10079     if remote_node is None:
10080       self.remote_node_info = None
10081     else:
10082       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10083              "Remote node '%s' is not locked" % remote_node
10084
10085       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10086       assert self.remote_node_info is not None, \
10087         "Cannot retrieve locked node %s" % remote_node
10088
10089     if remote_node == self.instance.primary_node:
10090       raise errors.OpPrereqError("The specified node is the primary node of"
10091                                  " the instance", errors.ECODE_INVAL)
10092
10093     if remote_node == secondary_node:
10094       raise errors.OpPrereqError("The specified node is already the"
10095                                  " secondary node of the instance",
10096                                  errors.ECODE_INVAL)
10097
10098     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10099                                     constants.REPLACE_DISK_CHG):
10100       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10101                                  errors.ECODE_INVAL)
10102
10103     if self.mode == constants.REPLACE_DISK_AUTO:
10104       if not self._CheckDisksActivated(instance):
10105         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10106                                    " first" % self.instance_name,
10107                                    errors.ECODE_STATE)
10108       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10109       faulty_secondary = self._FindFaultyDisks(secondary_node)
10110
10111       if faulty_primary and faulty_secondary:
10112         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10113                                    " one node and can not be repaired"
10114                                    " automatically" % self.instance_name,
10115                                    errors.ECODE_STATE)
10116
10117       if faulty_primary:
10118         self.disks = faulty_primary
10119         self.target_node = instance.primary_node
10120         self.other_node = secondary_node
10121         check_nodes = [self.target_node, self.other_node]
10122       elif faulty_secondary:
10123         self.disks = faulty_secondary
10124         self.target_node = secondary_node
10125         self.other_node = instance.primary_node
10126         check_nodes = [self.target_node, self.other_node]
10127       else:
10128         self.disks = []
10129         check_nodes = []
10130
10131     else:
10132       # Non-automatic modes
10133       if self.mode == constants.REPLACE_DISK_PRI:
10134         self.target_node = instance.primary_node
10135         self.other_node = secondary_node
10136         check_nodes = [self.target_node, self.other_node]
10137
10138       elif self.mode == constants.REPLACE_DISK_SEC:
10139         self.target_node = secondary_node
10140         self.other_node = instance.primary_node
10141         check_nodes = [self.target_node, self.other_node]
10142
10143       elif self.mode == constants.REPLACE_DISK_CHG:
10144         self.new_node = remote_node
10145         self.other_node = instance.primary_node
10146         self.target_node = secondary_node
10147         check_nodes = [self.new_node, self.other_node]
10148
10149         _CheckNodeNotDrained(self.lu, remote_node)
10150         _CheckNodeVmCapable(self.lu, remote_node)
10151
10152         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10153         assert old_node_info is not None
10154         if old_node_info.offline and not self.early_release:
10155           # doesn't make sense to delay the release
10156           self.early_release = True
10157           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10158                           " early-release mode", secondary_node)
10159
10160       else:
10161         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10162                                      self.mode)
10163
10164       # If not specified all disks should be replaced
10165       if not self.disks:
10166         self.disks = range(len(self.instance.disks))
10167
10168     # TODO: compute disk parameters
10169     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10170     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10171     if primary_node_info.group != secondary_node_info.group:
10172       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10173                       " different node groups; the disk parameters of the"
10174                       " primary node's group will be applied.")
10175
10176     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10177
10178     for node in check_nodes:
10179       _CheckNodeOnline(self.lu, node)
10180
10181     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10182                                                           self.other_node,
10183                                                           self.target_node]
10184                               if node_name is not None)
10185
10186     # Release unneeded node and node resource locks
10187     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10188     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10189
10190     # Release any owned node group
10191     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10192       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10193
10194     # Check whether disks are valid
10195     for disk_idx in self.disks:
10196       instance.FindDisk(disk_idx)
10197
10198     # Get secondary node IP addresses
10199     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10200                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10201
10202   def Exec(self, feedback_fn):
10203     """Execute disk replacement.
10204
10205     This dispatches the disk replacement to the appropriate handler.
10206
10207     """
10208     if self.delay_iallocator:
10209       self._CheckPrereq2()
10210
10211     if __debug__:
10212       # Verify owned locks before starting operation
10213       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10214       assert set(owned_nodes) == set(self.node_secondary_ip), \
10215           ("Incorrect node locks, owning %s, expected %s" %
10216            (owned_nodes, self.node_secondary_ip.keys()))
10217       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10218               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10219
10220       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10221       assert list(owned_instances) == [self.instance_name], \
10222           "Instance '%s' not locked" % self.instance_name
10223
10224       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10225           "Should not own any node group lock at this point"
10226
10227     if not self.disks:
10228       feedback_fn("No disks need replacement")
10229       return
10230
10231     feedback_fn("Replacing disk(s) %s for %s" %
10232                 (utils.CommaJoin(self.disks), self.instance.name))
10233
10234     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10235
10236     # Activate the instance disks if we're replacing them on a down instance
10237     if activate_disks:
10238       _StartInstanceDisks(self.lu, self.instance, True)
10239
10240     try:
10241       # Should we replace the secondary node?
10242       if self.new_node is not None:
10243         fn = self._ExecDrbd8Secondary
10244       else:
10245         fn = self._ExecDrbd8DiskOnly
10246
10247       result = fn(feedback_fn)
10248     finally:
10249       # Deactivate the instance disks if we're replacing them on a
10250       # down instance
10251       if activate_disks:
10252         _SafeShutdownInstanceDisks(self.lu, self.instance)
10253
10254     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10255
10256     if __debug__:
10257       # Verify owned locks
10258       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10259       nodes = frozenset(self.node_secondary_ip)
10260       assert ((self.early_release and not owned_nodes) or
10261               (not self.early_release and not (set(owned_nodes) - nodes))), \
10262         ("Not owning the correct locks, early_release=%s, owned=%r,"
10263          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10264
10265     return result
10266
10267   def _CheckVolumeGroup(self, nodes):
10268     self.lu.LogInfo("Checking volume groups")
10269
10270     vgname = self.cfg.GetVGName()
10271
10272     # Make sure volume group exists on all involved nodes
10273     results = self.rpc.call_vg_list(nodes)
10274     if not results:
10275       raise errors.OpExecError("Can't list volume groups on the nodes")
10276
10277     for node in nodes:
10278       res = results[node]
10279       res.Raise("Error checking node %s" % node)
10280       if vgname not in res.payload:
10281         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10282                                  (vgname, node))
10283
10284   def _CheckDisksExistence(self, nodes):
10285     # Check disk existence
10286     for idx, dev in enumerate(self.instance.disks):
10287       if idx not in self.disks:
10288         continue
10289
10290       for node in nodes:
10291         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10292         self.cfg.SetDiskID(dev, node)
10293
10294         result = self.rpc.call_blockdev_find(node, dev)
10295
10296         msg = result.fail_msg
10297         if msg or not result.payload:
10298           if not msg:
10299             msg = "disk not found"
10300           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10301                                    (idx, node, msg))
10302
10303   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10304     for idx, dev in enumerate(self.instance.disks):
10305       if idx not in self.disks:
10306         continue
10307
10308       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10309                       (idx, node_name))
10310
10311       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10312                                    ldisk=ldisk):
10313         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10314                                  " replace disks for instance %s" %
10315                                  (node_name, self.instance.name))
10316
10317   def _CreateNewStorage(self, node_name):
10318     """Create new storage on the primary or secondary node.
10319
10320     This is only used for same-node replaces, not for changing the
10321     secondary node, hence we don't want to modify the existing disk.
10322
10323     """
10324     iv_names = {}
10325
10326     for idx, dev in enumerate(self.instance.disks):
10327       if idx not in self.disks:
10328         continue
10329
10330       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10331
10332       self.cfg.SetDiskID(dev, node_name)
10333
10334       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10335       names = _GenerateUniqueNames(self.lu, lv_names)
10336
10337       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10338
10339       vg_data = dev.children[0].logical_id[0]
10340       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10341                              logical_id=(vg_data, names[0]), params=data_p)
10342       vg_meta = dev.children[1].logical_id[0]
10343       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10344                              logical_id=(vg_meta, names[1]), params=meta_p)
10345
10346       new_lvs = [lv_data, lv_meta]
10347       old_lvs = [child.Copy() for child in dev.children]
10348       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10349
10350       # we pass force_create=True to force the LVM creation
10351       for new_lv in new_lvs:
10352         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10353                         _GetInstanceInfoText(self.instance), False)
10354
10355     return iv_names
10356
10357   def _CheckDevices(self, node_name, iv_names):
10358     for name, (dev, _, _) in iv_names.iteritems():
10359       self.cfg.SetDiskID(dev, node_name)
10360
10361       result = self.rpc.call_blockdev_find(node_name, dev)
10362
10363       msg = result.fail_msg
10364       if msg or not result.payload:
10365         if not msg:
10366           msg = "disk not found"
10367         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10368                                  (name, msg))
10369
10370       if result.payload.is_degraded:
10371         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10372
10373   def _RemoveOldStorage(self, node_name, iv_names):
10374     for name, (_, old_lvs, _) in iv_names.iteritems():
10375       self.lu.LogInfo("Remove logical volumes for %s" % name)
10376
10377       for lv in old_lvs:
10378         self.cfg.SetDiskID(lv, node_name)
10379
10380         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10381         if msg:
10382           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10383                              hint="remove unused LVs manually")
10384
10385   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10386     """Replace a disk on the primary or secondary for DRBD 8.
10387
10388     The algorithm for replace is quite complicated:
10389
10390       1. for each disk to be replaced:
10391
10392         1. create new LVs on the target node with unique names
10393         1. detach old LVs from the drbd device
10394         1. rename old LVs to name_replaced.<time_t>
10395         1. rename new LVs to old LVs
10396         1. attach the new LVs (with the old names now) to the drbd device
10397
10398       1. wait for sync across all devices
10399
10400       1. for each modified disk:
10401
10402         1. remove old LVs (which have the name name_replaces.<time_t>)
10403
10404     Failures are not very well handled.
10405
10406     """
10407     steps_total = 6
10408
10409     # Step: check device activation
10410     self.lu.LogStep(1, steps_total, "Check device existence")
10411     self._CheckDisksExistence([self.other_node, self.target_node])
10412     self._CheckVolumeGroup([self.target_node, self.other_node])
10413
10414     # Step: check other node consistency
10415     self.lu.LogStep(2, steps_total, "Check peer consistency")
10416     self._CheckDisksConsistency(self.other_node,
10417                                 self.other_node == self.instance.primary_node,
10418                                 False)
10419
10420     # Step: create new storage
10421     self.lu.LogStep(3, steps_total, "Allocate new storage")
10422     iv_names = self._CreateNewStorage(self.target_node)
10423
10424     # Step: for each lv, detach+rename*2+attach
10425     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10426     for dev, old_lvs, new_lvs in iv_names.itervalues():
10427       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10428
10429       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10430                                                      old_lvs)
10431       result.Raise("Can't detach drbd from local storage on node"
10432                    " %s for device %s" % (self.target_node, dev.iv_name))
10433       #dev.children = []
10434       #cfg.Update(instance)
10435
10436       # ok, we created the new LVs, so now we know we have the needed
10437       # storage; as such, we proceed on the target node to rename
10438       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10439       # using the assumption that logical_id == physical_id (which in
10440       # turn is the unique_id on that node)
10441
10442       # FIXME(iustin): use a better name for the replaced LVs
10443       temp_suffix = int(time.time())
10444       ren_fn = lambda d, suff: (d.physical_id[0],
10445                                 d.physical_id[1] + "_replaced-%s" % suff)
10446
10447       # Build the rename list based on what LVs exist on the node
10448       rename_old_to_new = []
10449       for to_ren in old_lvs:
10450         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10451         if not result.fail_msg and result.payload:
10452           # device exists
10453           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10454
10455       self.lu.LogInfo("Renaming the old LVs on the target node")
10456       result = self.rpc.call_blockdev_rename(self.target_node,
10457                                              rename_old_to_new)
10458       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10459
10460       # Now we rename the new LVs to the old LVs
10461       self.lu.LogInfo("Renaming the new LVs on the target node")
10462       rename_new_to_old = [(new, old.physical_id)
10463                            for old, new in zip(old_lvs, new_lvs)]
10464       result = self.rpc.call_blockdev_rename(self.target_node,
10465                                              rename_new_to_old)
10466       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10467
10468       # Intermediate steps of in memory modifications
10469       for old, new in zip(old_lvs, new_lvs):
10470         new.logical_id = old.logical_id
10471         self.cfg.SetDiskID(new, self.target_node)
10472
10473       # We need to modify old_lvs so that removal later removes the
10474       # right LVs, not the newly added ones; note that old_lvs is a
10475       # copy here
10476       for disk in old_lvs:
10477         disk.logical_id = ren_fn(disk, temp_suffix)
10478         self.cfg.SetDiskID(disk, self.target_node)
10479
10480       # Now that the new lvs have the old name, we can add them to the device
10481       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10482       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10483                                                   new_lvs)
10484       msg = result.fail_msg
10485       if msg:
10486         for new_lv in new_lvs:
10487           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10488                                                new_lv).fail_msg
10489           if msg2:
10490             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10491                                hint=("cleanup manually the unused logical"
10492                                      "volumes"))
10493         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10494
10495     cstep = itertools.count(5)
10496
10497     if self.early_release:
10498       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10499       self._RemoveOldStorage(self.target_node, iv_names)
10500       # TODO: Check if releasing locks early still makes sense
10501       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10502     else:
10503       # Release all resource locks except those used by the instance
10504       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10505                     keep=self.node_secondary_ip.keys())
10506
10507     # Release all node locks while waiting for sync
10508     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10509
10510     # TODO: Can the instance lock be downgraded here? Take the optional disk
10511     # shutdown in the caller into consideration.
10512
10513     # Wait for sync
10514     # This can fail as the old devices are degraded and _WaitForSync
10515     # does a combined result over all disks, so we don't check its return value
10516     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10517     _WaitForSync(self.lu, self.instance)
10518
10519     # Check all devices manually
10520     self._CheckDevices(self.instance.primary_node, iv_names)
10521
10522     # Step: remove old storage
10523     if not self.early_release:
10524       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10525       self._RemoveOldStorage(self.target_node, iv_names)
10526
10527   def _ExecDrbd8Secondary(self, feedback_fn):
10528     """Replace the secondary node for DRBD 8.
10529
10530     The algorithm for replace is quite complicated:
10531       - for all disks of the instance:
10532         - create new LVs on the new node with same names
10533         - shutdown the drbd device on the old secondary
10534         - disconnect the drbd network on the primary
10535         - create the drbd device on the new secondary
10536         - network attach the drbd on the primary, using an artifice:
10537           the drbd code for Attach() will connect to the network if it
10538           finds a device which is connected to the good local disks but
10539           not network enabled
10540       - wait for sync across all devices
10541       - remove all disks from the old secondary
10542
10543     Failures are not very well handled.
10544
10545     """
10546     steps_total = 6
10547
10548     pnode = self.instance.primary_node
10549
10550     # Step: check device activation
10551     self.lu.LogStep(1, steps_total, "Check device existence")
10552     self._CheckDisksExistence([self.instance.primary_node])
10553     self._CheckVolumeGroup([self.instance.primary_node])
10554
10555     # Step: check other node consistency
10556     self.lu.LogStep(2, steps_total, "Check peer consistency")
10557     self._CheckDisksConsistency(self.instance.primary_node, True, True)
10558
10559     # Step: create new storage
10560     self.lu.LogStep(3, steps_total, "Allocate new storage")
10561     for idx, dev in enumerate(self.instance.disks):
10562       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10563                       (self.new_node, idx))
10564       # we pass force_create=True to force LVM creation
10565       for new_lv in dev.children:
10566         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10567                         _GetInstanceInfoText(self.instance), False)
10568
10569     # Step 4: dbrd minors and drbd setups changes
10570     # after this, we must manually remove the drbd minors on both the
10571     # error and the success paths
10572     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10573     minors = self.cfg.AllocateDRBDMinor([self.new_node
10574                                          for dev in self.instance.disks],
10575                                         self.instance.name)
10576     logging.debug("Allocated minors %r", minors)
10577
10578     iv_names = {}
10579     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10580       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10581                       (self.new_node, idx))
10582       # create new devices on new_node; note that we create two IDs:
10583       # one without port, so the drbd will be activated without
10584       # networking information on the new node at this stage, and one
10585       # with network, for the latter activation in step 4
10586       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10587       if self.instance.primary_node == o_node1:
10588         p_minor = o_minor1
10589       else:
10590         assert self.instance.primary_node == o_node2, "Three-node instance?"
10591         p_minor = o_minor2
10592
10593       new_alone_id = (self.instance.primary_node, self.new_node, None,
10594                       p_minor, new_minor, o_secret)
10595       new_net_id = (self.instance.primary_node, self.new_node, o_port,
10596                     p_minor, new_minor, o_secret)
10597
10598       iv_names[idx] = (dev, dev.children, new_net_id)
10599       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10600                     new_net_id)
10601       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10602       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10603                               logical_id=new_alone_id,
10604                               children=dev.children,
10605                               size=dev.size,
10606                               params=drbd_params)
10607       try:
10608         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10609                               _GetInstanceInfoText(self.instance), False)
10610       except errors.GenericError:
10611         self.cfg.ReleaseDRBDMinors(self.instance.name)
10612         raise
10613
10614     # We have new devices, shutdown the drbd on the old secondary
10615     for idx, dev in enumerate(self.instance.disks):
10616       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10617       self.cfg.SetDiskID(dev, self.target_node)
10618       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10619       if msg:
10620         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10621                            "node: %s" % (idx, msg),
10622                            hint=("Please cleanup this device manually as"
10623                                  " soon as possible"))
10624
10625     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10626     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10627                                                self.instance.disks)[pnode]
10628
10629     msg = result.fail_msg
10630     if msg:
10631       # detaches didn't succeed (unlikely)
10632       self.cfg.ReleaseDRBDMinors(self.instance.name)
10633       raise errors.OpExecError("Can't detach the disks from the network on"
10634                                " old node: %s" % (msg,))
10635
10636     # if we managed to detach at least one, we update all the disks of
10637     # the instance to point to the new secondary
10638     self.lu.LogInfo("Updating instance configuration")
10639     for dev, _, new_logical_id in iv_names.itervalues():
10640       dev.logical_id = new_logical_id
10641       self.cfg.SetDiskID(dev, self.instance.primary_node)
10642
10643     self.cfg.Update(self.instance, feedback_fn)
10644
10645     # Release all node locks (the configuration has been updated)
10646     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10647
10648     # and now perform the drbd attach
10649     self.lu.LogInfo("Attaching primary drbds to new secondary"
10650                     " (standalone => connected)")
10651     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10652                                             self.new_node],
10653                                            self.node_secondary_ip,
10654                                            self.instance.disks,
10655                                            self.instance.name,
10656                                            False)
10657     for to_node, to_result in result.items():
10658       msg = to_result.fail_msg
10659       if msg:
10660         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10661                            to_node, msg,
10662                            hint=("please do a gnt-instance info to see the"
10663                                  " status of disks"))
10664
10665     cstep = itertools.count(5)
10666
10667     if self.early_release:
10668       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10669       self._RemoveOldStorage(self.target_node, iv_names)
10670       # TODO: Check if releasing locks early still makes sense
10671       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10672     else:
10673       # Release all resource locks except those used by the instance
10674       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10675                     keep=self.node_secondary_ip.keys())
10676
10677     # TODO: Can the instance lock be downgraded here? Take the optional disk
10678     # shutdown in the caller into consideration.
10679
10680     # Wait for sync
10681     # This can fail as the old devices are degraded and _WaitForSync
10682     # does a combined result over all disks, so we don't check its return value
10683     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10684     _WaitForSync(self.lu, self.instance)
10685
10686     # Check all devices manually
10687     self._CheckDevices(self.instance.primary_node, iv_names)
10688
10689     # Step: remove old storage
10690     if not self.early_release:
10691       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10692       self._RemoveOldStorage(self.target_node, iv_names)
10693
10694
10695 class LURepairNodeStorage(NoHooksLU):
10696   """Repairs the volume group on a node.
10697
10698   """
10699   REQ_BGL = False
10700
10701   def CheckArguments(self):
10702     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10703
10704     storage_type = self.op.storage_type
10705
10706     if (constants.SO_FIX_CONSISTENCY not in
10707         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10708       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10709                                  " repaired" % storage_type,
10710                                  errors.ECODE_INVAL)
10711
10712   def ExpandNames(self):
10713     self.needed_locks = {
10714       locking.LEVEL_NODE: [self.op.node_name],
10715       }
10716
10717   def _CheckFaultyDisks(self, instance, node_name):
10718     """Ensure faulty disks abort the opcode or at least warn."""
10719     try:
10720       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10721                                   node_name, True):
10722         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10723                                    " node '%s'" % (instance.name, node_name),
10724                                    errors.ECODE_STATE)
10725     except errors.OpPrereqError, err:
10726       if self.op.ignore_consistency:
10727         self.proc.LogWarning(str(err.args[0]))
10728       else:
10729         raise
10730
10731   def CheckPrereq(self):
10732     """Check prerequisites.
10733
10734     """
10735     # Check whether any instance on this node has faulty disks
10736     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10737       if inst.admin_state != constants.ADMINST_UP:
10738         continue
10739       check_nodes = set(inst.all_nodes)
10740       check_nodes.discard(self.op.node_name)
10741       for inst_node_name in check_nodes:
10742         self._CheckFaultyDisks(inst, inst_node_name)
10743
10744   def Exec(self, feedback_fn):
10745     feedback_fn("Repairing storage unit '%s' on %s ..." %
10746                 (self.op.name, self.op.node_name))
10747
10748     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10749     result = self.rpc.call_storage_execute(self.op.node_name,
10750                                            self.op.storage_type, st_args,
10751                                            self.op.name,
10752                                            constants.SO_FIX_CONSISTENCY)
10753     result.Raise("Failed to repair storage unit '%s' on %s" %
10754                  (self.op.name, self.op.node_name))
10755
10756
10757 class LUNodeEvacuate(NoHooksLU):
10758   """Evacuates instances off a list of nodes.
10759
10760   """
10761   REQ_BGL = False
10762
10763   _MODE2IALLOCATOR = {
10764     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10765     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10766     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10767     }
10768   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10769   assert (frozenset(_MODE2IALLOCATOR.values()) ==
10770           constants.IALLOCATOR_NEVAC_MODES)
10771
10772   def CheckArguments(self):
10773     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10774
10775   def ExpandNames(self):
10776     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10777
10778     if self.op.remote_node is not None:
10779       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10780       assert self.op.remote_node
10781
10782       if self.op.remote_node == self.op.node_name:
10783         raise errors.OpPrereqError("Can not use evacuated node as a new"
10784                                    " secondary node", errors.ECODE_INVAL)
10785
10786       if self.op.mode != constants.NODE_EVAC_SEC:
10787         raise errors.OpPrereqError("Without the use of an iallocator only"
10788                                    " secondary instances can be evacuated",
10789                                    errors.ECODE_INVAL)
10790
10791     # Declare locks
10792     self.share_locks = _ShareAll()
10793     self.needed_locks = {
10794       locking.LEVEL_INSTANCE: [],
10795       locking.LEVEL_NODEGROUP: [],
10796       locking.LEVEL_NODE: [],
10797       }
10798
10799     # Determine nodes (via group) optimistically, needs verification once locks
10800     # have been acquired
10801     self.lock_nodes = self._DetermineNodes()
10802
10803   def _DetermineNodes(self):
10804     """Gets the list of nodes to operate on.
10805
10806     """
10807     if self.op.remote_node is None:
10808       # Iallocator will choose any node(s) in the same group
10809       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10810     else:
10811       group_nodes = frozenset([self.op.remote_node])
10812
10813     # Determine nodes to be locked
10814     return set([self.op.node_name]) | group_nodes
10815
10816   def _DetermineInstances(self):
10817     """Builds list of instances to operate on.
10818
10819     """
10820     assert self.op.mode in constants.NODE_EVAC_MODES
10821
10822     if self.op.mode == constants.NODE_EVAC_PRI:
10823       # Primary instances only
10824       inst_fn = _GetNodePrimaryInstances
10825       assert self.op.remote_node is None, \
10826         "Evacuating primary instances requires iallocator"
10827     elif self.op.mode == constants.NODE_EVAC_SEC:
10828       # Secondary instances only
10829       inst_fn = _GetNodeSecondaryInstances
10830     else:
10831       # All instances
10832       assert self.op.mode == constants.NODE_EVAC_ALL
10833       inst_fn = _GetNodeInstances
10834       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10835       # per instance
10836       raise errors.OpPrereqError("Due to an issue with the iallocator"
10837                                  " interface it is not possible to evacuate"
10838                                  " all instances at once; specify explicitly"
10839                                  " whether to evacuate primary or secondary"
10840                                  " instances",
10841                                  errors.ECODE_INVAL)
10842
10843     return inst_fn(self.cfg, self.op.node_name)
10844
10845   def DeclareLocks(self, level):
10846     if level == locking.LEVEL_INSTANCE:
10847       # Lock instances optimistically, needs verification once node and group
10848       # locks have been acquired
10849       self.needed_locks[locking.LEVEL_INSTANCE] = \
10850         set(i.name for i in self._DetermineInstances())
10851
10852     elif level == locking.LEVEL_NODEGROUP:
10853       # Lock node groups for all potential target nodes optimistically, needs
10854       # verification once nodes have been acquired
10855       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10856         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10857
10858     elif level == locking.LEVEL_NODE:
10859       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10860
10861   def CheckPrereq(self):
10862     # Verify locks
10863     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10864     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10865     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10866
10867     need_nodes = self._DetermineNodes()
10868
10869     if not owned_nodes.issuperset(need_nodes):
10870       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10871                                  " locks were acquired, current nodes are"
10872                                  " are '%s', used to be '%s'; retry the"
10873                                  " operation" %
10874                                  (self.op.node_name,
10875                                   utils.CommaJoin(need_nodes),
10876                                   utils.CommaJoin(owned_nodes)),
10877                                  errors.ECODE_STATE)
10878
10879     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10880     if owned_groups != wanted_groups:
10881       raise errors.OpExecError("Node groups changed since locks were acquired,"
10882                                " current groups are '%s', used to be '%s';"
10883                                " retry the operation" %
10884                                (utils.CommaJoin(wanted_groups),
10885                                 utils.CommaJoin(owned_groups)))
10886
10887     # Determine affected instances
10888     self.instances = self._DetermineInstances()
10889     self.instance_names = [i.name for i in self.instances]
10890
10891     if set(self.instance_names) != owned_instances:
10892       raise errors.OpExecError("Instances on node '%s' changed since locks"
10893                                " were acquired, current instances are '%s',"
10894                                " used to be '%s'; retry the operation" %
10895                                (self.op.node_name,
10896                                 utils.CommaJoin(self.instance_names),
10897                                 utils.CommaJoin(owned_instances)))
10898
10899     if self.instance_names:
10900       self.LogInfo("Evacuating instances from node '%s': %s",
10901                    self.op.node_name,
10902                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10903     else:
10904       self.LogInfo("No instances to evacuate from node '%s'",
10905                    self.op.node_name)
10906
10907     if self.op.remote_node is not None:
10908       for i in self.instances:
10909         if i.primary_node == self.op.remote_node:
10910           raise errors.OpPrereqError("Node %s is the primary node of"
10911                                      " instance %s, cannot use it as"
10912                                      " secondary" %
10913                                      (self.op.remote_node, i.name),
10914                                      errors.ECODE_INVAL)
10915
10916   def Exec(self, feedback_fn):
10917     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10918
10919     if not self.instance_names:
10920       # No instances to evacuate
10921       jobs = []
10922
10923     elif self.op.iallocator is not None:
10924       # TODO: Implement relocation to other group
10925       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10926                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10927                        instances=list(self.instance_names))
10928
10929       ial.Run(self.op.iallocator)
10930
10931       if not ial.success:
10932         raise errors.OpPrereqError("Can't compute node evacuation using"
10933                                    " iallocator '%s': %s" %
10934                                    (self.op.iallocator, ial.info),
10935                                    errors.ECODE_NORES)
10936
10937       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10938
10939     elif self.op.remote_node is not None:
10940       assert self.op.mode == constants.NODE_EVAC_SEC
10941       jobs = [
10942         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10943                                         remote_node=self.op.remote_node,
10944                                         disks=[],
10945                                         mode=constants.REPLACE_DISK_CHG,
10946                                         early_release=self.op.early_release)]
10947         for instance_name in self.instance_names
10948         ]
10949
10950     else:
10951       raise errors.ProgrammerError("No iallocator or remote node")
10952
10953     return ResultWithJobs(jobs)
10954
10955
10956 def _SetOpEarlyRelease(early_release, op):
10957   """Sets C{early_release} flag on opcodes if available.
10958
10959   """
10960   try:
10961     op.early_release = early_release
10962   except AttributeError:
10963     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10964
10965   return op
10966
10967
10968 def _NodeEvacDest(use_nodes, group, nodes):
10969   """Returns group or nodes depending on caller's choice.
10970
10971   """
10972   if use_nodes:
10973     return utils.CommaJoin(nodes)
10974   else:
10975     return group
10976
10977
10978 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10979   """Unpacks the result of change-group and node-evacuate iallocator requests.
10980
10981   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10982   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10983
10984   @type lu: L{LogicalUnit}
10985   @param lu: Logical unit instance
10986   @type alloc_result: tuple/list
10987   @param alloc_result: Result from iallocator
10988   @type early_release: bool
10989   @param early_release: Whether to release locks early if possible
10990   @type use_nodes: bool
10991   @param use_nodes: Whether to display node names instead of groups
10992
10993   """
10994   (moved, failed, jobs) = alloc_result
10995
10996   if failed:
10997     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10998                                  for (name, reason) in failed)
10999     lu.LogWarning("Unable to evacuate instances %s", failreason)
11000     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11001
11002   if moved:
11003     lu.LogInfo("Instances to be moved: %s",
11004                utils.CommaJoin("%s (to %s)" %
11005                                (name, _NodeEvacDest(use_nodes, group, nodes))
11006                                for (name, group, nodes) in moved))
11007
11008   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11009               map(opcodes.OpCode.LoadOpCode, ops))
11010           for ops in jobs]
11011
11012
11013 class LUInstanceGrowDisk(LogicalUnit):
11014   """Grow a disk of an instance.
11015
11016   """
11017   HPATH = "disk-grow"
11018   HTYPE = constants.HTYPE_INSTANCE
11019   REQ_BGL = False
11020
11021   def ExpandNames(self):
11022     self._ExpandAndLockInstance()
11023     self.needed_locks[locking.LEVEL_NODE] = []
11024     self.needed_locks[locking.LEVEL_NODE_RES] = []
11025     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11026
11027   def DeclareLocks(self, level):
11028     if level == locking.LEVEL_NODE:
11029       self._LockInstancesNodes()
11030     elif level == locking.LEVEL_NODE_RES:
11031       # Copy node locks
11032       self.needed_locks[locking.LEVEL_NODE_RES] = \
11033         self.needed_locks[locking.LEVEL_NODE][:]
11034
11035   def BuildHooksEnv(self):
11036     """Build hooks env.
11037
11038     This runs on the master, the primary and all the secondaries.
11039
11040     """
11041     env = {
11042       "DISK": self.op.disk,
11043       "AMOUNT": self.op.amount,
11044       }
11045     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11046     return env
11047
11048   def BuildHooksNodes(self):
11049     """Build hooks nodes.
11050
11051     """
11052     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11053     return (nl, nl)
11054
11055   def CheckPrereq(self):
11056     """Check prerequisites.
11057
11058     This checks that the instance is in the cluster.
11059
11060     """
11061     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11062     assert instance is not None, \
11063       "Cannot retrieve locked instance %s" % self.op.instance_name
11064     nodenames = list(instance.all_nodes)
11065     for node in nodenames:
11066       _CheckNodeOnline(self, node)
11067
11068     self.instance = instance
11069
11070     if instance.disk_template not in constants.DTS_GROWABLE:
11071       raise errors.OpPrereqError("Instance's disk layout does not support"
11072                                  " growing", errors.ECODE_INVAL)
11073
11074     self.disk = instance.FindDisk(self.op.disk)
11075
11076     if instance.disk_template not in (constants.DT_FILE,
11077                                       constants.DT_SHARED_FILE):
11078       # TODO: check the free disk space for file, when that feature will be
11079       # supported
11080       _CheckNodesFreeDiskPerVG(self, nodenames,
11081                                self.disk.ComputeGrowth(self.op.amount))
11082
11083   def Exec(self, feedback_fn):
11084     """Execute disk grow.
11085
11086     """
11087     instance = self.instance
11088     disk = self.disk
11089
11090     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11091     assert (self.owned_locks(locking.LEVEL_NODE) ==
11092             self.owned_locks(locking.LEVEL_NODE_RES))
11093
11094     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11095     if not disks_ok:
11096       raise errors.OpExecError("Cannot activate block device to grow")
11097
11098     feedback_fn("Growing disk %s of instance '%s' by %s" %
11099                 (self.op.disk, instance.name,
11100                  utils.FormatUnit(self.op.amount, "h")))
11101
11102     # First run all grow ops in dry-run mode
11103     for node in instance.all_nodes:
11104       self.cfg.SetDiskID(disk, node)
11105       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11106       result.Raise("Grow request failed to node %s" % node)
11107
11108     # We know that (as far as we can test) operations across different
11109     # nodes will succeed, time to run it for real
11110     for node in instance.all_nodes:
11111       self.cfg.SetDiskID(disk, node)
11112       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11113       result.Raise("Grow request failed to node %s" % node)
11114
11115       # TODO: Rewrite code to work properly
11116       # DRBD goes into sync mode for a short amount of time after executing the
11117       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11118       # calling "resize" in sync mode fails. Sleeping for a short amount of
11119       # time is a work-around.
11120       time.sleep(5)
11121
11122     disk.RecordGrow(self.op.amount)
11123     self.cfg.Update(instance, feedback_fn)
11124
11125     # Changes have been recorded, release node lock
11126     _ReleaseLocks(self, locking.LEVEL_NODE)
11127
11128     # Downgrade lock while waiting for sync
11129     self.glm.downgrade(locking.LEVEL_INSTANCE)
11130
11131     if self.op.wait_for_sync:
11132       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11133       if disk_abort:
11134         self.proc.LogWarning("Disk sync-ing has not returned a good"
11135                              " status; please check the instance")
11136       if instance.admin_state != constants.ADMINST_UP:
11137         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11138     elif instance.admin_state != constants.ADMINST_UP:
11139       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11140                            " not supposed to be running because no wait for"
11141                            " sync mode was requested")
11142
11143     assert self.owned_locks(locking.LEVEL_NODE_RES)
11144     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11145
11146
11147 class LUInstanceQueryData(NoHooksLU):
11148   """Query runtime instance data.
11149
11150   """
11151   REQ_BGL = False
11152
11153   def ExpandNames(self):
11154     self.needed_locks = {}
11155
11156     # Use locking if requested or when non-static information is wanted
11157     if not (self.op.static or self.op.use_locking):
11158       self.LogWarning("Non-static data requested, locks need to be acquired")
11159       self.op.use_locking = True
11160
11161     if self.op.instances or not self.op.use_locking:
11162       # Expand instance names right here
11163       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11164     else:
11165       # Will use acquired locks
11166       self.wanted_names = None
11167
11168     if self.op.use_locking:
11169       self.share_locks = _ShareAll()
11170
11171       if self.wanted_names is None:
11172         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11173       else:
11174         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11175
11176       self.needed_locks[locking.LEVEL_NODE] = []
11177       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11178
11179   def DeclareLocks(self, level):
11180     if self.op.use_locking and level == locking.LEVEL_NODE:
11181       self._LockInstancesNodes()
11182
11183   def CheckPrereq(self):
11184     """Check prerequisites.
11185
11186     This only checks the optional instance list against the existing names.
11187
11188     """
11189     if self.wanted_names is None:
11190       assert self.op.use_locking, "Locking was not used"
11191       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11192
11193     self.wanted_instances = \
11194         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11195
11196   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11197     """Returns the status of a block device
11198
11199     """
11200     if self.op.static or not node:
11201       return None
11202
11203     self.cfg.SetDiskID(dev, node)
11204
11205     result = self.rpc.call_blockdev_find(node, dev)
11206     if result.offline:
11207       return None
11208
11209     result.Raise("Can't compute disk status for %s" % instance_name)
11210
11211     status = result.payload
11212     if status is None:
11213       return None
11214
11215     return (status.dev_path, status.major, status.minor,
11216             status.sync_percent, status.estimated_time,
11217             status.is_degraded, status.ldisk_status)
11218
11219   def _ComputeDiskStatus(self, instance, snode, dev):
11220     """Compute block device status.
11221
11222     """
11223     if dev.dev_type in constants.LDS_DRBD:
11224       # we change the snode then (otherwise we use the one passed in)
11225       if dev.logical_id[0] == instance.primary_node:
11226         snode = dev.logical_id[1]
11227       else:
11228         snode = dev.logical_id[0]
11229
11230     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11231                                               instance.name, dev)
11232     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11233
11234     if dev.children:
11235       dev_children = map(compat.partial(self._ComputeDiskStatus,
11236                                         instance, snode),
11237                          dev.children)
11238     else:
11239       dev_children = []
11240
11241     return {
11242       "iv_name": dev.iv_name,
11243       "dev_type": dev.dev_type,
11244       "logical_id": dev.logical_id,
11245       "physical_id": dev.physical_id,
11246       "pstatus": dev_pstatus,
11247       "sstatus": dev_sstatus,
11248       "children": dev_children,
11249       "mode": dev.mode,
11250       "size": dev.size,
11251       }
11252
11253   def Exec(self, feedback_fn):
11254     """Gather and return data"""
11255     result = {}
11256
11257     cluster = self.cfg.GetClusterInfo()
11258
11259     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11260                                           for i in self.wanted_instances)
11261     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11262       if self.op.static or pnode.offline:
11263         remote_state = None
11264         if pnode.offline:
11265           self.LogWarning("Primary node %s is marked offline, returning static"
11266                           " information only for instance %s" %
11267                           (pnode.name, instance.name))
11268       else:
11269         remote_info = self.rpc.call_instance_info(instance.primary_node,
11270                                                   instance.name,
11271                                                   instance.hypervisor)
11272         remote_info.Raise("Error checking node %s" % instance.primary_node)
11273         remote_info = remote_info.payload
11274         if remote_info and "state" in remote_info:
11275           remote_state = "up"
11276         else:
11277           if instance.admin_state == constants.ADMINST_UP:
11278             remote_state = "down"
11279           else:
11280             remote_state = instance.admin_state
11281
11282       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11283                   instance.disks)
11284
11285       result[instance.name] = {
11286         "name": instance.name,
11287         "config_state": instance.admin_state,
11288         "run_state": remote_state,
11289         "pnode": instance.primary_node,
11290         "snodes": instance.secondary_nodes,
11291         "os": instance.os,
11292         # this happens to be the same format used for hooks
11293         "nics": _NICListToTuple(self, instance.nics),
11294         "disk_template": instance.disk_template,
11295         "disks": disks,
11296         "hypervisor": instance.hypervisor,
11297         "network_port": instance.network_port,
11298         "hv_instance": instance.hvparams,
11299         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11300         "be_instance": instance.beparams,
11301         "be_actual": cluster.FillBE(instance),
11302         "os_instance": instance.osparams,
11303         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11304         "serial_no": instance.serial_no,
11305         "mtime": instance.mtime,
11306         "ctime": instance.ctime,
11307         "uuid": instance.uuid,
11308         }
11309
11310     return result
11311
11312
11313 class LUInstanceSetParams(LogicalUnit):
11314   """Modifies an instances's parameters.
11315
11316   """
11317   HPATH = "instance-modify"
11318   HTYPE = constants.HTYPE_INSTANCE
11319   REQ_BGL = False
11320
11321   def CheckArguments(self):
11322     if not (self.op.nics or self.op.disks or self.op.disk_template or
11323             self.op.hvparams or self.op.beparams or self.op.os_name or
11324             self.op.online_inst or self.op.offline_inst):
11325       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11326
11327     if self.op.hvparams:
11328       _CheckGlobalHvParams(self.op.hvparams)
11329
11330     # Disk validation
11331     disk_addremove = 0
11332     for disk_op, disk_dict in self.op.disks:
11333       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11334       if disk_op == constants.DDM_REMOVE:
11335         disk_addremove += 1
11336         continue
11337       elif disk_op == constants.DDM_ADD:
11338         disk_addremove += 1
11339       else:
11340         if not isinstance(disk_op, int):
11341           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11342         if not isinstance(disk_dict, dict):
11343           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11344           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11345
11346       if disk_op == constants.DDM_ADD:
11347         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11348         if mode not in constants.DISK_ACCESS_SET:
11349           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11350                                      errors.ECODE_INVAL)
11351         size = disk_dict.get(constants.IDISK_SIZE, None)
11352         if size is None:
11353           raise errors.OpPrereqError("Required disk parameter size missing",
11354                                      errors.ECODE_INVAL)
11355         try:
11356           size = int(size)
11357         except (TypeError, ValueError), err:
11358           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11359                                      str(err), errors.ECODE_INVAL)
11360         disk_dict[constants.IDISK_SIZE] = size
11361       else:
11362         # modification of disk
11363         if constants.IDISK_SIZE in disk_dict:
11364           raise errors.OpPrereqError("Disk size change not possible, use"
11365                                      " grow-disk", errors.ECODE_INVAL)
11366
11367     if disk_addremove > 1:
11368       raise errors.OpPrereqError("Only one disk add or remove operation"
11369                                  " supported at a time", errors.ECODE_INVAL)
11370
11371     if self.op.disks and self.op.disk_template is not None:
11372       raise errors.OpPrereqError("Disk template conversion and other disk"
11373                                  " changes not supported at the same time",
11374                                  errors.ECODE_INVAL)
11375
11376     if (self.op.disk_template and
11377         self.op.disk_template in constants.DTS_INT_MIRROR and
11378         self.op.remote_node is None):
11379       raise errors.OpPrereqError("Changing the disk template to a mirrored"
11380                                  " one requires specifying a secondary node",
11381                                  errors.ECODE_INVAL)
11382
11383     # NIC validation
11384     nic_addremove = 0
11385     for nic_op, nic_dict in self.op.nics:
11386       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11387       if nic_op == constants.DDM_REMOVE:
11388         nic_addremove += 1
11389         continue
11390       elif nic_op == constants.DDM_ADD:
11391         nic_addremove += 1
11392       else:
11393         if not isinstance(nic_op, int):
11394           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11395         if not isinstance(nic_dict, dict):
11396           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11397           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11398
11399       # nic_dict should be a dict
11400       nic_ip = nic_dict.get(constants.INIC_IP, None)
11401       if nic_ip is not None:
11402         if nic_ip.lower() == constants.VALUE_NONE:
11403           nic_dict[constants.INIC_IP] = None
11404         else:
11405           if not netutils.IPAddress.IsValid(nic_ip):
11406             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11407                                        errors.ECODE_INVAL)
11408
11409       nic_bridge = nic_dict.get("bridge", None)
11410       nic_link = nic_dict.get(constants.INIC_LINK, None)
11411       if nic_bridge and nic_link:
11412         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11413                                    " at the same time", errors.ECODE_INVAL)
11414       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11415         nic_dict["bridge"] = None
11416       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11417         nic_dict[constants.INIC_LINK] = None
11418
11419       if nic_op == constants.DDM_ADD:
11420         nic_mac = nic_dict.get(constants.INIC_MAC, None)
11421         if nic_mac is None:
11422           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11423
11424       if constants.INIC_MAC in nic_dict:
11425         nic_mac = nic_dict[constants.INIC_MAC]
11426         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11427           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11428
11429         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11430           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11431                                      " modifying an existing nic",
11432                                      errors.ECODE_INVAL)
11433
11434     if nic_addremove > 1:
11435       raise errors.OpPrereqError("Only one NIC add or remove operation"
11436                                  " supported at a time", errors.ECODE_INVAL)
11437
11438   def ExpandNames(self):
11439     self._ExpandAndLockInstance()
11440     # Can't even acquire node locks in shared mode as upcoming changes in
11441     # Ganeti 2.6 will start to modify the node object on disk conversion
11442     self.needed_locks[locking.LEVEL_NODE] = []
11443     self.needed_locks[locking.LEVEL_NODE_RES] = []
11444     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11445
11446   def DeclareLocks(self, level):
11447     if level == locking.LEVEL_NODE:
11448       self._LockInstancesNodes()
11449       if self.op.disk_template and self.op.remote_node:
11450         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11451         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11452     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11453       # Copy node locks
11454       self.needed_locks[locking.LEVEL_NODE_RES] = \
11455         self.needed_locks[locking.LEVEL_NODE][:]
11456
11457   def BuildHooksEnv(self):
11458     """Build hooks env.
11459
11460     This runs on the master, primary and secondaries.
11461
11462     """
11463     args = dict()
11464     if constants.BE_MINMEM in self.be_new:
11465       args["minmem"] = self.be_new[constants.BE_MINMEM]
11466     if constants.BE_MAXMEM in self.be_new:
11467       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11468     if constants.BE_VCPUS in self.be_new:
11469       args["vcpus"] = self.be_new[constants.BE_VCPUS]
11470     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11471     # information at all.
11472     if self.op.nics:
11473       args["nics"] = []
11474       nic_override = dict(self.op.nics)
11475       for idx, nic in enumerate(self.instance.nics):
11476         if idx in nic_override:
11477           this_nic_override = nic_override[idx]
11478         else:
11479           this_nic_override = {}
11480         if constants.INIC_IP in this_nic_override:
11481           ip = this_nic_override[constants.INIC_IP]
11482         else:
11483           ip = nic.ip
11484         if constants.INIC_MAC in this_nic_override:
11485           mac = this_nic_override[constants.INIC_MAC]
11486         else:
11487           mac = nic.mac
11488         if idx in self.nic_pnew:
11489           nicparams = self.nic_pnew[idx]
11490         else:
11491           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11492         mode = nicparams[constants.NIC_MODE]
11493         link = nicparams[constants.NIC_LINK]
11494         args["nics"].append((ip, mac, mode, link))
11495       if constants.DDM_ADD in nic_override:
11496         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11497         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11498         nicparams = self.nic_pnew[constants.DDM_ADD]
11499         mode = nicparams[constants.NIC_MODE]
11500         link = nicparams[constants.NIC_LINK]
11501         args["nics"].append((ip, mac, mode, link))
11502       elif constants.DDM_REMOVE in nic_override:
11503         del args["nics"][-1]
11504
11505     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11506     if self.op.disk_template:
11507       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11508
11509     return env
11510
11511   def BuildHooksNodes(self):
11512     """Build hooks nodes.
11513
11514     """
11515     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11516     return (nl, nl)
11517
11518   def CheckPrereq(self):
11519     """Check prerequisites.
11520
11521     This only checks the instance list against the existing names.
11522
11523     """
11524     # checking the new params on the primary/secondary nodes
11525
11526     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11527     cluster = self.cluster = self.cfg.GetClusterInfo()
11528     assert self.instance is not None, \
11529       "Cannot retrieve locked instance %s" % self.op.instance_name
11530     pnode = instance.primary_node
11531     nodelist = list(instance.all_nodes)
11532     pnode_info = self.cfg.GetNodeInfo(pnode)
11533     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11534
11535     # OS change
11536     if self.op.os_name and not self.op.force:
11537       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11538                       self.op.force_variant)
11539       instance_os = self.op.os_name
11540     else:
11541       instance_os = instance.os
11542
11543     if self.op.disk_template:
11544       if instance.disk_template == self.op.disk_template:
11545         raise errors.OpPrereqError("Instance already has disk template %s" %
11546                                    instance.disk_template, errors.ECODE_INVAL)
11547
11548       if (instance.disk_template,
11549           self.op.disk_template) not in self._DISK_CONVERSIONS:
11550         raise errors.OpPrereqError("Unsupported disk template conversion from"
11551                                    " %s to %s" % (instance.disk_template,
11552                                                   self.op.disk_template),
11553                                    errors.ECODE_INVAL)
11554       _CheckInstanceState(self, instance, INSTANCE_DOWN,
11555                           msg="cannot change disk template")
11556       if self.op.disk_template in constants.DTS_INT_MIRROR:
11557         if self.op.remote_node == pnode:
11558           raise errors.OpPrereqError("Given new secondary node %s is the same"
11559                                      " as the primary node of the instance" %
11560                                      self.op.remote_node, errors.ECODE_STATE)
11561         _CheckNodeOnline(self, self.op.remote_node)
11562         _CheckNodeNotDrained(self, self.op.remote_node)
11563         # FIXME: here we assume that the old instance type is DT_PLAIN
11564         assert instance.disk_template == constants.DT_PLAIN
11565         disks = [{constants.IDISK_SIZE: d.size,
11566                   constants.IDISK_VG: d.logical_id[0]}
11567                  for d in instance.disks]
11568         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11569         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11570
11571         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11572         if pnode_info.group != snode_info.group:
11573           self.LogWarning("The primary and secondary nodes are in two"
11574                           " different node groups; the disk parameters"
11575                           " from the first disk's node group will be"
11576                           " used")
11577
11578     # hvparams processing
11579     if self.op.hvparams:
11580       hv_type = instance.hypervisor
11581       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11582       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11583       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11584
11585       # local check
11586       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11587       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11588       self.hv_proposed = self.hv_new = hv_new # the new actual values
11589       self.hv_inst = i_hvdict # the new dict (without defaults)
11590     else:
11591       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11592                                               instance.hvparams)
11593       self.hv_new = self.hv_inst = {}
11594
11595     # beparams processing
11596     if self.op.beparams:
11597       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11598                                    use_none=True)
11599       objects.UpgradeBeParams(i_bedict)
11600       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11601       be_new = cluster.SimpleFillBE(i_bedict)
11602       self.be_proposed = self.be_new = be_new # the new actual values
11603       self.be_inst = i_bedict # the new dict (without defaults)
11604     else:
11605       self.be_new = self.be_inst = {}
11606       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11607     be_old = cluster.FillBE(instance)
11608
11609     # CPU param validation -- checking every time a paramtere is
11610     # changed to cover all cases where either CPU mask or vcpus have
11611     # changed
11612     if (constants.BE_VCPUS in self.be_proposed and
11613         constants.HV_CPU_MASK in self.hv_proposed):
11614       cpu_list = \
11615         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11616       # Verify mask is consistent with number of vCPUs. Can skip this
11617       # test if only 1 entry in the CPU mask, which means same mask
11618       # is applied to all vCPUs.
11619       if (len(cpu_list) > 1 and
11620           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11621         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11622                                    " CPU mask [%s]" %
11623                                    (self.be_proposed[constants.BE_VCPUS],
11624                                     self.hv_proposed[constants.HV_CPU_MASK]),
11625                                    errors.ECODE_INVAL)
11626
11627       # Only perform this test if a new CPU mask is given
11628       if constants.HV_CPU_MASK in self.hv_new:
11629         # Calculate the largest CPU number requested
11630         max_requested_cpu = max(map(max, cpu_list))
11631         # Check that all of the instance's nodes have enough physical CPUs to
11632         # satisfy the requested CPU mask
11633         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11634                                 max_requested_cpu + 1, instance.hypervisor)
11635
11636     # osparams processing
11637     if self.op.osparams:
11638       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11639       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11640       self.os_inst = i_osdict # the new dict (without defaults)
11641     else:
11642       self.os_inst = {}
11643
11644     self.warn = []
11645
11646     #TODO(dynmem): do the appropriate check involving MINMEM
11647     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11648         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11649       mem_check_list = [pnode]
11650       if be_new[constants.BE_AUTO_BALANCE]:
11651         # either we changed auto_balance to yes or it was from before
11652         mem_check_list.extend(instance.secondary_nodes)
11653       instance_info = self.rpc.call_instance_info(pnode, instance.name,
11654                                                   instance.hypervisor)
11655       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11656                                          [instance.hypervisor])
11657       pninfo = nodeinfo[pnode]
11658       msg = pninfo.fail_msg
11659       if msg:
11660         # Assume the primary node is unreachable and go ahead
11661         self.warn.append("Can't get info from primary node %s: %s" %
11662                          (pnode, msg))
11663       else:
11664         (_, _, (pnhvinfo, )) = pninfo.payload
11665         if not isinstance(pnhvinfo.get("memory_free", None), int):
11666           self.warn.append("Node data from primary node %s doesn't contain"
11667                            " free memory information" % pnode)
11668         elif instance_info.fail_msg:
11669           self.warn.append("Can't get instance runtime information: %s" %
11670                           instance_info.fail_msg)
11671         else:
11672           if instance_info.payload:
11673             current_mem = int(instance_info.payload["memory"])
11674           else:
11675             # Assume instance not running
11676             # (there is a slight race condition here, but it's not very
11677             # probable, and we have no other way to check)
11678             # TODO: Describe race condition
11679             current_mem = 0
11680           #TODO(dynmem): do the appropriate check involving MINMEM
11681           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11682                       pnhvinfo["memory_free"])
11683           if miss_mem > 0:
11684             raise errors.OpPrereqError("This change will prevent the instance"
11685                                        " from starting, due to %d MB of memory"
11686                                        " missing on its primary node" %
11687                                        miss_mem,
11688                                        errors.ECODE_NORES)
11689
11690       if be_new[constants.BE_AUTO_BALANCE]:
11691         for node, nres in nodeinfo.items():
11692           if node not in instance.secondary_nodes:
11693             continue
11694           nres.Raise("Can't get info from secondary node %s" % node,
11695                      prereq=True, ecode=errors.ECODE_STATE)
11696           (_, _, (nhvinfo, )) = nres.payload
11697           if not isinstance(nhvinfo.get("memory_free", None), int):
11698             raise errors.OpPrereqError("Secondary node %s didn't return free"
11699                                        " memory information" % node,
11700                                        errors.ECODE_STATE)
11701           #TODO(dynmem): do the appropriate check involving MINMEM
11702           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11703             raise errors.OpPrereqError("This change will prevent the instance"
11704                                        " from failover to its secondary node"
11705                                        " %s, due to not enough memory" % node,
11706                                        errors.ECODE_STATE)
11707
11708     # NIC processing
11709     self.nic_pnew = {}
11710     self.nic_pinst = {}
11711     for nic_op, nic_dict in self.op.nics:
11712       if nic_op == constants.DDM_REMOVE:
11713         if not instance.nics:
11714           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11715                                      errors.ECODE_INVAL)
11716         continue
11717       if nic_op != constants.DDM_ADD:
11718         # an existing nic
11719         if not instance.nics:
11720           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11721                                      " no NICs" % nic_op,
11722                                      errors.ECODE_INVAL)
11723         if nic_op < 0 or nic_op >= len(instance.nics):
11724           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11725                                      " are 0 to %d" %
11726                                      (nic_op, len(instance.nics) - 1),
11727                                      errors.ECODE_INVAL)
11728         old_nic_params = instance.nics[nic_op].nicparams
11729         old_nic_ip = instance.nics[nic_op].ip
11730       else:
11731         old_nic_params = {}
11732         old_nic_ip = None
11733
11734       update_params_dict = dict([(key, nic_dict[key])
11735                                  for key in constants.NICS_PARAMETERS
11736                                  if key in nic_dict])
11737
11738       if "bridge" in nic_dict:
11739         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11740
11741       new_nic_params = _GetUpdatedParams(old_nic_params,
11742                                          update_params_dict)
11743       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11744       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11745       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11746       self.nic_pinst[nic_op] = new_nic_params
11747       self.nic_pnew[nic_op] = new_filled_nic_params
11748       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11749
11750       if new_nic_mode == constants.NIC_MODE_BRIDGED:
11751         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11752         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11753         if msg:
11754           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11755           if self.op.force:
11756             self.warn.append(msg)
11757           else:
11758             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11759       if new_nic_mode == constants.NIC_MODE_ROUTED:
11760         if constants.INIC_IP in nic_dict:
11761           nic_ip = nic_dict[constants.INIC_IP]
11762         else:
11763           nic_ip = old_nic_ip
11764         if nic_ip is None:
11765           raise errors.OpPrereqError("Cannot set the nic ip to None"
11766                                      " on a routed nic", errors.ECODE_INVAL)
11767       if constants.INIC_MAC in nic_dict:
11768         nic_mac = nic_dict[constants.INIC_MAC]
11769         if nic_mac is None:
11770           raise errors.OpPrereqError("Cannot set the nic mac to None",
11771                                      errors.ECODE_INVAL)
11772         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11773           # otherwise generate the mac
11774           nic_dict[constants.INIC_MAC] = \
11775             self.cfg.GenerateMAC(self.proc.GetECId())
11776         else:
11777           # or validate/reserve the current one
11778           try:
11779             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11780           except errors.ReservationError:
11781             raise errors.OpPrereqError("MAC address %s already in use"
11782                                        " in cluster" % nic_mac,
11783                                        errors.ECODE_NOTUNIQUE)
11784
11785     # DISK processing
11786     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11787       raise errors.OpPrereqError("Disk operations not supported for"
11788                                  " diskless instances",
11789                                  errors.ECODE_INVAL)
11790     for disk_op, _ in self.op.disks:
11791       if disk_op == constants.DDM_REMOVE:
11792         if len(instance.disks) == 1:
11793           raise errors.OpPrereqError("Cannot remove the last disk of"
11794                                      " an instance", errors.ECODE_INVAL)
11795         _CheckInstanceState(self, instance, INSTANCE_DOWN,
11796                             msg="cannot remove disks")
11797
11798       if (disk_op == constants.DDM_ADD and
11799           len(instance.disks) >= constants.MAX_DISKS):
11800         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11801                                    " add more" % constants.MAX_DISKS,
11802                                    errors.ECODE_STATE)
11803       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11804         # an existing disk
11805         if disk_op < 0 or disk_op >= len(instance.disks):
11806           raise errors.OpPrereqError("Invalid disk index %s, valid values"
11807                                      " are 0 to %d" %
11808                                      (disk_op, len(instance.disks)),
11809                                      errors.ECODE_INVAL)
11810
11811     # disabling the instance
11812     if self.op.offline_inst:
11813       _CheckInstanceState(self, instance, INSTANCE_DOWN,
11814                           msg="cannot change instance state to offline")
11815
11816     # enabling the instance
11817     if self.op.online_inst:
11818       _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11819                           msg="cannot make instance go online")
11820
11821   def _ConvertPlainToDrbd(self, feedback_fn):
11822     """Converts an instance from plain to drbd.
11823
11824     """
11825     feedback_fn("Converting template to drbd")
11826     instance = self.instance
11827     pnode = instance.primary_node
11828     snode = self.op.remote_node
11829
11830     assert instance.disk_template == constants.DT_PLAIN
11831
11832     # create a fake disk info for _GenerateDiskTemplate
11833     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11834                   constants.IDISK_VG: d.logical_id[0]}
11835                  for d in instance.disks]
11836     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11837                                       instance.name, pnode, [snode],
11838                                       disk_info, None, None, 0, feedback_fn,
11839                                       self.diskparams)
11840     info = _GetInstanceInfoText(instance)
11841     feedback_fn("Creating aditional volumes...")
11842     # first, create the missing data and meta devices
11843     for disk in new_disks:
11844       # unfortunately this is... not too nice
11845       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11846                             info, True)
11847       for child in disk.children:
11848         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11849     # at this stage, all new LVs have been created, we can rename the
11850     # old ones
11851     feedback_fn("Renaming original volumes...")
11852     rename_list = [(o, n.children[0].logical_id)
11853                    for (o, n) in zip(instance.disks, new_disks)]
11854     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11855     result.Raise("Failed to rename original LVs")
11856
11857     feedback_fn("Initializing DRBD devices...")
11858     # all child devices are in place, we can now create the DRBD devices
11859     for disk in new_disks:
11860       for node in [pnode, snode]:
11861         f_create = node == pnode
11862         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11863
11864     # at this point, the instance has been modified
11865     instance.disk_template = constants.DT_DRBD8
11866     instance.disks = new_disks
11867     self.cfg.Update(instance, feedback_fn)
11868
11869     # Release node locks while waiting for sync
11870     _ReleaseLocks(self, locking.LEVEL_NODE)
11871
11872     # disks are created, waiting for sync
11873     disk_abort = not _WaitForSync(self, instance,
11874                                   oneshot=not self.op.wait_for_sync)
11875     if disk_abort:
11876       raise errors.OpExecError("There are some degraded disks for"
11877                                " this instance, please cleanup manually")
11878
11879     # Node resource locks will be released by caller
11880
11881   def _ConvertDrbdToPlain(self, feedback_fn):
11882     """Converts an instance from drbd to plain.
11883
11884     """
11885     instance = self.instance
11886
11887     assert len(instance.secondary_nodes) == 1
11888     assert instance.disk_template == constants.DT_DRBD8
11889
11890     pnode = instance.primary_node
11891     snode = instance.secondary_nodes[0]
11892     feedback_fn("Converting template to plain")
11893
11894     old_disks = instance.disks
11895     new_disks = [d.children[0] for d in old_disks]
11896
11897     # copy over size and mode
11898     for parent, child in zip(old_disks, new_disks):
11899       child.size = parent.size
11900       child.mode = parent.mode
11901
11902     # update instance structure
11903     instance.disks = new_disks
11904     instance.disk_template = constants.DT_PLAIN
11905     self.cfg.Update(instance, feedback_fn)
11906
11907     # Release locks in case removing disks takes a while
11908     _ReleaseLocks(self, locking.LEVEL_NODE)
11909
11910     feedback_fn("Removing volumes on the secondary node...")
11911     for disk in old_disks:
11912       self.cfg.SetDiskID(disk, snode)
11913       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11914       if msg:
11915         self.LogWarning("Could not remove block device %s on node %s,"
11916                         " continuing anyway: %s", disk.iv_name, snode, msg)
11917
11918     feedback_fn("Removing unneeded volumes on the primary node...")
11919     for idx, disk in enumerate(old_disks):
11920       meta = disk.children[1]
11921       self.cfg.SetDiskID(meta, pnode)
11922       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11923       if msg:
11924         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11925                         " continuing anyway: %s", idx, pnode, msg)
11926
11927     # this is a DRBD disk, return its port to the pool
11928     for disk in old_disks:
11929       tcp_port = disk.logical_id[2]
11930       self.cfg.AddTcpUdpPort(tcp_port)
11931
11932     # Node resource locks will be released by caller
11933
11934   def Exec(self, feedback_fn):
11935     """Modifies an instance.
11936
11937     All parameters take effect only at the next restart of the instance.
11938
11939     """
11940     # Process here the warnings from CheckPrereq, as we don't have a
11941     # feedback_fn there.
11942     for warn in self.warn:
11943       feedback_fn("WARNING: %s" % warn)
11944
11945     assert ((self.op.disk_template is None) ^
11946             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11947       "Not owning any node resource locks"
11948
11949     result = []
11950     instance = self.instance
11951     # disk changes
11952     for disk_op, disk_dict in self.op.disks:
11953       if disk_op == constants.DDM_REMOVE:
11954         # remove the last disk
11955         device = instance.disks.pop()
11956         device_idx = len(instance.disks)
11957         for node, disk in device.ComputeNodeTree(instance.primary_node):
11958           self.cfg.SetDiskID(disk, node)
11959           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11960           if msg:
11961             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11962                             " continuing anyway", device_idx, node, msg)
11963         result.append(("disk/%d" % device_idx, "remove"))
11964
11965         # if this is a DRBD disk, return its port to the pool
11966         if device.dev_type in constants.LDS_DRBD:
11967           tcp_port = device.logical_id[2]
11968           self.cfg.AddTcpUdpPort(tcp_port)
11969       elif disk_op == constants.DDM_ADD:
11970         # add a new disk
11971         if instance.disk_template in (constants.DT_FILE,
11972                                         constants.DT_SHARED_FILE):
11973           file_driver, file_path = instance.disks[0].logical_id
11974           file_path = os.path.dirname(file_path)
11975         else:
11976           file_driver = file_path = None
11977         disk_idx_base = len(instance.disks)
11978         new_disk = _GenerateDiskTemplate(self,
11979                                          instance.disk_template,
11980                                          instance.name, instance.primary_node,
11981                                          instance.secondary_nodes,
11982                                          [disk_dict],
11983                                          file_path,
11984                                          file_driver,
11985                                          disk_idx_base,
11986                                          feedback_fn,
11987                                          self.diskparams)[0]
11988         instance.disks.append(new_disk)
11989         info = _GetInstanceInfoText(instance)
11990
11991         logging.info("Creating volume %s for instance %s",
11992                      new_disk.iv_name, instance.name)
11993         # Note: this needs to be kept in sync with _CreateDisks
11994         #HARDCODE
11995         for node in instance.all_nodes:
11996           f_create = node == instance.primary_node
11997           try:
11998             _CreateBlockDev(self, node, instance, new_disk,
11999                             f_create, info, f_create)
12000           except errors.OpExecError, err:
12001             self.LogWarning("Failed to create volume %s (%s) on"
12002                             " node %s: %s",
12003                             new_disk.iv_name, new_disk, node, err)
12004         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12005                        (new_disk.size, new_disk.mode)))
12006       else:
12007         # change a given disk
12008         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12009         result.append(("disk.mode/%d" % disk_op,
12010                        disk_dict[constants.IDISK_MODE]))
12011
12012     if self.op.disk_template:
12013       if __debug__:
12014         check_nodes = set(instance.all_nodes)
12015         if self.op.remote_node:
12016           check_nodes.add(self.op.remote_node)
12017         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12018           owned = self.owned_locks(level)
12019           assert not (check_nodes - owned), \
12020             ("Not owning the correct locks, owning %r, expected at least %r" %
12021              (owned, check_nodes))
12022
12023       r_shut = _ShutdownInstanceDisks(self, instance)
12024       if not r_shut:
12025         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12026                                  " proceed with disk template conversion")
12027       mode = (instance.disk_template, self.op.disk_template)
12028       try:
12029         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12030       except:
12031         self.cfg.ReleaseDRBDMinors(instance.name)
12032         raise
12033       result.append(("disk_template", self.op.disk_template))
12034
12035       assert instance.disk_template == self.op.disk_template, \
12036         ("Expected disk template '%s', found '%s'" %
12037          (self.op.disk_template, instance.disk_template))
12038
12039     # Release node and resource locks if there are any (they might already have
12040     # been released during disk conversion)
12041     _ReleaseLocks(self, locking.LEVEL_NODE)
12042     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12043
12044     # NIC changes
12045     for nic_op, nic_dict in self.op.nics:
12046       if nic_op == constants.DDM_REMOVE:
12047         # remove the last nic
12048         del instance.nics[-1]
12049         result.append(("nic.%d" % len(instance.nics), "remove"))
12050       elif nic_op == constants.DDM_ADD:
12051         # mac and bridge should be set, by now
12052         mac = nic_dict[constants.INIC_MAC]
12053         ip = nic_dict.get(constants.INIC_IP, None)
12054         nicparams = self.nic_pinst[constants.DDM_ADD]
12055         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12056         instance.nics.append(new_nic)
12057         result.append(("nic.%d" % (len(instance.nics) - 1),
12058                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12059                        (new_nic.mac, new_nic.ip,
12060                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12061                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12062                        )))
12063       else:
12064         for key in (constants.INIC_MAC, constants.INIC_IP):
12065           if key in nic_dict:
12066             setattr(instance.nics[nic_op], key, nic_dict[key])
12067         if nic_op in self.nic_pinst:
12068           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12069         for key, val in nic_dict.iteritems():
12070           result.append(("nic.%s/%d" % (key, nic_op), val))
12071
12072     # hvparams changes
12073     if self.op.hvparams:
12074       instance.hvparams = self.hv_inst
12075       for key, val in self.op.hvparams.iteritems():
12076         result.append(("hv/%s" % key, val))
12077
12078     # beparams changes
12079     if self.op.beparams:
12080       instance.beparams = self.be_inst
12081       for key, val in self.op.beparams.iteritems():
12082         result.append(("be/%s" % key, val))
12083
12084     # OS change
12085     if self.op.os_name:
12086       instance.os = self.op.os_name
12087
12088     # osparams changes
12089     if self.op.osparams:
12090       instance.osparams = self.os_inst
12091       for key, val in self.op.osparams.iteritems():
12092         result.append(("os/%s" % key, val))
12093
12094     # online/offline instance
12095     if self.op.online_inst:
12096       self.cfg.MarkInstanceDown(instance.name)
12097       result.append(("admin_state", constants.ADMINST_DOWN))
12098     if self.op.offline_inst:
12099       self.cfg.MarkInstanceOffline(instance.name)
12100       result.append(("admin_state", constants.ADMINST_OFFLINE))
12101
12102     self.cfg.Update(instance, feedback_fn)
12103
12104     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12105                 self.owned_locks(locking.LEVEL_NODE)), \
12106       "All node locks should have been released by now"
12107
12108     return result
12109
12110   _DISK_CONVERSIONS = {
12111     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12112     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12113     }
12114
12115
12116 class LUInstanceChangeGroup(LogicalUnit):
12117   HPATH = "instance-change-group"
12118   HTYPE = constants.HTYPE_INSTANCE
12119   REQ_BGL = False
12120
12121   def ExpandNames(self):
12122     self.share_locks = _ShareAll()
12123     self.needed_locks = {
12124       locking.LEVEL_NODEGROUP: [],
12125       locking.LEVEL_NODE: [],
12126       }
12127
12128     self._ExpandAndLockInstance()
12129
12130     if self.op.target_groups:
12131       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12132                                   self.op.target_groups)
12133     else:
12134       self.req_target_uuids = None
12135
12136     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12137
12138   def DeclareLocks(self, level):
12139     if level == locking.LEVEL_NODEGROUP:
12140       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12141
12142       if self.req_target_uuids:
12143         lock_groups = set(self.req_target_uuids)
12144
12145         # Lock all groups used by instance optimistically; this requires going
12146         # via the node before it's locked, requiring verification later on
12147         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12148         lock_groups.update(instance_groups)
12149       else:
12150         # No target groups, need to lock all of them
12151         lock_groups = locking.ALL_SET
12152
12153       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12154
12155     elif level == locking.LEVEL_NODE:
12156       if self.req_target_uuids:
12157         # Lock all nodes used by instances
12158         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12159         self._LockInstancesNodes()
12160
12161         # Lock all nodes in all potential target groups
12162         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12163                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12164         member_nodes = [node_name
12165                         for group in lock_groups
12166                         for node_name in self.cfg.GetNodeGroup(group).members]
12167         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12168       else:
12169         # Lock all nodes as all groups are potential targets
12170         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12171
12172   def CheckPrereq(self):
12173     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12174     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12175     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12176
12177     assert (self.req_target_uuids is None or
12178             owned_groups.issuperset(self.req_target_uuids))
12179     assert owned_instances == set([self.op.instance_name])
12180
12181     # Get instance information
12182     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12183
12184     # Check if node groups for locked instance are still correct
12185     assert owned_nodes.issuperset(self.instance.all_nodes), \
12186       ("Instance %s's nodes changed while we kept the lock" %
12187        self.op.instance_name)
12188
12189     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12190                                            owned_groups)
12191
12192     if self.req_target_uuids:
12193       # User requested specific target groups
12194       self.target_uuids = self.req_target_uuids
12195     else:
12196       # All groups except those used by the instance are potential targets
12197       self.target_uuids = owned_groups - inst_groups
12198
12199     conflicting_groups = self.target_uuids & inst_groups
12200     if conflicting_groups:
12201       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12202                                  " used by the instance '%s'" %
12203                                  (utils.CommaJoin(conflicting_groups),
12204                                   self.op.instance_name),
12205                                  errors.ECODE_INVAL)
12206
12207     if not self.target_uuids:
12208       raise errors.OpPrereqError("There are no possible target groups",
12209                                  errors.ECODE_INVAL)
12210
12211   def BuildHooksEnv(self):
12212     """Build hooks env.
12213
12214     """
12215     assert self.target_uuids
12216
12217     env = {
12218       "TARGET_GROUPS": " ".join(self.target_uuids),
12219       }
12220
12221     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12222
12223     return env
12224
12225   def BuildHooksNodes(self):
12226     """Build hooks nodes.
12227
12228     """
12229     mn = self.cfg.GetMasterNode()
12230     return ([mn], [mn])
12231
12232   def Exec(self, feedback_fn):
12233     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12234
12235     assert instances == [self.op.instance_name], "Instance not locked"
12236
12237     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12238                      instances=instances, target_groups=list(self.target_uuids))
12239
12240     ial.Run(self.op.iallocator)
12241
12242     if not ial.success:
12243       raise errors.OpPrereqError("Can't compute solution for changing group of"
12244                                  " instance '%s' using iallocator '%s': %s" %
12245                                  (self.op.instance_name, self.op.iallocator,
12246                                   ial.info),
12247                                  errors.ECODE_NORES)
12248
12249     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12250
12251     self.LogInfo("Iallocator returned %s job(s) for changing group of"
12252                  " instance '%s'", len(jobs), self.op.instance_name)
12253
12254     return ResultWithJobs(jobs)
12255
12256
12257 class LUBackupQuery(NoHooksLU):
12258   """Query the exports list
12259
12260   """
12261   REQ_BGL = False
12262
12263   def ExpandNames(self):
12264     self.needed_locks = {}
12265     self.share_locks[locking.LEVEL_NODE] = 1
12266     if not self.op.nodes:
12267       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12268     else:
12269       self.needed_locks[locking.LEVEL_NODE] = \
12270         _GetWantedNodes(self, self.op.nodes)
12271
12272   def Exec(self, feedback_fn):
12273     """Compute the list of all the exported system images.
12274
12275     @rtype: dict
12276     @return: a dictionary with the structure node->(export-list)
12277         where export-list is a list of the instances exported on
12278         that node.
12279
12280     """
12281     self.nodes = self.owned_locks(locking.LEVEL_NODE)
12282     rpcresult = self.rpc.call_export_list(self.nodes)
12283     result = {}
12284     for node in rpcresult:
12285       if rpcresult[node].fail_msg:
12286         result[node] = False
12287       else:
12288         result[node] = rpcresult[node].payload
12289
12290     return result
12291
12292
12293 class LUBackupPrepare(NoHooksLU):
12294   """Prepares an instance for an export and returns useful information.
12295
12296   """
12297   REQ_BGL = False
12298
12299   def ExpandNames(self):
12300     self._ExpandAndLockInstance()
12301
12302   def CheckPrereq(self):
12303     """Check prerequisites.
12304
12305     """
12306     instance_name = self.op.instance_name
12307
12308     self.instance = self.cfg.GetInstanceInfo(instance_name)
12309     assert self.instance is not None, \
12310           "Cannot retrieve locked instance %s" % self.op.instance_name
12311     _CheckNodeOnline(self, self.instance.primary_node)
12312
12313     self._cds = _GetClusterDomainSecret()
12314
12315   def Exec(self, feedback_fn):
12316     """Prepares an instance for an export.
12317
12318     """
12319     instance = self.instance
12320
12321     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12322       salt = utils.GenerateSecret(8)
12323
12324       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12325       result = self.rpc.call_x509_cert_create(instance.primary_node,
12326                                               constants.RIE_CERT_VALIDITY)
12327       result.Raise("Can't create X509 key and certificate on %s" % result.node)
12328
12329       (name, cert_pem) = result.payload
12330
12331       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12332                                              cert_pem)
12333
12334       return {
12335         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12336         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12337                           salt),
12338         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12339         }
12340
12341     return None
12342
12343
12344 class LUBackupExport(LogicalUnit):
12345   """Export an instance to an image in the cluster.
12346
12347   """
12348   HPATH = "instance-export"
12349   HTYPE = constants.HTYPE_INSTANCE
12350   REQ_BGL = False
12351
12352   def CheckArguments(self):
12353     """Check the arguments.
12354
12355     """
12356     self.x509_key_name = self.op.x509_key_name
12357     self.dest_x509_ca_pem = self.op.destination_x509_ca
12358
12359     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12360       if not self.x509_key_name:
12361         raise errors.OpPrereqError("Missing X509 key name for encryption",
12362                                    errors.ECODE_INVAL)
12363
12364       if not self.dest_x509_ca_pem:
12365         raise errors.OpPrereqError("Missing destination X509 CA",
12366                                    errors.ECODE_INVAL)
12367
12368   def ExpandNames(self):
12369     self._ExpandAndLockInstance()
12370
12371     # Lock all nodes for local exports
12372     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12373       # FIXME: lock only instance primary and destination node
12374       #
12375       # Sad but true, for now we have do lock all nodes, as we don't know where
12376       # the previous export might be, and in this LU we search for it and
12377       # remove it from its current node. In the future we could fix this by:
12378       #  - making a tasklet to search (share-lock all), then create the
12379       #    new one, then one to remove, after
12380       #  - removing the removal operation altogether
12381       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12382
12383   def DeclareLocks(self, level):
12384     """Last minute lock declaration."""
12385     # All nodes are locked anyway, so nothing to do here.
12386
12387   def BuildHooksEnv(self):
12388     """Build hooks env.
12389
12390     This will run on the master, primary node and target node.
12391
12392     """
12393     env = {
12394       "EXPORT_MODE": self.op.mode,
12395       "EXPORT_NODE": self.op.target_node,
12396       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12397       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12398       # TODO: Generic function for boolean env variables
12399       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12400       }
12401
12402     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12403
12404     return env
12405
12406   def BuildHooksNodes(self):
12407     """Build hooks nodes.
12408
12409     """
12410     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12411
12412     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12413       nl.append(self.op.target_node)
12414
12415     return (nl, nl)
12416
12417   def CheckPrereq(self):
12418     """Check prerequisites.
12419
12420     This checks that the instance and node names are valid.
12421
12422     """
12423     instance_name = self.op.instance_name
12424
12425     self.instance = self.cfg.GetInstanceInfo(instance_name)
12426     assert self.instance is not None, \
12427           "Cannot retrieve locked instance %s" % self.op.instance_name
12428     _CheckNodeOnline(self, self.instance.primary_node)
12429
12430     if (self.op.remove_instance and
12431         self.instance.admin_state == constants.ADMINST_UP and
12432         not self.op.shutdown):
12433       raise errors.OpPrereqError("Can not remove instance without shutting it"
12434                                  " down before")
12435
12436     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12437       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12438       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12439       assert self.dst_node is not None
12440
12441       _CheckNodeOnline(self, self.dst_node.name)
12442       _CheckNodeNotDrained(self, self.dst_node.name)
12443
12444       self._cds = None
12445       self.dest_disk_info = None
12446       self.dest_x509_ca = None
12447
12448     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12449       self.dst_node = None
12450
12451       if len(self.op.target_node) != len(self.instance.disks):
12452         raise errors.OpPrereqError(("Received destination information for %s"
12453                                     " disks, but instance %s has %s disks") %
12454                                    (len(self.op.target_node), instance_name,
12455                                     len(self.instance.disks)),
12456                                    errors.ECODE_INVAL)
12457
12458       cds = _GetClusterDomainSecret()
12459
12460       # Check X509 key name
12461       try:
12462         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12463       except (TypeError, ValueError), err:
12464         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12465
12466       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12467         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12468                                    errors.ECODE_INVAL)
12469
12470       # Load and verify CA
12471       try:
12472         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12473       except OpenSSL.crypto.Error, err:
12474         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12475                                    (err, ), errors.ECODE_INVAL)
12476
12477       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12478       if errcode is not None:
12479         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12480                                    (msg, ), errors.ECODE_INVAL)
12481
12482       self.dest_x509_ca = cert
12483
12484       # Verify target information
12485       disk_info = []
12486       for idx, disk_data in enumerate(self.op.target_node):
12487         try:
12488           (host, port, magic) = \
12489             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12490         except errors.GenericError, err:
12491           raise errors.OpPrereqError("Target info for disk %s: %s" %
12492                                      (idx, err), errors.ECODE_INVAL)
12493
12494         disk_info.append((host, port, magic))
12495
12496       assert len(disk_info) == len(self.op.target_node)
12497       self.dest_disk_info = disk_info
12498
12499     else:
12500       raise errors.ProgrammerError("Unhandled export mode %r" %
12501                                    self.op.mode)
12502
12503     # instance disk type verification
12504     # TODO: Implement export support for file-based disks
12505     for disk in self.instance.disks:
12506       if disk.dev_type == constants.LD_FILE:
12507         raise errors.OpPrereqError("Export not supported for instances with"
12508                                    " file-based disks", errors.ECODE_INVAL)
12509
12510   def _CleanupExports(self, feedback_fn):
12511     """Removes exports of current instance from all other nodes.
12512
12513     If an instance in a cluster with nodes A..D was exported to node C, its
12514     exports will be removed from the nodes A, B and D.
12515
12516     """
12517     assert self.op.mode != constants.EXPORT_MODE_REMOTE
12518
12519     nodelist = self.cfg.GetNodeList()
12520     nodelist.remove(self.dst_node.name)
12521
12522     # on one-node clusters nodelist will be empty after the removal
12523     # if we proceed the backup would be removed because OpBackupQuery
12524     # substitutes an empty list with the full cluster node list.
12525     iname = self.instance.name
12526     if nodelist:
12527       feedback_fn("Removing old exports for instance %s" % iname)
12528       exportlist = self.rpc.call_export_list(nodelist)
12529       for node in exportlist:
12530         if exportlist[node].fail_msg:
12531           continue
12532         if iname in exportlist[node].payload:
12533           msg = self.rpc.call_export_remove(node, iname).fail_msg
12534           if msg:
12535             self.LogWarning("Could not remove older export for instance %s"
12536                             " on node %s: %s", iname, node, msg)
12537
12538   def Exec(self, feedback_fn):
12539     """Export an instance to an image in the cluster.
12540
12541     """
12542     assert self.op.mode in constants.EXPORT_MODES
12543
12544     instance = self.instance
12545     src_node = instance.primary_node
12546
12547     if self.op.shutdown:
12548       # shutdown the instance, but not the disks
12549       feedback_fn("Shutting down instance %s" % instance.name)
12550       result = self.rpc.call_instance_shutdown(src_node, instance,
12551                                                self.op.shutdown_timeout)
12552       # TODO: Maybe ignore failures if ignore_remove_failures is set
12553       result.Raise("Could not shutdown instance %s on"
12554                    " node %s" % (instance.name, src_node))
12555
12556     # set the disks ID correctly since call_instance_start needs the
12557     # correct drbd minor to create the symlinks
12558     for disk in instance.disks:
12559       self.cfg.SetDiskID(disk, src_node)
12560
12561     activate_disks = (instance.admin_state != constants.ADMINST_UP)
12562
12563     if activate_disks:
12564       # Activate the instance disks if we'exporting a stopped instance
12565       feedback_fn("Activating disks for %s" % instance.name)
12566       _StartInstanceDisks(self, instance, None)
12567
12568     try:
12569       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12570                                                      instance)
12571
12572       helper.CreateSnapshots()
12573       try:
12574         if (self.op.shutdown and
12575             instance.admin_state == constants.ADMINST_UP and
12576             not self.op.remove_instance):
12577           assert not activate_disks
12578           feedback_fn("Starting instance %s" % instance.name)
12579           result = self.rpc.call_instance_start(src_node,
12580                                                 (instance, None, None), False)
12581           msg = result.fail_msg
12582           if msg:
12583             feedback_fn("Failed to start instance: %s" % msg)
12584             _ShutdownInstanceDisks(self, instance)
12585             raise errors.OpExecError("Could not start instance: %s" % msg)
12586
12587         if self.op.mode == constants.EXPORT_MODE_LOCAL:
12588           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12589         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12590           connect_timeout = constants.RIE_CONNECT_TIMEOUT
12591           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12592
12593           (key_name, _, _) = self.x509_key_name
12594
12595           dest_ca_pem = \
12596             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12597                                             self.dest_x509_ca)
12598
12599           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12600                                                      key_name, dest_ca_pem,
12601                                                      timeouts)
12602       finally:
12603         helper.Cleanup()
12604
12605       # Check for backwards compatibility
12606       assert len(dresults) == len(instance.disks)
12607       assert compat.all(isinstance(i, bool) for i in dresults), \
12608              "Not all results are boolean: %r" % dresults
12609
12610     finally:
12611       if activate_disks:
12612         feedback_fn("Deactivating disks for %s" % instance.name)
12613         _ShutdownInstanceDisks(self, instance)
12614
12615     if not (compat.all(dresults) and fin_resu):
12616       failures = []
12617       if not fin_resu:
12618         failures.append("export finalization")
12619       if not compat.all(dresults):
12620         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12621                                if not dsk)
12622         failures.append("disk export: disk(s) %s" % fdsk)
12623
12624       raise errors.OpExecError("Export failed, errors in %s" %
12625                                utils.CommaJoin(failures))
12626
12627     # At this point, the export was successful, we can cleanup/finish
12628
12629     # Remove instance if requested
12630     if self.op.remove_instance:
12631       feedback_fn("Removing instance %s" % instance.name)
12632       _RemoveInstance(self, feedback_fn, instance,
12633                       self.op.ignore_remove_failures)
12634
12635     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12636       self._CleanupExports(feedback_fn)
12637
12638     return fin_resu, dresults
12639
12640
12641 class LUBackupRemove(NoHooksLU):
12642   """Remove exports related to the named instance.
12643
12644   """
12645   REQ_BGL = False
12646
12647   def ExpandNames(self):
12648     self.needed_locks = {}
12649     # We need all nodes to be locked in order for RemoveExport to work, but we
12650     # don't need to lock the instance itself, as nothing will happen to it (and
12651     # we can remove exports also for a removed instance)
12652     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12653
12654   def Exec(self, feedback_fn):
12655     """Remove any export.
12656
12657     """
12658     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12659     # If the instance was not found we'll try with the name that was passed in.
12660     # This will only work if it was an FQDN, though.
12661     fqdn_warn = False
12662     if not instance_name:
12663       fqdn_warn = True
12664       instance_name = self.op.instance_name
12665
12666     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12667     exportlist = self.rpc.call_export_list(locked_nodes)
12668     found = False
12669     for node in exportlist:
12670       msg = exportlist[node].fail_msg
12671       if msg:
12672         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12673         continue
12674       if instance_name in exportlist[node].payload:
12675         found = True
12676         result = self.rpc.call_export_remove(node, instance_name)
12677         msg = result.fail_msg
12678         if msg:
12679           logging.error("Could not remove export for instance %s"
12680                         " on node %s: %s", instance_name, node, msg)
12681
12682     if fqdn_warn and not found:
12683       feedback_fn("Export not found. If trying to remove an export belonging"
12684                   " to a deleted instance please use its Fully Qualified"
12685                   " Domain Name.")
12686
12687
12688 class LUGroupAdd(LogicalUnit):
12689   """Logical unit for creating node groups.
12690
12691   """
12692   HPATH = "group-add"
12693   HTYPE = constants.HTYPE_GROUP
12694   REQ_BGL = False
12695
12696   def ExpandNames(self):
12697     # We need the new group's UUID here so that we can create and acquire the
12698     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12699     # that it should not check whether the UUID exists in the configuration.
12700     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12701     self.needed_locks = {}
12702     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12703
12704   def CheckPrereq(self):
12705     """Check prerequisites.
12706
12707     This checks that the given group name is not an existing node group
12708     already.
12709
12710     """
12711     try:
12712       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12713     except errors.OpPrereqError:
12714       pass
12715     else:
12716       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12717                                  " node group (UUID: %s)" %
12718                                  (self.op.group_name, existing_uuid),
12719                                  errors.ECODE_EXISTS)
12720
12721     if self.op.ndparams:
12722       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12723
12724     if self.op.diskparams:
12725       for templ in constants.DISK_TEMPLATES:
12726         if templ not in self.op.diskparams:
12727           self.op.diskparams[templ] = {}
12728         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12729     else:
12730       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12731
12732   def BuildHooksEnv(self):
12733     """Build hooks env.
12734
12735     """
12736     return {
12737       "GROUP_NAME": self.op.group_name,
12738       }
12739
12740   def BuildHooksNodes(self):
12741     """Build hooks nodes.
12742
12743     """
12744     mn = self.cfg.GetMasterNode()
12745     return ([mn], [mn])
12746
12747   def Exec(self, feedback_fn):
12748     """Add the node group to the cluster.
12749
12750     """
12751     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12752                                   uuid=self.group_uuid,
12753                                   alloc_policy=self.op.alloc_policy,
12754                                   ndparams=self.op.ndparams,
12755                                   diskparams=self.op.diskparams)
12756
12757     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12758     del self.remove_locks[locking.LEVEL_NODEGROUP]
12759
12760
12761 class LUGroupAssignNodes(NoHooksLU):
12762   """Logical unit for assigning nodes to groups.
12763
12764   """
12765   REQ_BGL = False
12766
12767   def ExpandNames(self):
12768     # These raise errors.OpPrereqError on their own:
12769     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12770     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12771
12772     # We want to lock all the affected nodes and groups. We have readily
12773     # available the list of nodes, and the *destination* group. To gather the
12774     # list of "source" groups, we need to fetch node information later on.
12775     self.needed_locks = {
12776       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12777       locking.LEVEL_NODE: self.op.nodes,
12778       }
12779
12780   def DeclareLocks(self, level):
12781     if level == locking.LEVEL_NODEGROUP:
12782       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12783
12784       # Try to get all affected nodes' groups without having the group or node
12785       # lock yet. Needs verification later in the code flow.
12786       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12787
12788       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12789
12790   def CheckPrereq(self):
12791     """Check prerequisites.
12792
12793     """
12794     assert self.needed_locks[locking.LEVEL_NODEGROUP]
12795     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12796             frozenset(self.op.nodes))
12797
12798     expected_locks = (set([self.group_uuid]) |
12799                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12800     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12801     if actual_locks != expected_locks:
12802       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12803                                " current groups are '%s', used to be '%s'" %
12804                                (utils.CommaJoin(expected_locks),
12805                                 utils.CommaJoin(actual_locks)))
12806
12807     self.node_data = self.cfg.GetAllNodesInfo()
12808     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12809     instance_data = self.cfg.GetAllInstancesInfo()
12810
12811     if self.group is None:
12812       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12813                                (self.op.group_name, self.group_uuid))
12814
12815     (new_splits, previous_splits) = \
12816       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12817                                              for node in self.op.nodes],
12818                                             self.node_data, instance_data)
12819
12820     if new_splits:
12821       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12822
12823       if not self.op.force:
12824         raise errors.OpExecError("The following instances get split by this"
12825                                  " change and --force was not given: %s" %
12826                                  fmt_new_splits)
12827       else:
12828         self.LogWarning("This operation will split the following instances: %s",
12829                         fmt_new_splits)
12830
12831         if previous_splits:
12832           self.LogWarning("In addition, these already-split instances continue"
12833                           " to be split across groups: %s",
12834                           utils.CommaJoin(utils.NiceSort(previous_splits)))
12835
12836   def Exec(self, feedback_fn):
12837     """Assign nodes to a new group.
12838
12839     """
12840     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12841
12842     self.cfg.AssignGroupNodes(mods)
12843
12844   @staticmethod
12845   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12846     """Check for split instances after a node assignment.
12847
12848     This method considers a series of node assignments as an atomic operation,
12849     and returns information about split instances after applying the set of
12850     changes.
12851
12852     In particular, it returns information about newly split instances, and
12853     instances that were already split, and remain so after the change.
12854
12855     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12856     considered.
12857
12858     @type changes: list of (node_name, new_group_uuid) pairs.
12859     @param changes: list of node assignments to consider.
12860     @param node_data: a dict with data for all nodes
12861     @param instance_data: a dict with all instances to consider
12862     @rtype: a two-tuple
12863     @return: a list of instances that were previously okay and result split as a
12864       consequence of this change, and a list of instances that were previously
12865       split and this change does not fix.
12866
12867     """
12868     changed_nodes = dict((node, group) for node, group in changes
12869                          if node_data[node].group != group)
12870
12871     all_split_instances = set()
12872     previously_split_instances = set()
12873
12874     def InstanceNodes(instance):
12875       return [instance.primary_node] + list(instance.secondary_nodes)
12876
12877     for inst in instance_data.values():
12878       if inst.disk_template not in constants.DTS_INT_MIRROR:
12879         continue
12880
12881       instance_nodes = InstanceNodes(inst)
12882
12883       if len(set(node_data[node].group for node in instance_nodes)) > 1:
12884         previously_split_instances.add(inst.name)
12885
12886       if len(set(changed_nodes.get(node, node_data[node].group)
12887                  for node in instance_nodes)) > 1:
12888         all_split_instances.add(inst.name)
12889
12890     return (list(all_split_instances - previously_split_instances),
12891             list(previously_split_instances & all_split_instances))
12892
12893
12894 class _GroupQuery(_QueryBase):
12895   FIELDS = query.GROUP_FIELDS
12896
12897   def ExpandNames(self, lu):
12898     lu.needed_locks = {}
12899
12900     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12901     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12902
12903     if not self.names:
12904       self.wanted = [name_to_uuid[name]
12905                      for name in utils.NiceSort(name_to_uuid.keys())]
12906     else:
12907       # Accept names to be either names or UUIDs.
12908       missing = []
12909       self.wanted = []
12910       all_uuid = frozenset(self._all_groups.keys())
12911
12912       for name in self.names:
12913         if name in all_uuid:
12914           self.wanted.append(name)
12915         elif name in name_to_uuid:
12916           self.wanted.append(name_to_uuid[name])
12917         else:
12918           missing.append(name)
12919
12920       if missing:
12921         raise errors.OpPrereqError("Some groups do not exist: %s" %
12922                                    utils.CommaJoin(missing),
12923                                    errors.ECODE_NOENT)
12924
12925   def DeclareLocks(self, lu, level):
12926     pass
12927
12928   def _GetQueryData(self, lu):
12929     """Computes the list of node groups and their attributes.
12930
12931     """
12932     do_nodes = query.GQ_NODE in self.requested_data
12933     do_instances = query.GQ_INST in self.requested_data
12934
12935     group_to_nodes = None
12936     group_to_instances = None
12937
12938     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12939     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12940     # latter GetAllInstancesInfo() is not enough, for we have to go through
12941     # instance->node. Hence, we will need to process nodes even if we only need
12942     # instance information.
12943     if do_nodes or do_instances:
12944       all_nodes = lu.cfg.GetAllNodesInfo()
12945       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12946       node_to_group = {}
12947
12948       for node in all_nodes.values():
12949         if node.group in group_to_nodes:
12950           group_to_nodes[node.group].append(node.name)
12951           node_to_group[node.name] = node.group
12952
12953       if do_instances:
12954         all_instances = lu.cfg.GetAllInstancesInfo()
12955         group_to_instances = dict((uuid, []) for uuid in self.wanted)
12956
12957         for instance in all_instances.values():
12958           node = instance.primary_node
12959           if node in node_to_group:
12960             group_to_instances[node_to_group[node]].append(instance.name)
12961
12962         if not do_nodes:
12963           # Do not pass on node information if it was not requested.
12964           group_to_nodes = None
12965
12966     return query.GroupQueryData([self._all_groups[uuid]
12967                                  for uuid in self.wanted],
12968                                 group_to_nodes, group_to_instances)
12969
12970
12971 class LUGroupQuery(NoHooksLU):
12972   """Logical unit for querying node groups.
12973
12974   """
12975   REQ_BGL = False
12976
12977   def CheckArguments(self):
12978     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12979                           self.op.output_fields, False)
12980
12981   def ExpandNames(self):
12982     self.gq.ExpandNames(self)
12983
12984   def DeclareLocks(self, level):
12985     self.gq.DeclareLocks(self, level)
12986
12987   def Exec(self, feedback_fn):
12988     return self.gq.OldStyleQuery(self)
12989
12990
12991 class LUGroupSetParams(LogicalUnit):
12992   """Modifies the parameters of a node group.
12993
12994   """
12995   HPATH = "group-modify"
12996   HTYPE = constants.HTYPE_GROUP
12997   REQ_BGL = False
12998
12999   def CheckArguments(self):
13000     all_changes = [
13001       self.op.ndparams,
13002       self.op.diskparams,
13003       self.op.alloc_policy,
13004       self.op.hv_state,
13005       self.op.disk_state
13006       ]
13007
13008     if all_changes.count(None) == len(all_changes):
13009       raise errors.OpPrereqError("Please pass at least one modification",
13010                                  errors.ECODE_INVAL)
13011
13012   def ExpandNames(self):
13013     # This raises errors.OpPrereqError on its own:
13014     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13015
13016     self.needed_locks = {
13017       locking.LEVEL_NODEGROUP: [self.group_uuid],
13018       }
13019
13020   def CheckPrereq(self):
13021     """Check prerequisites.
13022
13023     """
13024     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13025
13026     if self.group is None:
13027       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13028                                (self.op.group_name, self.group_uuid))
13029
13030     if self.op.ndparams:
13031       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13032       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13033       self.new_ndparams = new_ndparams
13034
13035     if self.op.diskparams:
13036       self.new_diskparams = dict()
13037       for templ in constants.DISK_TEMPLATES:
13038         if templ not in self.op.diskparams:
13039           self.op.diskparams[templ] = {}
13040         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13041                                              self.op.diskparams[templ])
13042         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13043         self.new_diskparams[templ] = new_templ_params
13044
13045     if self.op.hv_state:
13046       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13047                                                  self.group.hv_state_static)
13048
13049     if self.op.disk_state:
13050       self.new_disk_state = \
13051         _MergeAndVerifyDiskState(self.op.disk_state,
13052                                  self.group.disk_state_static)
13053
13054   def BuildHooksEnv(self):
13055     """Build hooks env.
13056
13057     """
13058     return {
13059       "GROUP_NAME": self.op.group_name,
13060       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13061       }
13062
13063   def BuildHooksNodes(self):
13064     """Build hooks nodes.
13065
13066     """
13067     mn = self.cfg.GetMasterNode()
13068     return ([mn], [mn])
13069
13070   def Exec(self, feedback_fn):
13071     """Modifies the node group.
13072
13073     """
13074     result = []
13075
13076     if self.op.ndparams:
13077       self.group.ndparams = self.new_ndparams
13078       result.append(("ndparams", str(self.group.ndparams)))
13079
13080     if self.op.diskparams:
13081       self.group.diskparams = self.new_diskparams
13082       result.append(("diskparams", str(self.group.diskparams)))
13083
13084     if self.op.alloc_policy:
13085       self.group.alloc_policy = self.op.alloc_policy
13086
13087     if self.op.hv_state:
13088       self.group.hv_state_static = self.new_hv_state
13089
13090     if self.op.disk_state:
13091       self.group.disk_state_static = self.new_disk_state
13092
13093     self.cfg.Update(self.group, feedback_fn)
13094     return result
13095
13096
13097 class LUGroupRemove(LogicalUnit):
13098   HPATH = "group-remove"
13099   HTYPE = constants.HTYPE_GROUP
13100   REQ_BGL = False
13101
13102   def ExpandNames(self):
13103     # This will raises errors.OpPrereqError on its own:
13104     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13105     self.needed_locks = {
13106       locking.LEVEL_NODEGROUP: [self.group_uuid],
13107       }
13108
13109   def CheckPrereq(self):
13110     """Check prerequisites.
13111
13112     This checks that the given group name exists as a node group, that is
13113     empty (i.e., contains no nodes), and that is not the last group of the
13114     cluster.
13115
13116     """
13117     # Verify that the group is empty.
13118     group_nodes = [node.name
13119                    for node in self.cfg.GetAllNodesInfo().values()
13120                    if node.group == self.group_uuid]
13121
13122     if group_nodes:
13123       raise errors.OpPrereqError("Group '%s' not empty, has the following"
13124                                  " nodes: %s" %
13125                                  (self.op.group_name,
13126                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
13127                                  errors.ECODE_STATE)
13128
13129     # Verify the cluster would not be left group-less.
13130     if len(self.cfg.GetNodeGroupList()) == 1:
13131       raise errors.OpPrereqError("Group '%s' is the only group,"
13132                                  " cannot be removed" %
13133                                  self.op.group_name,
13134                                  errors.ECODE_STATE)
13135
13136   def BuildHooksEnv(self):
13137     """Build hooks env.
13138
13139     """
13140     return {
13141       "GROUP_NAME": self.op.group_name,
13142       }
13143
13144   def BuildHooksNodes(self):
13145     """Build hooks nodes.
13146
13147     """
13148     mn = self.cfg.GetMasterNode()
13149     return ([mn], [mn])
13150
13151   def Exec(self, feedback_fn):
13152     """Remove the node group.
13153
13154     """
13155     try:
13156       self.cfg.RemoveNodeGroup(self.group_uuid)
13157     except errors.ConfigurationError:
13158       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13159                                (self.op.group_name, self.group_uuid))
13160
13161     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13162
13163
13164 class LUGroupRename(LogicalUnit):
13165   HPATH = "group-rename"
13166   HTYPE = constants.HTYPE_GROUP
13167   REQ_BGL = False
13168
13169   def ExpandNames(self):
13170     # This raises errors.OpPrereqError on its own:
13171     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13172
13173     self.needed_locks = {
13174       locking.LEVEL_NODEGROUP: [self.group_uuid],
13175       }
13176
13177   def CheckPrereq(self):
13178     """Check prerequisites.
13179
13180     Ensures requested new name is not yet used.
13181
13182     """
13183     try:
13184       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13185     except errors.OpPrereqError:
13186       pass
13187     else:
13188       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13189                                  " node group (UUID: %s)" %
13190                                  (self.op.new_name, new_name_uuid),
13191                                  errors.ECODE_EXISTS)
13192
13193   def BuildHooksEnv(self):
13194     """Build hooks env.
13195
13196     """
13197     return {
13198       "OLD_NAME": self.op.group_name,
13199       "NEW_NAME": self.op.new_name,
13200       }
13201
13202   def BuildHooksNodes(self):
13203     """Build hooks nodes.
13204
13205     """
13206     mn = self.cfg.GetMasterNode()
13207
13208     all_nodes = self.cfg.GetAllNodesInfo()
13209     all_nodes.pop(mn, None)
13210
13211     run_nodes = [mn]
13212     run_nodes.extend(node.name for node in all_nodes.values()
13213                      if node.group == self.group_uuid)
13214
13215     return (run_nodes, run_nodes)
13216
13217   def Exec(self, feedback_fn):
13218     """Rename the node group.
13219
13220     """
13221     group = self.cfg.GetNodeGroup(self.group_uuid)
13222
13223     if group is None:
13224       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13225                                (self.op.group_name, self.group_uuid))
13226
13227     group.name = self.op.new_name
13228     self.cfg.Update(group, feedback_fn)
13229
13230     return self.op.new_name
13231
13232
13233 class LUGroupEvacuate(LogicalUnit):
13234   HPATH = "group-evacuate"
13235   HTYPE = constants.HTYPE_GROUP
13236   REQ_BGL = False
13237
13238   def ExpandNames(self):
13239     # This raises errors.OpPrereqError on its own:
13240     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13241
13242     if self.op.target_groups:
13243       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13244                                   self.op.target_groups)
13245     else:
13246       self.req_target_uuids = []
13247
13248     if self.group_uuid in self.req_target_uuids:
13249       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13250                                  " as a target group (targets are %s)" %
13251                                  (self.group_uuid,
13252                                   utils.CommaJoin(self.req_target_uuids)),
13253                                  errors.ECODE_INVAL)
13254
13255     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13256
13257     self.share_locks = _ShareAll()
13258     self.needed_locks = {
13259       locking.LEVEL_INSTANCE: [],
13260       locking.LEVEL_NODEGROUP: [],
13261       locking.LEVEL_NODE: [],
13262       }
13263
13264   def DeclareLocks(self, level):
13265     if level == locking.LEVEL_INSTANCE:
13266       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13267
13268       # Lock instances optimistically, needs verification once node and group
13269       # locks have been acquired
13270       self.needed_locks[locking.LEVEL_INSTANCE] = \
13271         self.cfg.GetNodeGroupInstances(self.group_uuid)
13272
13273     elif level == locking.LEVEL_NODEGROUP:
13274       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13275
13276       if self.req_target_uuids:
13277         lock_groups = set([self.group_uuid] + self.req_target_uuids)
13278
13279         # Lock all groups used by instances optimistically; this requires going
13280         # via the node before it's locked, requiring verification later on
13281         lock_groups.update(group_uuid
13282                            for instance_name in
13283                              self.owned_locks(locking.LEVEL_INSTANCE)
13284                            for group_uuid in
13285                              self.cfg.GetInstanceNodeGroups(instance_name))
13286       else:
13287         # No target groups, need to lock all of them
13288         lock_groups = locking.ALL_SET
13289
13290       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13291
13292     elif level == locking.LEVEL_NODE:
13293       # This will only lock the nodes in the group to be evacuated which
13294       # contain actual instances
13295       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13296       self._LockInstancesNodes()
13297
13298       # Lock all nodes in group to be evacuated and target groups
13299       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13300       assert self.group_uuid in owned_groups
13301       member_nodes = [node_name
13302                       for group in owned_groups
13303                       for node_name in self.cfg.GetNodeGroup(group).members]
13304       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13305
13306   def CheckPrereq(self):
13307     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13308     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13309     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13310
13311     assert owned_groups.issuperset(self.req_target_uuids)
13312     assert self.group_uuid in owned_groups
13313
13314     # Check if locked instances are still correct
13315     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13316
13317     # Get instance information
13318     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13319
13320     # Check if node groups for locked instances are still correct
13321     for instance_name in owned_instances:
13322       inst = self.instances[instance_name]
13323       assert owned_nodes.issuperset(inst.all_nodes), \
13324         "Instance %s's nodes changed while we kept the lock" % instance_name
13325
13326       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13327                                              owned_groups)
13328
13329       assert self.group_uuid in inst_groups, \
13330         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13331
13332     if self.req_target_uuids:
13333       # User requested specific target groups
13334       self.target_uuids = self.req_target_uuids
13335     else:
13336       # All groups except the one to be evacuated are potential targets
13337       self.target_uuids = [group_uuid for group_uuid in owned_groups
13338                            if group_uuid != self.group_uuid]
13339
13340       if not self.target_uuids:
13341         raise errors.OpPrereqError("There are no possible target groups",
13342                                    errors.ECODE_INVAL)
13343
13344   def BuildHooksEnv(self):
13345     """Build hooks env.
13346
13347     """
13348     return {
13349       "GROUP_NAME": self.op.group_name,
13350       "TARGET_GROUPS": " ".join(self.target_uuids),
13351       }
13352
13353   def BuildHooksNodes(self):
13354     """Build hooks nodes.
13355
13356     """
13357     mn = self.cfg.GetMasterNode()
13358
13359     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13360
13361     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13362
13363     return (run_nodes, run_nodes)
13364
13365   def Exec(self, feedback_fn):
13366     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13367
13368     assert self.group_uuid not in self.target_uuids
13369
13370     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13371                      instances=instances, target_groups=self.target_uuids)
13372
13373     ial.Run(self.op.iallocator)
13374
13375     if not ial.success:
13376       raise errors.OpPrereqError("Can't compute group evacuation using"
13377                                  " iallocator '%s': %s" %
13378                                  (self.op.iallocator, ial.info),
13379                                  errors.ECODE_NORES)
13380
13381     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13382
13383     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13384                  len(jobs), self.op.group_name)
13385
13386     return ResultWithJobs(jobs)
13387
13388
13389 class TagsLU(NoHooksLU): # pylint: disable=W0223
13390   """Generic tags LU.
13391
13392   This is an abstract class which is the parent of all the other tags LUs.
13393
13394   """
13395   def ExpandNames(self):
13396     self.group_uuid = None
13397     self.needed_locks = {}
13398     if self.op.kind == constants.TAG_NODE:
13399       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13400       self.needed_locks[locking.LEVEL_NODE] = self.op.name
13401     elif self.op.kind == constants.TAG_INSTANCE:
13402       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13403       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13404     elif self.op.kind == constants.TAG_NODEGROUP:
13405       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13406
13407     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13408     # not possible to acquire the BGL based on opcode parameters)
13409
13410   def CheckPrereq(self):
13411     """Check prerequisites.
13412
13413     """
13414     if self.op.kind == constants.TAG_CLUSTER:
13415       self.target = self.cfg.GetClusterInfo()
13416     elif self.op.kind == constants.TAG_NODE:
13417       self.target = self.cfg.GetNodeInfo(self.op.name)
13418     elif self.op.kind == constants.TAG_INSTANCE:
13419       self.target = self.cfg.GetInstanceInfo(self.op.name)
13420     elif self.op.kind == constants.TAG_NODEGROUP:
13421       self.target = self.cfg.GetNodeGroup(self.group_uuid)
13422     else:
13423       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13424                                  str(self.op.kind), errors.ECODE_INVAL)
13425
13426
13427 class LUTagsGet(TagsLU):
13428   """Returns the tags of a given object.
13429
13430   """
13431   REQ_BGL = False
13432
13433   def ExpandNames(self):
13434     TagsLU.ExpandNames(self)
13435
13436     # Share locks as this is only a read operation
13437     self.share_locks = _ShareAll()
13438
13439   def Exec(self, feedback_fn):
13440     """Returns the tag list.
13441
13442     """
13443     return list(self.target.GetTags())
13444
13445
13446 class LUTagsSearch(NoHooksLU):
13447   """Searches the tags for a given pattern.
13448
13449   """
13450   REQ_BGL = False
13451
13452   def ExpandNames(self):
13453     self.needed_locks = {}
13454
13455   def CheckPrereq(self):
13456     """Check prerequisites.
13457
13458     This checks the pattern passed for validity by compiling it.
13459
13460     """
13461     try:
13462       self.re = re.compile(self.op.pattern)
13463     except re.error, err:
13464       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13465                                  (self.op.pattern, err), errors.ECODE_INVAL)
13466
13467   def Exec(self, feedback_fn):
13468     """Returns the tag list.
13469
13470     """
13471     cfg = self.cfg
13472     tgts = [("/cluster", cfg.GetClusterInfo())]
13473     ilist = cfg.GetAllInstancesInfo().values()
13474     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13475     nlist = cfg.GetAllNodesInfo().values()
13476     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13477     tgts.extend(("/nodegroup/%s" % n.name, n)
13478                 for n in cfg.GetAllNodeGroupsInfo().values())
13479     results = []
13480     for path, target in tgts:
13481       for tag in target.GetTags():
13482         if self.re.search(tag):
13483           results.append((path, tag))
13484     return results
13485
13486
13487 class LUTagsSet(TagsLU):
13488   """Sets a tag on a given object.
13489
13490   """
13491   REQ_BGL = False
13492
13493   def CheckPrereq(self):
13494     """Check prerequisites.
13495
13496     This checks the type and length of the tag name and value.
13497
13498     """
13499     TagsLU.CheckPrereq(self)
13500     for tag in self.op.tags:
13501       objects.TaggableObject.ValidateTag(tag)
13502
13503   def Exec(self, feedback_fn):
13504     """Sets the tag.
13505
13506     """
13507     try:
13508       for tag in self.op.tags:
13509         self.target.AddTag(tag)
13510     except errors.TagError, err:
13511       raise errors.OpExecError("Error while setting tag: %s" % str(err))
13512     self.cfg.Update(self.target, feedback_fn)
13513
13514
13515 class LUTagsDel(TagsLU):
13516   """Delete a list of tags from a given object.
13517
13518   """
13519   REQ_BGL = False
13520
13521   def CheckPrereq(self):
13522     """Check prerequisites.
13523
13524     This checks that we have the given tag.
13525
13526     """
13527     TagsLU.CheckPrereq(self)
13528     for tag in self.op.tags:
13529       objects.TaggableObject.ValidateTag(tag)
13530     del_tags = frozenset(self.op.tags)
13531     cur_tags = self.target.GetTags()
13532
13533     diff_tags = del_tags - cur_tags
13534     if diff_tags:
13535       diff_names = ("'%s'" % i for i in sorted(diff_tags))
13536       raise errors.OpPrereqError("Tag(s) %s not found" %
13537                                  (utils.CommaJoin(diff_names), ),
13538                                  errors.ECODE_NOENT)
13539
13540   def Exec(self, feedback_fn):
13541     """Remove the tag from the object.
13542
13543     """
13544     for tag in self.op.tags:
13545       self.target.RemoveTag(tag)
13546     self.cfg.Update(self.target, feedback_fn)
13547
13548
13549 class LUTestDelay(NoHooksLU):
13550   """Sleep for a specified amount of time.
13551
13552   This LU sleeps on the master and/or nodes for a specified amount of
13553   time.
13554
13555   """
13556   REQ_BGL = False
13557
13558   def ExpandNames(self):
13559     """Expand names and set required locks.
13560
13561     This expands the node list, if any.
13562
13563     """
13564     self.needed_locks = {}
13565     if self.op.on_nodes:
13566       # _GetWantedNodes can be used here, but is not always appropriate to use
13567       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13568       # more information.
13569       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13570       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13571
13572   def _TestDelay(self):
13573     """Do the actual sleep.
13574
13575     """
13576     if self.op.on_master:
13577       if not utils.TestDelay(self.op.duration):
13578         raise errors.OpExecError("Error during master delay test")
13579     if self.op.on_nodes:
13580       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13581       for node, node_result in result.items():
13582         node_result.Raise("Failure during rpc call to node %s" % node)
13583
13584   def Exec(self, feedback_fn):
13585     """Execute the test delay opcode, with the wanted repetitions.
13586
13587     """
13588     if self.op.repeat == 0:
13589       self._TestDelay()
13590     else:
13591       top_value = self.op.repeat - 1
13592       for i in range(self.op.repeat):
13593         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13594         self._TestDelay()
13595
13596
13597 class LUTestJqueue(NoHooksLU):
13598   """Utility LU to test some aspects of the job queue.
13599
13600   """
13601   REQ_BGL = False
13602
13603   # Must be lower than default timeout for WaitForJobChange to see whether it
13604   # notices changed jobs
13605   _CLIENT_CONNECT_TIMEOUT = 20.0
13606   _CLIENT_CONFIRM_TIMEOUT = 60.0
13607
13608   @classmethod
13609   def _NotifyUsingSocket(cls, cb, errcls):
13610     """Opens a Unix socket and waits for another program to connect.
13611
13612     @type cb: callable
13613     @param cb: Callback to send socket name to client
13614     @type errcls: class
13615     @param errcls: Exception class to use for errors
13616
13617     """
13618     # Using a temporary directory as there's no easy way to create temporary
13619     # sockets without writing a custom loop around tempfile.mktemp and
13620     # socket.bind
13621     tmpdir = tempfile.mkdtemp()
13622     try:
13623       tmpsock = utils.PathJoin(tmpdir, "sock")
13624
13625       logging.debug("Creating temporary socket at %s", tmpsock)
13626       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13627       try:
13628         sock.bind(tmpsock)
13629         sock.listen(1)
13630
13631         # Send details to client
13632         cb(tmpsock)
13633
13634         # Wait for client to connect before continuing
13635         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13636         try:
13637           (conn, _) = sock.accept()
13638         except socket.error, err:
13639           raise errcls("Client didn't connect in time (%s)" % err)
13640       finally:
13641         sock.close()
13642     finally:
13643       # Remove as soon as client is connected
13644       shutil.rmtree(tmpdir)
13645
13646     # Wait for client to close
13647     try:
13648       try:
13649         # pylint: disable=E1101
13650         # Instance of '_socketobject' has no ... member
13651         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13652         conn.recv(1)
13653       except socket.error, err:
13654         raise errcls("Client failed to confirm notification (%s)" % err)
13655     finally:
13656       conn.close()
13657
13658   def _SendNotification(self, test, arg, sockname):
13659     """Sends a notification to the client.
13660
13661     @type test: string
13662     @param test: Test name
13663     @param arg: Test argument (depends on test)
13664     @type sockname: string
13665     @param sockname: Socket path
13666
13667     """
13668     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13669
13670   def _Notify(self, prereq, test, arg):
13671     """Notifies the client of a test.
13672
13673     @type prereq: bool
13674     @param prereq: Whether this is a prereq-phase test
13675     @type test: string
13676     @param test: Test name
13677     @param arg: Test argument (depends on test)
13678
13679     """
13680     if prereq:
13681       errcls = errors.OpPrereqError
13682     else:
13683       errcls = errors.OpExecError
13684
13685     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13686                                                   test, arg),
13687                                    errcls)
13688
13689   def CheckArguments(self):
13690     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13691     self.expandnames_calls = 0
13692
13693   def ExpandNames(self):
13694     checkargs_calls = getattr(self, "checkargs_calls", 0)
13695     if checkargs_calls < 1:
13696       raise errors.ProgrammerError("CheckArguments was not called")
13697
13698     self.expandnames_calls += 1
13699
13700     if self.op.notify_waitlock:
13701       self._Notify(True, constants.JQT_EXPANDNAMES, None)
13702
13703     self.LogInfo("Expanding names")
13704
13705     # Get lock on master node (just to get a lock, not for a particular reason)
13706     self.needed_locks = {
13707       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13708       }
13709
13710   def Exec(self, feedback_fn):
13711     if self.expandnames_calls < 1:
13712       raise errors.ProgrammerError("ExpandNames was not called")
13713
13714     if self.op.notify_exec:
13715       self._Notify(False, constants.JQT_EXEC, None)
13716
13717     self.LogInfo("Executing")
13718
13719     if self.op.log_messages:
13720       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13721       for idx, msg in enumerate(self.op.log_messages):
13722         self.LogInfo("Sending log message %s", idx + 1)
13723         feedback_fn(constants.JQT_MSGPREFIX + msg)
13724         # Report how many test messages have been sent
13725         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13726
13727     if self.op.fail:
13728       raise errors.OpExecError("Opcode failure was requested")
13729
13730     return True
13731
13732
13733 class IAllocator(object):
13734   """IAllocator framework.
13735
13736   An IAllocator instance has three sets of attributes:
13737     - cfg that is needed to query the cluster
13738     - input data (all members of the _KEYS class attribute are required)
13739     - four buffer attributes (in|out_data|text), that represent the
13740       input (to the external script) in text and data structure format,
13741       and the output from it, again in two formats
13742     - the result variables from the script (success, info, nodes) for
13743       easy usage
13744
13745   """
13746   # pylint: disable=R0902
13747   # lots of instance attributes
13748
13749   def __init__(self, cfg, rpc_runner, mode, **kwargs):
13750     self.cfg = cfg
13751     self.rpc = rpc_runner
13752     # init buffer variables
13753     self.in_text = self.out_text = self.in_data = self.out_data = None
13754     # init all input fields so that pylint is happy
13755     self.mode = mode
13756     self.memory = self.disks = self.disk_template = None
13757     self.os = self.tags = self.nics = self.vcpus = None
13758     self.hypervisor = None
13759     self.relocate_from = None
13760     self.name = None
13761     self.instances = None
13762     self.evac_mode = None
13763     self.target_groups = []
13764     # computed fields
13765     self.required_nodes = None
13766     # init result fields
13767     self.success = self.info = self.result = None
13768
13769     try:
13770       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13771     except KeyError:
13772       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13773                                    " IAllocator" % self.mode)
13774
13775     keyset = [n for (n, _) in keydata]
13776
13777     for key in kwargs:
13778       if key not in keyset:
13779         raise errors.ProgrammerError("Invalid input parameter '%s' to"
13780                                      " IAllocator" % key)
13781       setattr(self, key, kwargs[key])
13782
13783     for key in keyset:
13784       if key not in kwargs:
13785         raise errors.ProgrammerError("Missing input parameter '%s' to"
13786                                      " IAllocator" % key)
13787     self._BuildInputData(compat.partial(fn, self), keydata)
13788
13789   def _ComputeClusterData(self):
13790     """Compute the generic allocator input data.
13791
13792     This is the data that is independent of the actual operation.
13793
13794     """
13795     cfg = self.cfg
13796     cluster_info = cfg.GetClusterInfo()
13797     # cluster data
13798     data = {
13799       "version": constants.IALLOCATOR_VERSION,
13800       "cluster_name": cfg.GetClusterName(),
13801       "cluster_tags": list(cluster_info.GetTags()),
13802       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13803       # we don't have job IDs
13804       }
13805     ninfo = cfg.GetAllNodesInfo()
13806     iinfo = cfg.GetAllInstancesInfo().values()
13807     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13808
13809     # node data
13810     node_list = [n.name for n in ninfo.values() if n.vm_capable]
13811
13812     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13813       hypervisor_name = self.hypervisor
13814     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13815       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13816     else:
13817       hypervisor_name = cluster_info.primary_hypervisor
13818
13819     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
13820                                         [hypervisor_name])
13821     node_iinfo = \
13822       self.rpc.call_all_instances_info(node_list,
13823                                        cluster_info.enabled_hypervisors)
13824
13825     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13826
13827     config_ndata = self._ComputeBasicNodeData(ninfo)
13828     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13829                                                  i_list, config_ndata)
13830     assert len(data["nodes"]) == len(ninfo), \
13831         "Incomplete node data computed"
13832
13833     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13834
13835     self.in_data = data
13836
13837   @staticmethod
13838   def _ComputeNodeGroupData(cfg):
13839     """Compute node groups data.
13840
13841     """
13842     ng = dict((guuid, {
13843       "name": gdata.name,
13844       "alloc_policy": gdata.alloc_policy,
13845       })
13846       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13847
13848     return ng
13849
13850   @staticmethod
13851   def _ComputeBasicNodeData(node_cfg):
13852     """Compute global node data.
13853
13854     @rtype: dict
13855     @returns: a dict of name: (node dict, node config)
13856
13857     """
13858     # fill in static (config-based) values
13859     node_results = dict((ninfo.name, {
13860       "tags": list(ninfo.GetTags()),
13861       "primary_ip": ninfo.primary_ip,
13862       "secondary_ip": ninfo.secondary_ip,
13863       "offline": ninfo.offline,
13864       "drained": ninfo.drained,
13865       "master_candidate": ninfo.master_candidate,
13866       "group": ninfo.group,
13867       "master_capable": ninfo.master_capable,
13868       "vm_capable": ninfo.vm_capable,
13869       })
13870       for ninfo in node_cfg.values())
13871
13872     return node_results
13873
13874   @staticmethod
13875   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13876                               node_results):
13877     """Compute global node data.
13878
13879     @param node_results: the basic node structures as filled from the config
13880
13881     """
13882     #TODO(dynmem): compute the right data on MAX and MIN memory
13883     # make a copy of the current dict
13884     node_results = dict(node_results)
13885     for nname, nresult in node_data.items():
13886       assert nname in node_results, "Missing basic data for node %s" % nname
13887       ninfo = node_cfg[nname]
13888
13889       if not (ninfo.offline or ninfo.drained):
13890         nresult.Raise("Can't get data for node %s" % nname)
13891         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13892                                 nname)
13893         remote_info = _MakeLegacyNodeInfo(nresult.payload)
13894
13895         for attr in ["memory_total", "memory_free", "memory_dom0",
13896                      "vg_size", "vg_free", "cpu_total"]:
13897           if attr not in remote_info:
13898             raise errors.OpExecError("Node '%s' didn't return attribute"
13899                                      " '%s'" % (nname, attr))
13900           if not isinstance(remote_info[attr], int):
13901             raise errors.OpExecError("Node '%s' returned invalid value"
13902                                      " for '%s': %s" %
13903                                      (nname, attr, remote_info[attr]))
13904         # compute memory used by primary instances
13905         i_p_mem = i_p_up_mem = 0
13906         for iinfo, beinfo in i_list:
13907           if iinfo.primary_node == nname:
13908             i_p_mem += beinfo[constants.BE_MAXMEM]
13909             if iinfo.name not in node_iinfo[nname].payload:
13910               i_used_mem = 0
13911             else:
13912               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13913             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
13914             remote_info["memory_free"] -= max(0, i_mem_diff)
13915
13916             if iinfo.admin_state == constants.ADMINST_UP:
13917               i_p_up_mem += beinfo[constants.BE_MAXMEM]
13918
13919         # compute memory used by instances
13920         pnr_dyn = {
13921           "total_memory": remote_info["memory_total"],
13922           "reserved_memory": remote_info["memory_dom0"],
13923           "free_memory": remote_info["memory_free"],
13924           "total_disk": remote_info["vg_size"],
13925           "free_disk": remote_info["vg_free"],
13926           "total_cpus": remote_info["cpu_total"],
13927           "i_pri_memory": i_p_mem,
13928           "i_pri_up_memory": i_p_up_mem,
13929           }
13930         pnr_dyn.update(node_results[nname])
13931         node_results[nname] = pnr_dyn
13932
13933     return node_results
13934
13935   @staticmethod
13936   def _ComputeInstanceData(cluster_info, i_list):
13937     """Compute global instance data.
13938
13939     """
13940     instance_data = {}
13941     for iinfo, beinfo in i_list:
13942       nic_data = []
13943       for nic in iinfo.nics:
13944         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13945         nic_dict = {
13946           "mac": nic.mac,
13947           "ip": nic.ip,
13948           "mode": filled_params[constants.NIC_MODE],
13949           "link": filled_params[constants.NIC_LINK],
13950           }
13951         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13952           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13953         nic_data.append(nic_dict)
13954       pir = {
13955         "tags": list(iinfo.GetTags()),
13956         "admin_state": iinfo.admin_state,
13957         "vcpus": beinfo[constants.BE_VCPUS],
13958         "memory": beinfo[constants.BE_MAXMEM],
13959         "os": iinfo.os,
13960         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13961         "nics": nic_data,
13962         "disks": [{constants.IDISK_SIZE: dsk.size,
13963                    constants.IDISK_MODE: dsk.mode}
13964                   for dsk in iinfo.disks],
13965         "disk_template": iinfo.disk_template,
13966         "hypervisor": iinfo.hypervisor,
13967         }
13968       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13969                                                  pir["disks"])
13970       instance_data[iinfo.name] = pir
13971
13972     return instance_data
13973
13974   def _AddNewInstance(self):
13975     """Add new instance data to allocator structure.
13976
13977     This in combination with _AllocatorGetClusterData will create the
13978     correct structure needed as input for the allocator.
13979
13980     The checks for the completeness of the opcode must have already been
13981     done.
13982
13983     """
13984     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13985
13986     if self.disk_template in constants.DTS_INT_MIRROR:
13987       self.required_nodes = 2
13988     else:
13989       self.required_nodes = 1
13990
13991     request = {
13992       "name": self.name,
13993       "disk_template": self.disk_template,
13994       "tags": self.tags,
13995       "os": self.os,
13996       "vcpus": self.vcpus,
13997       "memory": self.memory,
13998       "disks": self.disks,
13999       "disk_space_total": disk_space,
14000       "nics": self.nics,
14001       "required_nodes": self.required_nodes,
14002       "hypervisor": self.hypervisor,
14003       }
14004
14005     return request
14006
14007   def _AddRelocateInstance(self):
14008     """Add relocate instance data to allocator structure.
14009
14010     This in combination with _IAllocatorGetClusterData will create the
14011     correct structure needed as input for the allocator.
14012
14013     The checks for the completeness of the opcode must have already been
14014     done.
14015
14016     """
14017     instance = self.cfg.GetInstanceInfo(self.name)
14018     if instance is None:
14019       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14020                                    " IAllocator" % self.name)
14021
14022     if instance.disk_template not in constants.DTS_MIRRORED:
14023       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14024                                  errors.ECODE_INVAL)
14025
14026     if instance.disk_template in constants.DTS_INT_MIRROR and \
14027         len(instance.secondary_nodes) != 1:
14028       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14029                                  errors.ECODE_STATE)
14030
14031     self.required_nodes = 1
14032     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14033     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14034
14035     request = {
14036       "name": self.name,
14037       "disk_space_total": disk_space,
14038       "required_nodes": self.required_nodes,
14039       "relocate_from": self.relocate_from,
14040       }
14041     return request
14042
14043   def _AddNodeEvacuate(self):
14044     """Get data for node-evacuate requests.
14045
14046     """
14047     return {
14048       "instances": self.instances,
14049       "evac_mode": self.evac_mode,
14050       }
14051
14052   def _AddChangeGroup(self):
14053     """Get data for node-evacuate requests.
14054
14055     """
14056     return {
14057       "instances": self.instances,
14058       "target_groups": self.target_groups,
14059       }
14060
14061   def _BuildInputData(self, fn, keydata):
14062     """Build input data structures.
14063
14064     """
14065     self._ComputeClusterData()
14066
14067     request = fn()
14068     request["type"] = self.mode
14069     for keyname, keytype in keydata:
14070       if keyname not in request:
14071         raise errors.ProgrammerError("Request parameter %s is missing" %
14072                                      keyname)
14073       val = request[keyname]
14074       if not keytype(val):
14075         raise errors.ProgrammerError("Request parameter %s doesn't pass"
14076                                      " validation, value %s, expected"
14077                                      " type %s" % (keyname, val, keytype))
14078     self.in_data["request"] = request
14079
14080     self.in_text = serializer.Dump(self.in_data)
14081
14082   _STRING_LIST = ht.TListOf(ht.TString)
14083   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14084      # pylint: disable=E1101
14085      # Class '...' has no 'OP_ID' member
14086      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14087                           opcodes.OpInstanceMigrate.OP_ID,
14088                           opcodes.OpInstanceReplaceDisks.OP_ID])
14089      })))
14090
14091   _NEVAC_MOVED = \
14092     ht.TListOf(ht.TAnd(ht.TIsLength(3),
14093                        ht.TItems([ht.TNonEmptyString,
14094                                   ht.TNonEmptyString,
14095                                   ht.TListOf(ht.TNonEmptyString),
14096                                  ])))
14097   _NEVAC_FAILED = \
14098     ht.TListOf(ht.TAnd(ht.TIsLength(2),
14099                        ht.TItems([ht.TNonEmptyString,
14100                                   ht.TMaybeString,
14101                                  ])))
14102   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14103                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14104
14105   _MODE_DATA = {
14106     constants.IALLOCATOR_MODE_ALLOC:
14107       (_AddNewInstance,
14108        [
14109         ("name", ht.TString),
14110         ("memory", ht.TInt),
14111         ("disks", ht.TListOf(ht.TDict)),
14112         ("disk_template", ht.TString),
14113         ("os", ht.TString),
14114         ("tags", _STRING_LIST),
14115         ("nics", ht.TListOf(ht.TDict)),
14116         ("vcpus", ht.TInt),
14117         ("hypervisor", ht.TString),
14118         ], ht.TList),
14119     constants.IALLOCATOR_MODE_RELOC:
14120       (_AddRelocateInstance,
14121        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14122        ht.TList),
14123      constants.IALLOCATOR_MODE_NODE_EVAC:
14124       (_AddNodeEvacuate, [
14125         ("instances", _STRING_LIST),
14126         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14127         ], _NEVAC_RESULT),
14128      constants.IALLOCATOR_MODE_CHG_GROUP:
14129       (_AddChangeGroup, [
14130         ("instances", _STRING_LIST),
14131         ("target_groups", _STRING_LIST),
14132         ], _NEVAC_RESULT),
14133     }
14134
14135   def Run(self, name, validate=True, call_fn=None):
14136     """Run an instance allocator and return the results.
14137
14138     """
14139     if call_fn is None:
14140       call_fn = self.rpc.call_iallocator_runner
14141
14142     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14143     result.Raise("Failure while running the iallocator script")
14144
14145     self.out_text = result.payload
14146     if validate:
14147       self._ValidateResult()
14148
14149   def _ValidateResult(self):
14150     """Process the allocator results.
14151
14152     This will process and if successful save the result in
14153     self.out_data and the other parameters.
14154
14155     """
14156     try:
14157       rdict = serializer.Load(self.out_text)
14158     except Exception, err:
14159       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14160
14161     if not isinstance(rdict, dict):
14162       raise errors.OpExecError("Can't parse iallocator results: not a dict")
14163
14164     # TODO: remove backwards compatiblity in later versions
14165     if "nodes" in rdict and "result" not in rdict:
14166       rdict["result"] = rdict["nodes"]
14167       del rdict["nodes"]
14168
14169     for key in "success", "info", "result":
14170       if key not in rdict:
14171         raise errors.OpExecError("Can't parse iallocator results:"
14172                                  " missing key '%s'" % key)
14173       setattr(self, key, rdict[key])
14174
14175     if not self._result_check(self.result):
14176       raise errors.OpExecError("Iallocator returned invalid result,"
14177                                " expected %s, got %s" %
14178                                (self._result_check, self.result),
14179                                errors.ECODE_INVAL)
14180
14181     if self.mode == constants.IALLOCATOR_MODE_RELOC:
14182       assert self.relocate_from is not None
14183       assert self.required_nodes == 1
14184
14185       node2group = dict((name, ndata["group"])
14186                         for (name, ndata) in self.in_data["nodes"].items())
14187
14188       fn = compat.partial(self._NodesToGroups, node2group,
14189                           self.in_data["nodegroups"])
14190
14191       instance = self.cfg.GetInstanceInfo(self.name)
14192       request_groups = fn(self.relocate_from + [instance.primary_node])
14193       result_groups = fn(rdict["result"] + [instance.primary_node])
14194
14195       if self.success and not set(result_groups).issubset(request_groups):
14196         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14197                                  " differ from original groups (%s)" %
14198                                  (utils.CommaJoin(result_groups),
14199                                   utils.CommaJoin(request_groups)))
14200
14201     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14202       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14203
14204     self.out_data = rdict
14205
14206   @staticmethod
14207   def _NodesToGroups(node2group, groups, nodes):
14208     """Returns a list of unique group names for a list of nodes.
14209
14210     @type node2group: dict
14211     @param node2group: Map from node name to group UUID
14212     @type groups: dict
14213     @param groups: Group information
14214     @type nodes: list
14215     @param nodes: Node names
14216
14217     """
14218     result = set()
14219
14220     for node in nodes:
14221       try:
14222         group_uuid = node2group[node]
14223       except KeyError:
14224         # Ignore unknown node
14225         pass
14226       else:
14227         try:
14228           group = groups[group_uuid]
14229         except KeyError:
14230           # Can't find group, let's use UUID
14231           group_name = group_uuid
14232         else:
14233           group_name = group["name"]
14234
14235         result.add(group_name)
14236
14237     return sorted(result)
14238
14239
14240 class LUTestAllocator(NoHooksLU):
14241   """Run allocator tests.
14242
14243   This LU runs the allocator tests
14244
14245   """
14246   def CheckPrereq(self):
14247     """Check prerequisites.
14248
14249     This checks the opcode parameters depending on the director and mode test.
14250
14251     """
14252     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14253       for attr in ["memory", "disks", "disk_template",
14254                    "os", "tags", "nics", "vcpus"]:
14255         if not hasattr(self.op, attr):
14256           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14257                                      attr, errors.ECODE_INVAL)
14258       iname = self.cfg.ExpandInstanceName(self.op.name)
14259       if iname is not None:
14260         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14261                                    iname, errors.ECODE_EXISTS)
14262       if not isinstance(self.op.nics, list):
14263         raise errors.OpPrereqError("Invalid parameter 'nics'",
14264                                    errors.ECODE_INVAL)
14265       if not isinstance(self.op.disks, list):
14266         raise errors.OpPrereqError("Invalid parameter 'disks'",
14267                                    errors.ECODE_INVAL)
14268       for row in self.op.disks:
14269         if (not isinstance(row, dict) or
14270             constants.IDISK_SIZE not in row or
14271             not isinstance(row[constants.IDISK_SIZE], int) or
14272             constants.IDISK_MODE not in row or
14273             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14274           raise errors.OpPrereqError("Invalid contents of the 'disks'"
14275                                      " parameter", errors.ECODE_INVAL)
14276       if self.op.hypervisor is None:
14277         self.op.hypervisor = self.cfg.GetHypervisorType()
14278     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14279       fname = _ExpandInstanceName(self.cfg, self.op.name)
14280       self.op.name = fname
14281       self.relocate_from = \
14282           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14283     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14284                           constants.IALLOCATOR_MODE_NODE_EVAC):
14285       if not self.op.instances:
14286         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14287       self.op.instances = _GetWantedInstances(self, self.op.instances)
14288     else:
14289       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14290                                  self.op.mode, errors.ECODE_INVAL)
14291
14292     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14293       if self.op.allocator is None:
14294         raise errors.OpPrereqError("Missing allocator name",
14295                                    errors.ECODE_INVAL)
14296     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14297       raise errors.OpPrereqError("Wrong allocator test '%s'" %
14298                                  self.op.direction, errors.ECODE_INVAL)
14299
14300   def Exec(self, feedback_fn):
14301     """Run the allocator test.
14302
14303     """
14304     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14305       ial = IAllocator(self.cfg, self.rpc,
14306                        mode=self.op.mode,
14307                        name=self.op.name,
14308                        memory=self.op.memory,
14309                        disks=self.op.disks,
14310                        disk_template=self.op.disk_template,
14311                        os=self.op.os,
14312                        tags=self.op.tags,
14313                        nics=self.op.nics,
14314                        vcpus=self.op.vcpus,
14315                        hypervisor=self.op.hypervisor,
14316                        )
14317     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14318       ial = IAllocator(self.cfg, self.rpc,
14319                        mode=self.op.mode,
14320                        name=self.op.name,
14321                        relocate_from=list(self.relocate_from),
14322                        )
14323     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14324       ial = IAllocator(self.cfg, self.rpc,
14325                        mode=self.op.mode,
14326                        instances=self.op.instances,
14327                        target_groups=self.op.target_groups)
14328     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14329       ial = IAllocator(self.cfg, self.rpc,
14330                        mode=self.op.mode,
14331                        instances=self.op.instances,
14332                        evac_mode=self.op.evac_mode)
14333     else:
14334       raise errors.ProgrammerError("Uncatched mode %s in"
14335                                    " LUTestAllocator.Exec", self.op.mode)
14336
14337     if self.op.direction == constants.IALLOCATOR_DIR_IN:
14338       result = ial.in_text
14339     else:
14340       ial.Run(self.op.allocator, validate=False)
14341       result = ial.out_text
14342     return result
14343
14344
14345 #: Query type implementations
14346 _QUERY_IMPL = {
14347   constants.QR_INSTANCE: _InstanceQuery,
14348   constants.QR_NODE: _NodeQuery,
14349   constants.QR_GROUP: _GroupQuery,
14350   constants.QR_OS: _OsQuery,
14351   }
14352
14353 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14354
14355
14356 def _GetQueryImplementation(name):
14357   """Returns the implemtnation for a query type.
14358
14359   @param name: Query type, must be one of L{constants.QR_VIA_OP}
14360
14361   """
14362   try:
14363     return _QUERY_IMPL[name]
14364   except KeyError:
14365     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14366                                errors.ECODE_INVAL)