code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_UP = [constants.ADMINST_UP]
  72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
  74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  76
  77
  78 class ResultWithJobs:
  79   """Data container for LU results with jobs.
  80
  81   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  82   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  83   contained in the C{jobs} attribute and include the job IDs in the opcode
  84   result.
  85
  86   """
  87   def __init__(self, jobs, **kwargs):
  88     """Initializes this class.
  89
  90     Additional return values can be specified as keyword arguments.
  91
  92     @type jobs: list of lists of L{opcode.OpCode}
  93     @param jobs: A list of lists of opcode objects
  94
  95     """
  96     self.jobs = jobs
  97     self.other = kwargs
  98
  99
 100 class LogicalUnit(object):
 101   """Logical Unit base class.
 102
 103   Subclasses must follow these rules:
 104     - implement ExpandNames
 105     - implement CheckPrereq (except when tasklets are used)
 106     - implement Exec (except when tasklets are used)
 107     - implement BuildHooksEnv
 108     - implement BuildHooksNodes
 109     - redefine HPATH and HTYPE
 110     - optionally redefine their run requirements:
 111         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 112
 113   Note that all commands require root permissions.
 114
 115   @ivar dry_run_result: the value (if any) that will be returned to the caller
 116       in dry-run mode (signalled by opcode dry_run parameter)
 117
 118   """
 119   HPATH = None
 120   HTYPE = None
 121   REQ_BGL = True
 122
 123   def __init__(self, processor, op, context, rpc_runner):
 124     """Constructor for LogicalUnit.
 125
 126     This needs to be overridden in derived classes in order to check op
 127     validity.
 128
 129     """
 130     self.proc = processor
 131     self.op = op
 132     self.cfg = context.cfg
 133     self.glm = context.glm
 134     # readability alias
 135     self.owned_locks = context.glm.list_owned
 136     self.context = context
 137     self.rpc = rpc_runner
 138     # Dicts used to declare locking needs to mcpu
 139     self.needed_locks = None
 140     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 141     self.add_locks = {}
 142     self.remove_locks = {}
 143     # Used to force good behavior when calling helper functions
 144     self.recalculate_locks = {}
 145     # logging
 146     self.Log = processor.Log # pylint: disable=C0103
 147     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 148     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 149     self.LogStep = processor.LogStep # pylint: disable=C0103
 150     # support for dry-run
 151     self.dry_run_result = None
 152     # support for generic debug attribute
 153     if (not hasattr(self.op, "debug_level") or
 154         not isinstance(self.op.debug_level, int)):
 155       self.op.debug_level = 0
 156
 157     # Tasklets
 158     self.tasklets = None
 159
 160     # Validate opcode parameters and set defaults
 161     self.op.Validate(True)
 162
 163     self.CheckArguments()
 164
 165   def CheckArguments(self):
 166     """Check syntactic validity for the opcode arguments.
 167
 168     This method is for doing a simple syntactic check and ensure
 169     validity of opcode parameters, without any cluster-related
 170     checks. While the same can be accomplished in ExpandNames and/or
 171     CheckPrereq, doing these separate is better because:
 172
 173       - ExpandNames is left as as purely a lock-related function
 174       - CheckPrereq is run after we have acquired locks (and possible
 175         waited for them)
 176
 177     The function is allowed to change the self.op attribute so that
 178     later methods can no longer worry about missing parameters.
 179
 180     """
 181     pass
 182
 183   def ExpandNames(self):
 184     """Expand names for this LU.
 185
 186     This method is called before starting to execute the opcode, and it should
 187     update all the parameters of the opcode to their canonical form (e.g. a
 188     short node name must be fully expanded after this method has successfully
 189     completed). This way locking, hooks, logging, etc. can work correctly.
 190
 191     LUs which implement this method must also populate the self.needed_locks
 192     member, as a dict with lock levels as keys, and a list of needed lock names
 193     as values. Rules:
 194
 195       - use an empty dict if you don't need any lock
 196       - if you don't need any lock at a particular level omit that level
 197       - don't put anything for the BGL level
 198       - if you want all locks at a level use locking.ALL_SET as a value
 199
 200     If you need to share locks (rather than acquire them exclusively) at one
 201     level you can modify self.share_locks, setting a true value (usually 1) for
 202     that level. By default locks are not shared.
 203
 204     This function can also define a list of tasklets, which then will be
 205     executed in order instead of the usual LU-level CheckPrereq and Exec
 206     functions, if those are not defined by the LU.
 207
 208     Examples::
 209
 210       # Acquire all nodes and one instance
 211       self.needed_locks = {
 212         locking.LEVEL_NODE: locking.ALL_SET,
 213         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 214       }
 215       # Acquire just two nodes
 216       self.needed_locks = {
 217         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 218       }
 219       # Acquire no locks
 220       self.needed_locks = {} # No, you can't leave it to the default value None
 221
 222     """
 223     # The implementation of this method is mandatory only if the new LU is
 224     # concurrent, so that old LUs don't need to be changed all at the same
 225     # time.
 226     if self.REQ_BGL:
 227       self.needed_locks = {} # Exclusive LUs don't need locks.
 228     else:
 229       raise NotImplementedError
 230
 231   def DeclareLocks(self, level):
 232     """Declare LU locking needs for a level
 233
 234     While most LUs can just declare their locking needs at ExpandNames time,
 235     sometimes there's the need to calculate some locks after having acquired
 236     the ones before. This function is called just before acquiring locks at a
 237     particular level, but after acquiring the ones at lower levels, and permits
 238     such calculations. It can be used to modify self.needed_locks, and by
 239     default it does nothing.
 240
 241     This function is only called if you have something already set in
 242     self.needed_locks for the level.
 243
 244     @param level: Locking level which is going to be locked
 245     @type level: member of ganeti.locking.LEVELS
 246
 247     """
 248
 249   def CheckPrereq(self):
 250     """Check prerequisites for this LU.
 251
 252     This method should check that the prerequisites for the execution
 253     of this LU are fulfilled. It can do internode communication, but
 254     it should be idempotent - no cluster or system changes are
 255     allowed.
 256
 257     The method should raise errors.OpPrereqError in case something is
 258     not fulfilled. Its return value is ignored.
 259
 260     This method should also update all the parameters of the opcode to
 261     their canonical form if it hasn't been done by ExpandNames before.
 262
 263     """
 264     if self.tasklets is not None:
 265       for (idx, tl) in enumerate(self.tasklets):
 266         logging.debug("Checking prerequisites for tasklet %s/%s",
 267                       idx + 1, len(self.tasklets))
 268         tl.CheckPrereq()
 269     else:
 270       pass
 271
 272   def Exec(self, feedback_fn):
 273     """Execute the LU.
 274
 275     This method should implement the actual work. It should raise
 276     errors.OpExecError for failures that are somewhat dealt with in
 277     code, or expected.
 278
 279     """
 280     if self.tasklets is not None:
 281       for (idx, tl) in enumerate(self.tasklets):
 282         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 283         tl.Exec(feedback_fn)
 284     else:
 285       raise NotImplementedError
 286
 287   def BuildHooksEnv(self):
 288     """Build hooks environment for this LU.
 289
 290     @rtype: dict
 291     @return: Dictionary containing the environment that will be used for
 292       running the hooks for this LU. The keys of the dict must not be prefixed
 293       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 294       will extend the environment with additional variables. If no environment
 295       should be defined, an empty dictionary should be returned (not C{None}).
 296     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 297       will not be called.
 298
 299     """
 300     raise NotImplementedError
 301
 302   def BuildHooksNodes(self):
 303     """Build list of nodes to run LU's hooks.
 304
 305     @rtype: tuple; (list, list)
 306     @return: Tuple containing a list of node names on which the hook
 307       should run before the execution and a list of node names on which the
 308       hook should run after the execution. No nodes should be returned as an
 309       empty list (and not None).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 317     """Notify the LU about the results of its hooks.
 318
 319     This method is called every time a hooks phase is executed, and notifies
 320     the Logical Unit about the hooks' result. The LU can then use it to alter
 321     its result based on the hooks.  By default the method does nothing and the
 322     previous result is passed back unchanged but any LU can define it if it
 323     wants to use the local cluster hook-scripts somehow.
 324
 325     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 326         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 327     @param hook_results: the results of the multi-node hooks rpc call
 328     @param feedback_fn: function used send feedback back to the caller
 329     @param lu_result: the previous Exec result this LU had, or None
 330         in the PRE phase
 331     @return: the new Exec result, based on the previous result
 332         and hook results
 333
 334     """
 335     # API must be kept, thus we ignore the unused argument and could
 336     # be a function warnings
 337     # pylint: disable=W0613,R0201
 338     return lu_result
 339
 340   def _ExpandAndLockInstance(self):
 341     """Helper function to expand and lock an instance.
 342
 343     Many LUs that work on an instance take its name in self.op.instance_name
 344     and need to expand it and then declare the expanded name for locking. This
 345     function does it, and then updates self.op.instance_name to the expanded
 346     name. It also initializes needed_locks as a dict, if this hasn't been done
 347     before.
 348
 349     """
 350     if self.needed_locks is None:
 351       self.needed_locks = {}
 352     else:
 353       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 354         "_ExpandAndLockInstance called with instance-level locks set"
 355     self.op.instance_name = _ExpandInstanceName(self.cfg,
 356                                                 self.op.instance_name)
 357     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 358
 359   def _LockInstancesNodes(self, primary_only=False,
 360                           level=locking.LEVEL_NODE):
 361     """Helper function to declare instances' nodes for locking.
 362
 363     This function should be called after locking one or more instances to lock
 364     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 365     with all primary or secondary nodes for instances already locked and
 366     present in self.needed_locks[locking.LEVEL_INSTANCE].
 367
 368     It should be called from DeclareLocks, and for safety only works if
 369     self.recalculate_locks[locking.LEVEL_NODE] is set.
 370
 371     In the future it may grow parameters to just lock some instance's nodes, or
 372     to just lock primaries or secondary nodes, if needed.
 373
 374     If should be called in DeclareLocks in a way similar to::
 375
 376       if level == locking.LEVEL_NODE:
 377         self._LockInstancesNodes()
 378
 379     @type primary_only: boolean
 380     @param primary_only: only lock primary nodes of locked instances
 381     @param level: Which lock level to use for locking nodes
 382
 383     """
 384     assert level in self.recalculate_locks, \
 385       "_LockInstancesNodes helper function called with no nodes to recalculate"
 386
 387     # TODO: check if we're really been called with the instance locks held
 388
 389     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 390     # future we might want to have different behaviors depending on the value
 391     # of self.recalculate_locks[locking.LEVEL_NODE]
 392     wanted_nodes = []
 393     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 394     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 395       wanted_nodes.append(instance.primary_node)
 396       if not primary_only:
 397         wanted_nodes.extend(instance.secondary_nodes)
 398
 399     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 400       self.needed_locks[level] = wanted_nodes
 401     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 402       self.needed_locks[level].extend(wanted_nodes)
 403     else:
 404       raise errors.ProgrammerError("Unknown recalculation mode")
 405
 406     del self.recalculate_locks[level]
 407
 408
 409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 410   """Simple LU which runs no hooks.
 411
 412   This LU is intended as a parent for other LogicalUnits which will
 413   run no hooks, in order to reduce duplicate code.
 414
 415   """
 416   HPATH = None
 417   HTYPE = None
 418
 419   def BuildHooksEnv(self):
 420     """Empty BuildHooksEnv for NoHooksLu.
 421
 422     This just raises an error.
 423
 424     """
 425     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 426
 427   def BuildHooksNodes(self):
 428     """Empty BuildHooksNodes for NoHooksLU.
 429
 430     """
 431     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 432
 433
 434 class Tasklet:
 435   """Tasklet base class.
 436
 437   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 438   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 439   tasklets know nothing about locks.
 440
 441   Subclasses must follow these rules:
 442     - Implement CheckPrereq
 443     - Implement Exec
 444
 445   """
 446   def __init__(self, lu):
 447     self.lu = lu
 448
 449     # Shortcuts
 450     self.cfg = lu.cfg
 451     self.rpc = lu.rpc
 452
 453   def CheckPrereq(self):
 454     """Check prerequisites for this tasklets.
 455
 456     This method should check whether the prerequisites for the execution of
 457     this tasklet are fulfilled. It can do internode communication, but it
 458     should be idempotent - no cluster or system changes are allowed.
 459
 460     The method should raise errors.OpPrereqError in case something is not
 461     fulfilled. Its return value is ignored.
 462
 463     This method should also update all parameters to their canonical form if it
 464     hasn't been done before.
 465
 466     """
 467     pass
 468
 469   def Exec(self, feedback_fn):
 470     """Execute the tasklet.
 471
 472     This method should implement the actual work. It should raise
 473     errors.OpExecError for failures that are somewhat dealt with in code, or
 474     expected.
 475
 476     """
 477     raise NotImplementedError
 478
 479
 480 class _QueryBase:
 481   """Base for query utility classes.
 482
 483   """
 484   #: Attribute holding field definitions
 485   FIELDS = None
 486
 487   def __init__(self, qfilter, fields, use_locking):
 488     """Initializes this class.
 489
 490     """
 491     self.use_locking = use_locking
 492
 493     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 494                              namefield="name")
 495     self.requested_data = self.query.RequestedData()
 496     self.names = self.query.RequestedNames()
 497
 498     # Sort only if no names were requested
 499     self.sort_by_name = not self.names
 500
 501     self.do_locking = None
 502     self.wanted = None
 503
 504   def _GetNames(self, lu, all_names, lock_level):
 505     """Helper function to determine names asked for in the query.
 506
 507     """
 508     if self.do_locking:
 509       names = lu.owned_locks(lock_level)
 510     else:
 511       names = all_names
 512
 513     if self.wanted == locking.ALL_SET:
 514       assert not self.names
 515       # caller didn't specify names, so ordering is not important
 516       return utils.NiceSort(names)
 517
 518     # caller specified names and we must keep the same order
 519     assert self.names
 520     assert not self.do_locking or lu.glm.is_owned(lock_level)
 521
 522     missing = set(self.wanted).difference(names)
 523     if missing:
 524       raise errors.OpExecError("Some items were removed before retrieving"
 525                                " their data: %s" % missing)
 526
 527     # Return expanded names
 528     return self.wanted
 529
 530   def ExpandNames(self, lu):
 531     """Expand names for this query.
 532
 533     See L{LogicalUnit.ExpandNames}.
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def DeclareLocks(self, lu, level):
 539     """Declare locks for this query.
 540
 541     See L{LogicalUnit.DeclareLocks}.
 542
 543     """
 544     raise NotImplementedError()
 545
 546   def _GetQueryData(self, lu):
 547     """Collects all data for this query.
 548
 549     @return: Query data object
 550
 551     """
 552     raise NotImplementedError()
 553
 554   def NewStyleQuery(self, lu):
 555     """Collect data and execute query.
 556
 557     """
 558     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 559                                   sort_by_name=self.sort_by_name)
 560
 561   def OldStyleQuery(self, lu):
 562     """Collect data and execute query.
 563
 564     """
 565     return self.query.OldStyleQuery(self._GetQueryData(lu),
 566                                     sort_by_name=self.sort_by_name)
 567
 568
 569 def _ShareAll():
 570   """Returns a dict declaring all lock levels shared.
 571
 572   """
 573   return dict.fromkeys(locking.LEVELS, 1)
 574
 575
 576 def _MakeLegacyNodeInfo(data):
 577   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 578
 579   Converts the data into a single dictionary. This is fine for most use cases,
 580   but some require information from more than one volume group or hypervisor.
 581
 582   """
 583   (bootid, (vg_info, ), (hv_info, )) = data
 584
 585   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 586     "bootid": bootid,
 587     })
 588
 589
 590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 591   """Checks if the owned node groups are still correct for an instance.
 592
 593   @type cfg: L{config.ConfigWriter}
 594   @param cfg: The cluster configuration
 595   @type instance_name: string
 596   @param instance_name: Instance name
 597   @type owned_groups: set or frozenset
 598   @param owned_groups: List of currently owned node groups
 599
 600   """
 601   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 602
 603   if not owned_groups.issuperset(inst_groups):
 604     raise errors.OpPrereqError("Instance %s's node groups changed since"
 605                                " locks were acquired, current groups are"
 606                                " are '%s', owning groups '%s'; retry the"
 607                                " operation" %
 608                                (instance_name,
 609                                 utils.CommaJoin(inst_groups),
 610                                 utils.CommaJoin(owned_groups)),
 611                                errors.ECODE_STATE)
 612
 613   return inst_groups
 614
 615
 616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 617   """Checks if the instances in a node group are still correct.
 618
 619   @type cfg: L{config.ConfigWriter}
 620   @param cfg: The cluster configuration
 621   @type group_uuid: string
 622   @param group_uuid: Node group UUID
 623   @type owned_instances: set or frozenset
 624   @param owned_instances: List of currently owned instances
 625
 626   """
 627   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 628   if owned_instances != wanted_instances:
 629     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 630                                " locks were acquired, wanted '%s', have '%s';"
 631                                " retry the operation" %
 632                                (group_uuid,
 633                                 utils.CommaJoin(wanted_instances),
 634                                 utils.CommaJoin(owned_instances)),
 635                                errors.ECODE_STATE)
 636
 637   return wanted_instances
 638
 639
 640 def _SupportsOob(cfg, node):
 641   """Tells if node supports OOB.
 642
 643   @type cfg: L{config.ConfigWriter}
 644   @param cfg: The cluster configuration
 645   @type node: L{objects.Node}
 646   @param node: The node
 647   @return: The OOB script if supported or an empty string otherwise
 648
 649   """
 650   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 651
 652
 653 def _GetWantedNodes(lu, nodes):
 654   """Returns list of checked and expanded node names.
 655
 656   @type lu: L{LogicalUnit}
 657   @param lu: the logical unit on whose behalf we execute
 658   @type nodes: list
 659   @param nodes: list of node names or None for all nodes
 660   @rtype: list
 661   @return: the list of nodes, sorted
 662   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 663
 664   """
 665   if nodes:
 666     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 667
 668   return utils.NiceSort(lu.cfg.GetNodeList())
 669
 670
 671 def _GetWantedInstances(lu, instances):
 672   """Returns list of checked and expanded instance names.
 673
 674   @type lu: L{LogicalUnit}
 675   @param lu: the logical unit on whose behalf we execute
 676   @type instances: list
 677   @param instances: list of instance names or None for all instances
 678   @rtype: list
 679   @return: the list of instances, sorted
 680   @raise errors.OpPrereqError: if the instances parameter is wrong type
 681   @raise errors.OpPrereqError: if any of the passed instances is not found
 682
 683   """
 684   if instances:
 685     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 686   else:
 687     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 688   return wanted
 689
 690
 691 def _GetUpdatedParams(old_params, update_dict,
 692                       use_default=True, use_none=False):
 693   """Return the new version of a parameter dictionary.
 694
 695   @type old_params: dict
 696   @param old_params: old parameters
 697   @type update_dict: dict
 698   @param update_dict: dict containing new parameter values, or
 699       constants.VALUE_DEFAULT to reset the parameter to its default
 700       value
 701   @param use_default: boolean
 702   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 703       values as 'to be deleted' values
 704   @param use_none: boolean
 705   @type use_none: whether to recognise C{None} values as 'to be
 706       deleted' values
 707   @rtype: dict
 708   @return: the new parameter dictionary
 709
 710   """
 711   params_copy = copy.deepcopy(old_params)
 712   for key, val in update_dict.iteritems():
 713     if ((use_default and val == constants.VALUE_DEFAULT) or
 714         (use_none and val is None)):
 715       try:
 716         del params_copy[key]
 717       except KeyError:
 718         pass
 719     else:
 720       params_copy[key] = val
 721   return params_copy
 722
 723
 724 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 725   """Return the new version of a instance policy.
 726
 727   @param group_policy: whether this policy applies to a group and thus
 728     we should support removal of policy entries
 729
 730   """
 731   use_none = use_default = group_policy
 732   ipolicy = copy.deepcopy(old_ipolicy)
 733   for key, value in new_ipolicy.items():
 734     if key not in constants.IPOLICY_ALL_KEYS:
 735       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 736                                  errors.ECODE_INVAL)
 737     if key in constants.IPOLICY_ISPECS:
 738       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 739       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 740                                        use_none=use_none,
 741                                        use_default=use_default)
 742     else:
 743       # FIXME: we assume all others are lists; this should be redone
 744       # in a nicer way
 745       if not value or value == [constants.VALUE_DEFAULT]:
 746         if group_policy:
 747           del ipolicy[key]
 748         else:
 749           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 750                                      " on the cluster'" % key,
 751                                      errors.ECODE_INVAL)
 752       else:
 753         ipolicy[key] = list(value)
 754   try:
 755     objects.InstancePolicy.CheckParameterSyntax(ipolicy)
 756   except errors.ConfigurationError, err:
 757     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 758                                errors.ECODE_INVAL)
 759   return ipolicy
 760
 761
 762 def _UpdateAndVerifySubDict(base, updates, type_check):
 763   """Updates and verifies a dict with sub dicts of the same type.
 764
 765   @param base: The dict with the old data
 766   @param updates: The dict with the new data
 767   @param type_check: Dict suitable to ForceDictType to verify correct types
 768   @returns: A new dict with updated and verified values
 769
 770   """
 771   def fn(old, value):
 772     new = _GetUpdatedParams(old, value)
 773     utils.ForceDictType(new, type_check)
 774     return new
 775
 776   ret = copy.deepcopy(base)
 777   ret.update(dict((key, fn(base.get(key, {}), value))
 778                   for key, value in updates.items()))
 779   return ret
 780
 781
 782 def _MergeAndVerifyHvState(op_input, obj_input):
 783   """Combines the hv state from an opcode with the one of the object
 784
 785   @param op_input: The input dict from the opcode
 786   @param obj_input: The input dict from the objects
 787   @return: The verified and updated dict
 788
 789   """
 790   if op_input:
 791     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 792     if invalid_hvs:
 793       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 794                                  " %s" % utils.CommaJoin(invalid_hvs),
 795                                  errors.ECODE_INVAL)
 796     if obj_input is None:
 797       obj_input = {}
 798     type_check = constants.HVSTS_PARAMETER_TYPES
 799     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 800
 801   return None
 802
 803
 804 def _MergeAndVerifyDiskState(op_input, obj_input):
 805   """Combines the disk state from an opcode with the one of the object
 806
 807   @param op_input: The input dict from the opcode
 808   @param obj_input: The input dict from the objects
 809   @return: The verified and updated dict
 810   """
 811   if op_input:
 812     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 813     if invalid_dst:
 814       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 815                                  utils.CommaJoin(invalid_dst),
 816                                  errors.ECODE_INVAL)
 817     type_check = constants.DSS_PARAMETER_TYPES
 818     if obj_input is None:
 819       obj_input = {}
 820     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 821                                               type_check))
 822                 for key, value in op_input.items())
 823
 824   return None
 825
 826
 827 def _ReleaseLocks(lu, level, names=None, keep=None):
 828   """Releases locks owned by an LU.
 829
 830   @type lu: L{LogicalUnit}
 831   @param level: Lock level
 832   @type names: list or None
 833   @param names: Names of locks to release
 834   @type keep: list or None
 835   @param keep: Names of locks to retain
 836
 837   """
 838   assert not (keep is not None and names is not None), \
 839          "Only one of the 'names' and the 'keep' parameters can be given"
 840
 841   if names is not None:
 842     should_release = names.__contains__
 843   elif keep:
 844     should_release = lambda name: name not in keep
 845   else:
 846     should_release = None
 847
 848   owned = lu.owned_locks(level)
 849   if not owned:
 850     # Not owning any lock at this level, do nothing
 851     pass
 852
 853   elif should_release:
 854     retain = []
 855     release = []
 856
 857     # Determine which locks to release
 858     for name in owned:
 859       if should_release(name):
 860         release.append(name)
 861       else:
 862         retain.append(name)
 863
 864     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 865
 866     # Release just some locks
 867     lu.glm.release(level, names=release)
 868
 869     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 870   else:
 871     # Release everything
 872     lu.glm.release(level)
 873
 874     assert not lu.glm.is_owned(level), "No locks should be owned"
 875
 876
 877 def _MapInstanceDisksToNodes(instances):
 878   """Creates a map from (node, volume) to instance name.
 879
 880   @type instances: list of L{objects.Instance}
 881   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 882
 883   """
 884   return dict(((node, vol), inst.name)
 885               for inst in instances
 886               for (node, vols) in inst.MapLVsByNode().items()
 887               for vol in vols)
 888
 889
 890 def _RunPostHook(lu, node_name):
 891   """Runs the post-hook for an opcode on a single node.
 892
 893   """
 894   hm = lu.proc.BuildHooksManager(lu)
 895   try:
 896     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 897   except:
 898     # pylint: disable=W0702
 899     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 900
 901
 902 def _CheckOutputFields(static, dynamic, selected):
 903   """Checks whether all selected fields are valid.
 904
 905   @type static: L{utils.FieldSet}
 906   @param static: static fields set
 907   @type dynamic: L{utils.FieldSet}
 908   @param dynamic: dynamic fields set
 909
 910   """
 911   f = utils.FieldSet()
 912   f.Extend(static)
 913   f.Extend(dynamic)
 914
 915   delta = f.NonMatching(selected)
 916   if delta:
 917     raise errors.OpPrereqError("Unknown output fields selected: %s"
 918                                % ",".join(delta), errors.ECODE_INVAL)
 919
 920
 921 def _CheckGlobalHvParams(params):
 922   """Validates that given hypervisor params are not global ones.
 923
 924   This will ensure that instances don't get customised versions of
 925   global params.
 926
 927   """
 928   used_globals = constants.HVC_GLOBALS.intersection(params)
 929   if used_globals:
 930     msg = ("The following hypervisor parameters are global and cannot"
 931            " be customized at instance level, please modify them at"
 932            " cluster level: %s" % utils.CommaJoin(used_globals))
 933     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 934
 935
 936 def _CheckNodeOnline(lu, node, msg=None):
 937   """Ensure that a given node is online.
 938
 939   @param lu: the LU on behalf of which we make the check
 940   @param node: the node to check
 941   @param msg: if passed, should be a message to replace the default one
 942   @raise errors.OpPrereqError: if the node is offline
 943
 944   """
 945   if msg is None:
 946     msg = "Can't use offline node"
 947   if lu.cfg.GetNodeInfo(node).offline:
 948     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 949
 950
 951 def _CheckNodeNotDrained(lu, node):
 952   """Ensure that a given node is not drained.
 953
 954   @param lu: the LU on behalf of which we make the check
 955   @param node: the node to check
 956   @raise errors.OpPrereqError: if the node is drained
 957
 958   """
 959   if lu.cfg.GetNodeInfo(node).drained:
 960     raise errors.OpPrereqError("Can't use drained node %s" % node,
 961                                errors.ECODE_STATE)
 962
 963
 964 def _CheckNodeVmCapable(lu, node):
 965   """Ensure that a given node is vm capable.
 966
 967   @param lu: the LU on behalf of which we make the check
 968   @param node: the node to check
 969   @raise errors.OpPrereqError: if the node is not vm capable
 970
 971   """
 972   if not lu.cfg.GetNodeInfo(node).vm_capable:
 973     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 974                                errors.ECODE_STATE)
 975
 976
 977 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 978   """Ensure that a node supports a given OS.
 979
 980   @param lu: the LU on behalf of which we make the check
 981   @param node: the node to check
 982   @param os_name: the OS to query about
 983   @param force_variant: whether to ignore variant errors
 984   @raise errors.OpPrereqError: if the node is not supporting the OS
 985
 986   """
 987   result = lu.rpc.call_os_get(node, os_name)
 988   result.Raise("OS '%s' not in supported OS list for node %s" %
 989                (os_name, node),
 990                prereq=True, ecode=errors.ECODE_INVAL)
 991   if not force_variant:
 992     _CheckOSVariant(result.payload, os_name)
 993
 994
 995 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 996   """Ensure that a node has the given secondary ip.
 997
 998   @type lu: L{LogicalUnit}
 999   @param lu: the LU on behalf of which we make the check
1000   @type node: string
1001   @param node: the node to check
1002   @type secondary_ip: string
1003   @param secondary_ip: the ip to check
1004   @type prereq: boolean
1005   @param prereq: whether to throw a prerequisite or an execute error
1006   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1007   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1008
1009   """
1010   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1011   result.Raise("Failure checking secondary ip on node %s" % node,
1012                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1013   if not result.payload:
1014     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1015            " please fix and re-run this command" % secondary_ip)
1016     if prereq:
1017       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1018     else:
1019       raise errors.OpExecError(msg)
1020
1021
1022 def _GetClusterDomainSecret():
1023   """Reads the cluster domain secret.
1024
1025   """
1026   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1027                                strict=True)
1028
1029
1030 def _CheckInstanceState(lu, instance, req_states, msg=None):
1031   """Ensure that an instance is in one of the required states.
1032
1033   @param lu: the LU on behalf of which we make the check
1034   @param instance: the instance to check
1035   @param msg: if passed, should be a message to replace the default one
1036   @raise errors.OpPrereqError: if the instance is not in the required state
1037
1038   """
1039   if msg is None:
1040     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1041   if instance.admin_state not in req_states:
1042     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1043                                (instance.name, instance.admin_state, msg),
1044                                errors.ECODE_STATE)
1045
1046   if constants.ADMINST_UP not in req_states:
1047     pnode = instance.primary_node
1048     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1049     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1050                 prereq=True, ecode=errors.ECODE_ENVIRON)
1051
1052     if instance.name in ins_l.payload:
1053       raise errors.OpPrereqError("Instance %s is running, %s" %
1054                                  (instance.name, msg), errors.ECODE_STATE)
1055
1056
1057 def _ComputeMinMaxSpec(name, ipolicy, value):
1058   """Computes if value is in the desired range.
1059
1060   @param name: name of the parameter for which we perform the check
1061   @param ipolicy: dictionary containing min, max and std values
1062   @param value: actual value that we want to use
1063   @return: None or element not meeting the criteria
1064
1065
1066   """
1067   if value in [None, constants.VALUE_AUTO]:
1068     return None
1069   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1070   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1071   if value > max_v or min_v > value:
1072     return ("%s value %s is not in range [%s, %s]" %
1073             (name, value, min_v, max_v))
1074   return None
1075
1076
1077 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1078                                  nic_count, disk_sizes,
1079                                  _compute_fn=_ComputeMinMaxSpec):
1080   """Verifies ipolicy against provided specs.
1081
1082   @type ipolicy: dict
1083   @param ipolicy: The ipolicy
1084   @type mem_size: int
1085   @param mem_size: The memory size
1086   @type cpu_count: int
1087   @param cpu_count: Used cpu cores
1088   @type disk_count: int
1089   @param disk_count: Number of disks used
1090   @type nic_count: int
1091   @param nic_count: Number of nics used
1092   @type disk_sizes: list of ints
1093   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1094   @param _compute_fn: The compute function (unittest only)
1095   @return: A list of violations, or an empty list of no violations are found
1096
1097   """
1098   assert disk_count == len(disk_sizes)
1099
1100   test_settings = [
1101     (constants.ISPEC_MEM_SIZE, mem_size),
1102     (constants.ISPEC_CPU_COUNT, cpu_count),
1103     (constants.ISPEC_DISK_COUNT, disk_count),
1104     (constants.ISPEC_NIC_COUNT, nic_count),
1105     ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1106
1107   return filter(None,
1108                 (_compute_fn(name, ipolicy, value)
1109                  for (name, value) in test_settings))
1110
1111
1112 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1113                                      _compute_fn=_ComputeIPolicySpecViolation):
1114   """Compute if instance meets the specs of ipolicy.
1115
1116   @type ipolicy: dict
1117   @param ipolicy: The ipolicy to verify against
1118   @type instance: L{objects.Instance}
1119   @param instance: The instance to verify
1120   @param _compute_fn: The function to verify ipolicy (unittest only)
1121   @see: L{_ComputeIPolicySpecViolation}
1122
1123   """
1124   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1125   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1126   disk_count = len(instance.disks)
1127   disk_sizes = [disk.size for disk in instance.disks]
1128   nic_count = len(instance.nics)
1129
1130   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1131                      disk_sizes)
1132
1133
1134 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1135     _compute_fn=_ComputeIPolicySpecViolation):
1136   """Compute if instance specs meets the specs of ipolicy.
1137
1138   @type ipolicy: dict
1139   @param ipolicy: The ipolicy to verify against
1140   @param instance_spec: dict
1141   @param instance_spec: The instance spec to verify
1142   @param _compute_fn: The function to verify ipolicy (unittest only)
1143   @see: L{_ComputeIPolicySpecViolation}
1144
1145   """
1146   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1147   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1148   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1149   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1150   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1151
1152   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1153                      disk_sizes)
1154
1155
1156 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1157                                  target_group,
1158                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1159   """Compute if instance meets the specs of the new target group.
1160
1161   @param ipolicy: The ipolicy to verify
1162   @param instance: The instance object to verify
1163   @param current_group: The current group of the instance
1164   @param target_group: The new group of the instance
1165   @param _compute_fn: The function to verify ipolicy (unittest only)
1166   @see: L{_ComputeIPolicySpecViolation}
1167
1168   """
1169   if current_group == target_group:
1170     return []
1171   else:
1172     return _compute_fn(ipolicy, instance)
1173
1174
1175 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1176                             _compute_fn=_ComputeIPolicyNodeViolation):
1177   """Checks that the target node is correct in terms of instance policy.
1178
1179   @param ipolicy: The ipolicy to verify
1180   @param instance: The instance object to verify
1181   @param node: The new node to relocate
1182   @param ignore: Ignore violations of the ipolicy
1183   @param _compute_fn: The function to verify ipolicy (unittest only)
1184   @see: L{_ComputeIPolicySpecViolation}
1185
1186   """
1187   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1188   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1189
1190   if res:
1191     msg = ("Instance does not meet target node group's (%s) instance"
1192            " policy: %s") % (node.group, utils.CommaJoin(res))
1193     if ignore:
1194       lu.LogWarning(msg)
1195     else:
1196       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1197
1198
1199 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1200   """Computes a set of any instances that would violate the new ipolicy.
1201
1202   @param old_ipolicy: The current (still in-place) ipolicy
1203   @param new_ipolicy: The new (to become) ipolicy
1204   @param instances: List of instances to verify
1205   @return: A list of instances which violates the new ipolicy but did not before
1206
1207   """
1208   return (_ComputeViolatingInstances(old_ipolicy, instances) -
1209           _ComputeViolatingInstances(new_ipolicy, instances))
1210
1211
1212 def _ExpandItemName(fn, name, kind):
1213   """Expand an item name.
1214
1215   @param fn: the function to use for expansion
1216   @param name: requested item name
1217   @param kind: text description ('Node' or 'Instance')
1218   @return: the resolved (full) name
1219   @raise errors.OpPrereqError: if the item is not found
1220
1221   """
1222   full_name = fn(name)
1223   if full_name is None:
1224     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1225                                errors.ECODE_NOENT)
1226   return full_name
1227
1228
1229 def _ExpandNodeName(cfg, name):
1230   """Wrapper over L{_ExpandItemName} for nodes."""
1231   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1232
1233
1234 def _ExpandInstanceName(cfg, name):
1235   """Wrapper over L{_ExpandItemName} for instance."""
1236   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1237
1238
1239 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1240                           minmem, maxmem, vcpus, nics, disk_template, disks,
1241                           bep, hvp, hypervisor_name, tags):
1242   """Builds instance related env variables for hooks
1243
1244   This builds the hook environment from individual variables.
1245
1246   @type name: string
1247   @param name: the name of the instance
1248   @type primary_node: string
1249   @param primary_node: the name of the instance's primary node
1250   @type secondary_nodes: list
1251   @param secondary_nodes: list of secondary nodes as strings
1252   @type os_type: string
1253   @param os_type: the name of the instance's OS
1254   @type status: string
1255   @param status: the desired status of the instance
1256   @type minmem: string
1257   @param minmem: the minimum memory size of the instance
1258   @type maxmem: string
1259   @param maxmem: the maximum memory size of the instance
1260   @type vcpus: string
1261   @param vcpus: the count of VCPUs the instance has
1262   @type nics: list
1263   @param nics: list of tuples (ip, mac, mode, link) representing
1264       the NICs the instance has
1265   @type disk_template: string
1266   @param disk_template: the disk template of the instance
1267   @type disks: list
1268   @param disks: the list of (size, mode) pairs
1269   @type bep: dict
1270   @param bep: the backend parameters for the instance
1271   @type hvp: dict
1272   @param hvp: the hypervisor parameters for the instance
1273   @type hypervisor_name: string
1274   @param hypervisor_name: the hypervisor for the instance
1275   @type tags: list
1276   @param tags: list of instance tags as strings
1277   @rtype: dict
1278   @return: the hook environment for this instance
1279
1280   """
1281   env = {
1282     "OP_TARGET": name,
1283     "INSTANCE_NAME": name,
1284     "INSTANCE_PRIMARY": primary_node,
1285     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1286     "INSTANCE_OS_TYPE": os_type,
1287     "INSTANCE_STATUS": status,
1288     "INSTANCE_MINMEM": minmem,
1289     "INSTANCE_MAXMEM": maxmem,
1290     # TODO(2.7) remove deprecated "memory" value
1291     "INSTANCE_MEMORY": maxmem,
1292     "INSTANCE_VCPUS": vcpus,
1293     "INSTANCE_DISK_TEMPLATE": disk_template,
1294     "INSTANCE_HYPERVISOR": hypervisor_name,
1295   }
1296   if nics:
1297     nic_count = len(nics)
1298     for idx, (ip, mac, mode, link) in enumerate(nics):
1299       if ip is None:
1300         ip = ""
1301       env["INSTANCE_NIC%d_IP" % idx] = ip
1302       env["INSTANCE_NIC%d_MAC" % idx] = mac
1303       env["INSTANCE_NIC%d_MODE" % idx] = mode
1304       env["INSTANCE_NIC%d_LINK" % idx] = link
1305       if mode == constants.NIC_MODE_BRIDGED:
1306         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1307   else:
1308     nic_count = 0
1309
1310   env["INSTANCE_NIC_COUNT"] = nic_count
1311
1312   if disks:
1313     disk_count = len(disks)
1314     for idx, (size, mode) in enumerate(disks):
1315       env["INSTANCE_DISK%d_SIZE" % idx] = size
1316       env["INSTANCE_DISK%d_MODE" % idx] = mode
1317   else:
1318     disk_count = 0
1319
1320   env["INSTANCE_DISK_COUNT"] = disk_count
1321
1322   if not tags:
1323     tags = []
1324
1325   env["INSTANCE_TAGS"] = " ".join(tags)
1326
1327   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1328     for key, value in source.items():
1329       env["INSTANCE_%s_%s" % (kind, key)] = value
1330
1331   return env
1332
1333
1334 def _NICListToTuple(lu, nics):
1335   """Build a list of nic information tuples.
1336
1337   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1338   value in LUInstanceQueryData.
1339
1340   @type lu:  L{LogicalUnit}
1341   @param lu: the logical unit on whose behalf we execute
1342   @type nics: list of L{objects.NIC}
1343   @param nics: list of nics to convert to hooks tuples
1344
1345   """
1346   hooks_nics = []
1347   cluster = lu.cfg.GetClusterInfo()
1348   for nic in nics:
1349     ip = nic.ip
1350     mac = nic.mac
1351     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1352     mode = filled_params[constants.NIC_MODE]
1353     link = filled_params[constants.NIC_LINK]
1354     hooks_nics.append((ip, mac, mode, link))
1355   return hooks_nics
1356
1357
1358 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1359   """Builds instance related env variables for hooks from an object.
1360
1361   @type lu: L{LogicalUnit}
1362   @param lu: the logical unit on whose behalf we execute
1363   @type instance: L{objects.Instance}
1364   @param instance: the instance for which we should build the
1365       environment
1366   @type override: dict
1367   @param override: dictionary with key/values that will override
1368       our values
1369   @rtype: dict
1370   @return: the hook environment dictionary
1371
1372   """
1373   cluster = lu.cfg.GetClusterInfo()
1374   bep = cluster.FillBE(instance)
1375   hvp = cluster.FillHV(instance)
1376   args = {
1377     "name": instance.name,
1378     "primary_node": instance.primary_node,
1379     "secondary_nodes": instance.secondary_nodes,
1380     "os_type": instance.os,
1381     "status": instance.admin_state,
1382     "maxmem": bep[constants.BE_MAXMEM],
1383     "minmem": bep[constants.BE_MINMEM],
1384     "vcpus": bep[constants.BE_VCPUS],
1385     "nics": _NICListToTuple(lu, instance.nics),
1386     "disk_template": instance.disk_template,
1387     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1388     "bep": bep,
1389     "hvp": hvp,
1390     "hypervisor_name": instance.hypervisor,
1391     "tags": instance.tags,
1392   }
1393   if override:
1394     args.update(override)
1395   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1396
1397
1398 def _AdjustCandidatePool(lu, exceptions):
1399   """Adjust the candidate pool after node operations.
1400
1401   """
1402   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1403   if mod_list:
1404     lu.LogInfo("Promoted nodes to master candidate role: %s",
1405                utils.CommaJoin(node.name for node in mod_list))
1406     for name in mod_list:
1407       lu.context.ReaddNode(name)
1408   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1409   if mc_now > mc_max:
1410     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1411                (mc_now, mc_max))
1412
1413
1414 def _DecideSelfPromotion(lu, exceptions=None):
1415   """Decide whether I should promote myself as a master candidate.
1416
1417   """
1418   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1419   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1420   # the new node will increase mc_max with one, so:
1421   mc_should = min(mc_should + 1, cp_size)
1422   return mc_now < mc_should
1423
1424
1425 def _CalculateGroupIPolicy(cluster, group):
1426   """Calculate instance policy for group.
1427
1428   """
1429   return cluster.SimpleFillIPolicy(group.ipolicy)
1430
1431
1432 def _ComputeViolatingInstances(ipolicy, instances):
1433   """Computes a set of instances who violates given ipolicy.
1434
1435   @param ipolicy: The ipolicy to verify
1436   @type instances: object.Instance
1437   @param instances: List of instances to verify
1438   @return: A frozenset of instance names violating the ipolicy
1439
1440   """
1441   return frozenset([inst.name for inst in instances
1442                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1443
1444
1445 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1446   """Check that the brigdes needed by a list of nics exist.
1447
1448   """
1449   cluster = lu.cfg.GetClusterInfo()
1450   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1451   brlist = [params[constants.NIC_LINK] for params in paramslist
1452             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1453   if brlist:
1454     result = lu.rpc.call_bridges_exist(target_node, brlist)
1455     result.Raise("Error checking bridges on destination node '%s'" %
1456                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1457
1458
1459 def _CheckInstanceBridgesExist(lu, instance, node=None):
1460   """Check that the brigdes needed by an instance exist.
1461
1462   """
1463   if node is None:
1464     node = instance.primary_node
1465   _CheckNicsBridgesExist(lu, instance.nics, node)
1466
1467
1468 def _CheckOSVariant(os_obj, name):
1469   """Check whether an OS name conforms to the os variants specification.
1470
1471   @type os_obj: L{objects.OS}
1472   @param os_obj: OS object to check
1473   @type name: string
1474   @param name: OS name passed by the user, to check for validity
1475
1476   """
1477   variant = objects.OS.GetVariant(name)
1478   if not os_obj.supported_variants:
1479     if variant:
1480       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1481                                  " passed)" % (os_obj.name, variant),
1482                                  errors.ECODE_INVAL)
1483     return
1484   if not variant:
1485     raise errors.OpPrereqError("OS name must include a variant",
1486                                errors.ECODE_INVAL)
1487
1488   if variant not in os_obj.supported_variants:
1489     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1490
1491
1492 def _GetNodeInstancesInner(cfg, fn):
1493   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1494
1495
1496 def _GetNodeInstances(cfg, node_name):
1497   """Returns a list of all primary and secondary instances on a node.
1498
1499   """
1500
1501   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1502
1503
1504 def _GetNodePrimaryInstances(cfg, node_name):
1505   """Returns primary instances on a node.
1506
1507   """
1508   return _GetNodeInstancesInner(cfg,
1509                                 lambda inst: node_name == inst.primary_node)
1510
1511
1512 def _GetNodeSecondaryInstances(cfg, node_name):
1513   """Returns secondary instances on a node.
1514
1515   """
1516   return _GetNodeInstancesInner(cfg,
1517                                 lambda inst: node_name in inst.secondary_nodes)
1518
1519
1520 def _GetStorageTypeArgs(cfg, storage_type):
1521   """Returns the arguments for a storage type.
1522
1523   """
1524   # Special case for file storage
1525   if storage_type == constants.ST_FILE:
1526     # storage.FileStorage wants a list of storage directories
1527     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1528
1529   return []
1530
1531
1532 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1533   faulty = []
1534
1535   for dev in instance.disks:
1536     cfg.SetDiskID(dev, node_name)
1537
1538   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1539   result.Raise("Failed to get disk status from node %s" % node_name,
1540                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1541
1542   for idx, bdev_status in enumerate(result.payload):
1543     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1544       faulty.append(idx)
1545
1546   return faulty
1547
1548
1549 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1550   """Check the sanity of iallocator and node arguments and use the
1551   cluster-wide iallocator if appropriate.
1552
1553   Check that at most one of (iallocator, node) is specified. If none is
1554   specified, then the LU's opcode's iallocator slot is filled with the
1555   cluster-wide default iallocator.
1556
1557   @type iallocator_slot: string
1558   @param iallocator_slot: the name of the opcode iallocator slot
1559   @type node_slot: string
1560   @param node_slot: the name of the opcode target node slot
1561
1562   """
1563   node = getattr(lu.op, node_slot, None)
1564   iallocator = getattr(lu.op, iallocator_slot, None)
1565
1566   if node is not None and iallocator is not None:
1567     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1568                                errors.ECODE_INVAL)
1569   elif node is None and iallocator is None:
1570     default_iallocator = lu.cfg.GetDefaultIAllocator()
1571     if default_iallocator:
1572       setattr(lu.op, iallocator_slot, default_iallocator)
1573     else:
1574       raise errors.OpPrereqError("No iallocator or node given and no"
1575                                  " cluster-wide default iallocator found;"
1576                                  " please specify either an iallocator or a"
1577                                  " node, or set a cluster-wide default"
1578                                  " iallocator")
1579
1580
1581 def _GetDefaultIAllocator(cfg, iallocator):
1582   """Decides on which iallocator to use.
1583
1584   @type cfg: L{config.ConfigWriter}
1585   @param cfg: Cluster configuration object
1586   @type iallocator: string or None
1587   @param iallocator: Iallocator specified in opcode
1588   @rtype: string
1589   @return: Iallocator name
1590
1591   """
1592   if not iallocator:
1593     # Use default iallocator
1594     iallocator = cfg.GetDefaultIAllocator()
1595
1596   if not iallocator:
1597     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1598                                " opcode nor as a cluster-wide default",
1599                                errors.ECODE_INVAL)
1600
1601   return iallocator
1602
1603
1604 class LUClusterPostInit(LogicalUnit):
1605   """Logical unit for running hooks after cluster initialization.
1606
1607   """
1608   HPATH = "cluster-init"
1609   HTYPE = constants.HTYPE_CLUSTER
1610
1611   def BuildHooksEnv(self):
1612     """Build hooks env.
1613
1614     """
1615     return {
1616       "OP_TARGET": self.cfg.GetClusterName(),
1617       }
1618
1619   def BuildHooksNodes(self):
1620     """Build hooks nodes.
1621
1622     """
1623     return ([], [self.cfg.GetMasterNode()])
1624
1625   def Exec(self, feedback_fn):
1626     """Nothing to do.
1627
1628     """
1629     return True
1630
1631
1632 class LUClusterDestroy(LogicalUnit):
1633   """Logical unit for destroying the cluster.
1634
1635   """
1636   HPATH = "cluster-destroy"
1637   HTYPE = constants.HTYPE_CLUSTER
1638
1639   def BuildHooksEnv(self):
1640     """Build hooks env.
1641
1642     """
1643     return {
1644       "OP_TARGET": self.cfg.GetClusterName(),
1645       }
1646
1647   def BuildHooksNodes(self):
1648     """Build hooks nodes.
1649
1650     """
1651     return ([], [])
1652
1653   def CheckPrereq(self):
1654     """Check prerequisites.
1655
1656     This checks whether the cluster is empty.
1657
1658     Any errors are signaled by raising errors.OpPrereqError.
1659
1660     """
1661     master = self.cfg.GetMasterNode()
1662
1663     nodelist = self.cfg.GetNodeList()
1664     if len(nodelist) != 1 or nodelist[0] != master:
1665       raise errors.OpPrereqError("There are still %d node(s) in"
1666                                  " this cluster." % (len(nodelist) - 1),
1667                                  errors.ECODE_INVAL)
1668     instancelist = self.cfg.GetInstanceList()
1669     if instancelist:
1670       raise errors.OpPrereqError("There are still %d instance(s) in"
1671                                  " this cluster." % len(instancelist),
1672                                  errors.ECODE_INVAL)
1673
1674   def Exec(self, feedback_fn):
1675     """Destroys the cluster.
1676
1677     """
1678     master_params = self.cfg.GetMasterNetworkParameters()
1679
1680     # Run post hooks on master node before it's removed
1681     _RunPostHook(self, master_params.name)
1682
1683     ems = self.cfg.GetUseExternalMipScript()
1684     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1685                                                      master_params, ems)
1686     if result.fail_msg:
1687       self.LogWarning("Error disabling the master IP address: %s",
1688                       result.fail_msg)
1689
1690     return master_params.name
1691
1692
1693 def _VerifyCertificate(filename):
1694   """Verifies a certificate for L{LUClusterVerifyConfig}.
1695
1696   @type filename: string
1697   @param filename: Path to PEM file
1698
1699   """
1700   try:
1701     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1702                                            utils.ReadFile(filename))
1703   except Exception, err: # pylint: disable=W0703
1704     return (LUClusterVerifyConfig.ETYPE_ERROR,
1705             "Failed to load X509 certificate %s: %s" % (filename, err))
1706
1707   (errcode, msg) = \
1708     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1709                                 constants.SSL_CERT_EXPIRATION_ERROR)
1710
1711   if msg:
1712     fnamemsg = "While verifying %s: %s" % (filename, msg)
1713   else:
1714     fnamemsg = None
1715
1716   if errcode is None:
1717     return (None, fnamemsg)
1718   elif errcode == utils.CERT_WARNING:
1719     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1720   elif errcode == utils.CERT_ERROR:
1721     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1722
1723   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1724
1725
1726 def _GetAllHypervisorParameters(cluster, instances):
1727   """Compute the set of all hypervisor parameters.
1728
1729   @type cluster: L{objects.Cluster}
1730   @param cluster: the cluster object
1731   @param instances: list of L{objects.Instance}
1732   @param instances: additional instances from which to obtain parameters
1733   @rtype: list of (origin, hypervisor, parameters)
1734   @return: a list with all parameters found, indicating the hypervisor they
1735        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1736
1737   """
1738   hvp_data = []
1739
1740   for hv_name in cluster.enabled_hypervisors:
1741     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1742
1743   for os_name, os_hvp in cluster.os_hvp.items():
1744     for hv_name, hv_params in os_hvp.items():
1745       if hv_params:
1746         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1747         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1748
1749   # TODO: collapse identical parameter values in a single one
1750   for instance in instances:
1751     if instance.hvparams:
1752       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1753                        cluster.FillHV(instance)))
1754
1755   return hvp_data
1756
1757
1758 class _VerifyErrors(object):
1759   """Mix-in for cluster/group verify LUs.
1760
1761   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1762   self.op and self._feedback_fn to be available.)
1763
1764   """
1765
1766   ETYPE_FIELD = "code"
1767   ETYPE_ERROR = "ERROR"
1768   ETYPE_WARNING = "WARNING"
1769
1770   def _Error(self, ecode, item, msg, *args, **kwargs):
1771     """Format an error message.
1772
1773     Based on the opcode's error_codes parameter, either format a
1774     parseable error code, or a simpler error string.
1775
1776     This must be called only from Exec and functions called from Exec.
1777
1778     """
1779     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1780     itype, etxt, _ = ecode
1781     # first complete the msg
1782     if args:
1783       msg = msg % args
1784     # then format the whole message
1785     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1786       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1787     else:
1788       if item:
1789         item = " " + item
1790       else:
1791         item = ""
1792       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1793     # and finally report it via the feedback_fn
1794     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1795
1796   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1797     """Log an error message if the passed condition is True.
1798
1799     """
1800     cond = (bool(cond)
1801             or self.op.debug_simulate_errors) # pylint: disable=E1101
1802
1803     # If the error code is in the list of ignored errors, demote the error to a
1804     # warning
1805     (_, etxt, _) = ecode
1806     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1807       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1808
1809     if cond:
1810       self._Error(ecode, *args, **kwargs)
1811
1812     # do not mark the operation as failed for WARN cases only
1813     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1814       self.bad = self.bad or cond
1815
1816
1817 class LUClusterVerify(NoHooksLU):
1818   """Submits all jobs necessary to verify the cluster.
1819
1820   """
1821   REQ_BGL = False
1822
1823   def ExpandNames(self):
1824     self.needed_locks = {}
1825
1826   def Exec(self, feedback_fn):
1827     jobs = []
1828
1829     if self.op.group_name:
1830       groups = [self.op.group_name]
1831       depends_fn = lambda: None
1832     else:
1833       groups = self.cfg.GetNodeGroupList()
1834
1835       # Verify global configuration
1836       jobs.append([
1837         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1838         ])
1839
1840       # Always depend on global verification
1841       depends_fn = lambda: [(-len(jobs), [])]
1842
1843     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1844                                             ignore_errors=self.op.ignore_errors,
1845                                             depends=depends_fn())]
1846                 for group in groups)
1847
1848     # Fix up all parameters
1849     for op in itertools.chain(*jobs): # pylint: disable=W0142
1850       op.debug_simulate_errors = self.op.debug_simulate_errors
1851       op.verbose = self.op.verbose
1852       op.error_codes = self.op.error_codes
1853       try:
1854         op.skip_checks = self.op.skip_checks
1855       except AttributeError:
1856         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1857
1858     return ResultWithJobs(jobs)
1859
1860
1861 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1862   """Verifies the cluster config.
1863
1864   """
1865   REQ_BGL = True
1866
1867   def _VerifyHVP(self, hvp_data):
1868     """Verifies locally the syntax of the hypervisor parameters.
1869
1870     """
1871     for item, hv_name, hv_params in hvp_data:
1872       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1873              (item, hv_name))
1874       try:
1875         hv_class = hypervisor.GetHypervisor(hv_name)
1876         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1877         hv_class.CheckParameterSyntax(hv_params)
1878       except errors.GenericError, err:
1879         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1880
1881   def ExpandNames(self):
1882     # Information can be safely retrieved as the BGL is acquired in exclusive
1883     # mode
1884     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1885     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1886     self.all_node_info = self.cfg.GetAllNodesInfo()
1887     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1888     self.needed_locks = {}
1889
1890   def Exec(self, feedback_fn):
1891     """Verify integrity of cluster, performing various test on nodes.
1892
1893     """
1894     self.bad = False
1895     self._feedback_fn = feedback_fn
1896
1897     feedback_fn("* Verifying cluster config")
1898
1899     for msg in self.cfg.VerifyConfig():
1900       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1901
1902     feedback_fn("* Verifying cluster certificate files")
1903
1904     for cert_filename in constants.ALL_CERT_FILES:
1905       (errcode, msg) = _VerifyCertificate(cert_filename)
1906       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1907
1908     feedback_fn("* Verifying hypervisor parameters")
1909
1910     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1911                                                 self.all_inst_info.values()))
1912
1913     feedback_fn("* Verifying all nodes belong to an existing group")
1914
1915     # We do this verification here because, should this bogus circumstance
1916     # occur, it would never be caught by VerifyGroup, which only acts on
1917     # nodes/instances reachable from existing node groups.
1918
1919     dangling_nodes = set(node.name for node in self.all_node_info.values()
1920                          if node.group not in self.all_group_info)
1921
1922     dangling_instances = {}
1923     no_node_instances = []
1924
1925     for inst in self.all_inst_info.values():
1926       if inst.primary_node in dangling_nodes:
1927         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1928       elif inst.primary_node not in self.all_node_info:
1929         no_node_instances.append(inst.name)
1930
1931     pretty_dangling = [
1932         "%s (%s)" %
1933         (node.name,
1934          utils.CommaJoin(dangling_instances.get(node.name,
1935                                                 ["no instances"])))
1936         for node in dangling_nodes]
1937
1938     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1939                   None,
1940                   "the following nodes (and their instances) belong to a non"
1941                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1942
1943     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1944                   None,
1945                   "the following instances have a non-existing primary-node:"
1946                   " %s", utils.CommaJoin(no_node_instances))
1947
1948     return not self.bad
1949
1950
1951 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1952   """Verifies the status of a node group.
1953
1954   """
1955   HPATH = "cluster-verify"
1956   HTYPE = constants.HTYPE_CLUSTER
1957   REQ_BGL = False
1958
1959   _HOOKS_INDENT_RE = re.compile("^", re.M)
1960
1961   class NodeImage(object):
1962     """A class representing the logical and physical status of a node.
1963
1964     @type name: string
1965     @ivar name: the node name to which this object refers
1966     @ivar volumes: a structure as returned from
1967         L{ganeti.backend.GetVolumeList} (runtime)
1968     @ivar instances: a list of running instances (runtime)
1969     @ivar pinst: list of configured primary instances (config)
1970     @ivar sinst: list of configured secondary instances (config)
1971     @ivar sbp: dictionary of {primary-node: list of instances} for all
1972         instances for which this node is secondary (config)
1973     @ivar mfree: free memory, as reported by hypervisor (runtime)
1974     @ivar dfree: free disk, as reported by the node (runtime)
1975     @ivar offline: the offline status (config)
1976     @type rpc_fail: boolean
1977     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1978         not whether the individual keys were correct) (runtime)
1979     @type lvm_fail: boolean
1980     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1981     @type hyp_fail: boolean
1982     @ivar hyp_fail: whether the RPC call didn't return the instance list
1983     @type ghost: boolean
1984     @ivar ghost: whether this is a known node or not (config)
1985     @type os_fail: boolean
1986     @ivar os_fail: whether the RPC call didn't return valid OS data
1987     @type oslist: list
1988     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1989     @type vm_capable: boolean
1990     @ivar vm_capable: whether the node can host instances
1991
1992     """
1993     def __init__(self, offline=False, name=None, vm_capable=True):
1994       self.name = name
1995       self.volumes = {}
1996       self.instances = []
1997       self.pinst = []
1998       self.sinst = []
1999       self.sbp = {}
2000       self.mfree = 0
2001       self.dfree = 0
2002       self.offline = offline
2003       self.vm_capable = vm_capable
2004       self.rpc_fail = False
2005       self.lvm_fail = False
2006       self.hyp_fail = False
2007       self.ghost = False
2008       self.os_fail = False
2009       self.oslist = {}
2010
2011   def ExpandNames(self):
2012     # This raises errors.OpPrereqError on its own:
2013     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2014
2015     # Get instances in node group; this is unsafe and needs verification later
2016     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
2017
2018     self.needed_locks = {
2019       locking.LEVEL_INSTANCE: inst_names,
2020       locking.LEVEL_NODEGROUP: [self.group_uuid],
2021       locking.LEVEL_NODE: [],
2022       }
2023
2024     self.share_locks = _ShareAll()
2025
2026   def DeclareLocks(self, level):
2027     if level == locking.LEVEL_NODE:
2028       # Get members of node group; this is unsafe and needs verification later
2029       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2030
2031       all_inst_info = self.cfg.GetAllInstancesInfo()
2032
2033       # In Exec(), we warn about mirrored instances that have primary and
2034       # secondary living in separate node groups. To fully verify that
2035       # volumes for these instances are healthy, we will need to do an
2036       # extra call to their secondaries. We ensure here those nodes will
2037       # be locked.
2038       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2039         # Important: access only the instances whose lock is owned
2040         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2041           nodes.update(all_inst_info[inst].secondary_nodes)
2042
2043       self.needed_locks[locking.LEVEL_NODE] = nodes
2044
2045   def CheckPrereq(self):
2046     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2047     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2048
2049     group_nodes = set(self.group_info.members)
2050     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2051
2052     unlocked_nodes = \
2053         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2054
2055     unlocked_instances = \
2056         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2057
2058     if unlocked_nodes:
2059       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2060                                  utils.CommaJoin(unlocked_nodes))
2061
2062     if unlocked_instances:
2063       raise errors.OpPrereqError("Missing lock for instances: %s" %
2064                                  utils.CommaJoin(unlocked_instances))
2065
2066     self.all_node_info = self.cfg.GetAllNodesInfo()
2067     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2068
2069     self.my_node_names = utils.NiceSort(group_nodes)
2070     self.my_inst_names = utils.NiceSort(group_instances)
2071
2072     self.my_node_info = dict((name, self.all_node_info[name])
2073                              for name in self.my_node_names)
2074
2075     self.my_inst_info = dict((name, self.all_inst_info[name])
2076                              for name in self.my_inst_names)
2077
2078     # We detect here the nodes that will need the extra RPC calls for verifying
2079     # split LV volumes; they should be locked.
2080     extra_lv_nodes = set()
2081
2082     for inst in self.my_inst_info.values():
2083       if inst.disk_template in constants.DTS_INT_MIRROR:
2084         group = self.my_node_info[inst.primary_node].group
2085         for nname in inst.secondary_nodes:
2086           if self.all_node_info[nname].group != group:
2087             extra_lv_nodes.add(nname)
2088
2089     unlocked_lv_nodes = \
2090         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2091
2092     if unlocked_lv_nodes:
2093       raise errors.OpPrereqError("these nodes could be locked: %s" %
2094                                  utils.CommaJoin(unlocked_lv_nodes))
2095     self.extra_lv_nodes = list(extra_lv_nodes)
2096
2097   def _VerifyNode(self, ninfo, nresult):
2098     """Perform some basic validation on data returned from a node.
2099
2100       - check the result data structure is well formed and has all the
2101         mandatory fields
2102       - check ganeti version
2103
2104     @type ninfo: L{objects.Node}
2105     @param ninfo: the node to check
2106     @param nresult: the results from the node
2107     @rtype: boolean
2108     @return: whether overall this call was successful (and we can expect
2109          reasonable values in the respose)
2110
2111     """
2112     node = ninfo.name
2113     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2114
2115     # main result, nresult should be a non-empty dict
2116     test = not nresult or not isinstance(nresult, dict)
2117     _ErrorIf(test, constants.CV_ENODERPC, node,
2118                   "unable to verify node: no data returned")
2119     if test:
2120       return False
2121
2122     # compares ganeti version
2123     local_version = constants.PROTOCOL_VERSION
2124     remote_version = nresult.get("version", None)
2125     test = not (remote_version and
2126                 isinstance(remote_version, (list, tuple)) and
2127                 len(remote_version) == 2)
2128     _ErrorIf(test, constants.CV_ENODERPC, node,
2129              "connection to node returned invalid data")
2130     if test:
2131       return False
2132
2133     test = local_version != remote_version[0]
2134     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2135              "incompatible protocol versions: master %s,"
2136              " node %s", local_version, remote_version[0])
2137     if test:
2138       return False
2139
2140     # node seems compatible, we can actually try to look into its results
2141
2142     # full package version
2143     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2144                   constants.CV_ENODEVERSION, node,
2145                   "software version mismatch: master %s, node %s",
2146                   constants.RELEASE_VERSION, remote_version[1],
2147                   code=self.ETYPE_WARNING)
2148
2149     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2150     if ninfo.vm_capable and isinstance(hyp_result, dict):
2151       for hv_name, hv_result in hyp_result.iteritems():
2152         test = hv_result is not None
2153         _ErrorIf(test, constants.CV_ENODEHV, node,
2154                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2155
2156     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2157     if ninfo.vm_capable and isinstance(hvp_result, list):
2158       for item, hv_name, hv_result in hvp_result:
2159         _ErrorIf(True, constants.CV_ENODEHV, node,
2160                  "hypervisor %s parameter verify failure (source %s): %s",
2161                  hv_name, item, hv_result)
2162
2163     test = nresult.get(constants.NV_NODESETUP,
2164                        ["Missing NODESETUP results"])
2165     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2166              "; ".join(test))
2167
2168     return True
2169
2170   def _VerifyNodeTime(self, ninfo, nresult,
2171                       nvinfo_starttime, nvinfo_endtime):
2172     """Check the node time.
2173
2174     @type ninfo: L{objects.Node}
2175     @param ninfo: the node to check
2176     @param nresult: the remote results for the node
2177     @param nvinfo_starttime: the start time of the RPC call
2178     @param nvinfo_endtime: the end time of the RPC call
2179
2180     """
2181     node = ninfo.name
2182     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2183
2184     ntime = nresult.get(constants.NV_TIME, None)
2185     try:
2186       ntime_merged = utils.MergeTime(ntime)
2187     except (ValueError, TypeError):
2188       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2189       return
2190
2191     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2192       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2193     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2194       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2195     else:
2196       ntime_diff = None
2197
2198     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2199              "Node time diverges by at least %s from master node time",
2200              ntime_diff)
2201
2202   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2203     """Check the node LVM results.
2204
2205     @type ninfo: L{objects.Node}
2206     @param ninfo: the node to check
2207     @param nresult: the remote results for the node
2208     @param vg_name: the configured VG name
2209
2210     """
2211     if vg_name is None:
2212       return
2213
2214     node = ninfo.name
2215     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2216
2217     # checks vg existence and size > 20G
2218     vglist = nresult.get(constants.NV_VGLIST, None)
2219     test = not vglist
2220     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2221     if not test:
2222       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2223                                             constants.MIN_VG_SIZE)
2224       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2225
2226     # check pv names
2227     pvlist = nresult.get(constants.NV_PVLIST, None)
2228     test = pvlist is None
2229     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2230     if not test:
2231       # check that ':' is not present in PV names, since it's a
2232       # special character for lvcreate (denotes the range of PEs to
2233       # use on the PV)
2234       for _, pvname, owner_vg in pvlist:
2235         test = ":" in pvname
2236         _ErrorIf(test, constants.CV_ENODELVM, node,
2237                  "Invalid character ':' in PV '%s' of VG '%s'",
2238                  pvname, owner_vg)
2239
2240   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2241     """Check the node bridges.
2242
2243     @type ninfo: L{objects.Node}
2244     @param ninfo: the node to check
2245     @param nresult: the remote results for the node
2246     @param bridges: the expected list of bridges
2247
2248     """
2249     if not bridges:
2250       return
2251
2252     node = ninfo.name
2253     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2254
2255     missing = nresult.get(constants.NV_BRIDGES, None)
2256     test = not isinstance(missing, list)
2257     _ErrorIf(test, constants.CV_ENODENET, node,
2258              "did not return valid bridge information")
2259     if not test:
2260       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2261                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2262
2263   def _VerifyNodeUserScripts(self, ninfo, nresult):
2264     """Check the results of user scripts presence and executability on the node
2265
2266     @type ninfo: L{objects.Node}
2267     @param ninfo: the node to check
2268     @param nresult: the remote results for the node
2269
2270     """
2271     node = ninfo.name
2272
2273     test = not constants.NV_USERSCRIPTS in nresult
2274     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2275                   "did not return user scripts information")
2276
2277     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2278     if not test:
2279       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2280                     "user scripts not present or not executable: %s" %
2281                     utils.CommaJoin(sorted(broken_scripts)))
2282
2283   def _VerifyNodeNetwork(self, ninfo, nresult):
2284     """Check the node network connectivity results.
2285
2286     @type ninfo: L{objects.Node}
2287     @param ninfo: the node to check
2288     @param nresult: the remote results for the node
2289
2290     """
2291     node = ninfo.name
2292     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2293
2294     test = constants.NV_NODELIST not in nresult
2295     _ErrorIf(test, constants.CV_ENODESSH, node,
2296              "node hasn't returned node ssh connectivity data")
2297     if not test:
2298       if nresult[constants.NV_NODELIST]:
2299         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2300           _ErrorIf(True, constants.CV_ENODESSH, node,
2301                    "ssh communication with node '%s': %s", a_node, a_msg)
2302
2303     test = constants.NV_NODENETTEST not in nresult
2304     _ErrorIf(test, constants.CV_ENODENET, node,
2305              "node hasn't returned node tcp connectivity data")
2306     if not test:
2307       if nresult[constants.NV_NODENETTEST]:
2308         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2309         for anode in nlist:
2310           _ErrorIf(True, constants.CV_ENODENET, node,
2311                    "tcp communication with node '%s': %s",
2312                    anode, nresult[constants.NV_NODENETTEST][anode])
2313
2314     test = constants.NV_MASTERIP not in nresult
2315     _ErrorIf(test, constants.CV_ENODENET, node,
2316              "node hasn't returned node master IP reachability data")
2317     if not test:
2318       if not nresult[constants.NV_MASTERIP]:
2319         if node == self.master_node:
2320           msg = "the master node cannot reach the master IP (not configured?)"
2321         else:
2322           msg = "cannot reach the master IP"
2323         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2324
2325   def _VerifyInstance(self, instance, instanceconfig, node_image,
2326                       diskstatus):
2327     """Verify an instance.
2328
2329     This function checks to see if the required block devices are
2330     available on the instance's node.
2331
2332     """
2333     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2334     node_current = instanceconfig.primary_node
2335
2336     node_vol_should = {}
2337     instanceconfig.MapLVsByNode(node_vol_should)
2338
2339     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2340     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2341     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2342
2343     for node in node_vol_should:
2344       n_img = node_image[node]
2345       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2346         # ignore missing volumes on offline or broken nodes
2347         continue
2348       for volume in node_vol_should[node]:
2349         test = volume not in n_img.volumes
2350         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2351                  "volume %s missing on node %s", volume, node)
2352
2353     if instanceconfig.admin_state == constants.ADMINST_UP:
2354       pri_img = node_image[node_current]
2355       test = instance not in pri_img.instances and not pri_img.offline
2356       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2357                "instance not running on its primary node %s",
2358                node_current)
2359
2360     diskdata = [(nname, success, status, idx)
2361                 for (nname, disks) in diskstatus.items()
2362                 for idx, (success, status) in enumerate(disks)]
2363
2364     for nname, success, bdev_status, idx in diskdata:
2365       # the 'ghost node' construction in Exec() ensures that we have a
2366       # node here
2367       snode = node_image[nname]
2368       bad_snode = snode.ghost or snode.offline
2369       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2370                not success and not bad_snode,
2371                constants.CV_EINSTANCEFAULTYDISK, instance,
2372                "couldn't retrieve status for disk/%s on %s: %s",
2373                idx, nname, bdev_status)
2374       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2375                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2376                constants.CV_EINSTANCEFAULTYDISK, instance,
2377                "disk/%s on %s is faulty", idx, nname)
2378
2379   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2380     """Verify if there are any unknown volumes in the cluster.
2381
2382     The .os, .swap and backup volumes are ignored. All other volumes are
2383     reported as unknown.
2384
2385     @type reserved: L{ganeti.utils.FieldSet}
2386     @param reserved: a FieldSet of reserved volume names
2387
2388     """
2389     for node, n_img in node_image.items():
2390       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2391         # skip non-healthy nodes
2392         continue
2393       for volume in n_img.volumes:
2394         test = ((node not in node_vol_should or
2395                 volume not in node_vol_should[node]) and
2396                 not reserved.Matches(volume))
2397         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2398                       "volume %s is unknown", volume)
2399
2400   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2401     """Verify N+1 Memory Resilience.
2402
2403     Check that if one single node dies we can still start all the
2404     instances it was primary for.
2405
2406     """
2407     cluster_info = self.cfg.GetClusterInfo()
2408     for node, n_img in node_image.items():
2409       # This code checks that every node which is now listed as
2410       # secondary has enough memory to host all instances it is
2411       # supposed to should a single other node in the cluster fail.
2412       # FIXME: not ready for failover to an arbitrary node
2413       # FIXME: does not support file-backed instances
2414       # WARNING: we currently take into account down instances as well
2415       # as up ones, considering that even if they're down someone
2416       # might want to start them even in the event of a node failure.
2417       if n_img.offline:
2418         # we're skipping offline nodes from the N+1 warning, since
2419         # most likely we don't have good memory infromation from them;
2420         # we already list instances living on such nodes, and that's
2421         # enough warning
2422         continue
2423       #TODO(dynmem): use MINMEM for checking
2424       #TODO(dynmem): also consider ballooning out other instances
2425       for prinode, instances in n_img.sbp.items():
2426         needed_mem = 0
2427         for instance in instances:
2428           bep = cluster_info.FillBE(instance_cfg[instance])
2429           if bep[constants.BE_AUTO_BALANCE]:
2430             needed_mem += bep[constants.BE_MAXMEM]
2431         test = n_img.mfree < needed_mem
2432         self._ErrorIf(test, constants.CV_ENODEN1, node,
2433                       "not enough memory to accomodate instance failovers"
2434                       " should node %s fail (%dMiB needed, %dMiB available)",
2435                       prinode, needed_mem, n_img.mfree)
2436
2437   @classmethod
2438   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2439                    (files_all, files_opt, files_mc, files_vm)):
2440     """Verifies file checksums collected from all nodes.
2441
2442     @param errorif: Callback for reporting errors
2443     @param nodeinfo: List of L{objects.Node} objects
2444     @param master_node: Name of master node
2445     @param all_nvinfo: RPC results
2446
2447     """
2448     # Define functions determining which nodes to consider for a file
2449     files2nodefn = [
2450       (files_all, None),
2451       (files_mc, lambda node: (node.master_candidate or
2452                                node.name == master_node)),
2453       (files_vm, lambda node: node.vm_capable),
2454       ]
2455
2456     # Build mapping from filename to list of nodes which should have the file
2457     nodefiles = {}
2458     for (files, fn) in files2nodefn:
2459       if fn is None:
2460         filenodes = nodeinfo
2461       else:
2462         filenodes = filter(fn, nodeinfo)
2463       nodefiles.update((filename,
2464                         frozenset(map(operator.attrgetter("name"), filenodes)))
2465                        for filename in files)
2466
2467     assert set(nodefiles) == (files_all | files_mc | files_vm)
2468
2469     fileinfo = dict((filename, {}) for filename in nodefiles)
2470     ignore_nodes = set()
2471
2472     for node in nodeinfo:
2473       if node.offline:
2474         ignore_nodes.add(node.name)
2475         continue
2476
2477       nresult = all_nvinfo[node.name]
2478
2479       if nresult.fail_msg or not nresult.payload:
2480         node_files = None
2481       else:
2482         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2483
2484       test = not (node_files and isinstance(node_files, dict))
2485       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2486               "Node did not return file checksum data")
2487       if test:
2488         ignore_nodes.add(node.name)
2489         continue
2490
2491       # Build per-checksum mapping from filename to nodes having it
2492       for (filename, checksum) in node_files.items():
2493         assert filename in nodefiles
2494         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2495
2496     for (filename, checksums) in fileinfo.items():
2497       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2498
2499       # Nodes having the file
2500       with_file = frozenset(node_name
2501                             for nodes in fileinfo[filename].values()
2502                             for node_name in nodes) - ignore_nodes
2503
2504       expected_nodes = nodefiles[filename] - ignore_nodes
2505
2506       # Nodes missing file
2507       missing_file = expected_nodes - with_file
2508
2509       if filename in files_opt:
2510         # All or no nodes
2511         errorif(missing_file and missing_file != expected_nodes,
2512                 constants.CV_ECLUSTERFILECHECK, None,
2513                 "File %s is optional, but it must exist on all or no"
2514                 " nodes (not found on %s)",
2515                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2516       else:
2517         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2518                 "File %s is missing from node(s) %s", filename,
2519                 utils.CommaJoin(utils.NiceSort(missing_file)))
2520
2521         # Warn if a node has a file it shouldn't
2522         unexpected = with_file - expected_nodes
2523         errorif(unexpected,
2524                 constants.CV_ECLUSTERFILECHECK, None,
2525                 "File %s should not exist on node(s) %s",
2526                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2527
2528       # See if there are multiple versions of the file
2529       test = len(checksums) > 1
2530       if test:
2531         variants = ["variant %s on %s" %
2532                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2533                     for (idx, (checksum, nodes)) in
2534                       enumerate(sorted(checksums.items()))]
2535       else:
2536         variants = []
2537
2538       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2539               "File %s found with %s different checksums (%s)",
2540               filename, len(checksums), "; ".join(variants))
2541
2542   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2543                       drbd_map):
2544     """Verifies and the node DRBD status.
2545
2546     @type ninfo: L{objects.Node}
2547     @param ninfo: the node to check
2548     @param nresult: the remote results for the node
2549     @param instanceinfo: the dict of instances
2550     @param drbd_helper: the configured DRBD usermode helper
2551     @param drbd_map: the DRBD map as returned by
2552         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2553
2554     """
2555     node = ninfo.name
2556     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2557
2558     if drbd_helper:
2559       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2560       test = (helper_result == None)
2561       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2562                "no drbd usermode helper returned")
2563       if helper_result:
2564         status, payload = helper_result
2565         test = not status
2566         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2567                  "drbd usermode helper check unsuccessful: %s", payload)
2568         test = status and (payload != drbd_helper)
2569         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2570                  "wrong drbd usermode helper: %s", payload)
2571
2572     # compute the DRBD minors
2573     node_drbd = {}
2574     for minor, instance in drbd_map[node].items():
2575       test = instance not in instanceinfo
2576       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2577                "ghost instance '%s' in temporary DRBD map", instance)
2578         # ghost instance should not be running, but otherwise we
2579         # don't give double warnings (both ghost instance and
2580         # unallocated minor in use)
2581       if test:
2582         node_drbd[minor] = (instance, False)
2583       else:
2584         instance = instanceinfo[instance]
2585         node_drbd[minor] = (instance.name,
2586                             instance.admin_state == constants.ADMINST_UP)
2587
2588     # and now check them
2589     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2590     test = not isinstance(used_minors, (tuple, list))
2591     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2592              "cannot parse drbd status file: %s", str(used_minors))
2593     if test:
2594       # we cannot check drbd status
2595       return
2596
2597     for minor, (iname, must_exist) in node_drbd.items():
2598       test = minor not in used_minors and must_exist
2599       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2600                "drbd minor %d of instance %s is not active", minor, iname)
2601     for minor in used_minors:
2602       test = minor not in node_drbd
2603       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2604                "unallocated drbd minor %d is in use", minor)
2605
2606   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2607     """Builds the node OS structures.
2608
2609     @type ninfo: L{objects.Node}
2610     @param ninfo: the node to check
2611     @param nresult: the remote results for the node
2612     @param nimg: the node image object
2613
2614     """
2615     node = ninfo.name
2616     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2617
2618     remote_os = nresult.get(constants.NV_OSLIST, None)
2619     test = (not isinstance(remote_os, list) or
2620             not compat.all(isinstance(v, list) and len(v) == 7
2621                            for v in remote_os))
2622
2623     _ErrorIf(test, constants.CV_ENODEOS, node,
2624              "node hasn't returned valid OS data")
2625
2626     nimg.os_fail = test
2627
2628     if test:
2629       return
2630
2631     os_dict = {}
2632
2633     for (name, os_path, status, diagnose,
2634          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2635
2636       if name not in os_dict:
2637         os_dict[name] = []
2638
2639       # parameters is a list of lists instead of list of tuples due to
2640       # JSON lacking a real tuple type, fix it:
2641       parameters = [tuple(v) for v in parameters]
2642       os_dict[name].append((os_path, status, diagnose,
2643                             set(variants), set(parameters), set(api_ver)))
2644
2645     nimg.oslist = os_dict
2646
2647   def _VerifyNodeOS(self, ninfo, nimg, base):
2648     """Verifies the node OS list.
2649
2650     @type ninfo: L{objects.Node}
2651     @param ninfo: the node to check
2652     @param nimg: the node image object
2653     @param base: the 'template' node we match against (e.g. from the master)
2654
2655     """
2656     node = ninfo.name
2657     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2658
2659     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2660
2661     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2662     for os_name, os_data in nimg.oslist.items():
2663       assert os_data, "Empty OS status for OS %s?!" % os_name
2664       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2665       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2666                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2667       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2668                "OS '%s' has multiple entries (first one shadows the rest): %s",
2669                os_name, utils.CommaJoin([v[0] for v in os_data]))
2670       # comparisons with the 'base' image
2671       test = os_name not in base.oslist
2672       _ErrorIf(test, constants.CV_ENODEOS, node,
2673                "Extra OS %s not present on reference node (%s)",
2674                os_name, base.name)
2675       if test:
2676         continue
2677       assert base.oslist[os_name], "Base node has empty OS status?"
2678       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2679       if not b_status:
2680         # base OS is invalid, skipping
2681         continue
2682       for kind, a, b in [("API version", f_api, b_api),
2683                          ("variants list", f_var, b_var),
2684                          ("parameters", beautify_params(f_param),
2685                           beautify_params(b_param))]:
2686         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2687                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2688                  kind, os_name, base.name,
2689                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2690
2691     # check any missing OSes
2692     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2693     _ErrorIf(missing, constants.CV_ENODEOS, node,
2694              "OSes present on reference node %s but missing on this node: %s",
2695              base.name, utils.CommaJoin(missing))
2696
2697   def _VerifyOob(self, ninfo, nresult):
2698     """Verifies out of band functionality of a node.
2699
2700     @type ninfo: L{objects.Node}
2701     @param ninfo: the node to check
2702     @param nresult: the remote results for the node
2703
2704     """
2705     node = ninfo.name
2706     # We just have to verify the paths on master and/or master candidates
2707     # as the oob helper is invoked on the master
2708     if ((ninfo.master_candidate or ninfo.master_capable) and
2709         constants.NV_OOB_PATHS in nresult):
2710       for path_result in nresult[constants.NV_OOB_PATHS]:
2711         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2712
2713   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2714     """Verifies and updates the node volume data.
2715
2716     This function will update a L{NodeImage}'s internal structures
2717     with data from the remote call.
2718
2719     @type ninfo: L{objects.Node}
2720     @param ninfo: the node to check
2721     @param nresult: the remote results for the node
2722     @param nimg: the node image object
2723     @param vg_name: the configured VG name
2724
2725     """
2726     node = ninfo.name
2727     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2728
2729     nimg.lvm_fail = True
2730     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2731     if vg_name is None:
2732       pass
2733     elif isinstance(lvdata, basestring):
2734       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2735                utils.SafeEncode(lvdata))
2736     elif not isinstance(lvdata, dict):
2737       _ErrorIf(True, constants.CV_ENODELVM, node,
2738                "rpc call to node failed (lvlist)")
2739     else:
2740       nimg.volumes = lvdata
2741       nimg.lvm_fail = False
2742
2743   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2744     """Verifies and updates the node instance list.
2745
2746     If the listing was successful, then updates this node's instance
2747     list. Otherwise, it marks the RPC call as failed for the instance
2748     list key.
2749
2750     @type ninfo: L{objects.Node}
2751     @param ninfo: the node to check
2752     @param nresult: the remote results for the node
2753     @param nimg: the node image object
2754
2755     """
2756     idata = nresult.get(constants.NV_INSTANCELIST, None)
2757     test = not isinstance(idata, list)
2758     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2759                   "rpc call to node failed (instancelist): %s",
2760                   utils.SafeEncode(str(idata)))
2761     if test:
2762       nimg.hyp_fail = True
2763     else:
2764       nimg.instances = idata
2765
2766   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2767     """Verifies and computes a node information map
2768
2769     @type ninfo: L{objects.Node}
2770     @param ninfo: the node to check
2771     @param nresult: the remote results for the node
2772     @param nimg: the node image object
2773     @param vg_name: the configured VG name
2774
2775     """
2776     node = ninfo.name
2777     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2778
2779     # try to read free memory (from the hypervisor)
2780     hv_info = nresult.get(constants.NV_HVINFO, None)
2781     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2782     _ErrorIf(test, constants.CV_ENODEHV, node,
2783              "rpc call to node failed (hvinfo)")
2784     if not test:
2785       try:
2786         nimg.mfree = int(hv_info["memory_free"])
2787       except (ValueError, TypeError):
2788         _ErrorIf(True, constants.CV_ENODERPC, node,
2789                  "node returned invalid nodeinfo, check hypervisor")
2790
2791     # FIXME: devise a free space model for file based instances as well
2792     if vg_name is not None:
2793       test = (constants.NV_VGLIST not in nresult or
2794               vg_name not in nresult[constants.NV_VGLIST])
2795       _ErrorIf(test, constants.CV_ENODELVM, node,
2796                "node didn't return data for the volume group '%s'"
2797                " - it is either missing or broken", vg_name)
2798       if not test:
2799         try:
2800           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2801         except (ValueError, TypeError):
2802           _ErrorIf(True, constants.CV_ENODERPC, node,
2803                    "node returned invalid LVM info, check LVM status")
2804
2805   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2806     """Gets per-disk status information for all instances.
2807
2808     @type nodelist: list of strings
2809     @param nodelist: Node names
2810     @type node_image: dict of (name, L{objects.Node})
2811     @param node_image: Node objects
2812     @type instanceinfo: dict of (name, L{objects.Instance})
2813     @param instanceinfo: Instance objects
2814     @rtype: {instance: {node: [(succes, payload)]}}
2815     @return: a dictionary of per-instance dictionaries with nodes as
2816         keys and disk information as values; the disk information is a
2817         list of tuples (success, payload)
2818
2819     """
2820     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2821
2822     node_disks = {}
2823     node_disks_devonly = {}
2824     diskless_instances = set()
2825     diskless = constants.DT_DISKLESS
2826
2827     for nname in nodelist:
2828       node_instances = list(itertools.chain(node_image[nname].pinst,
2829                                             node_image[nname].sinst))
2830       diskless_instances.update(inst for inst in node_instances
2831                                 if instanceinfo[inst].disk_template == diskless)
2832       disks = [(inst, disk)
2833                for inst in node_instances
2834                for disk in instanceinfo[inst].disks]
2835
2836       if not disks:
2837         # No need to collect data
2838         continue
2839
2840       node_disks[nname] = disks
2841
2842       # Creating copies as SetDiskID below will modify the objects and that can
2843       # lead to incorrect data returned from nodes
2844       devonly = [dev.Copy() for (_, dev) in disks]
2845
2846       for dev in devonly:
2847         self.cfg.SetDiskID(dev, nname)
2848
2849       node_disks_devonly[nname] = devonly
2850
2851     assert len(node_disks) == len(node_disks_devonly)
2852
2853     # Collect data from all nodes with disks
2854     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2855                                                           node_disks_devonly)
2856
2857     assert len(result) == len(node_disks)
2858
2859     instdisk = {}
2860
2861     for (nname, nres) in result.items():
2862       disks = node_disks[nname]
2863
2864       if nres.offline:
2865         # No data from this node
2866         data = len(disks) * [(False, "node offline")]
2867       else:
2868         msg = nres.fail_msg
2869         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2870                  "while getting disk information: %s", msg)
2871         if msg:
2872           # No data from this node
2873           data = len(disks) * [(False, msg)]
2874         else:
2875           data = []
2876           for idx, i in enumerate(nres.payload):
2877             if isinstance(i, (tuple, list)) and len(i) == 2:
2878               data.append(i)
2879             else:
2880               logging.warning("Invalid result from node %s, entry %d: %s",
2881                               nname, idx, i)
2882               data.append((False, "Invalid result from the remote node"))
2883
2884       for ((inst, _), status) in zip(disks, data):
2885         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2886
2887     # Add empty entries for diskless instances.
2888     for inst in diskless_instances:
2889       assert inst not in instdisk
2890       instdisk[inst] = {}
2891
2892     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2893                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2894                       compat.all(isinstance(s, (tuple, list)) and
2895                                  len(s) == 2 for s in statuses)
2896                       for inst, nnames in instdisk.items()
2897                       for nname, statuses in nnames.items())
2898     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2899
2900     return instdisk
2901
2902   @staticmethod
2903   def _SshNodeSelector(group_uuid, all_nodes):
2904     """Create endless iterators for all potential SSH check hosts.
2905
2906     """
2907     nodes = [node for node in all_nodes
2908              if (node.group != group_uuid and
2909                  not node.offline)]
2910     keyfunc = operator.attrgetter("group")
2911
2912     return map(itertools.cycle,
2913                [sorted(map(operator.attrgetter("name"), names))
2914                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2915                                                   keyfunc)])
2916
2917   @classmethod
2918   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2919     """Choose which nodes should talk to which other nodes.
2920
2921     We will make nodes contact all nodes in their group, and one node from
2922     every other group.
2923
2924     @warning: This algorithm has a known issue if one node group is much
2925       smaller than others (e.g. just one node). In such a case all other
2926       nodes will talk to the single node.
2927
2928     """
2929     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2930     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2931
2932     return (online_nodes,
2933             dict((name, sorted([i.next() for i in sel]))
2934                  for name in online_nodes))
2935
2936   def BuildHooksEnv(self):
2937     """Build hooks env.
2938
2939     Cluster-Verify hooks just ran in the post phase and their failure makes
2940     the output be logged in the verify output and the verification to fail.
2941
2942     """
2943     env = {
2944       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2945       }
2946
2947     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2948                for node in self.my_node_info.values())
2949
2950     return env
2951
2952   def BuildHooksNodes(self):
2953     """Build hooks nodes.
2954
2955     """
2956     return ([], self.my_node_names)
2957
2958   def Exec(self, feedback_fn):
2959     """Verify integrity of the node group, performing various test on nodes.
2960
2961     """
2962     # This method has too many local variables. pylint: disable=R0914
2963     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2964
2965     if not self.my_node_names:
2966       # empty node group
2967       feedback_fn("* Empty node group, skipping verification")
2968       return True
2969
2970     self.bad = False
2971     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2972     verbose = self.op.verbose
2973     self._feedback_fn = feedback_fn
2974
2975     vg_name = self.cfg.GetVGName()
2976     drbd_helper = self.cfg.GetDRBDHelper()
2977     cluster = self.cfg.GetClusterInfo()
2978     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2979     hypervisors = cluster.enabled_hypervisors
2980     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2981
2982     i_non_redundant = [] # Non redundant instances
2983     i_non_a_balanced = [] # Non auto-balanced instances
2984     i_offline = 0 # Count of offline instances
2985     n_offline = 0 # Count of offline nodes
2986     n_drained = 0 # Count of nodes being drained
2987     node_vol_should = {}
2988
2989     # FIXME: verify OS list
2990
2991     # File verification
2992     filemap = _ComputeAncillaryFiles(cluster, False)
2993
2994     # do local checksums
2995     master_node = self.master_node = self.cfg.GetMasterNode()
2996     master_ip = self.cfg.GetMasterIP()
2997
2998     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2999
3000     user_scripts = []
3001     if self.cfg.GetUseExternalMipScript():
3002       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3003
3004     node_verify_param = {
3005       constants.NV_FILELIST:
3006         utils.UniqueSequence(filename
3007                              for files in filemap
3008                              for filename in files),
3009       constants.NV_NODELIST:
3010         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3011                                   self.all_node_info.values()),
3012       constants.NV_HYPERVISOR: hypervisors,
3013       constants.NV_HVPARAMS:
3014         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3015       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3016                                  for node in node_data_list
3017                                  if not node.offline],
3018       constants.NV_INSTANCELIST: hypervisors,
3019       constants.NV_VERSION: None,
3020       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3021       constants.NV_NODESETUP: None,
3022       constants.NV_TIME: None,
3023       constants.NV_MASTERIP: (master_node, master_ip),
3024       constants.NV_OSLIST: None,
3025       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3026       constants.NV_USERSCRIPTS: user_scripts,
3027       }
3028
3029     if vg_name is not None:
3030       node_verify_param[constants.NV_VGLIST] = None
3031       node_verify_param[constants.NV_LVLIST] = vg_name
3032       node_verify_param[constants.NV_PVLIST] = [vg_name]
3033       node_verify_param[constants.NV_DRBDLIST] = None
3034
3035     if drbd_helper:
3036       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3037
3038     # bridge checks
3039     # FIXME: this needs to be changed per node-group, not cluster-wide
3040     bridges = set()
3041     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3042     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3043       bridges.add(default_nicpp[constants.NIC_LINK])
3044     for instance in self.my_inst_info.values():
3045       for nic in instance.nics:
3046         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3047         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3048           bridges.add(full_nic[constants.NIC_LINK])
3049
3050     if bridges:
3051       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3052
3053     # Build our expected cluster state
3054     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3055                                                  name=node.name,
3056                                                  vm_capable=node.vm_capable))
3057                       for node in node_data_list)
3058
3059     # Gather OOB paths
3060     oob_paths = []
3061     for node in self.all_node_info.values():
3062       path = _SupportsOob(self.cfg, node)
3063       if path and path not in oob_paths:
3064         oob_paths.append(path)
3065
3066     if oob_paths:
3067       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3068
3069     for instance in self.my_inst_names:
3070       inst_config = self.my_inst_info[instance]
3071
3072       for nname in inst_config.all_nodes:
3073         if nname not in node_image:
3074           gnode = self.NodeImage(name=nname)
3075           gnode.ghost = (nname not in self.all_node_info)
3076           node_image[nname] = gnode
3077
3078       inst_config.MapLVsByNode(node_vol_should)
3079
3080       pnode = inst_config.primary_node
3081       node_image[pnode].pinst.append(instance)
3082
3083       for snode in inst_config.secondary_nodes:
3084         nimg = node_image[snode]
3085         nimg.sinst.append(instance)
3086         if pnode not in nimg.sbp:
3087           nimg.sbp[pnode] = []
3088         nimg.sbp[pnode].append(instance)
3089
3090     # At this point, we have the in-memory data structures complete,
3091     # except for the runtime information, which we'll gather next
3092
3093     # Due to the way our RPC system works, exact response times cannot be
3094     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3095     # time before and after executing the request, we can at least have a time
3096     # window.
3097     nvinfo_starttime = time.time()
3098     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3099                                            node_verify_param,
3100                                            self.cfg.GetClusterName())
3101     nvinfo_endtime = time.time()
3102
3103     if self.extra_lv_nodes and vg_name is not None:
3104       extra_lv_nvinfo = \
3105           self.rpc.call_node_verify(self.extra_lv_nodes,
3106                                     {constants.NV_LVLIST: vg_name},
3107                                     self.cfg.GetClusterName())
3108     else:
3109       extra_lv_nvinfo = {}
3110
3111     all_drbd_map = self.cfg.ComputeDRBDMap()
3112
3113     feedback_fn("* Gathering disk information (%s nodes)" %
3114                 len(self.my_node_names))
3115     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3116                                      self.my_inst_info)
3117
3118     feedback_fn("* Verifying configuration file consistency")
3119
3120     # If not all nodes are being checked, we need to make sure the master node
3121     # and a non-checked vm_capable node are in the list.
3122     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3123     if absent_nodes:
3124       vf_nvinfo = all_nvinfo.copy()
3125       vf_node_info = list(self.my_node_info.values())
3126       additional_nodes = []
3127       if master_node not in self.my_node_info:
3128         additional_nodes.append(master_node)
3129         vf_node_info.append(self.all_node_info[master_node])
3130       # Add the first vm_capable node we find which is not included
3131       for node in absent_nodes:
3132         nodeinfo = self.all_node_info[node]
3133         if nodeinfo.vm_capable and not nodeinfo.offline:
3134           additional_nodes.append(node)
3135           vf_node_info.append(self.all_node_info[node])
3136           break
3137       key = constants.NV_FILELIST
3138       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3139                                                  {key: node_verify_param[key]},
3140                                                  self.cfg.GetClusterName()))
3141     else:
3142       vf_nvinfo = all_nvinfo
3143       vf_node_info = self.my_node_info.values()
3144
3145     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3146
3147     feedback_fn("* Verifying node status")
3148
3149     refos_img = None
3150
3151     for node_i in node_data_list:
3152       node = node_i.name
3153       nimg = node_image[node]
3154
3155       if node_i.offline:
3156         if verbose:
3157           feedback_fn("* Skipping offline node %s" % (node,))
3158         n_offline += 1
3159         continue
3160
3161       if node == master_node:
3162         ntype = "master"
3163       elif node_i.master_candidate:
3164         ntype = "master candidate"
3165       elif node_i.drained:
3166         ntype = "drained"
3167         n_drained += 1
3168       else:
3169         ntype = "regular"
3170       if verbose:
3171         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3172
3173       msg = all_nvinfo[node].fail_msg
3174       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3175                msg)
3176       if msg:
3177         nimg.rpc_fail = True
3178         continue
3179
3180       nresult = all_nvinfo[node].payload
3181
3182       nimg.call_ok = self._VerifyNode(node_i, nresult)
3183       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3184       self._VerifyNodeNetwork(node_i, nresult)
3185       self._VerifyNodeUserScripts(node_i, nresult)
3186       self._VerifyOob(node_i, nresult)
3187
3188       if nimg.vm_capable:
3189         self._VerifyNodeLVM(node_i, nresult, vg_name)
3190         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3191                              all_drbd_map)
3192
3193         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3194         self._UpdateNodeInstances(node_i, nresult, nimg)
3195         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3196         self._UpdateNodeOS(node_i, nresult, nimg)
3197
3198         if not nimg.os_fail:
3199           if refos_img is None:
3200             refos_img = nimg
3201           self._VerifyNodeOS(node_i, nimg, refos_img)
3202         self._VerifyNodeBridges(node_i, nresult, bridges)
3203
3204         # Check whether all running instancies are primary for the node. (This
3205         # can no longer be done from _VerifyInstance below, since some of the
3206         # wrong instances could be from other node groups.)
3207         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3208
3209         for inst in non_primary_inst:
3210           # FIXME: investigate best way to handle offline insts
3211           if inst.admin_state == constants.ADMINST_OFFLINE:
3212             if verbose:
3213               feedback_fn("* Skipping offline instance %s" % inst.name)
3214             i_offline += 1
3215             continue
3216           test = inst in self.all_inst_info
3217           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3218                    "instance should not run on node %s", node_i.name)
3219           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3220                    "node is running unknown instance %s", inst)
3221
3222     for node, result in extra_lv_nvinfo.items():
3223       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3224                               node_image[node], vg_name)
3225
3226     feedback_fn("* Verifying instance status")
3227     for instance in self.my_inst_names:
3228       if verbose:
3229         feedback_fn("* Verifying instance %s" % instance)
3230       inst_config = self.my_inst_info[instance]
3231       self._VerifyInstance(instance, inst_config, node_image,
3232                            instdisk[instance])
3233       inst_nodes_offline = []
3234
3235       pnode = inst_config.primary_node
3236       pnode_img = node_image[pnode]
3237       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3238                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3239                " primary node failed", instance)
3240
3241       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3242                pnode_img.offline,
3243                constants.CV_EINSTANCEBADNODE, instance,
3244                "instance is marked as running and lives on offline node %s",
3245                inst_config.primary_node)
3246
3247       # If the instance is non-redundant we cannot survive losing its primary
3248       # node, so we are not N+1 compliant. On the other hand we have no disk
3249       # templates with more than one secondary so that situation is not well
3250       # supported either.
3251       # FIXME: does not support file-backed instances
3252       if not inst_config.secondary_nodes:
3253         i_non_redundant.append(instance)
3254
3255       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3256                constants.CV_EINSTANCELAYOUT,
3257                instance, "instance has multiple secondary nodes: %s",
3258                utils.CommaJoin(inst_config.secondary_nodes),
3259                code=self.ETYPE_WARNING)
3260
3261       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3262         pnode = inst_config.primary_node
3263         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3264         instance_groups = {}
3265
3266         for node in instance_nodes:
3267           instance_groups.setdefault(self.all_node_info[node].group,
3268                                      []).append(node)
3269
3270         pretty_list = [
3271           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3272           # Sort so that we always list the primary node first.
3273           for group, nodes in sorted(instance_groups.items(),
3274                                      key=lambda (_, nodes): pnode in nodes,
3275                                      reverse=True)]
3276
3277         self._ErrorIf(len(instance_groups) > 1,
3278                       constants.CV_EINSTANCESPLITGROUPS,
3279                       instance, "instance has primary and secondary nodes in"
3280                       " different groups: %s", utils.CommaJoin(pretty_list),
3281                       code=self.ETYPE_WARNING)
3282
3283       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3284         i_non_a_balanced.append(instance)
3285
3286       for snode in inst_config.secondary_nodes:
3287         s_img = node_image[snode]
3288         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3289                  snode, "instance %s, connection to secondary node failed",
3290                  instance)
3291
3292         if s_img.offline:
3293           inst_nodes_offline.append(snode)
3294
3295       # warn that the instance lives on offline nodes
3296       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3297                "instance has offline secondary node(s) %s",
3298                utils.CommaJoin(inst_nodes_offline))
3299       # ... or ghost/non-vm_capable nodes
3300       for node in inst_config.all_nodes:
3301         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3302                  instance, "instance lives on ghost node %s", node)
3303         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3304                  instance, "instance lives on non-vm_capable node %s", node)
3305
3306     feedback_fn("* Verifying orphan volumes")
3307     reserved = utils.FieldSet(*cluster.reserved_lvs)
3308
3309     # We will get spurious "unknown volume" warnings if any node of this group
3310     # is secondary for an instance whose primary is in another group. To avoid
3311     # them, we find these instances and add their volumes to node_vol_should.
3312     for inst in self.all_inst_info.values():
3313       for secondary in inst.secondary_nodes:
3314         if (secondary in self.my_node_info
3315             and inst.name not in self.my_inst_info):
3316           inst.MapLVsByNode(node_vol_should)
3317           break
3318
3319     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3320
3321     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3322       feedback_fn("* Verifying N+1 Memory redundancy")
3323       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3324
3325     feedback_fn("* Other Notes")
3326     if i_non_redundant:
3327       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3328                   % len(i_non_redundant))
3329
3330     if i_non_a_balanced:
3331       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3332                   % len(i_non_a_balanced))
3333
3334     if i_offline:
3335       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3336
3337     if n_offline:
3338       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3339
3340     if n_drained:
3341       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3342
3343     return not self.bad
3344
3345   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3346     """Analyze the post-hooks' result
3347
3348     This method analyses the hook result, handles it, and sends some
3349     nicely-formatted feedback back to the user.
3350
3351     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3352         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3353     @param hooks_results: the results of the multi-node hooks rpc call
3354     @param feedback_fn: function used send feedback back to the caller
3355     @param lu_result: previous Exec result
3356     @return: the new Exec result, based on the previous result
3357         and hook results
3358
3359     """
3360     # We only really run POST phase hooks, only for non-empty groups,
3361     # and are only interested in their results
3362     if not self.my_node_names:
3363       # empty node group
3364       pass
3365     elif phase == constants.HOOKS_PHASE_POST:
3366       # Used to change hooks' output to proper indentation
3367       feedback_fn("* Hooks Results")
3368       assert hooks_results, "invalid result from hooks"
3369
3370       for node_name in hooks_results:
3371         res = hooks_results[node_name]
3372         msg = res.fail_msg
3373         test = msg and not res.offline
3374         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3375                       "Communication failure in hooks execution: %s", msg)
3376         if res.offline or msg:
3377           # No need to investigate payload if node is offline or gave
3378           # an error.
3379           continue
3380         for script, hkr, output in res.payload:
3381           test = hkr == constants.HKR_FAIL
3382           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3383                         "Script %s failed, output:", script)
3384           if test:
3385             output = self._HOOKS_INDENT_RE.sub("      ", output)
3386             feedback_fn("%s" % output)
3387             lu_result = False
3388
3389     return lu_result
3390
3391
3392 class LUClusterVerifyDisks(NoHooksLU):
3393   """Verifies the cluster disks status.
3394
3395   """
3396   REQ_BGL = False
3397
3398   def ExpandNames(self):
3399     self.share_locks = _ShareAll()
3400     self.needed_locks = {
3401       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3402       }
3403
3404   def Exec(self, feedback_fn):
3405     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3406
3407     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3408     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3409                            for group in group_names])
3410
3411
3412 class LUGroupVerifyDisks(NoHooksLU):
3413   """Verifies the status of all disks in a node group.
3414
3415   """
3416   REQ_BGL = False
3417
3418   def ExpandNames(self):
3419     # Raises errors.OpPrereqError on its own if group can't be found
3420     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3421
3422     self.share_locks = _ShareAll()
3423     self.needed_locks = {
3424       locking.LEVEL_INSTANCE: [],
3425       locking.LEVEL_NODEGROUP: [],
3426       locking.LEVEL_NODE: [],
3427       }
3428
3429   def DeclareLocks(self, level):
3430     if level == locking.LEVEL_INSTANCE:
3431       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3432
3433       # Lock instances optimistically, needs verification once node and group
3434       # locks have been acquired
3435       self.needed_locks[locking.LEVEL_INSTANCE] = \
3436         self.cfg.GetNodeGroupInstances(self.group_uuid)
3437
3438     elif level == locking.LEVEL_NODEGROUP:
3439       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3440
3441       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3442         set([self.group_uuid] +
3443             # Lock all groups used by instances optimistically; this requires
3444             # going via the node before it's locked, requiring verification
3445             # later on
3446             [group_uuid
3447              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3448              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3449
3450     elif level == locking.LEVEL_NODE:
3451       # This will only lock the nodes in the group to be verified which contain
3452       # actual instances
3453       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3454       self._LockInstancesNodes()
3455
3456       # Lock all nodes in group to be verified
3457       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3458       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3459       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3460
3461   def CheckPrereq(self):
3462     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3463     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3464     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3465
3466     assert self.group_uuid in owned_groups
3467
3468     # Check if locked instances are still correct
3469     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3470
3471     # Get instance information
3472     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3473
3474     # Check if node groups for locked instances are still correct
3475     for (instance_name, inst) in self.instances.items():
3476       assert owned_nodes.issuperset(inst.all_nodes), \
3477         "Instance %s's nodes changed while we kept the lock" % instance_name
3478
3479       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3480                                              owned_groups)
3481
3482       assert self.group_uuid in inst_groups, \
3483         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3484
3485   def Exec(self, feedback_fn):
3486     """Verify integrity of cluster disks.
3487
3488     @rtype: tuple of three items
3489     @return: a tuple of (dict of node-to-node_error, list of instances
3490         which need activate-disks, dict of instance: (node, volume) for
3491         missing volumes
3492
3493     """
3494     res_nodes = {}
3495     res_instances = set()
3496     res_missing = {}
3497
3498     nv_dict = _MapInstanceDisksToNodes([inst
3499             for inst in self.instances.values()
3500             if inst.admin_state == constants.ADMINST_UP])
3501
3502     if nv_dict:
3503       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3504                              set(self.cfg.GetVmCapableNodeList()))
3505
3506       node_lvs = self.rpc.call_lv_list(nodes, [])
3507
3508       for (node, node_res) in node_lvs.items():
3509         if node_res.offline:
3510           continue
3511
3512         msg = node_res.fail_msg
3513         if msg:
3514           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3515           res_nodes[node] = msg
3516           continue
3517
3518         for lv_name, (_, _, lv_online) in node_res.payload.items():
3519           inst = nv_dict.pop((node, lv_name), None)
3520           if not (lv_online or inst is None):
3521             res_instances.add(inst)
3522
3523       # any leftover items in nv_dict are missing LVs, let's arrange the data
3524       # better
3525       for key, inst in nv_dict.iteritems():
3526         res_missing.setdefault(inst, []).append(list(key))
3527
3528     return (res_nodes, list(res_instances), res_missing)
3529
3530
3531 class LUClusterRepairDiskSizes(NoHooksLU):
3532   """Verifies the cluster disks sizes.
3533
3534   """
3535   REQ_BGL = False
3536
3537   def ExpandNames(self):
3538     if self.op.instances:
3539       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3540       self.needed_locks = {
3541         locking.LEVEL_NODE_RES: [],
3542         locking.LEVEL_INSTANCE: self.wanted_names,
3543         }
3544       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3545     else:
3546       self.wanted_names = None
3547       self.needed_locks = {
3548         locking.LEVEL_NODE_RES: locking.ALL_SET,
3549         locking.LEVEL_INSTANCE: locking.ALL_SET,
3550         }
3551     self.share_locks = {
3552       locking.LEVEL_NODE_RES: 1,
3553       locking.LEVEL_INSTANCE: 0,
3554       }
3555
3556   def DeclareLocks(self, level):
3557     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3558       self._LockInstancesNodes(primary_only=True, level=level)
3559
3560   def CheckPrereq(self):
3561     """Check prerequisites.
3562
3563     This only checks the optional instance list against the existing names.
3564
3565     """
3566     if self.wanted_names is None:
3567       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3568
3569     self.wanted_instances = \
3570         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3571
3572   def _EnsureChildSizes(self, disk):
3573     """Ensure children of the disk have the needed disk size.
3574
3575     This is valid mainly for DRBD8 and fixes an issue where the
3576     children have smaller disk size.
3577
3578     @param disk: an L{ganeti.objects.Disk} object
3579
3580     """
3581     if disk.dev_type == constants.LD_DRBD8:
3582       assert disk.children, "Empty children for DRBD8?"
3583       fchild = disk.children[0]
3584       mismatch = fchild.size < disk.size
3585       if mismatch:
3586         self.LogInfo("Child disk has size %d, parent %d, fixing",
3587                      fchild.size, disk.size)
3588         fchild.size = disk.size
3589
3590       # and we recurse on this child only, not on the metadev
3591       return self._EnsureChildSizes(fchild) or mismatch
3592     else:
3593       return False
3594
3595   def Exec(self, feedback_fn):
3596     """Verify the size of cluster disks.
3597
3598     """
3599     # TODO: check child disks too
3600     # TODO: check differences in size between primary/secondary nodes
3601     per_node_disks = {}
3602     for instance in self.wanted_instances:
3603       pnode = instance.primary_node
3604       if pnode not in per_node_disks:
3605         per_node_disks[pnode] = []
3606       for idx, disk in enumerate(instance.disks):
3607         per_node_disks[pnode].append((instance, idx, disk))
3608
3609     assert not (frozenset(per_node_disks.keys()) -
3610                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3611       "Not owning correct locks"
3612     assert not self.owned_locks(locking.LEVEL_NODE)
3613
3614     changed = []
3615     for node, dskl in per_node_disks.items():
3616       newl = [v[2].Copy() for v in dskl]
3617       for dsk in newl:
3618         self.cfg.SetDiskID(dsk, node)
3619       result = self.rpc.call_blockdev_getsize(node, newl)
3620       if result.fail_msg:
3621         self.LogWarning("Failure in blockdev_getsize call to node"
3622                         " %s, ignoring", node)
3623         continue
3624       if len(result.payload) != len(dskl):
3625         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3626                         " result.payload=%s", node, len(dskl), result.payload)
3627         self.LogWarning("Invalid result from node %s, ignoring node results",
3628                         node)
3629         continue
3630       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3631         if size is None:
3632           self.LogWarning("Disk %d of instance %s did not return size"
3633                           " information, ignoring", idx, instance.name)
3634           continue
3635         if not isinstance(size, (int, long)):
3636           self.LogWarning("Disk %d of instance %s did not return valid"
3637                           " size information, ignoring", idx, instance.name)
3638           continue
3639         size = size >> 20
3640         if size != disk.size:
3641           self.LogInfo("Disk %d of instance %s has mismatched size,"
3642                        " correcting: recorded %d, actual %d", idx,
3643                        instance.name, disk.size, size)
3644           disk.size = size
3645           self.cfg.Update(instance, feedback_fn)
3646           changed.append((instance.name, idx, size))
3647         if self._EnsureChildSizes(disk):
3648           self.cfg.Update(instance, feedback_fn)
3649           changed.append((instance.name, idx, disk.size))
3650     return changed
3651
3652
3653 class LUClusterRename(LogicalUnit):
3654   """Rename the cluster.
3655
3656   """
3657   HPATH = "cluster-rename"
3658   HTYPE = constants.HTYPE_CLUSTER
3659
3660   def BuildHooksEnv(self):
3661     """Build hooks env.
3662
3663     """
3664     return {
3665       "OP_TARGET": self.cfg.GetClusterName(),
3666       "NEW_NAME": self.op.name,
3667       }
3668
3669   def BuildHooksNodes(self):
3670     """Build hooks nodes.
3671
3672     """
3673     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3674
3675   def CheckPrereq(self):
3676     """Verify that the passed name is a valid one.
3677
3678     """
3679     hostname = netutils.GetHostname(name=self.op.name,
3680                                     family=self.cfg.GetPrimaryIPFamily())
3681
3682     new_name = hostname.name
3683     self.ip = new_ip = hostname.ip
3684     old_name = self.cfg.GetClusterName()
3685     old_ip = self.cfg.GetMasterIP()
3686     if new_name == old_name and new_ip == old_ip:
3687       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3688                                  " cluster has changed",
3689                                  errors.ECODE_INVAL)
3690     if new_ip != old_ip:
3691       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3692         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3693                                    " reachable on the network" %
3694                                    new_ip, errors.ECODE_NOTUNIQUE)
3695
3696     self.op.name = new_name
3697
3698   def Exec(self, feedback_fn):
3699     """Rename the cluster.
3700
3701     """
3702     clustername = self.op.name
3703     new_ip = self.ip
3704
3705     # shutdown the master IP
3706     master_params = self.cfg.GetMasterNetworkParameters()
3707     ems = self.cfg.GetUseExternalMipScript()
3708     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3709                                                      master_params, ems)
3710     result.Raise("Could not disable the master role")
3711
3712     try:
3713       cluster = self.cfg.GetClusterInfo()
3714       cluster.cluster_name = clustername
3715       cluster.master_ip = new_ip
3716       self.cfg.Update(cluster, feedback_fn)
3717
3718       # update the known hosts file
3719       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3720       node_list = self.cfg.GetOnlineNodeList()
3721       try:
3722         node_list.remove(master_params.name)
3723       except ValueError:
3724         pass
3725       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3726     finally:
3727       master_params.ip = new_ip
3728       result = self.rpc.call_node_activate_master_ip(master_params.name,
3729                                                      master_params, ems)
3730       msg = result.fail_msg
3731       if msg:
3732         self.LogWarning("Could not re-enable the master role on"
3733                         " the master, please restart manually: %s", msg)
3734
3735     return clustername
3736
3737
3738 def _ValidateNetmask(cfg, netmask):
3739   """Checks if a netmask is valid.
3740
3741   @type cfg: L{config.ConfigWriter}
3742   @param cfg: The cluster configuration
3743   @type netmask: int
3744   @param netmask: the netmask to be verified
3745   @raise errors.OpPrereqError: if the validation fails
3746
3747   """
3748   ip_family = cfg.GetPrimaryIPFamily()
3749   try:
3750     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3751   except errors.ProgrammerError:
3752     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3753                                ip_family)
3754   if not ipcls.ValidateNetmask(netmask):
3755     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3756                                 (netmask))
3757
3758
3759 class LUClusterSetParams(LogicalUnit):
3760   """Change the parameters of the cluster.
3761
3762   """
3763   HPATH = "cluster-modify"
3764   HTYPE = constants.HTYPE_CLUSTER
3765   REQ_BGL = False
3766
3767   def CheckArguments(self):
3768     """Check parameters
3769
3770     """
3771     if self.op.uid_pool:
3772       uidpool.CheckUidPool(self.op.uid_pool)
3773
3774     if self.op.add_uids:
3775       uidpool.CheckUidPool(self.op.add_uids)
3776
3777     if self.op.remove_uids:
3778       uidpool.CheckUidPool(self.op.remove_uids)
3779
3780     if self.op.master_netmask is not None:
3781       _ValidateNetmask(self.cfg, self.op.master_netmask)
3782
3783     if self.op.diskparams:
3784       for dt_params in self.op.diskparams.values():
3785         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3786
3787   def ExpandNames(self):
3788     # FIXME: in the future maybe other cluster params won't require checking on
3789     # all nodes to be modified.
3790     self.needed_locks = {
3791       locking.LEVEL_NODE: locking.ALL_SET,
3792       locking.LEVEL_INSTANCE: locking.ALL_SET,
3793       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3794     }
3795     self.share_locks = {
3796         locking.LEVEL_NODE: 1,
3797         locking.LEVEL_INSTANCE: 1,
3798         locking.LEVEL_NODEGROUP: 1,
3799     }
3800
3801   def BuildHooksEnv(self):
3802     """Build hooks env.
3803
3804     """
3805     return {
3806       "OP_TARGET": self.cfg.GetClusterName(),
3807       "NEW_VG_NAME": self.op.vg_name,
3808       }
3809
3810   def BuildHooksNodes(self):
3811     """Build hooks nodes.
3812
3813     """
3814     mn = self.cfg.GetMasterNode()
3815     return ([mn], [mn])
3816
3817   def CheckPrereq(self):
3818     """Check prerequisites.
3819
3820     This checks whether the given params don't conflict and
3821     if the given volume group is valid.
3822
3823     """
3824     if self.op.vg_name is not None and not self.op.vg_name:
3825       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3826         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3827                                    " instances exist", errors.ECODE_INVAL)
3828
3829     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3830       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3831         raise errors.OpPrereqError("Cannot disable drbd helper while"
3832                                    " drbd-based instances exist",
3833                                    errors.ECODE_INVAL)
3834
3835     node_list = self.owned_locks(locking.LEVEL_NODE)
3836
3837     # if vg_name not None, checks given volume group on all nodes
3838     if self.op.vg_name:
3839       vglist = self.rpc.call_vg_list(node_list)
3840       for node in node_list:
3841         msg = vglist[node].fail_msg
3842         if msg:
3843           # ignoring down node
3844           self.LogWarning("Error while gathering data on node %s"
3845                           " (ignoring node): %s", node, msg)
3846           continue
3847         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3848                                               self.op.vg_name,
3849                                               constants.MIN_VG_SIZE)
3850         if vgstatus:
3851           raise errors.OpPrereqError("Error on node '%s': %s" %
3852                                      (node, vgstatus), errors.ECODE_ENVIRON)
3853
3854     if self.op.drbd_helper:
3855       # checks given drbd helper on all nodes
3856       helpers = self.rpc.call_drbd_helper(node_list)
3857       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3858         if ninfo.offline:
3859           self.LogInfo("Not checking drbd helper on offline node %s", node)
3860           continue
3861         msg = helpers[node].fail_msg
3862         if msg:
3863           raise errors.OpPrereqError("Error checking drbd helper on node"
3864                                      " '%s': %s" % (node, msg),
3865                                      errors.ECODE_ENVIRON)
3866         node_helper = helpers[node].payload
3867         if node_helper != self.op.drbd_helper:
3868           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3869                                      (node, node_helper), errors.ECODE_ENVIRON)
3870
3871     self.cluster = cluster = self.cfg.GetClusterInfo()
3872     # validate params changes
3873     if self.op.beparams:
3874       objects.UpgradeBeParams(self.op.beparams)
3875       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3876       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3877
3878     if self.op.ndparams:
3879       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3880       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3881
3882       # TODO: we need a more general way to handle resetting
3883       # cluster-level parameters to default values
3884       if self.new_ndparams["oob_program"] == "":
3885         self.new_ndparams["oob_program"] = \
3886             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3887
3888     if self.op.hv_state:
3889       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3890                                             self.cluster.hv_state_static)
3891       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3892                                for hv, values in new_hv_state.items())
3893
3894     if self.op.disk_state:
3895       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3896                                                 self.cluster.disk_state_static)
3897       self.new_disk_state = \
3898         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3899                             for name, values in svalues.items()))
3900              for storage, svalues in new_disk_state.items())
3901
3902     if self.op.ipolicy:
3903       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3904                                             group_policy=False)
3905
3906       all_instances = self.cfg.GetAllInstancesInfo().values()
3907       violations = set()
3908       for group in self.cfg.GetAllNodeGroupsInfo().values():
3909         instances = frozenset([inst for inst in all_instances
3910                                if compat.any(node in group.members
3911                                              for node in inst.all_nodes)])
3912         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3913         new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3914                                                                    group),
3915                                             new_ipolicy, instances)
3916         if new:
3917           violations.update(new)
3918
3919       if violations:
3920         self.LogWarning("After the ipolicy change the following instances"
3921                         " violate them: %s",
3922                         utils.CommaJoin(violations))
3923
3924     if self.op.nicparams:
3925       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3926       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3927       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3928       nic_errors = []
3929
3930       # check all instances for consistency
3931       for instance in self.cfg.GetAllInstancesInfo().values():
3932         for nic_idx, nic in enumerate(instance.nics):
3933           params_copy = copy.deepcopy(nic.nicparams)
3934           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3935
3936           # check parameter syntax
3937           try:
3938             objects.NIC.CheckParameterSyntax(params_filled)
3939           except errors.ConfigurationError, err:
3940             nic_errors.append("Instance %s, nic/%d: %s" %
3941                               (instance.name, nic_idx, err))
3942
3943           # if we're moving instances to routed, check that they have an ip
3944           target_mode = params_filled[constants.NIC_MODE]
3945           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3946             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3947                               " address" % (instance.name, nic_idx))
3948       if nic_errors:
3949         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3950                                    "\n".join(nic_errors))
3951
3952     # hypervisor list/parameters
3953     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3954     if self.op.hvparams:
3955       for hv_name, hv_dict in self.op.hvparams.items():
3956         if hv_name not in self.new_hvparams:
3957           self.new_hvparams[hv_name] = hv_dict
3958         else:
3959           self.new_hvparams[hv_name].update(hv_dict)
3960
3961     # disk template parameters
3962     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3963     if self.op.diskparams:
3964       for dt_name, dt_params in self.op.diskparams.items():
3965         if dt_name not in self.op.diskparams:
3966           self.new_diskparams[dt_name] = dt_params
3967         else:
3968           self.new_diskparams[dt_name].update(dt_params)
3969
3970     # os hypervisor parameters
3971     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3972     if self.op.os_hvp:
3973       for os_name, hvs in self.op.os_hvp.items():
3974         if os_name not in self.new_os_hvp:
3975           self.new_os_hvp[os_name] = hvs
3976         else:
3977           for hv_name, hv_dict in hvs.items():
3978             if hv_name not in self.new_os_hvp[os_name]:
3979               self.new_os_hvp[os_name][hv_name] = hv_dict
3980             else:
3981               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3982
3983     # os parameters
3984     self.new_osp = objects.FillDict(cluster.osparams, {})
3985     if self.op.osparams:
3986       for os_name, osp in self.op.osparams.items():
3987         if os_name not in self.new_osp:
3988           self.new_osp[os_name] = {}
3989
3990         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3991                                                   use_none=True)
3992
3993         if not self.new_osp[os_name]:
3994           # we removed all parameters
3995           del self.new_osp[os_name]
3996         else:
3997           # check the parameter validity (remote check)
3998           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3999                          os_name, self.new_osp[os_name])
4000
4001     # changes to the hypervisor list
4002     if self.op.enabled_hypervisors is not None:
4003       self.hv_list = self.op.enabled_hypervisors
4004       for hv in self.hv_list:
4005         # if the hypervisor doesn't already exist in the cluster
4006         # hvparams, we initialize it to empty, and then (in both
4007         # cases) we make sure to fill the defaults, as we might not
4008         # have a complete defaults list if the hypervisor wasn't
4009         # enabled before
4010         if hv not in new_hvp:
4011           new_hvp[hv] = {}
4012         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4013         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4014     else:
4015       self.hv_list = cluster.enabled_hypervisors
4016
4017     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4018       # either the enabled list has changed, or the parameters have, validate
4019       for hv_name, hv_params in self.new_hvparams.items():
4020         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4021             (self.op.enabled_hypervisors and
4022              hv_name in self.op.enabled_hypervisors)):
4023           # either this is a new hypervisor, or its parameters have changed
4024           hv_class = hypervisor.GetHypervisor(hv_name)
4025           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4026           hv_class.CheckParameterSyntax(hv_params)
4027           _CheckHVParams(self, node_list, hv_name, hv_params)
4028
4029     if self.op.os_hvp:
4030       # no need to check any newly-enabled hypervisors, since the
4031       # defaults have already been checked in the above code-block
4032       for os_name, os_hvp in self.new_os_hvp.items():
4033         for hv_name, hv_params in os_hvp.items():
4034           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4035           # we need to fill in the new os_hvp on top of the actual hv_p
4036           cluster_defaults = self.new_hvparams.get(hv_name, {})
4037           new_osp = objects.FillDict(cluster_defaults, hv_params)
4038           hv_class = hypervisor.GetHypervisor(hv_name)
4039           hv_class.CheckParameterSyntax(new_osp)
4040           _CheckHVParams(self, node_list, hv_name, new_osp)
4041
4042     if self.op.default_iallocator:
4043       alloc_script = utils.FindFile(self.op.default_iallocator,
4044                                     constants.IALLOCATOR_SEARCH_PATH,
4045                                     os.path.isfile)
4046       if alloc_script is None:
4047         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4048                                    " specified" % self.op.default_iallocator,
4049                                    errors.ECODE_INVAL)
4050
4051   def Exec(self, feedback_fn):
4052     """Change the parameters of the cluster.
4053
4054     """
4055     if self.op.vg_name is not None:
4056       new_volume = self.op.vg_name
4057       if not new_volume:
4058         new_volume = None
4059       if new_volume != self.cfg.GetVGName():
4060         self.cfg.SetVGName(new_volume)
4061       else:
4062         feedback_fn("Cluster LVM configuration already in desired"
4063                     " state, not changing")
4064     if self.op.drbd_helper is not None:
4065       new_helper = self.op.drbd_helper
4066       if not new_helper:
4067         new_helper = None
4068       if new_helper != self.cfg.GetDRBDHelper():
4069         self.cfg.SetDRBDHelper(new_helper)
4070       else:
4071         feedback_fn("Cluster DRBD helper already in desired state,"
4072                     " not changing")
4073     if self.op.hvparams:
4074       self.cluster.hvparams = self.new_hvparams
4075     if self.op.os_hvp:
4076       self.cluster.os_hvp = self.new_os_hvp
4077     if self.op.enabled_hypervisors is not None:
4078       self.cluster.hvparams = self.new_hvparams
4079       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4080     if self.op.beparams:
4081       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4082     if self.op.nicparams:
4083       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4084     if self.op.ipolicy:
4085       self.cluster.ipolicy = self.new_ipolicy
4086     if self.op.osparams:
4087       self.cluster.osparams = self.new_osp
4088     if self.op.ndparams:
4089       self.cluster.ndparams = self.new_ndparams
4090     if self.op.diskparams:
4091       self.cluster.diskparams = self.new_diskparams
4092     if self.op.hv_state:
4093       self.cluster.hv_state_static = self.new_hv_state
4094     if self.op.disk_state:
4095       self.cluster.disk_state_static = self.new_disk_state
4096
4097     if self.op.candidate_pool_size is not None:
4098       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4099       # we need to update the pool size here, otherwise the save will fail
4100       _AdjustCandidatePool(self, [])
4101
4102     if self.op.maintain_node_health is not None:
4103       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4104         feedback_fn("Note: CONFD was disabled at build time, node health"
4105                     " maintenance is not useful (still enabling it)")
4106       self.cluster.maintain_node_health = self.op.maintain_node_health
4107
4108     if self.op.prealloc_wipe_disks is not None:
4109       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4110
4111     if self.op.add_uids is not None:
4112       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4113
4114     if self.op.remove_uids is not None:
4115       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4116
4117     if self.op.uid_pool is not None:
4118       self.cluster.uid_pool = self.op.uid_pool
4119
4120     if self.op.default_iallocator is not None:
4121       self.cluster.default_iallocator = self.op.default_iallocator
4122
4123     if self.op.reserved_lvs is not None:
4124       self.cluster.reserved_lvs = self.op.reserved_lvs
4125
4126     if self.op.use_external_mip_script is not None:
4127       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4128
4129     def helper_os(aname, mods, desc):
4130       desc += " OS list"
4131       lst = getattr(self.cluster, aname)
4132       for key, val in mods:
4133         if key == constants.DDM_ADD:
4134           if val in lst:
4135             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4136           else:
4137             lst.append(val)
4138         elif key == constants.DDM_REMOVE:
4139           if val in lst:
4140             lst.remove(val)
4141           else:
4142             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4143         else:
4144           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4145
4146     if self.op.hidden_os:
4147       helper_os("hidden_os", self.op.hidden_os, "hidden")
4148
4149     if self.op.blacklisted_os:
4150       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4151
4152     if self.op.master_netdev:
4153       master_params = self.cfg.GetMasterNetworkParameters()
4154       ems = self.cfg.GetUseExternalMipScript()
4155       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4156                   self.cluster.master_netdev)
4157       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4158                                                        master_params, ems)
4159       result.Raise("Could not disable the master ip")
4160       feedback_fn("Changing master_netdev from %s to %s" %
4161                   (master_params.netdev, self.op.master_netdev))
4162       self.cluster.master_netdev = self.op.master_netdev
4163
4164     if self.op.master_netmask:
4165       master_params = self.cfg.GetMasterNetworkParameters()
4166       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4167       result = self.rpc.call_node_change_master_netmask(master_params.name,
4168                                                         master_params.netmask,
4169                                                         self.op.master_netmask,
4170                                                         master_params.ip,
4171                                                         master_params.netdev)
4172       if result.fail_msg:
4173         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4174         feedback_fn(msg)
4175
4176       self.cluster.master_netmask = self.op.master_netmask
4177
4178     self.cfg.Update(self.cluster, feedback_fn)
4179
4180     if self.op.master_netdev:
4181       master_params = self.cfg.GetMasterNetworkParameters()
4182       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4183                   self.op.master_netdev)
4184       ems = self.cfg.GetUseExternalMipScript()
4185       result = self.rpc.call_node_activate_master_ip(master_params.name,
4186                                                      master_params, ems)
4187       if result.fail_msg:
4188         self.LogWarning("Could not re-enable the master ip on"
4189                         " the master, please restart manually: %s",
4190                         result.fail_msg)
4191
4192
4193 def _UploadHelper(lu, nodes, fname):
4194   """Helper for uploading a file and showing warnings.
4195
4196   """
4197   if os.path.exists(fname):
4198     result = lu.rpc.call_upload_file(nodes, fname)
4199     for to_node, to_result in result.items():
4200       msg = to_result.fail_msg
4201       if msg:
4202         msg = ("Copy of file %s to node %s failed: %s" %
4203                (fname, to_node, msg))
4204         lu.proc.LogWarning(msg)
4205
4206
4207 def _ComputeAncillaryFiles(cluster, redist):
4208   """Compute files external to Ganeti which need to be consistent.
4209
4210   @type redist: boolean
4211   @param redist: Whether to include files which need to be redistributed
4212
4213   """
4214   # Compute files for all nodes
4215   files_all = set([
4216     constants.SSH_KNOWN_HOSTS_FILE,
4217     constants.CONFD_HMAC_KEY,
4218     constants.CLUSTER_DOMAIN_SECRET_FILE,
4219     constants.SPICE_CERT_FILE,
4220     constants.SPICE_CACERT_FILE,
4221     constants.RAPI_USERS_FILE,
4222     ])
4223
4224   if not redist:
4225     files_all.update(constants.ALL_CERT_FILES)
4226     files_all.update(ssconf.SimpleStore().GetFileList())
4227   else:
4228     # we need to ship at least the RAPI certificate
4229     files_all.add(constants.RAPI_CERT_FILE)
4230
4231   if cluster.modify_etc_hosts:
4232     files_all.add(constants.ETC_HOSTS)
4233
4234   # Files which are optional, these must:
4235   # - be present in one other category as well
4236   # - either exist or not exist on all nodes of that category (mc, vm all)
4237   files_opt = set([
4238     constants.RAPI_USERS_FILE,
4239     ])
4240
4241   # Files which should only be on master candidates
4242   files_mc = set()
4243
4244   if not redist:
4245     files_mc.add(constants.CLUSTER_CONF_FILE)
4246
4247     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4248     # replication
4249     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4250
4251   # Files which should only be on VM-capable nodes
4252   files_vm = set(filename
4253     for hv_name in cluster.enabled_hypervisors
4254     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4255
4256   files_opt |= set(filename
4257     for hv_name in cluster.enabled_hypervisors
4258     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4259
4260   # Filenames in each category must be unique
4261   all_files_set = files_all | files_mc | files_vm
4262   assert (len(all_files_set) ==
4263           sum(map(len, [files_all, files_mc, files_vm]))), \
4264          "Found file listed in more than one file list"
4265
4266   # Optional files must be present in one other category
4267   assert all_files_set.issuperset(files_opt), \
4268          "Optional file not in a different required list"
4269
4270   return (files_all, files_opt, files_mc, files_vm)
4271
4272
4273 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4274   """Distribute additional files which are part of the cluster configuration.
4275
4276   ConfigWriter takes care of distributing the config and ssconf files, but
4277   there are more files which should be distributed to all nodes. This function
4278   makes sure those are copied.
4279
4280   @param lu: calling logical unit
4281   @param additional_nodes: list of nodes not in the config to distribute to
4282   @type additional_vm: boolean
4283   @param additional_vm: whether the additional nodes are vm-capable or not
4284
4285   """
4286   # Gather target nodes
4287   cluster = lu.cfg.GetClusterInfo()
4288   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4289
4290   online_nodes = lu.cfg.GetOnlineNodeList()
4291   vm_nodes = lu.cfg.GetVmCapableNodeList()
4292
4293   if additional_nodes is not None:
4294     online_nodes.extend(additional_nodes)
4295     if additional_vm:
4296       vm_nodes.extend(additional_nodes)
4297
4298   # Never distribute to master node
4299   for nodelist in [online_nodes, vm_nodes]:
4300     if master_info.name in nodelist:
4301       nodelist.remove(master_info.name)
4302
4303   # Gather file lists
4304   (files_all, _, files_mc, files_vm) = \
4305     _ComputeAncillaryFiles(cluster, True)
4306
4307   # Never re-distribute configuration file from here
4308   assert not (constants.CLUSTER_CONF_FILE in files_all or
4309               constants.CLUSTER_CONF_FILE in files_vm)
4310   assert not files_mc, "Master candidates not handled in this function"
4311
4312   filemap = [
4313     (online_nodes, files_all),
4314     (vm_nodes, files_vm),
4315     ]
4316
4317   # Upload the files
4318   for (node_list, files) in filemap:
4319     for fname in files:
4320       _UploadHelper(lu, node_list, fname)
4321
4322
4323 class LUClusterRedistConf(NoHooksLU):
4324   """Force the redistribution of cluster configuration.
4325
4326   This is a very simple LU.
4327
4328   """
4329   REQ_BGL = False
4330
4331   def ExpandNames(self):
4332     self.needed_locks = {
4333       locking.LEVEL_NODE: locking.ALL_SET,
4334     }
4335     self.share_locks[locking.LEVEL_NODE] = 1
4336
4337   def Exec(self, feedback_fn):
4338     """Redistribute the configuration.
4339
4340     """
4341     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4342     _RedistributeAncillaryFiles(self)
4343
4344
4345 class LUClusterActivateMasterIp(NoHooksLU):
4346   """Activate the master IP on the master node.
4347
4348   """
4349   def Exec(self, feedback_fn):
4350     """Activate the master IP.
4351
4352     """
4353     master_params = self.cfg.GetMasterNetworkParameters()
4354     ems = self.cfg.GetUseExternalMipScript()
4355     result = self.rpc.call_node_activate_master_ip(master_params.name,
4356                                                    master_params, ems)
4357     result.Raise("Could not activate the master IP")
4358
4359
4360 class LUClusterDeactivateMasterIp(NoHooksLU):
4361   """Deactivate the master IP on the master node.
4362
4363   """
4364   def Exec(self, feedback_fn):
4365     """Deactivate the master IP.
4366
4367     """
4368     master_params = self.cfg.GetMasterNetworkParameters()
4369     ems = self.cfg.GetUseExternalMipScript()
4370     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4371                                                      master_params, ems)
4372     result.Raise("Could not deactivate the master IP")
4373
4374
4375 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4376   """Sleep and poll for an instance's disk to sync.
4377
4378   """
4379   if not instance.disks or disks is not None and not disks:
4380     return True
4381
4382   disks = _ExpandCheckDisks(instance, disks)
4383
4384   if not oneshot:
4385     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4386
4387   node = instance.primary_node
4388
4389   for dev in disks:
4390     lu.cfg.SetDiskID(dev, node)
4391
4392   # TODO: Convert to utils.Retry
4393
4394   retries = 0
4395   degr_retries = 10 # in seconds, as we sleep 1 second each time
4396   while True:
4397     max_time = 0
4398     done = True
4399     cumul_degraded = False
4400     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4401     msg = rstats.fail_msg
4402     if msg:
4403       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4404       retries += 1
4405       if retries >= 10:
4406         raise errors.RemoteError("Can't contact node %s for mirror data,"
4407                                  " aborting." % node)
4408       time.sleep(6)
4409       continue
4410     rstats = rstats.payload
4411     retries = 0
4412     for i, mstat in enumerate(rstats):
4413       if mstat is None:
4414         lu.LogWarning("Can't compute data for node %s/%s",
4415                            node, disks[i].iv_name)
4416         continue
4417
4418       cumul_degraded = (cumul_degraded or
4419                         (mstat.is_degraded and mstat.sync_percent is None))
4420       if mstat.sync_percent is not None:
4421         done = False
4422         if mstat.estimated_time is not None:
4423           rem_time = ("%s remaining (estimated)" %
4424                       utils.FormatSeconds(mstat.estimated_time))
4425           max_time = mstat.estimated_time
4426         else:
4427           rem_time = "no time estimate"
4428         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4429                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4430
4431     # if we're done but degraded, let's do a few small retries, to
4432     # make sure we see a stable and not transient situation; therefore
4433     # we force restart of the loop
4434     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4435       logging.info("Degraded disks found, %d retries left", degr_retries)
4436       degr_retries -= 1
4437       time.sleep(1)
4438       continue
4439
4440     if done or oneshot:
4441       break
4442
4443     time.sleep(min(60, max_time))
4444
4445   if done:
4446     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4447   return not cumul_degraded
4448
4449
4450 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4451   """Check that mirrors are not degraded.
4452
4453   The ldisk parameter, if True, will change the test from the
4454   is_degraded attribute (which represents overall non-ok status for
4455   the device(s)) to the ldisk (representing the local storage status).
4456
4457   """
4458   lu.cfg.SetDiskID(dev, node)
4459
4460   result = True
4461
4462   if on_primary or dev.AssembleOnSecondary():
4463     rstats = lu.rpc.call_blockdev_find(node, dev)
4464     msg = rstats.fail_msg
4465     if msg:
4466       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4467       result = False
4468     elif not rstats.payload:
4469       lu.LogWarning("Can't find disk on node %s", node)
4470       result = False
4471     else:
4472       if ldisk:
4473         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4474       else:
4475         result = result and not rstats.payload.is_degraded
4476
4477   if dev.children:
4478     for child in dev.children:
4479       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4480
4481   return result
4482
4483
4484 class LUOobCommand(NoHooksLU):
4485   """Logical unit for OOB handling.
4486
4487   """
4488   REG_BGL = False
4489   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4490
4491   def ExpandNames(self):
4492     """Gather locks we need.
4493
4494     """
4495     if self.op.node_names:
4496       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4497       lock_names = self.op.node_names
4498     else:
4499       lock_names = locking.ALL_SET
4500
4501     self.needed_locks = {
4502       locking.LEVEL_NODE: lock_names,
4503       }
4504
4505   def CheckPrereq(self):
4506     """Check prerequisites.
4507
4508     This checks:
4509      - the node exists in the configuration
4510      - OOB is supported
4511
4512     Any errors are signaled by raising errors.OpPrereqError.
4513
4514     """
4515     self.nodes = []
4516     self.master_node = self.cfg.GetMasterNode()
4517
4518     assert self.op.power_delay >= 0.0
4519
4520     if self.op.node_names:
4521       if (self.op.command in self._SKIP_MASTER and
4522           self.master_node in self.op.node_names):
4523         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4524         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4525
4526         if master_oob_handler:
4527           additional_text = ("run '%s %s %s' if you want to operate on the"
4528                              " master regardless") % (master_oob_handler,
4529                                                       self.op.command,
4530                                                       self.master_node)
4531         else:
4532           additional_text = "it does not support out-of-band operations"
4533
4534         raise errors.OpPrereqError(("Operating on the master node %s is not"
4535                                     " allowed for %s; %s") %
4536                                    (self.master_node, self.op.command,
4537                                     additional_text), errors.ECODE_INVAL)
4538     else:
4539       self.op.node_names = self.cfg.GetNodeList()
4540       if self.op.command in self._SKIP_MASTER:
4541         self.op.node_names.remove(self.master_node)
4542
4543     if self.op.command in self._SKIP_MASTER:
4544       assert self.master_node not in self.op.node_names
4545
4546     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4547       if node is None:
4548         raise errors.OpPrereqError("Node %s not found" % node_name,
4549                                    errors.ECODE_NOENT)
4550       else:
4551         self.nodes.append(node)
4552
4553       if (not self.op.ignore_status and
4554           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4555         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4556                                     " not marked offline") % node_name,
4557                                    errors.ECODE_STATE)
4558
4559   def Exec(self, feedback_fn):
4560     """Execute OOB and return result if we expect any.
4561
4562     """
4563     master_node = self.master_node
4564     ret = []
4565
4566     for idx, node in enumerate(utils.NiceSort(self.nodes,
4567                                               key=lambda node: node.name)):
4568       node_entry = [(constants.RS_NORMAL, node.name)]
4569       ret.append(node_entry)
4570
4571       oob_program = _SupportsOob(self.cfg, node)
4572
4573       if not oob_program:
4574         node_entry.append((constants.RS_UNAVAIL, None))
4575         continue
4576
4577       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4578                    self.op.command, oob_program, node.name)
4579       result = self.rpc.call_run_oob(master_node, oob_program,
4580                                      self.op.command, node.name,
4581                                      self.op.timeout)
4582
4583       if result.fail_msg:
4584         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4585                         node.name, result.fail_msg)
4586         node_entry.append((constants.RS_NODATA, None))
4587       else:
4588         try:
4589           self._CheckPayload(result)
4590         except errors.OpExecError, err:
4591           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4592                           node.name, err)
4593           node_entry.append((constants.RS_NODATA, None))
4594         else:
4595           if self.op.command == constants.OOB_HEALTH:
4596             # For health we should log important events
4597             for item, status in result.payload:
4598               if status in [constants.OOB_STATUS_WARNING,
4599                             constants.OOB_STATUS_CRITICAL]:
4600                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4601                                 item, node.name, status)
4602
4603           if self.op.command == constants.OOB_POWER_ON:
4604             node.powered = True
4605           elif self.op.command == constants.OOB_POWER_OFF:
4606             node.powered = False
4607           elif self.op.command == constants.OOB_POWER_STATUS:
4608             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4609             if powered != node.powered:
4610               logging.warning(("Recorded power state (%s) of node '%s' does not"
4611                                " match actual power state (%s)"), node.powered,
4612                               node.name, powered)
4613
4614           # For configuration changing commands we should update the node
4615           if self.op.command in (constants.OOB_POWER_ON,
4616                                  constants.OOB_POWER_OFF):
4617             self.cfg.Update(node, feedback_fn)
4618
4619           node_entry.append((constants.RS_NORMAL, result.payload))
4620
4621           if (self.op.command == constants.OOB_POWER_ON and
4622               idx < len(self.nodes) - 1):
4623             time.sleep(self.op.power_delay)
4624
4625     return ret
4626
4627   def _CheckPayload(self, result):
4628     """Checks if the payload is valid.
4629
4630     @param result: RPC result
4631     @raises errors.OpExecError: If payload is not valid
4632
4633     """
4634     errs = []
4635     if self.op.command == constants.OOB_HEALTH:
4636       if not isinstance(result.payload, list):
4637         errs.append("command 'health' is expected to return a list but got %s" %
4638                     type(result.payload))
4639       else:
4640         for item, status in result.payload:
4641           if status not in constants.OOB_STATUSES:
4642             errs.append("health item '%s' has invalid status '%s'" %
4643                         (item, status))
4644
4645     if self.op.command == constants.OOB_POWER_STATUS:
4646       if not isinstance(result.payload, dict):
4647         errs.append("power-status is expected to return a dict but got %s" %
4648                     type(result.payload))
4649
4650     if self.op.command in [
4651         constants.OOB_POWER_ON,
4652         constants.OOB_POWER_OFF,
4653         constants.OOB_POWER_CYCLE,
4654         ]:
4655       if result.payload is not None:
4656         errs.append("%s is expected to not return payload but got '%s'" %
4657                     (self.op.command, result.payload))
4658
4659     if errs:
4660       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4661                                utils.CommaJoin(errs))
4662
4663
4664 class _OsQuery(_QueryBase):
4665   FIELDS = query.OS_FIELDS
4666
4667   def ExpandNames(self, lu):
4668     # Lock all nodes in shared mode
4669     # Temporary removal of locks, should be reverted later
4670     # TODO: reintroduce locks when they are lighter-weight
4671     lu.needed_locks = {}
4672     #self.share_locks[locking.LEVEL_NODE] = 1
4673     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4674
4675     # The following variables interact with _QueryBase._GetNames
4676     if self.names:
4677       self.wanted = self.names
4678     else:
4679       self.wanted = locking.ALL_SET
4680
4681     self.do_locking = self.use_locking
4682
4683   def DeclareLocks(self, lu, level):
4684     pass
4685
4686   @staticmethod
4687   def _DiagnoseByOS(rlist):
4688     """Remaps a per-node return list into an a per-os per-node dictionary
4689
4690     @param rlist: a map with node names as keys and OS objects as values
4691
4692     @rtype: dict
4693     @return: a dictionary with osnames as keys and as value another
4694         map, with nodes as keys and tuples of (path, status, diagnose,
4695         variants, parameters, api_versions) as values, eg::
4696
4697           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4698                                      (/srv/..., False, "invalid api")],
4699                            "node2": [(/srv/..., True, "", [], [])]}
4700           }
4701
4702     """
4703     all_os = {}
4704     # we build here the list of nodes that didn't fail the RPC (at RPC
4705     # level), so that nodes with a non-responding node daemon don't
4706     # make all OSes invalid
4707     good_nodes = [node_name for node_name in rlist
4708                   if not rlist[node_name].fail_msg]
4709     for node_name, nr in rlist.items():
4710       if nr.fail_msg or not nr.payload:
4711         continue
4712       for (name, path, status, diagnose, variants,
4713            params, api_versions) in nr.payload:
4714         if name not in all_os:
4715           # build a list of nodes for this os containing empty lists
4716           # for each node in node_list
4717           all_os[name] = {}
4718           for nname in good_nodes:
4719             all_os[name][nname] = []
4720         # convert params from [name, help] to (name, help)
4721         params = [tuple(v) for v in params]
4722         all_os[name][node_name].append((path, status, diagnose,
4723                                         variants, params, api_versions))
4724     return all_os
4725
4726   def _GetQueryData(self, lu):
4727     """Computes the list of nodes and their attributes.
4728
4729     """
4730     # Locking is not used
4731     assert not (compat.any(lu.glm.is_owned(level)
4732                            for level in locking.LEVELS
4733                            if level != locking.LEVEL_CLUSTER) or
4734                 self.do_locking or self.use_locking)
4735
4736     valid_nodes = [node.name
4737                    for node in lu.cfg.GetAllNodesInfo().values()
4738                    if not node.offline and node.vm_capable]
4739     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4740     cluster = lu.cfg.GetClusterInfo()
4741
4742     data = {}
4743
4744     for (os_name, os_data) in pol.items():
4745       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4746                           hidden=(os_name in cluster.hidden_os),
4747                           blacklisted=(os_name in cluster.blacklisted_os))
4748
4749       variants = set()
4750       parameters = set()
4751       api_versions = set()
4752
4753       for idx, osl in enumerate(os_data.values()):
4754         info.valid = bool(info.valid and osl and osl[0][1])
4755         if not info.valid:
4756           break
4757
4758         (node_variants, node_params, node_api) = osl[0][3:6]
4759         if idx == 0:
4760           # First entry
4761           variants.update(node_variants)
4762           parameters.update(node_params)
4763           api_versions.update(node_api)
4764         else:
4765           # Filter out inconsistent values
4766           variants.intersection_update(node_variants)
4767           parameters.intersection_update(node_params)
4768           api_versions.intersection_update(node_api)
4769
4770       info.variants = list(variants)
4771       info.parameters = list(parameters)
4772       info.api_versions = list(api_versions)
4773
4774       data[os_name] = info
4775
4776     # Prepare data in requested order
4777     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4778             if name in data]
4779
4780
4781 class LUOsDiagnose(NoHooksLU):
4782   """Logical unit for OS diagnose/query.
4783
4784   """
4785   REQ_BGL = False
4786
4787   @staticmethod
4788   def _BuildFilter(fields, names):
4789     """Builds a filter for querying OSes.
4790
4791     """
4792     name_filter = qlang.MakeSimpleFilter("name", names)
4793
4794     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4795     # respective field is not requested
4796     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4797                      for fname in ["hidden", "blacklisted"]
4798                      if fname not in fields]
4799     if "valid" not in fields:
4800       status_filter.append([qlang.OP_TRUE, "valid"])
4801
4802     if status_filter:
4803       status_filter.insert(0, qlang.OP_AND)
4804     else:
4805       status_filter = None
4806
4807     if name_filter and status_filter:
4808       return [qlang.OP_AND, name_filter, status_filter]
4809     elif name_filter:
4810       return name_filter
4811     else:
4812       return status_filter
4813
4814   def CheckArguments(self):
4815     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4816                        self.op.output_fields, False)
4817
4818   def ExpandNames(self):
4819     self.oq.ExpandNames(self)
4820
4821   def Exec(self, feedback_fn):
4822     return self.oq.OldStyleQuery(self)
4823
4824
4825 class LUNodeRemove(LogicalUnit):
4826   """Logical unit for removing a node.
4827
4828   """
4829   HPATH = "node-remove"
4830   HTYPE = constants.HTYPE_NODE
4831
4832   def BuildHooksEnv(self):
4833     """Build hooks env.
4834
4835     This doesn't run on the target node in the pre phase as a failed
4836     node would then be impossible to remove.
4837
4838     """
4839     return {
4840       "OP_TARGET": self.op.node_name,
4841       "NODE_NAME": self.op.node_name,
4842       }
4843
4844   def BuildHooksNodes(self):
4845     """Build hooks nodes.
4846
4847     """
4848     all_nodes = self.cfg.GetNodeList()
4849     try:
4850       all_nodes.remove(self.op.node_name)
4851     except ValueError:
4852       logging.warning("Node '%s', which is about to be removed, was not found"
4853                       " in the list of all nodes", self.op.node_name)
4854     return (all_nodes, all_nodes)
4855
4856   def CheckPrereq(self):
4857     """Check prerequisites.
4858
4859     This checks:
4860      - the node exists in the configuration
4861      - it does not have primary or secondary instances
4862      - it's not the master
4863
4864     Any errors are signaled by raising errors.OpPrereqError.
4865
4866     """
4867     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4868     node = self.cfg.GetNodeInfo(self.op.node_name)
4869     assert node is not None
4870
4871     masternode = self.cfg.GetMasterNode()
4872     if node.name == masternode:
4873       raise errors.OpPrereqError("Node is the master node, failover to another"
4874                                  " node is required", errors.ECODE_INVAL)
4875
4876     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4877       if node.name in instance.all_nodes:
4878         raise errors.OpPrereqError("Instance %s is still running on the node,"
4879                                    " please remove first" % instance_name,
4880                                    errors.ECODE_INVAL)
4881     self.op.node_name = node.name
4882     self.node = node
4883
4884   def Exec(self, feedback_fn):
4885     """Removes the node from the cluster.
4886
4887     """
4888     node = self.node
4889     logging.info("Stopping the node daemon and removing configs from node %s",
4890                  node.name)
4891
4892     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4893
4894     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4895       "Not owning BGL"
4896
4897     # Promote nodes to master candidate as needed
4898     _AdjustCandidatePool(self, exceptions=[node.name])
4899     self.context.RemoveNode(node.name)
4900
4901     # Run post hooks on the node before it's removed
4902     _RunPostHook(self, node.name)
4903
4904     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4905     msg = result.fail_msg
4906     if msg:
4907       self.LogWarning("Errors encountered on the remote node while leaving"
4908                       " the cluster: %s", msg)
4909
4910     # Remove node from our /etc/hosts
4911     if self.cfg.GetClusterInfo().modify_etc_hosts:
4912       master_node = self.cfg.GetMasterNode()
4913       result = self.rpc.call_etc_hosts_modify(master_node,
4914                                               constants.ETC_HOSTS_REMOVE,
4915                                               node.name, None)
4916       result.Raise("Can't update hosts file with new host data")
4917       _RedistributeAncillaryFiles(self)
4918
4919
4920 class _NodeQuery(_QueryBase):
4921   FIELDS = query.NODE_FIELDS
4922
4923   def ExpandNames(self, lu):
4924     lu.needed_locks = {}
4925     lu.share_locks = _ShareAll()
4926
4927     if self.names:
4928       self.wanted = _GetWantedNodes(lu, self.names)
4929     else:
4930       self.wanted = locking.ALL_SET
4931
4932     self.do_locking = (self.use_locking and
4933                        query.NQ_LIVE in self.requested_data)
4934
4935     if self.do_locking:
4936       # If any non-static field is requested we need to lock the nodes
4937       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4938
4939   def DeclareLocks(self, lu, level):
4940     pass
4941
4942   def _GetQueryData(self, lu):
4943     """Computes the list of nodes and their attributes.
4944
4945     """
4946     all_info = lu.cfg.GetAllNodesInfo()
4947
4948     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4949
4950     # Gather data as requested
4951     if query.NQ_LIVE in self.requested_data:
4952       # filter out non-vm_capable nodes
4953       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4954
4955       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4956                                         [lu.cfg.GetHypervisorType()])
4957       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4958                        for (name, nresult) in node_data.items()
4959                        if not nresult.fail_msg and nresult.payload)
4960     else:
4961       live_data = None
4962
4963     if query.NQ_INST in self.requested_data:
4964       node_to_primary = dict([(name, set()) for name in nodenames])
4965       node_to_secondary = dict([(name, set()) for name in nodenames])
4966
4967       inst_data = lu.cfg.GetAllInstancesInfo()
4968
4969       for inst in inst_data.values():
4970         if inst.primary_node in node_to_primary:
4971           node_to_primary[inst.primary_node].add(inst.name)
4972         for secnode in inst.secondary_nodes:
4973           if secnode in node_to_secondary:
4974             node_to_secondary[secnode].add(inst.name)
4975     else:
4976       node_to_primary = None
4977       node_to_secondary = None
4978
4979     if query.NQ_OOB in self.requested_data:
4980       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4981                          for name, node in all_info.iteritems())
4982     else:
4983       oob_support = None
4984
4985     if query.NQ_GROUP in self.requested_data:
4986       groups = lu.cfg.GetAllNodeGroupsInfo()
4987     else:
4988       groups = {}
4989
4990     return query.NodeQueryData([all_info[name] for name in nodenames],
4991                                live_data, lu.cfg.GetMasterNode(),
4992                                node_to_primary, node_to_secondary, groups,
4993                                oob_support, lu.cfg.GetClusterInfo())
4994
4995
4996 class LUNodeQuery(NoHooksLU):
4997   """Logical unit for querying nodes.
4998
4999   """
5000   # pylint: disable=W0142
5001   REQ_BGL = False
5002
5003   def CheckArguments(self):
5004     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5005                          self.op.output_fields, self.op.use_locking)
5006
5007   def ExpandNames(self):
5008     self.nq.ExpandNames(self)
5009
5010   def DeclareLocks(self, level):
5011     self.nq.DeclareLocks(self, level)
5012
5013   def Exec(self, feedback_fn):
5014     return self.nq.OldStyleQuery(self)
5015
5016
5017 class LUNodeQueryvols(NoHooksLU):
5018   """Logical unit for getting volumes on node(s).
5019
5020   """
5021   REQ_BGL = False
5022   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5023   _FIELDS_STATIC = utils.FieldSet("node")
5024
5025   def CheckArguments(self):
5026     _CheckOutputFields(static=self._FIELDS_STATIC,
5027                        dynamic=self._FIELDS_DYNAMIC,
5028                        selected=self.op.output_fields)
5029
5030   def ExpandNames(self):
5031     self.share_locks = _ShareAll()
5032     self.needed_locks = {}
5033
5034     if not self.op.nodes:
5035       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5036     else:
5037       self.needed_locks[locking.LEVEL_NODE] = \
5038         _GetWantedNodes(self, self.op.nodes)
5039
5040   def Exec(self, feedback_fn):
5041     """Computes the list of nodes and their attributes.
5042
5043     """
5044     nodenames = self.owned_locks(locking.LEVEL_NODE)
5045     volumes = self.rpc.call_node_volumes(nodenames)
5046
5047     ilist = self.cfg.GetAllInstancesInfo()
5048     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5049
5050     output = []
5051     for node in nodenames:
5052       nresult = volumes[node]
5053       if nresult.offline:
5054         continue
5055       msg = nresult.fail_msg
5056       if msg:
5057         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5058         continue
5059
5060       node_vols = sorted(nresult.payload,
5061                          key=operator.itemgetter("dev"))
5062
5063       for vol in node_vols:
5064         node_output = []
5065         for field in self.op.output_fields:
5066           if field == "node":
5067             val = node
5068           elif field == "phys":
5069             val = vol["dev"]
5070           elif field == "vg":
5071             val = vol["vg"]
5072           elif field == "name":
5073             val = vol["name"]
5074           elif field == "size":
5075             val = int(float(vol["size"]))
5076           elif field == "instance":
5077             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5078           else:
5079             raise errors.ParameterError(field)
5080           node_output.append(str(val))
5081
5082         output.append(node_output)
5083
5084     return output
5085
5086
5087 class LUNodeQueryStorage(NoHooksLU):
5088   """Logical unit for getting information on storage units on node(s).
5089
5090   """
5091   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5092   REQ_BGL = False
5093
5094   def CheckArguments(self):
5095     _CheckOutputFields(static=self._FIELDS_STATIC,
5096                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5097                        selected=self.op.output_fields)
5098
5099   def ExpandNames(self):
5100     self.share_locks = _ShareAll()
5101     self.needed_locks = {}
5102
5103     if self.op.nodes:
5104       self.needed_locks[locking.LEVEL_NODE] = \
5105         _GetWantedNodes(self, self.op.nodes)
5106     else:
5107       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5108
5109   def Exec(self, feedback_fn):
5110     """Computes the list of nodes and their attributes.
5111
5112     """
5113     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5114
5115     # Always get name to sort by
5116     if constants.SF_NAME in self.op.output_fields:
5117       fields = self.op.output_fields[:]
5118     else:
5119       fields = [constants.SF_NAME] + self.op.output_fields
5120
5121     # Never ask for node or type as it's only known to the LU
5122     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5123       while extra in fields:
5124         fields.remove(extra)
5125
5126     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5127     name_idx = field_idx[constants.SF_NAME]
5128
5129     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5130     data = self.rpc.call_storage_list(self.nodes,
5131                                       self.op.storage_type, st_args,
5132                                       self.op.name, fields)
5133
5134     result = []
5135
5136     for node in utils.NiceSort(self.nodes):
5137       nresult = data[node]
5138       if nresult.offline:
5139         continue
5140
5141       msg = nresult.fail_msg
5142       if msg:
5143         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5144         continue
5145
5146       rows = dict([(row[name_idx], row) for row in nresult.payload])
5147
5148       for name in utils.NiceSort(rows.keys()):
5149         row = rows[name]
5150
5151         out = []
5152
5153         for field in self.op.output_fields:
5154           if field == constants.SF_NODE:
5155             val = node
5156           elif field == constants.SF_TYPE:
5157             val = self.op.storage_type
5158           elif field in field_idx:
5159             val = row[field_idx[field]]
5160           else:
5161             raise errors.ParameterError(field)
5162
5163           out.append(val)
5164
5165         result.append(out)
5166
5167     return result
5168
5169
5170 class _InstanceQuery(_QueryBase):
5171   FIELDS = query.INSTANCE_FIELDS
5172
5173   def ExpandNames(self, lu):
5174     lu.needed_locks = {}
5175     lu.share_locks = _ShareAll()
5176
5177     if self.names:
5178       self.wanted = _GetWantedInstances(lu, self.names)
5179     else:
5180       self.wanted = locking.ALL_SET
5181
5182     self.do_locking = (self.use_locking and
5183                        query.IQ_LIVE in self.requested_data)
5184     if self.do_locking:
5185       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5186       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5187       lu.needed_locks[locking.LEVEL_NODE] = []
5188       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5189
5190     self.do_grouplocks = (self.do_locking and
5191                           query.IQ_NODES in self.requested_data)
5192
5193   def DeclareLocks(self, lu, level):
5194     if self.do_locking:
5195       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5196         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5197
5198         # Lock all groups used by instances optimistically; this requires going
5199         # via the node before it's locked, requiring verification later on
5200         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5201           set(group_uuid
5202               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5203               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5204       elif level == locking.LEVEL_NODE:
5205         lu._LockInstancesNodes() # pylint: disable=W0212
5206
5207   @staticmethod
5208   def _CheckGroupLocks(lu):
5209     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5210     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5211
5212     # Check if node groups for locked instances are still correct
5213     for instance_name in owned_instances:
5214       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5215
5216   def _GetQueryData(self, lu):
5217     """Computes the list of instances and their attributes.
5218
5219     """
5220     if self.do_grouplocks:
5221       self._CheckGroupLocks(lu)
5222
5223     cluster = lu.cfg.GetClusterInfo()
5224     all_info = lu.cfg.GetAllInstancesInfo()
5225
5226     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5227
5228     instance_list = [all_info[name] for name in instance_names]
5229     nodes = frozenset(itertools.chain(*(inst.all_nodes
5230                                         for inst in instance_list)))
5231     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5232     bad_nodes = []
5233     offline_nodes = []
5234     wrongnode_inst = set()
5235
5236     # Gather data as requested
5237     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5238       live_data = {}
5239       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5240       for name in nodes:
5241         result = node_data[name]
5242         if result.offline:
5243           # offline nodes will be in both lists
5244           assert result.fail_msg
5245           offline_nodes.append(name)
5246         if result.fail_msg:
5247           bad_nodes.append(name)
5248         elif result.payload:
5249           for inst in result.payload:
5250             if inst in all_info:
5251               if all_info[inst].primary_node == name:
5252                 live_data.update(result.payload)
5253               else:
5254                 wrongnode_inst.add(inst)
5255             else:
5256               # orphan instance; we don't list it here as we don't
5257               # handle this case yet in the output of instance listing
5258               logging.warning("Orphan instance '%s' found on node %s",
5259                               inst, name)
5260         # else no instance is alive
5261     else:
5262       live_data = {}
5263
5264     if query.IQ_DISKUSAGE in self.requested_data:
5265       disk_usage = dict((inst.name,
5266                          _ComputeDiskSize(inst.disk_template,
5267                                           [{constants.IDISK_SIZE: disk.size}
5268                                            for disk in inst.disks]))
5269                         for inst in instance_list)
5270     else:
5271       disk_usage = None
5272
5273     if query.IQ_CONSOLE in self.requested_data:
5274       consinfo = {}
5275       for inst in instance_list:
5276         if inst.name in live_data:
5277           # Instance is running
5278           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5279         else:
5280           consinfo[inst.name] = None
5281       assert set(consinfo.keys()) == set(instance_names)
5282     else:
5283       consinfo = None
5284
5285     if query.IQ_NODES in self.requested_data:
5286       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5287                                             instance_list)))
5288       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5289       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5290                     for uuid in set(map(operator.attrgetter("group"),
5291                                         nodes.values())))
5292     else:
5293       nodes = None
5294       groups = None
5295
5296     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5297                                    disk_usage, offline_nodes, bad_nodes,
5298                                    live_data, wrongnode_inst, consinfo,
5299                                    nodes, groups)
5300
5301
5302 class LUQuery(NoHooksLU):
5303   """Query for resources/items of a certain kind.
5304
5305   """
5306   # pylint: disable=W0142
5307   REQ_BGL = False
5308
5309   def CheckArguments(self):
5310     qcls = _GetQueryImplementation(self.op.what)
5311
5312     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5313
5314   def ExpandNames(self):
5315     self.impl.ExpandNames(self)
5316
5317   def DeclareLocks(self, level):
5318     self.impl.DeclareLocks(self, level)
5319
5320   def Exec(self, feedback_fn):
5321     return self.impl.NewStyleQuery(self)
5322
5323
5324 class LUQueryFields(NoHooksLU):
5325   """Query for resources/items of a certain kind.
5326
5327   """
5328   # pylint: disable=W0142
5329   REQ_BGL = False
5330
5331   def CheckArguments(self):
5332     self.qcls = _GetQueryImplementation(self.op.what)
5333
5334   def ExpandNames(self):
5335     self.needed_locks = {}
5336
5337   def Exec(self, feedback_fn):
5338     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5339
5340
5341 class LUNodeModifyStorage(NoHooksLU):
5342   """Logical unit for modifying a storage volume on a node.
5343
5344   """
5345   REQ_BGL = False
5346
5347   def CheckArguments(self):
5348     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5349
5350     storage_type = self.op.storage_type
5351
5352     try:
5353       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5354     except KeyError:
5355       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5356                                  " modified" % storage_type,
5357                                  errors.ECODE_INVAL)
5358
5359     diff = set(self.op.changes.keys()) - modifiable
5360     if diff:
5361       raise errors.OpPrereqError("The following fields can not be modified for"
5362                                  " storage units of type '%s': %r" %
5363                                  (storage_type, list(diff)),
5364                                  errors.ECODE_INVAL)
5365
5366   def ExpandNames(self):
5367     self.needed_locks = {
5368       locking.LEVEL_NODE: self.op.node_name,
5369       }
5370
5371   def Exec(self, feedback_fn):
5372     """Computes the list of nodes and their attributes.
5373
5374     """
5375     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5376     result = self.rpc.call_storage_modify(self.op.node_name,
5377                                           self.op.storage_type, st_args,
5378                                           self.op.name, self.op.changes)
5379     result.Raise("Failed to modify storage unit '%s' on %s" %
5380                  (self.op.name, self.op.node_name))
5381
5382
5383 class LUNodeAdd(LogicalUnit):
5384   """Logical unit for adding node to the cluster.
5385
5386   """
5387   HPATH = "node-add"
5388   HTYPE = constants.HTYPE_NODE
5389   _NFLAGS = ["master_capable", "vm_capable"]
5390
5391   def CheckArguments(self):
5392     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5393     # validate/normalize the node name
5394     self.hostname = netutils.GetHostname(name=self.op.node_name,
5395                                          family=self.primary_ip_family)
5396     self.op.node_name = self.hostname.name
5397
5398     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5399       raise errors.OpPrereqError("Cannot readd the master node",
5400                                  errors.ECODE_STATE)
5401
5402     if self.op.readd and self.op.group:
5403       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5404                                  " being readded", errors.ECODE_INVAL)
5405
5406   def BuildHooksEnv(self):
5407     """Build hooks env.
5408
5409     This will run on all nodes before, and on all nodes + the new node after.
5410
5411     """
5412     return {
5413       "OP_TARGET": self.op.node_name,
5414       "NODE_NAME": self.op.node_name,
5415       "NODE_PIP": self.op.primary_ip,
5416       "NODE_SIP": self.op.secondary_ip,
5417       "MASTER_CAPABLE": str(self.op.master_capable),
5418       "VM_CAPABLE": str(self.op.vm_capable),
5419       }
5420
5421   def BuildHooksNodes(self):
5422     """Build hooks nodes.
5423
5424     """
5425     # Exclude added node
5426     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5427     post_nodes = pre_nodes + [self.op.node_name, ]
5428
5429     return (pre_nodes, post_nodes)
5430
5431   def CheckPrereq(self):
5432     """Check prerequisites.
5433
5434     This checks:
5435      - the new node is not already in the config
5436      - it is resolvable
5437      - its parameters (single/dual homed) matches the cluster
5438
5439     Any errors are signaled by raising errors.OpPrereqError.
5440
5441     """
5442     cfg = self.cfg
5443     hostname = self.hostname
5444     node = hostname.name
5445     primary_ip = self.op.primary_ip = hostname.ip
5446     if self.op.secondary_ip is None:
5447       if self.primary_ip_family == netutils.IP6Address.family:
5448         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5449                                    " IPv4 address must be given as secondary",
5450                                    errors.ECODE_INVAL)
5451       self.op.secondary_ip = primary_ip
5452
5453     secondary_ip = self.op.secondary_ip
5454     if not netutils.IP4Address.IsValid(secondary_ip):
5455       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5456                                  " address" % secondary_ip, errors.ECODE_INVAL)
5457
5458     node_list = cfg.GetNodeList()
5459     if not self.op.readd and node in node_list:
5460       raise errors.OpPrereqError("Node %s is already in the configuration" %
5461                                  node, errors.ECODE_EXISTS)
5462     elif self.op.readd and node not in node_list:
5463       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5464                                  errors.ECODE_NOENT)
5465
5466     self.changed_primary_ip = False
5467
5468     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5469       if self.op.readd and node == existing_node_name:
5470         if existing_node.secondary_ip != secondary_ip:
5471           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5472                                      " address configuration as before",
5473                                      errors.ECODE_INVAL)
5474         if existing_node.primary_ip != primary_ip:
5475           self.changed_primary_ip = True
5476
5477         continue
5478
5479       if (existing_node.primary_ip == primary_ip or
5480           existing_node.secondary_ip == primary_ip or
5481           existing_node.primary_ip == secondary_ip or
5482           existing_node.secondary_ip == secondary_ip):
5483         raise errors.OpPrereqError("New node ip address(es) conflict with"
5484                                    " existing node %s" % existing_node.name,
5485                                    errors.ECODE_NOTUNIQUE)
5486
5487     # After this 'if' block, None is no longer a valid value for the
5488     # _capable op attributes
5489     if self.op.readd:
5490       old_node = self.cfg.GetNodeInfo(node)
5491       assert old_node is not None, "Can't retrieve locked node %s" % node
5492       for attr in self._NFLAGS:
5493         if getattr(self.op, attr) is None:
5494           setattr(self.op, attr, getattr(old_node, attr))
5495     else:
5496       for attr in self._NFLAGS:
5497         if getattr(self.op, attr) is None:
5498           setattr(self.op, attr, True)
5499
5500     if self.op.readd and not self.op.vm_capable:
5501       pri, sec = cfg.GetNodeInstances(node)
5502       if pri or sec:
5503         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5504                                    " flag set to false, but it already holds"
5505                                    " instances" % node,
5506                                    errors.ECODE_STATE)
5507
5508     # check that the type of the node (single versus dual homed) is the
5509     # same as for the master
5510     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5511     master_singlehomed = myself.secondary_ip == myself.primary_ip
5512     newbie_singlehomed = secondary_ip == primary_ip
5513     if master_singlehomed != newbie_singlehomed:
5514       if master_singlehomed:
5515         raise errors.OpPrereqError("The master has no secondary ip but the"
5516                                    " new node has one",
5517                                    errors.ECODE_INVAL)
5518       else:
5519         raise errors.OpPrereqError("The master has a secondary ip but the"
5520                                    " new node doesn't have one",
5521                                    errors.ECODE_INVAL)
5522
5523     # checks reachability
5524     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5525       raise errors.OpPrereqError("Node not reachable by ping",
5526                                  errors.ECODE_ENVIRON)
5527
5528     if not newbie_singlehomed:
5529       # check reachability from my secondary ip to newbie's secondary ip
5530       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5531                            source=myself.secondary_ip):
5532         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5533                                    " based ping to node daemon port",
5534                                    errors.ECODE_ENVIRON)
5535
5536     if self.op.readd:
5537       exceptions = [node]
5538     else:
5539       exceptions = []
5540
5541     if self.op.master_capable:
5542       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5543     else:
5544       self.master_candidate = False
5545
5546     if self.op.readd:
5547       self.new_node = old_node
5548     else:
5549       node_group = cfg.LookupNodeGroup(self.op.group)
5550       self.new_node = objects.Node(name=node,
5551                                    primary_ip=primary_ip,
5552                                    secondary_ip=secondary_ip,
5553                                    master_candidate=self.master_candidate,
5554                                    offline=False, drained=False,
5555                                    group=node_group)
5556
5557     if self.op.ndparams:
5558       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5559
5560     if self.op.hv_state:
5561       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5562
5563     if self.op.disk_state:
5564       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5565
5566   def Exec(self, feedback_fn):
5567     """Adds the new node to the cluster.
5568
5569     """
5570     new_node = self.new_node
5571     node = new_node.name
5572
5573     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5574       "Not owning BGL"
5575
5576     # We adding a new node so we assume it's powered
5577     new_node.powered = True
5578
5579     # for re-adds, reset the offline/drained/master-candidate flags;
5580     # we need to reset here, otherwise offline would prevent RPC calls
5581     # later in the procedure; this also means that if the re-add
5582     # fails, we are left with a non-offlined, broken node
5583     if self.op.readd:
5584       new_node.drained = new_node.offline = False # pylint: disable=W0201
5585       self.LogInfo("Readding a node, the offline/drained flags were reset")
5586       # if we demote the node, we do cleanup later in the procedure
5587       new_node.master_candidate = self.master_candidate
5588       if self.changed_primary_ip:
5589         new_node.primary_ip = self.op.primary_ip
5590
5591     # copy the master/vm_capable flags
5592     for attr in self._NFLAGS:
5593       setattr(new_node, attr, getattr(self.op, attr))
5594
5595     # notify the user about any possible mc promotion
5596     if new_node.master_candidate:
5597       self.LogInfo("Node will be a master candidate")
5598
5599     if self.op.ndparams:
5600       new_node.ndparams = self.op.ndparams
5601     else:
5602       new_node.ndparams = {}
5603
5604     if self.op.hv_state:
5605       new_node.hv_state_static = self.new_hv_state
5606
5607     if self.op.disk_state:
5608       new_node.disk_state_static = self.new_disk_state
5609
5610     # check connectivity
5611     result = self.rpc.call_version([node])[node]
5612     result.Raise("Can't get version information from node %s" % node)
5613     if constants.PROTOCOL_VERSION == result.payload:
5614       logging.info("Communication to node %s fine, sw version %s match",
5615                    node, result.payload)
5616     else:
5617       raise errors.OpExecError("Version mismatch master version %s,"
5618                                " node version %s" %
5619                                (constants.PROTOCOL_VERSION, result.payload))
5620
5621     # Add node to our /etc/hosts, and add key to known_hosts
5622     if self.cfg.GetClusterInfo().modify_etc_hosts:
5623       master_node = self.cfg.GetMasterNode()
5624       result = self.rpc.call_etc_hosts_modify(master_node,
5625                                               constants.ETC_HOSTS_ADD,
5626                                               self.hostname.name,
5627                                               self.hostname.ip)
5628       result.Raise("Can't update hosts file with new host data")
5629
5630     if new_node.secondary_ip != new_node.primary_ip:
5631       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5632                                False)
5633
5634     node_verify_list = [self.cfg.GetMasterNode()]
5635     node_verify_param = {
5636       constants.NV_NODELIST: ([node], {}),
5637       # TODO: do a node-net-test as well?
5638     }
5639
5640     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5641                                        self.cfg.GetClusterName())
5642     for verifier in node_verify_list:
5643       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5644       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5645       if nl_payload:
5646         for failed in nl_payload:
5647           feedback_fn("ssh/hostname verification failed"
5648                       " (checking from %s): %s" %
5649                       (verifier, nl_payload[failed]))
5650         raise errors.OpExecError("ssh/hostname verification failed")
5651
5652     if self.op.readd:
5653       _RedistributeAncillaryFiles(self)
5654       self.context.ReaddNode(new_node)
5655       # make sure we redistribute the config
5656       self.cfg.Update(new_node, feedback_fn)
5657       # and make sure the new node will not have old files around
5658       if not new_node.master_candidate:
5659         result = self.rpc.call_node_demote_from_mc(new_node.name)
5660         msg = result.fail_msg
5661         if msg:
5662           self.LogWarning("Node failed to demote itself from master"
5663                           " candidate status: %s" % msg)
5664     else:
5665       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5666                                   additional_vm=self.op.vm_capable)
5667       self.context.AddNode(new_node, self.proc.GetECId())
5668
5669
5670 class LUNodeSetParams(LogicalUnit):
5671   """Modifies the parameters of a node.
5672
5673   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5674       to the node role (as _ROLE_*)
5675   @cvar _R2F: a dictionary from node role to tuples of flags
5676   @cvar _FLAGS: a list of attribute names corresponding to the flags
5677
5678   """
5679   HPATH = "node-modify"
5680   HTYPE = constants.HTYPE_NODE
5681   REQ_BGL = False
5682   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5683   _F2R = {
5684     (True, False, False): _ROLE_CANDIDATE,
5685     (False, True, False): _ROLE_DRAINED,
5686     (False, False, True): _ROLE_OFFLINE,
5687     (False, False, False): _ROLE_REGULAR,
5688     }
5689   _R2F = dict((v, k) for k, v in _F2R.items())
5690   _FLAGS = ["master_candidate", "drained", "offline"]
5691
5692   def CheckArguments(self):
5693     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5694     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5695                 self.op.master_capable, self.op.vm_capable,
5696                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5697                 self.op.disk_state]
5698     if all_mods.count(None) == len(all_mods):
5699       raise errors.OpPrereqError("Please pass at least one modification",
5700                                  errors.ECODE_INVAL)
5701     if all_mods.count(True) > 1:
5702       raise errors.OpPrereqError("Can't set the node into more than one"
5703                                  " state at the same time",
5704                                  errors.ECODE_INVAL)
5705
5706     # Boolean value that tells us whether we might be demoting from MC
5707     self.might_demote = (self.op.master_candidate == False or
5708                          self.op.offline == True or
5709                          self.op.drained == True or
5710                          self.op.master_capable == False)
5711
5712     if self.op.secondary_ip:
5713       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5714         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5715                                    " address" % self.op.secondary_ip,
5716                                    errors.ECODE_INVAL)
5717
5718     self.lock_all = self.op.auto_promote and self.might_demote
5719     self.lock_instances = self.op.secondary_ip is not None
5720
5721   def _InstanceFilter(self, instance):
5722     """Filter for getting affected instances.
5723
5724     """
5725     return (instance.disk_template in constants.DTS_INT_MIRROR and
5726             self.op.node_name in instance.all_nodes)
5727
5728   def ExpandNames(self):
5729     if self.lock_all:
5730       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5731     else:
5732       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5733
5734     # Since modifying a node can have severe effects on currently running
5735     # operations the resource lock is at least acquired in shared mode
5736     self.needed_locks[locking.LEVEL_NODE_RES] = \
5737       self.needed_locks[locking.LEVEL_NODE]
5738
5739     # Get node resource and instance locks in shared mode; they are not used
5740     # for anything but read-only access
5741     self.share_locks[locking.LEVEL_NODE_RES] = 1
5742     self.share_locks[locking.LEVEL_INSTANCE] = 1
5743
5744     if self.lock_instances:
5745       self.needed_locks[locking.LEVEL_INSTANCE] = \
5746         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5747
5748   def BuildHooksEnv(self):
5749     """Build hooks env.
5750
5751     This runs on the master node.
5752
5753     """
5754     return {
5755       "OP_TARGET": self.op.node_name,
5756       "MASTER_CANDIDATE": str(self.op.master_candidate),
5757       "OFFLINE": str(self.op.offline),
5758       "DRAINED": str(self.op.drained),
5759       "MASTER_CAPABLE": str(self.op.master_capable),
5760       "VM_CAPABLE": str(self.op.vm_capable),
5761       }
5762
5763   def BuildHooksNodes(self):
5764     """Build hooks nodes.
5765
5766     """
5767     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5768     return (nl, nl)
5769
5770   def CheckPrereq(self):
5771     """Check prerequisites.
5772
5773     This only checks the instance list against the existing names.
5774
5775     """
5776     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5777
5778     if self.lock_instances:
5779       affected_instances = \
5780         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5781
5782       # Verify instance locks
5783       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5784       wanted_instances = frozenset(affected_instances.keys())
5785       if wanted_instances - owned_instances:
5786         raise errors.OpPrereqError("Instances affected by changing node %s's"
5787                                    " secondary IP address have changed since"
5788                                    " locks were acquired, wanted '%s', have"
5789                                    " '%s'; retry the operation" %
5790                                    (self.op.node_name,
5791                                     utils.CommaJoin(wanted_instances),
5792                                     utils.CommaJoin(owned_instances)),
5793                                    errors.ECODE_STATE)
5794     else:
5795       affected_instances = None
5796
5797     if (self.op.master_candidate is not None or
5798         self.op.drained is not None or
5799         self.op.offline is not None):
5800       # we can't change the master's node flags
5801       if self.op.node_name == self.cfg.GetMasterNode():
5802         raise errors.OpPrereqError("The master role can be changed"
5803                                    " only via master-failover",
5804                                    errors.ECODE_INVAL)
5805
5806     if self.op.master_candidate and not node.master_capable:
5807       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5808                                  " it a master candidate" % node.name,
5809                                  errors.ECODE_STATE)
5810
5811     if self.op.vm_capable == False:
5812       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5813       if ipri or isec:
5814         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5815                                    " the vm_capable flag" % node.name,
5816                                    errors.ECODE_STATE)
5817
5818     if node.master_candidate and self.might_demote and not self.lock_all:
5819       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5820       # check if after removing the current node, we're missing master
5821       # candidates
5822       (mc_remaining, mc_should, _) = \
5823           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5824       if mc_remaining < mc_should:
5825         raise errors.OpPrereqError("Not enough master candidates, please"
5826                                    " pass auto promote option to allow"
5827                                    " promotion", errors.ECODE_STATE)
5828
5829     self.old_flags = old_flags = (node.master_candidate,
5830                                   node.drained, node.offline)
5831     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5832     self.old_role = old_role = self._F2R[old_flags]
5833
5834     # Check for ineffective changes
5835     for attr in self._FLAGS:
5836       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5837         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5838         setattr(self.op, attr, None)
5839
5840     # Past this point, any flag change to False means a transition
5841     # away from the respective state, as only real changes are kept
5842
5843     # TODO: We might query the real power state if it supports OOB
5844     if _SupportsOob(self.cfg, node):
5845       if self.op.offline is False and not (node.powered or
5846                                            self.op.powered == True):
5847         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5848                                     " offline status can be reset") %
5849                                    self.op.node_name)
5850     elif self.op.powered is not None:
5851       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5852                                   " as it does not support out-of-band"
5853                                   " handling") % self.op.node_name)
5854
5855     # If we're being deofflined/drained, we'll MC ourself if needed
5856     if (self.op.drained == False or self.op.offline == False or
5857         (self.op.master_capable and not node.master_capable)):
5858       if _DecideSelfPromotion(self):
5859         self.op.master_candidate = True
5860         self.LogInfo("Auto-promoting node to master candidate")
5861
5862     # If we're no longer master capable, we'll demote ourselves from MC
5863     if self.op.master_capable == False and node.master_candidate:
5864       self.LogInfo("Demoting from master candidate")
5865       self.op.master_candidate = False
5866
5867     # Compute new role
5868     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5869     if self.op.master_candidate:
5870       new_role = self._ROLE_CANDIDATE
5871     elif self.op.drained:
5872       new_role = self._ROLE_DRAINED
5873     elif self.op.offline:
5874       new_role = self._ROLE_OFFLINE
5875     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5876       # False is still in new flags, which means we're un-setting (the
5877       # only) True flag
5878       new_role = self._ROLE_REGULAR
5879     else: # no new flags, nothing, keep old role
5880       new_role = old_role
5881
5882     self.new_role = new_role
5883
5884     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5885       # Trying to transition out of offline status
5886       # TODO: Use standard RPC runner, but make sure it works when the node is
5887       # still marked offline
5888       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5889       if result.fail_msg:
5890         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5891                                    " to report its version: %s" %
5892                                    (node.name, result.fail_msg),
5893                                    errors.ECODE_STATE)
5894       else:
5895         self.LogWarning("Transitioning node from offline to online state"
5896                         " without using re-add. Please make sure the node"
5897                         " is healthy!")
5898
5899     if self.op.secondary_ip:
5900       # Ok even without locking, because this can't be changed by any LU
5901       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5902       master_singlehomed = master.secondary_ip == master.primary_ip
5903       if master_singlehomed and self.op.secondary_ip:
5904         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5905                                    " homed cluster", errors.ECODE_INVAL)
5906
5907       assert not (frozenset(affected_instances) -
5908                   self.owned_locks(locking.LEVEL_INSTANCE))
5909
5910       if node.offline:
5911         if affected_instances:
5912           raise errors.OpPrereqError("Cannot change secondary IP address:"
5913                                      " offline node has instances (%s)"
5914                                      " configured to use it" %
5915                                      utils.CommaJoin(affected_instances.keys()))
5916       else:
5917         # On online nodes, check that no instances are running, and that
5918         # the node has the new ip and we can reach it.
5919         for instance in affected_instances.values():
5920           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5921                               msg="cannot change secondary ip")
5922
5923         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5924         if master.name != node.name:
5925           # check reachability from master secondary ip to new secondary ip
5926           if not netutils.TcpPing(self.op.secondary_ip,
5927                                   constants.DEFAULT_NODED_PORT,
5928                                   source=master.secondary_ip):
5929             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5930                                        " based ping to node daemon port",
5931                                        errors.ECODE_ENVIRON)
5932
5933     if self.op.ndparams:
5934       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5935       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5936       self.new_ndparams = new_ndparams
5937
5938     if self.op.hv_state:
5939       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5940                                                  self.node.hv_state_static)
5941
5942     if self.op.disk_state:
5943       self.new_disk_state = \
5944         _MergeAndVerifyDiskState(self.op.disk_state,
5945                                  self.node.disk_state_static)
5946
5947   def Exec(self, feedback_fn):
5948     """Modifies a node.
5949
5950     """
5951     node = self.node
5952     old_role = self.old_role
5953     new_role = self.new_role
5954
5955     result = []
5956
5957     if self.op.ndparams:
5958       node.ndparams = self.new_ndparams
5959
5960     if self.op.powered is not None:
5961       node.powered = self.op.powered
5962
5963     if self.op.hv_state:
5964       node.hv_state_static = self.new_hv_state
5965
5966     if self.op.disk_state:
5967       node.disk_state_static = self.new_disk_state
5968
5969     for attr in ["master_capable", "vm_capable"]:
5970       val = getattr(self.op, attr)
5971       if val is not None:
5972         setattr(node, attr, val)
5973         result.append((attr, str(val)))
5974
5975     if new_role != old_role:
5976       # Tell the node to demote itself, if no longer MC and not offline
5977       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5978         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5979         if msg:
5980           self.LogWarning("Node failed to demote itself: %s", msg)
5981
5982       new_flags = self._R2F[new_role]
5983       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5984         if of != nf:
5985           result.append((desc, str(nf)))
5986       (node.master_candidate, node.drained, node.offline) = new_flags
5987
5988       # we locked all nodes, we adjust the CP before updating this node
5989       if self.lock_all:
5990         _AdjustCandidatePool(self, [node.name])
5991
5992     if self.op.secondary_ip:
5993       node.secondary_ip = self.op.secondary_ip
5994       result.append(("secondary_ip", self.op.secondary_ip))
5995
5996     # this will trigger configuration file update, if needed
5997     self.cfg.Update(node, feedback_fn)
5998
5999     # this will trigger job queue propagation or cleanup if the mc
6000     # flag changed
6001     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6002       self.context.ReaddNode(node)
6003
6004     return result
6005
6006
6007 class LUNodePowercycle(NoHooksLU):
6008   """Powercycles a node.
6009
6010   """
6011   REQ_BGL = False
6012
6013   def CheckArguments(self):
6014     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6015     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6016       raise errors.OpPrereqError("The node is the master and the force"
6017                                  " parameter was not set",
6018                                  errors.ECODE_INVAL)
6019
6020   def ExpandNames(self):
6021     """Locking for PowercycleNode.
6022
6023     This is a last-resort option and shouldn't block on other
6024     jobs. Therefore, we grab no locks.
6025
6026     """
6027     self.needed_locks = {}
6028
6029   def Exec(self, feedback_fn):
6030     """Reboots a node.
6031
6032     """
6033     result = self.rpc.call_node_powercycle(self.op.node_name,
6034                                            self.cfg.GetHypervisorType())
6035     result.Raise("Failed to schedule the reboot")
6036     return result.payload
6037
6038
6039 class LUClusterQuery(NoHooksLU):
6040   """Query cluster configuration.
6041
6042   """
6043   REQ_BGL = False
6044
6045   def ExpandNames(self):
6046     self.needed_locks = {}
6047
6048   def Exec(self, feedback_fn):
6049     """Return cluster config.
6050
6051     """
6052     cluster = self.cfg.GetClusterInfo()
6053     os_hvp = {}
6054
6055     # Filter just for enabled hypervisors
6056     for os_name, hv_dict in cluster.os_hvp.items():
6057       os_hvp[os_name] = {}
6058       for hv_name, hv_params in hv_dict.items():
6059         if hv_name in cluster.enabled_hypervisors:
6060           os_hvp[os_name][hv_name] = hv_params
6061
6062     # Convert ip_family to ip_version
6063     primary_ip_version = constants.IP4_VERSION
6064     if cluster.primary_ip_family == netutils.IP6Address.family:
6065       primary_ip_version = constants.IP6_VERSION
6066
6067     result = {
6068       "software_version": constants.RELEASE_VERSION,
6069       "protocol_version": constants.PROTOCOL_VERSION,
6070       "config_version": constants.CONFIG_VERSION,
6071       "os_api_version": max(constants.OS_API_VERSIONS),
6072       "export_version": constants.EXPORT_VERSION,
6073       "architecture": (platform.architecture()[0], platform.machine()),
6074       "name": cluster.cluster_name,
6075       "master": cluster.master_node,
6076       "default_hypervisor": cluster.primary_hypervisor,
6077       "enabled_hypervisors": cluster.enabled_hypervisors,
6078       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6079                         for hypervisor_name in cluster.enabled_hypervisors]),
6080       "os_hvp": os_hvp,
6081       "beparams": cluster.beparams,
6082       "osparams": cluster.osparams,
6083       "ipolicy": cluster.ipolicy,
6084       "nicparams": cluster.nicparams,
6085       "ndparams": cluster.ndparams,
6086       "candidate_pool_size": cluster.candidate_pool_size,
6087       "master_netdev": cluster.master_netdev,
6088       "master_netmask": cluster.master_netmask,
6089       "use_external_mip_script": cluster.use_external_mip_script,
6090       "volume_group_name": cluster.volume_group_name,
6091       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6092       "file_storage_dir": cluster.file_storage_dir,
6093       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6094       "maintain_node_health": cluster.maintain_node_health,
6095       "ctime": cluster.ctime,
6096       "mtime": cluster.mtime,
6097       "uuid": cluster.uuid,
6098       "tags": list(cluster.GetTags()),
6099       "uid_pool": cluster.uid_pool,
6100       "default_iallocator": cluster.default_iallocator,
6101       "reserved_lvs": cluster.reserved_lvs,
6102       "primary_ip_version": primary_ip_version,
6103       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6104       "hidden_os": cluster.hidden_os,
6105       "blacklisted_os": cluster.blacklisted_os,
6106       }
6107
6108     return result
6109
6110
6111 class LUClusterConfigQuery(NoHooksLU):
6112   """Return configuration values.
6113
6114   """
6115   REQ_BGL = False
6116   _FIELDS_DYNAMIC = utils.FieldSet()
6117   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6118                                   "watcher_pause", "volume_group_name")
6119
6120   def CheckArguments(self):
6121     _CheckOutputFields(static=self._FIELDS_STATIC,
6122                        dynamic=self._FIELDS_DYNAMIC,
6123                        selected=self.op.output_fields)
6124
6125   def ExpandNames(self):
6126     self.needed_locks = {}
6127
6128   def Exec(self, feedback_fn):
6129     """Dump a representation of the cluster config to the standard output.
6130
6131     """
6132     values = []
6133     for field in self.op.output_fields:
6134       if field == "cluster_name":
6135         entry = self.cfg.GetClusterName()
6136       elif field == "master_node":
6137         entry = self.cfg.GetMasterNode()
6138       elif field == "drain_flag":
6139         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6140       elif field == "watcher_pause":
6141         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6142       elif field == "volume_group_name":
6143         entry = self.cfg.GetVGName()
6144       else:
6145         raise errors.ParameterError(field)
6146       values.append(entry)
6147     return values
6148
6149
6150 class LUInstanceActivateDisks(NoHooksLU):
6151   """Bring up an instance's disks.
6152
6153   """
6154   REQ_BGL = False
6155
6156   def ExpandNames(self):
6157     self._ExpandAndLockInstance()
6158     self.needed_locks[locking.LEVEL_NODE] = []
6159     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6160
6161   def DeclareLocks(self, level):
6162     if level == locking.LEVEL_NODE:
6163       self._LockInstancesNodes()
6164
6165   def CheckPrereq(self):
6166     """Check prerequisites.
6167
6168     This checks that the instance is in the cluster.
6169
6170     """
6171     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6172     assert self.instance is not None, \
6173       "Cannot retrieve locked instance %s" % self.op.instance_name
6174     _CheckNodeOnline(self, self.instance.primary_node)
6175
6176   def Exec(self, feedback_fn):
6177     """Activate the disks.
6178
6179     """
6180     disks_ok, disks_info = \
6181               _AssembleInstanceDisks(self, self.instance,
6182                                      ignore_size=self.op.ignore_size)
6183     if not disks_ok:
6184       raise errors.OpExecError("Cannot activate block devices")
6185
6186     return disks_info
6187
6188
6189 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6190                            ignore_size=False):
6191   """Prepare the block devices for an instance.
6192
6193   This sets up the block devices on all nodes.
6194
6195   @type lu: L{LogicalUnit}
6196   @param lu: the logical unit on whose behalf we execute
6197   @type instance: L{objects.Instance}
6198   @param instance: the instance for whose disks we assemble
6199   @type disks: list of L{objects.Disk} or None
6200   @param disks: which disks to assemble (or all, if None)
6201   @type ignore_secondaries: boolean
6202   @param ignore_secondaries: if true, errors on secondary nodes
6203       won't result in an error return from the function
6204   @type ignore_size: boolean
6205   @param ignore_size: if true, the current known size of the disk
6206       will not be used during the disk activation, useful for cases
6207       when the size is wrong
6208   @return: False if the operation failed, otherwise a list of
6209       (host, instance_visible_name, node_visible_name)
6210       with the mapping from node devices to instance devices
6211
6212   """
6213   device_info = []
6214   disks_ok = True
6215   iname = instance.name
6216   disks = _ExpandCheckDisks(instance, disks)
6217
6218   # With the two passes mechanism we try to reduce the window of
6219   # opportunity for the race condition of switching DRBD to primary
6220   # before handshaking occured, but we do not eliminate it
6221
6222   # The proper fix would be to wait (with some limits) until the
6223   # connection has been made and drbd transitions from WFConnection
6224   # into any other network-connected state (Connected, SyncTarget,
6225   # SyncSource, etc.)
6226
6227   # 1st pass, assemble on all nodes in secondary mode
6228   for idx, inst_disk in enumerate(disks):
6229     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6230       if ignore_size:
6231         node_disk = node_disk.Copy()
6232         node_disk.UnsetSize()
6233       lu.cfg.SetDiskID(node_disk, node)
6234       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6235       msg = result.fail_msg
6236       if msg:
6237         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6238                            " (is_primary=False, pass=1): %s",
6239                            inst_disk.iv_name, node, msg)
6240         if not ignore_secondaries:
6241           disks_ok = False
6242
6243   # FIXME: race condition on drbd migration to primary
6244
6245   # 2nd pass, do only the primary node
6246   for idx, inst_disk in enumerate(disks):
6247     dev_path = None
6248
6249     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6250       if node != instance.primary_node:
6251         continue
6252       if ignore_size:
6253         node_disk = node_disk.Copy()
6254         node_disk.UnsetSize()
6255       lu.cfg.SetDiskID(node_disk, node)
6256       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6257       msg = result.fail_msg
6258       if msg:
6259         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6260                            " (is_primary=True, pass=2): %s",
6261                            inst_disk.iv_name, node, msg)
6262         disks_ok = False
6263       else:
6264         dev_path = result.payload
6265
6266     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6267
6268   # leave the disks configured for the primary node
6269   # this is a workaround that would be fixed better by
6270   # improving the logical/physical id handling
6271   for disk in disks:
6272     lu.cfg.SetDiskID(disk, instance.primary_node)
6273
6274   return disks_ok, device_info
6275
6276
6277 def _StartInstanceDisks(lu, instance, force):
6278   """Start the disks of an instance.
6279
6280   """
6281   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6282                                            ignore_secondaries=force)
6283   if not disks_ok:
6284     _ShutdownInstanceDisks(lu, instance)
6285     if force is not None and not force:
6286       lu.proc.LogWarning("", hint="If the message above refers to a"
6287                          " secondary node,"
6288                          " you can retry the operation using '--force'.")
6289     raise errors.OpExecError("Disk consistency error")
6290
6291
6292 class LUInstanceDeactivateDisks(NoHooksLU):
6293   """Shutdown an instance's disks.
6294
6295   """
6296   REQ_BGL = False
6297
6298   def ExpandNames(self):
6299     self._ExpandAndLockInstance()
6300     self.needed_locks[locking.LEVEL_NODE] = []
6301     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6302
6303   def DeclareLocks(self, level):
6304     if level == locking.LEVEL_NODE:
6305       self._LockInstancesNodes()
6306
6307   def CheckPrereq(self):
6308     """Check prerequisites.
6309
6310     This checks that the instance is in the cluster.
6311
6312     """
6313     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6314     assert self.instance is not None, \
6315       "Cannot retrieve locked instance %s" % self.op.instance_name
6316
6317   def Exec(self, feedback_fn):
6318     """Deactivate the disks
6319
6320     """
6321     instance = self.instance
6322     if self.op.force:
6323       _ShutdownInstanceDisks(self, instance)
6324     else:
6325       _SafeShutdownInstanceDisks(self, instance)
6326
6327
6328 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6329   """Shutdown block devices of an instance.
6330
6331   This function checks if an instance is running, before calling
6332   _ShutdownInstanceDisks.
6333
6334   """
6335   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6336   _ShutdownInstanceDisks(lu, instance, disks=disks)
6337
6338
6339 def _ExpandCheckDisks(instance, disks):
6340   """Return the instance disks selected by the disks list
6341
6342   @type disks: list of L{objects.Disk} or None
6343   @param disks: selected disks
6344   @rtype: list of L{objects.Disk}
6345   @return: selected instance disks to act on
6346
6347   """
6348   if disks is None:
6349     return instance.disks
6350   else:
6351     if not set(disks).issubset(instance.disks):
6352       raise errors.ProgrammerError("Can only act on disks belonging to the"
6353                                    " target instance")
6354     return disks
6355
6356
6357 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6358   """Shutdown block devices of an instance.
6359
6360   This does the shutdown on all nodes of the instance.
6361
6362   If the ignore_primary is false, errors on the primary node are
6363   ignored.
6364
6365   """
6366   all_result = True
6367   disks = _ExpandCheckDisks(instance, disks)
6368
6369   for disk in disks:
6370     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6371       lu.cfg.SetDiskID(top_disk, node)
6372       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6373       msg = result.fail_msg
6374       if msg:
6375         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6376                       disk.iv_name, node, msg)
6377         if ((node == instance.primary_node and not ignore_primary) or
6378             (node != instance.primary_node and not result.offline)):
6379           all_result = False
6380   return all_result
6381
6382
6383 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6384   """Checks if a node has enough free memory.
6385
6386   This function check if a given node has the needed amount of free
6387   memory. In case the node has less memory or we cannot get the
6388   information from the node, this function raise an OpPrereqError
6389   exception.
6390
6391   @type lu: C{LogicalUnit}
6392   @param lu: a logical unit from which we get configuration data
6393   @type node: C{str}
6394   @param node: the node to check
6395   @type reason: C{str}
6396   @param reason: string to use in the error message
6397   @type requested: C{int}
6398   @param requested: the amount of memory in MiB to check for
6399   @type hypervisor_name: C{str}
6400   @param hypervisor_name: the hypervisor to ask for memory stats
6401   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6402       we cannot check the node
6403
6404   """
6405   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6406   nodeinfo[node].Raise("Can't get data from node %s" % node,
6407                        prereq=True, ecode=errors.ECODE_ENVIRON)
6408   (_, _, (hv_info, )) = nodeinfo[node].payload
6409
6410   free_mem = hv_info.get("memory_free", None)
6411   if not isinstance(free_mem, int):
6412     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6413                                " was '%s'" % (node, free_mem),
6414                                errors.ECODE_ENVIRON)
6415   if requested > free_mem:
6416     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6417                                " needed %s MiB, available %s MiB" %
6418                                (node, reason, requested, free_mem),
6419                                errors.ECODE_NORES)
6420
6421
6422 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6423   """Checks if nodes have enough free disk space in the all VGs.
6424
6425   This function check if all given nodes have the needed amount of
6426   free disk. In case any node has less disk or we cannot get the
6427   information from the node, this function raise an OpPrereqError
6428   exception.
6429
6430   @type lu: C{LogicalUnit}
6431   @param lu: a logical unit from which we get configuration data
6432   @type nodenames: C{list}
6433   @param nodenames: the list of node names to check
6434   @type req_sizes: C{dict}
6435   @param req_sizes: the hash of vg and corresponding amount of disk in
6436       MiB to check for
6437   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6438       or we cannot check the node
6439
6440   """
6441   for vg, req_size in req_sizes.items():
6442     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6443
6444
6445 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6446   """Checks if nodes have enough free disk space in the specified VG.
6447
6448   This function check if all given nodes have the needed amount of
6449   free disk. In case any node has less disk or we cannot get the
6450   information from the node, this function raise an OpPrereqError
6451   exception.
6452
6453   @type lu: C{LogicalUnit}
6454   @param lu: a logical unit from which we get configuration data
6455   @type nodenames: C{list}
6456   @param nodenames: the list of node names to check
6457   @type vg: C{str}
6458   @param vg: the volume group to check
6459   @type requested: C{int}
6460   @param requested: the amount of disk in MiB to check for
6461   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6462       or we cannot check the node
6463
6464   """
6465   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6466   for node in nodenames:
6467     info = nodeinfo[node]
6468     info.Raise("Cannot get current information from node %s" % node,
6469                prereq=True, ecode=errors.ECODE_ENVIRON)
6470     (_, (vg_info, ), _) = info.payload
6471     vg_free = vg_info.get("vg_free", None)
6472     if not isinstance(vg_free, int):
6473       raise errors.OpPrereqError("Can't compute free disk space on node"
6474                                  " %s for vg %s, result was '%s'" %
6475                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6476     if requested > vg_free:
6477       raise errors.OpPrereqError("Not enough disk space on target node %s"
6478                                  " vg %s: required %d MiB, available %d MiB" %
6479                                  (node, vg, requested, vg_free),
6480                                  errors.ECODE_NORES)
6481
6482
6483 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6484   """Checks if nodes have enough physical CPUs
6485
6486   This function checks if all given nodes have the needed number of
6487   physical CPUs. In case any node has less CPUs or we cannot get the
6488   information from the node, this function raises an OpPrereqError
6489   exception.
6490
6491   @type lu: C{LogicalUnit}
6492   @param lu: a logical unit from which we get configuration data
6493   @type nodenames: C{list}
6494   @param nodenames: the list of node names to check
6495   @type requested: C{int}
6496   @param requested: the minimum acceptable number of physical CPUs
6497   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6498       or we cannot check the node
6499
6500   """
6501   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6502   for node in nodenames:
6503     info = nodeinfo[node]
6504     info.Raise("Cannot get current information from node %s" % node,
6505                prereq=True, ecode=errors.ECODE_ENVIRON)
6506     (_, _, (hv_info, )) = info.payload
6507     num_cpus = hv_info.get("cpu_total", None)
6508     if not isinstance(num_cpus, int):
6509       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6510                                  " on node %s, result was '%s'" %
6511                                  (node, num_cpus), errors.ECODE_ENVIRON)
6512     if requested > num_cpus:
6513       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6514                                  "required" % (node, num_cpus, requested),
6515                                  errors.ECODE_NORES)
6516
6517
6518 class LUInstanceStartup(LogicalUnit):
6519   """Starts an instance.
6520
6521   """
6522   HPATH = "instance-start"
6523   HTYPE = constants.HTYPE_INSTANCE
6524   REQ_BGL = False
6525
6526   def CheckArguments(self):
6527     # extra beparams
6528     if self.op.beparams:
6529       # fill the beparams dict
6530       objects.UpgradeBeParams(self.op.beparams)
6531       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6532
6533   def ExpandNames(self):
6534     self._ExpandAndLockInstance()
6535     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6536
6537   def DeclareLocks(self, level):
6538     if level == locking.LEVEL_NODE_RES:
6539       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6540
6541   def BuildHooksEnv(self):
6542     """Build hooks env.
6543
6544     This runs on master, primary and secondary nodes of the instance.
6545
6546     """
6547     env = {
6548       "FORCE": self.op.force,
6549       }
6550
6551     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6552
6553     return env
6554
6555   def BuildHooksNodes(self):
6556     """Build hooks nodes.
6557
6558     """
6559     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6560     return (nl, nl)
6561
6562   def CheckPrereq(self):
6563     """Check prerequisites.
6564
6565     This checks that the instance is in the cluster.
6566
6567     """
6568     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6569     assert self.instance is not None, \
6570       "Cannot retrieve locked instance %s" % self.op.instance_name
6571
6572     # extra hvparams
6573     if self.op.hvparams:
6574       # check hypervisor parameter syntax (locally)
6575       cluster = self.cfg.GetClusterInfo()
6576       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6577       filled_hvp = cluster.FillHV(instance)
6578       filled_hvp.update(self.op.hvparams)
6579       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6580       hv_type.CheckParameterSyntax(filled_hvp)
6581       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6582
6583     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6584
6585     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6586
6587     if self.primary_offline and self.op.ignore_offline_nodes:
6588       self.proc.LogWarning("Ignoring offline primary node")
6589
6590       if self.op.hvparams or self.op.beparams:
6591         self.proc.LogWarning("Overridden parameters are ignored")
6592     else:
6593       _CheckNodeOnline(self, instance.primary_node)
6594
6595       bep = self.cfg.GetClusterInfo().FillBE(instance)
6596       bep.update(self.op.beparams)
6597
6598       # check bridges existence
6599       _CheckInstanceBridgesExist(self, instance)
6600
6601       remote_info = self.rpc.call_instance_info(instance.primary_node,
6602                                                 instance.name,
6603                                                 instance.hypervisor)
6604       remote_info.Raise("Error checking node %s" % instance.primary_node,
6605                         prereq=True, ecode=errors.ECODE_ENVIRON)
6606       if not remote_info.payload: # not running already
6607         _CheckNodeFreeMemory(self, instance.primary_node,
6608                              "starting instance %s" % instance.name,
6609                              bep[constants.BE_MINMEM], instance.hypervisor)
6610
6611   def Exec(self, feedback_fn):
6612     """Start the instance.
6613
6614     """
6615     instance = self.instance
6616     force = self.op.force
6617
6618     if not self.op.no_remember:
6619       self.cfg.MarkInstanceUp(instance.name)
6620
6621     if self.primary_offline:
6622       assert self.op.ignore_offline_nodes
6623       self.proc.LogInfo("Primary node offline, marked instance as started")
6624     else:
6625       node_current = instance.primary_node
6626
6627       _StartInstanceDisks(self, instance, force)
6628
6629       result = \
6630         self.rpc.call_instance_start(node_current,
6631                                      (instance, self.op.hvparams,
6632                                       self.op.beparams),
6633                                      self.op.startup_paused)
6634       msg = result.fail_msg
6635       if msg:
6636         _ShutdownInstanceDisks(self, instance)
6637         raise errors.OpExecError("Could not start instance: %s" % msg)
6638
6639
6640 class LUInstanceReboot(LogicalUnit):
6641   """Reboot an instance.
6642
6643   """
6644   HPATH = "instance-reboot"
6645   HTYPE = constants.HTYPE_INSTANCE
6646   REQ_BGL = False
6647
6648   def ExpandNames(self):
6649     self._ExpandAndLockInstance()
6650
6651   def BuildHooksEnv(self):
6652     """Build hooks env.
6653
6654     This runs on master, primary and secondary nodes of the instance.
6655
6656     """
6657     env = {
6658       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6659       "REBOOT_TYPE": self.op.reboot_type,
6660       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6661       }
6662
6663     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6664
6665     return env
6666
6667   def BuildHooksNodes(self):
6668     """Build hooks nodes.
6669
6670     """
6671     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6672     return (nl, nl)
6673
6674   def CheckPrereq(self):
6675     """Check prerequisites.
6676
6677     This checks that the instance is in the cluster.
6678
6679     """
6680     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6681     assert self.instance is not None, \
6682       "Cannot retrieve locked instance %s" % self.op.instance_name
6683     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6684     _CheckNodeOnline(self, instance.primary_node)
6685
6686     # check bridges existence
6687     _CheckInstanceBridgesExist(self, instance)
6688
6689   def Exec(self, feedback_fn):
6690     """Reboot the instance.
6691
6692     """
6693     instance = self.instance
6694     ignore_secondaries = self.op.ignore_secondaries
6695     reboot_type = self.op.reboot_type
6696
6697     remote_info = self.rpc.call_instance_info(instance.primary_node,
6698                                               instance.name,
6699                                               instance.hypervisor)
6700     remote_info.Raise("Error checking node %s" % instance.primary_node)
6701     instance_running = bool(remote_info.payload)
6702
6703     node_current = instance.primary_node
6704
6705     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6706                                             constants.INSTANCE_REBOOT_HARD]:
6707       for disk in instance.disks:
6708         self.cfg.SetDiskID(disk, node_current)
6709       result = self.rpc.call_instance_reboot(node_current, instance,
6710                                              reboot_type,
6711                                              self.op.shutdown_timeout)
6712       result.Raise("Could not reboot instance")
6713     else:
6714       if instance_running:
6715         result = self.rpc.call_instance_shutdown(node_current, instance,
6716                                                  self.op.shutdown_timeout)
6717         result.Raise("Could not shutdown instance for full reboot")
6718         _ShutdownInstanceDisks(self, instance)
6719       else:
6720         self.LogInfo("Instance %s was already stopped, starting now",
6721                      instance.name)
6722       _StartInstanceDisks(self, instance, ignore_secondaries)
6723       result = self.rpc.call_instance_start(node_current,
6724                                             (instance, None, None), False)
6725       msg = result.fail_msg
6726       if msg:
6727         _ShutdownInstanceDisks(self, instance)
6728         raise errors.OpExecError("Could not start instance for"
6729                                  " full reboot: %s" % msg)
6730
6731     self.cfg.MarkInstanceUp(instance.name)
6732
6733
6734 class LUInstanceShutdown(LogicalUnit):
6735   """Shutdown an instance.
6736
6737   """
6738   HPATH = "instance-stop"
6739   HTYPE = constants.HTYPE_INSTANCE
6740   REQ_BGL = False
6741
6742   def ExpandNames(self):
6743     self._ExpandAndLockInstance()
6744
6745   def BuildHooksEnv(self):
6746     """Build hooks env.
6747
6748     This runs on master, primary and secondary nodes of the instance.
6749
6750     """
6751     env = _BuildInstanceHookEnvByObject(self, self.instance)
6752     env["TIMEOUT"] = self.op.timeout
6753     return env
6754
6755   def BuildHooksNodes(self):
6756     """Build hooks nodes.
6757
6758     """
6759     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6760     return (nl, nl)
6761
6762   def CheckPrereq(self):
6763     """Check prerequisites.
6764
6765     This checks that the instance is in the cluster.
6766
6767     """
6768     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6769     assert self.instance is not None, \
6770       "Cannot retrieve locked instance %s" % self.op.instance_name
6771
6772     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6773
6774     self.primary_offline = \
6775       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6776
6777     if self.primary_offline and self.op.ignore_offline_nodes:
6778       self.proc.LogWarning("Ignoring offline primary node")
6779     else:
6780       _CheckNodeOnline(self, self.instance.primary_node)
6781
6782   def Exec(self, feedback_fn):
6783     """Shutdown the instance.
6784
6785     """
6786     instance = self.instance
6787     node_current = instance.primary_node
6788     timeout = self.op.timeout
6789
6790     if not self.op.no_remember:
6791       self.cfg.MarkInstanceDown(instance.name)
6792
6793     if self.primary_offline:
6794       assert self.op.ignore_offline_nodes
6795       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6796     else:
6797       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6798       msg = result.fail_msg
6799       if msg:
6800         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6801
6802       _ShutdownInstanceDisks(self, instance)
6803
6804
6805 class LUInstanceReinstall(LogicalUnit):
6806   """Reinstall an instance.
6807
6808   """
6809   HPATH = "instance-reinstall"
6810   HTYPE = constants.HTYPE_INSTANCE
6811   REQ_BGL = False
6812
6813   def ExpandNames(self):
6814     self._ExpandAndLockInstance()
6815
6816   def BuildHooksEnv(self):
6817     """Build hooks env.
6818
6819     This runs on master, primary and secondary nodes of the instance.
6820
6821     """
6822     return _BuildInstanceHookEnvByObject(self, self.instance)
6823
6824   def BuildHooksNodes(self):
6825     """Build hooks nodes.
6826
6827     """
6828     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6829     return (nl, nl)
6830
6831   def CheckPrereq(self):
6832     """Check prerequisites.
6833
6834     This checks that the instance is in the cluster and is not running.
6835
6836     """
6837     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6838     assert instance is not None, \
6839       "Cannot retrieve locked instance %s" % self.op.instance_name
6840     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6841                      " offline, cannot reinstall")
6842     for node in instance.secondary_nodes:
6843       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6844                        " cannot reinstall")
6845
6846     if instance.disk_template == constants.DT_DISKLESS:
6847       raise errors.OpPrereqError("Instance '%s' has no disks" %
6848                                  self.op.instance_name,
6849                                  errors.ECODE_INVAL)
6850     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6851
6852     if self.op.os_type is not None:
6853       # OS verification
6854       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6855       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6856       instance_os = self.op.os_type
6857     else:
6858       instance_os = instance.os
6859
6860     nodelist = list(instance.all_nodes)
6861
6862     if self.op.osparams:
6863       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6864       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6865       self.os_inst = i_osdict # the new dict (without defaults)
6866     else:
6867       self.os_inst = None
6868
6869     self.instance = instance
6870
6871   def Exec(self, feedback_fn):
6872     """Reinstall the instance.
6873
6874     """
6875     inst = self.instance
6876
6877     if self.op.os_type is not None:
6878       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6879       inst.os = self.op.os_type
6880       # Write to configuration
6881       self.cfg.Update(inst, feedback_fn)
6882
6883     _StartInstanceDisks(self, inst, None)
6884     try:
6885       feedback_fn("Running the instance OS create scripts...")
6886       # FIXME: pass debug option from opcode to backend
6887       result = self.rpc.call_instance_os_add(inst.primary_node,
6888                                              (inst, self.os_inst), True,
6889                                              self.op.debug_level)
6890       result.Raise("Could not install OS for instance %s on node %s" %
6891                    (inst.name, inst.primary_node))
6892     finally:
6893       _ShutdownInstanceDisks(self, inst)
6894
6895
6896 class LUInstanceRecreateDisks(LogicalUnit):
6897   """Recreate an instance's missing disks.
6898
6899   """
6900   HPATH = "instance-recreate-disks"
6901   HTYPE = constants.HTYPE_INSTANCE
6902   REQ_BGL = False
6903
6904   _MODIFYABLE = frozenset([
6905     constants.IDISK_SIZE,
6906     constants.IDISK_MODE,
6907     ])
6908
6909   # New or changed disk parameters may have different semantics
6910   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6911     constants.IDISK_ADOPT,
6912
6913     # TODO: Implement support changing VG while recreating
6914     constants.IDISK_VG,
6915     constants.IDISK_METAVG,
6916     ]))
6917
6918   def CheckArguments(self):
6919     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6920       # Normalize and convert deprecated list of disk indices
6921       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6922
6923     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6924     if duplicates:
6925       raise errors.OpPrereqError("Some disks have been specified more than"
6926                                  " once: %s" % utils.CommaJoin(duplicates),
6927                                  errors.ECODE_INVAL)
6928
6929     for (idx, params) in self.op.disks:
6930       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6931       unsupported = frozenset(params.keys()) - self._MODIFYABLE
6932       if unsupported:
6933         raise errors.OpPrereqError("Parameters for disk %s try to change"
6934                                    " unmodifyable parameter(s): %s" %
6935                                    (idx, utils.CommaJoin(unsupported)),
6936                                    errors.ECODE_INVAL)
6937
6938   def ExpandNames(self):
6939     self._ExpandAndLockInstance()
6940     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6941     if self.op.nodes:
6942       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6943       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6944     else:
6945       self.needed_locks[locking.LEVEL_NODE] = []
6946     self.needed_locks[locking.LEVEL_NODE_RES] = []
6947
6948   def DeclareLocks(self, level):
6949     if level == locking.LEVEL_NODE:
6950       # if we replace the nodes, we only need to lock the old primary,
6951       # otherwise we need to lock all nodes for disk re-creation
6952       primary_only = bool(self.op.nodes)
6953       self._LockInstancesNodes(primary_only=primary_only)
6954     elif level == locking.LEVEL_NODE_RES:
6955       # Copy node locks
6956       self.needed_locks[locking.LEVEL_NODE_RES] = \
6957         self.needed_locks[locking.LEVEL_NODE][:]
6958
6959   def BuildHooksEnv(self):
6960     """Build hooks env.
6961
6962     This runs on master, primary and secondary nodes of the instance.
6963
6964     """
6965     return _BuildInstanceHookEnvByObject(self, self.instance)
6966
6967   def BuildHooksNodes(self):
6968     """Build hooks nodes.
6969
6970     """
6971     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6972     return (nl, nl)
6973
6974   def CheckPrereq(self):
6975     """Check prerequisites.
6976
6977     This checks that the instance is in the cluster and is not running.
6978
6979     """
6980     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6981     assert instance is not None, \
6982       "Cannot retrieve locked instance %s" % self.op.instance_name
6983     if self.op.nodes:
6984       if len(self.op.nodes) != len(instance.all_nodes):
6985         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6986                                    " %d replacement nodes were specified" %
6987                                    (instance.name, len(instance.all_nodes),
6988                                     len(self.op.nodes)),
6989                                    errors.ECODE_INVAL)
6990       assert instance.disk_template != constants.DT_DRBD8 or \
6991           len(self.op.nodes) == 2
6992       assert instance.disk_template != constants.DT_PLAIN or \
6993           len(self.op.nodes) == 1
6994       primary_node = self.op.nodes[0]
6995     else:
6996       primary_node = instance.primary_node
6997     _CheckNodeOnline(self, primary_node)
6998
6999     if instance.disk_template == constants.DT_DISKLESS:
7000       raise errors.OpPrereqError("Instance '%s' has no disks" %
7001                                  self.op.instance_name, errors.ECODE_INVAL)
7002
7003     # if we replace nodes *and* the old primary is offline, we don't
7004     # check
7005     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7006     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7007     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7008     if not (self.op.nodes and old_pnode.offline):
7009       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7010                           msg="cannot recreate disks")
7011
7012     if self.op.disks:
7013       self.disks = dict(self.op.disks)
7014     else:
7015       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7016
7017     maxidx = max(self.disks.keys())
7018     if maxidx >= len(instance.disks):
7019       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7020                                  errors.ECODE_INVAL)
7021
7022     if (self.op.nodes and
7023         sorted(self.disks.keys()) != range(len(instance.disks))):
7024       raise errors.OpPrereqError("Can't recreate disks partially and"
7025                                  " change the nodes at the same time",
7026                                  errors.ECODE_INVAL)
7027
7028     self.instance = instance
7029
7030   def Exec(self, feedback_fn):
7031     """Recreate the disks.
7032
7033     """
7034     instance = self.instance
7035
7036     assert (self.owned_locks(locking.LEVEL_NODE) ==
7037             self.owned_locks(locking.LEVEL_NODE_RES))
7038
7039     to_skip = []
7040     mods = [] # keeps track of needed changes
7041
7042     for idx, disk in enumerate(instance.disks):
7043       try:
7044         changes = self.disks[idx]
7045       except KeyError:
7046         # Disk should not be recreated
7047         to_skip.append(idx)
7048         continue
7049
7050       # update secondaries for disks, if needed
7051       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7052         # need to update the nodes and minors
7053         assert len(self.op.nodes) == 2
7054         assert len(disk.logical_id) == 6 # otherwise disk internals
7055                                          # have changed
7056         (_, _, old_port, _, _, old_secret) = disk.logical_id
7057         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7058         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7059                   new_minors[0], new_minors[1], old_secret)
7060         assert len(disk.logical_id) == len(new_id)
7061       else:
7062         new_id = None
7063
7064       mods.append((idx, new_id, changes))
7065
7066     # now that we have passed all asserts above, we can apply the mods
7067     # in a single run (to avoid partial changes)
7068     for idx, new_id, changes in mods:
7069       disk = instance.disks[idx]
7070       if new_id is not None:
7071         assert disk.dev_type == constants.LD_DRBD8
7072         disk.logical_id = new_id
7073       if changes:
7074         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7075                     mode=changes.get(constants.IDISK_MODE, None))
7076
7077     # change primary node, if needed
7078     if self.op.nodes:
7079       instance.primary_node = self.op.nodes[0]
7080       self.LogWarning("Changing the instance's nodes, you will have to"
7081                       " remove any disks left on the older nodes manually")
7082
7083     if self.op.nodes:
7084       self.cfg.Update(instance, feedback_fn)
7085
7086     _CreateDisks(self, instance, to_skip=to_skip)
7087
7088
7089 class LUInstanceRename(LogicalUnit):
7090   """Rename an instance.
7091
7092   """
7093   HPATH = "instance-rename"
7094   HTYPE = constants.HTYPE_INSTANCE
7095
7096   def CheckArguments(self):
7097     """Check arguments.
7098
7099     """
7100     if self.op.ip_check and not self.op.name_check:
7101       # TODO: make the ip check more flexible and not depend on the name check
7102       raise errors.OpPrereqError("IP address check requires a name check",
7103                                  errors.ECODE_INVAL)
7104
7105   def BuildHooksEnv(self):
7106     """Build hooks env.
7107
7108     This runs on master, primary and secondary nodes of the instance.
7109
7110     """
7111     env = _BuildInstanceHookEnvByObject(self, self.instance)
7112     env["INSTANCE_NEW_NAME"] = self.op.new_name
7113     return env
7114
7115   def BuildHooksNodes(self):
7116     """Build hooks nodes.
7117
7118     """
7119     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7120     return (nl, nl)
7121
7122   def CheckPrereq(self):
7123     """Check prerequisites.
7124
7125     This checks that the instance is in the cluster and is not running.
7126
7127     """
7128     self.op.instance_name = _ExpandInstanceName(self.cfg,
7129                                                 self.op.instance_name)
7130     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7131     assert instance is not None
7132     _CheckNodeOnline(self, instance.primary_node)
7133     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7134                         msg="cannot rename")
7135     self.instance = instance
7136
7137     new_name = self.op.new_name
7138     if self.op.name_check:
7139       hostname = netutils.GetHostname(name=new_name)
7140       if hostname.name != new_name:
7141         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7142                      hostname.name)
7143       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7144         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7145                                     " same as given hostname '%s'") %
7146                                     (hostname.name, self.op.new_name),
7147                                     errors.ECODE_INVAL)
7148       new_name = self.op.new_name = hostname.name
7149       if (self.op.ip_check and
7150           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7151         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7152                                    (hostname.ip, new_name),
7153                                    errors.ECODE_NOTUNIQUE)
7154
7155     instance_list = self.cfg.GetInstanceList()
7156     if new_name in instance_list and new_name != instance.name:
7157       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7158                                  new_name, errors.ECODE_EXISTS)
7159
7160   def Exec(self, feedback_fn):
7161     """Rename the instance.
7162
7163     """
7164     inst = self.instance
7165     old_name = inst.name
7166
7167     rename_file_storage = False
7168     if (inst.disk_template in constants.DTS_FILEBASED and
7169         self.op.new_name != inst.name):
7170       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7171       rename_file_storage = True
7172
7173     self.cfg.RenameInstance(inst.name, self.op.new_name)
7174     # Change the instance lock. This is definitely safe while we hold the BGL.
7175     # Otherwise the new lock would have to be added in acquired mode.
7176     assert self.REQ_BGL
7177     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7178     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7179
7180     # re-read the instance from the configuration after rename
7181     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7182
7183     if rename_file_storage:
7184       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7185       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7186                                                      old_file_storage_dir,
7187                                                      new_file_storage_dir)
7188       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7189                    " (but the instance has been renamed in Ganeti)" %
7190                    (inst.primary_node, old_file_storage_dir,
7191                     new_file_storage_dir))
7192
7193     _StartInstanceDisks(self, inst, None)
7194     try:
7195       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7196                                                  old_name, self.op.debug_level)
7197       msg = result.fail_msg
7198       if msg:
7199         msg = ("Could not run OS rename script for instance %s on node %s"
7200                " (but the instance has been renamed in Ganeti): %s" %
7201                (inst.name, inst.primary_node, msg))
7202         self.proc.LogWarning(msg)
7203     finally:
7204       _ShutdownInstanceDisks(self, inst)
7205
7206     return inst.name
7207
7208
7209 class LUInstanceRemove(LogicalUnit):
7210   """Remove an instance.
7211
7212   """
7213   HPATH = "instance-remove"
7214   HTYPE = constants.HTYPE_INSTANCE
7215   REQ_BGL = False
7216
7217   def ExpandNames(self):
7218     self._ExpandAndLockInstance()
7219     self.needed_locks[locking.LEVEL_NODE] = []
7220     self.needed_locks[locking.LEVEL_NODE_RES] = []
7221     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7222
7223   def DeclareLocks(self, level):
7224     if level == locking.LEVEL_NODE:
7225       self._LockInstancesNodes()
7226     elif level == locking.LEVEL_NODE_RES:
7227       # Copy node locks
7228       self.needed_locks[locking.LEVEL_NODE_RES] = \
7229         self.needed_locks[locking.LEVEL_NODE][:]
7230
7231   def BuildHooksEnv(self):
7232     """Build hooks env.
7233
7234     This runs on master, primary and secondary nodes of the instance.
7235
7236     """
7237     env = _BuildInstanceHookEnvByObject(self, self.instance)
7238     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7239     return env
7240
7241   def BuildHooksNodes(self):
7242     """Build hooks nodes.
7243
7244     """
7245     nl = [self.cfg.GetMasterNode()]
7246     nl_post = list(self.instance.all_nodes) + nl
7247     return (nl, nl_post)
7248
7249   def CheckPrereq(self):
7250     """Check prerequisites.
7251
7252     This checks that the instance is in the cluster.
7253
7254     """
7255     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7256     assert self.instance is not None, \
7257       "Cannot retrieve locked instance %s" % self.op.instance_name
7258
7259   def Exec(self, feedback_fn):
7260     """Remove the instance.
7261
7262     """
7263     instance = self.instance
7264     logging.info("Shutting down instance %s on node %s",
7265                  instance.name, instance.primary_node)
7266
7267     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7268                                              self.op.shutdown_timeout)
7269     msg = result.fail_msg
7270     if msg:
7271       if self.op.ignore_failures:
7272         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7273       else:
7274         raise errors.OpExecError("Could not shutdown instance %s on"
7275                                  " node %s: %s" %
7276                                  (instance.name, instance.primary_node, msg))
7277
7278     assert (self.owned_locks(locking.LEVEL_NODE) ==
7279             self.owned_locks(locking.LEVEL_NODE_RES))
7280     assert not (set(instance.all_nodes) -
7281                 self.owned_locks(locking.LEVEL_NODE)), \
7282       "Not owning correct locks"
7283
7284     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7285
7286
7287 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7288   """Utility function to remove an instance.
7289
7290   """
7291   logging.info("Removing block devices for instance %s", instance.name)
7292
7293   if not _RemoveDisks(lu, instance):
7294     if not ignore_failures:
7295       raise errors.OpExecError("Can't remove instance's disks")
7296     feedback_fn("Warning: can't remove instance's disks")
7297
7298   logging.info("Removing instance %s out of cluster config", instance.name)
7299
7300   lu.cfg.RemoveInstance(instance.name)
7301
7302   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7303     "Instance lock removal conflict"
7304
7305   # Remove lock for the instance
7306   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7307
7308
7309 class LUInstanceQuery(NoHooksLU):
7310   """Logical unit for querying instances.
7311
7312   """
7313   # pylint: disable=W0142
7314   REQ_BGL = False
7315
7316   def CheckArguments(self):
7317     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7318                              self.op.output_fields, self.op.use_locking)
7319
7320   def ExpandNames(self):
7321     self.iq.ExpandNames(self)
7322
7323   def DeclareLocks(self, level):
7324     self.iq.DeclareLocks(self, level)
7325
7326   def Exec(self, feedback_fn):
7327     return self.iq.OldStyleQuery(self)
7328
7329
7330 class LUInstanceFailover(LogicalUnit):
7331   """Failover an instance.
7332
7333   """
7334   HPATH = "instance-failover"
7335   HTYPE = constants.HTYPE_INSTANCE
7336   REQ_BGL = False
7337
7338   def CheckArguments(self):
7339     """Check the arguments.
7340
7341     """
7342     self.iallocator = getattr(self.op, "iallocator", None)
7343     self.target_node = getattr(self.op, "target_node", None)
7344
7345   def ExpandNames(self):
7346     self._ExpandAndLockInstance()
7347
7348     if self.op.target_node is not None:
7349       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7350
7351     self.needed_locks[locking.LEVEL_NODE] = []
7352     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7353
7354     ignore_consistency = self.op.ignore_consistency
7355     shutdown_timeout = self.op.shutdown_timeout
7356     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7357                                        cleanup=False,
7358                                        failover=True,
7359                                        ignore_consistency=ignore_consistency,
7360                                        shutdown_timeout=shutdown_timeout,
7361                                        ignore_ipolicy=self.op.ignore_ipolicy)
7362     self.tasklets = [self._migrater]
7363
7364   def DeclareLocks(self, level):
7365     if level == locking.LEVEL_NODE:
7366       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7367       if instance.disk_template in constants.DTS_EXT_MIRROR:
7368         if self.op.target_node is None:
7369           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7370         else:
7371           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7372                                                    self.op.target_node]
7373         del self.recalculate_locks[locking.LEVEL_NODE]
7374       else:
7375         self._LockInstancesNodes()
7376
7377   def BuildHooksEnv(self):
7378     """Build hooks env.
7379
7380     This runs on master, primary and secondary nodes of the instance.
7381
7382     """
7383     instance = self._migrater.instance
7384     source_node = instance.primary_node
7385     target_node = self.op.target_node
7386     env = {
7387       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7388       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7389       "OLD_PRIMARY": source_node,
7390       "NEW_PRIMARY": target_node,
7391       }
7392
7393     if instance.disk_template in constants.DTS_INT_MIRROR:
7394       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7395       env["NEW_SECONDARY"] = source_node
7396     else:
7397       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7398
7399     env.update(_BuildInstanceHookEnvByObject(self, instance))
7400
7401     return env
7402
7403   def BuildHooksNodes(self):
7404     """Build hooks nodes.
7405
7406     """
7407     instance = self._migrater.instance
7408     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7409     return (nl, nl + [instance.primary_node])
7410
7411
7412 class LUInstanceMigrate(LogicalUnit):
7413   """Migrate an instance.
7414
7415   This is migration without shutting down, compared to the failover,
7416   which is done with shutdown.
7417
7418   """
7419   HPATH = "instance-migrate"
7420   HTYPE = constants.HTYPE_INSTANCE
7421   REQ_BGL = False
7422
7423   def ExpandNames(self):
7424     self._ExpandAndLockInstance()
7425
7426     if self.op.target_node is not None:
7427       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7428
7429     self.needed_locks[locking.LEVEL_NODE] = []
7430     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7431
7432     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7433                                        cleanup=self.op.cleanup,
7434                                        failover=False,
7435                                        fallback=self.op.allow_failover,
7436                                        ignore_ipolicy=self.op.ignore_ipolicy)
7437     self.tasklets = [self._migrater]
7438
7439   def DeclareLocks(self, level):
7440     if level == locking.LEVEL_NODE:
7441       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7442       if instance.disk_template in constants.DTS_EXT_MIRROR:
7443         if self.op.target_node is None:
7444           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7445         else:
7446           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7447                                                    self.op.target_node]
7448         del self.recalculate_locks[locking.LEVEL_NODE]
7449       else:
7450         self._LockInstancesNodes()
7451
7452   def BuildHooksEnv(self):
7453     """Build hooks env.
7454
7455     This runs on master, primary and secondary nodes of the instance.
7456
7457     """
7458     instance = self._migrater.instance
7459     source_node = instance.primary_node
7460     target_node = self.op.target_node
7461     env = _BuildInstanceHookEnvByObject(self, instance)
7462     env.update({
7463       "MIGRATE_LIVE": self._migrater.live,
7464       "MIGRATE_CLEANUP": self.op.cleanup,
7465       "OLD_PRIMARY": source_node,
7466       "NEW_PRIMARY": target_node,
7467       })
7468
7469     if instance.disk_template in constants.DTS_INT_MIRROR:
7470       env["OLD_SECONDARY"] = target_node
7471       env["NEW_SECONDARY"] = source_node
7472     else:
7473       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7474
7475     return env
7476
7477   def BuildHooksNodes(self):
7478     """Build hooks nodes.
7479
7480     """
7481     instance = self._migrater.instance
7482     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7483     return (nl, nl + [instance.primary_node])
7484
7485
7486 class LUInstanceMove(LogicalUnit):
7487   """Move an instance by data-copying.
7488
7489   """
7490   HPATH = "instance-move"
7491   HTYPE = constants.HTYPE_INSTANCE
7492   REQ_BGL = False
7493
7494   def ExpandNames(self):
7495     self._ExpandAndLockInstance()
7496     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7497     self.op.target_node = target_node
7498     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7499     self.needed_locks[locking.LEVEL_NODE_RES] = []
7500     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7501
7502   def DeclareLocks(self, level):
7503     if level == locking.LEVEL_NODE:
7504       self._LockInstancesNodes(primary_only=True)
7505     elif level == locking.LEVEL_NODE_RES:
7506       # Copy node locks
7507       self.needed_locks[locking.LEVEL_NODE_RES] = \
7508         self.needed_locks[locking.LEVEL_NODE][:]
7509
7510   def BuildHooksEnv(self):
7511     """Build hooks env.
7512
7513     This runs on master, primary and secondary nodes of the instance.
7514
7515     """
7516     env = {
7517       "TARGET_NODE": self.op.target_node,
7518       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7519       }
7520     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7521     return env
7522
7523   def BuildHooksNodes(self):
7524     """Build hooks nodes.
7525
7526     """
7527     nl = [
7528       self.cfg.GetMasterNode(),
7529       self.instance.primary_node,
7530       self.op.target_node,
7531       ]
7532     return (nl, nl)
7533
7534   def CheckPrereq(self):
7535     """Check prerequisites.
7536
7537     This checks that the instance is in the cluster.
7538
7539     """
7540     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7541     assert self.instance is not None, \
7542       "Cannot retrieve locked instance %s" % self.op.instance_name
7543
7544     node = self.cfg.GetNodeInfo(self.op.target_node)
7545     assert node is not None, \
7546       "Cannot retrieve locked node %s" % self.op.target_node
7547
7548     self.target_node = target_node = node.name
7549
7550     if target_node == instance.primary_node:
7551       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7552                                  (instance.name, target_node),
7553                                  errors.ECODE_STATE)
7554
7555     bep = self.cfg.GetClusterInfo().FillBE(instance)
7556
7557     for idx, dsk in enumerate(instance.disks):
7558       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7559         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7560                                    " cannot copy" % idx, errors.ECODE_STATE)
7561
7562     _CheckNodeOnline(self, target_node)
7563     _CheckNodeNotDrained(self, target_node)
7564     _CheckNodeVmCapable(self, target_node)
7565     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7566                                      self.cfg.GetNodeGroup(node.group))
7567     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7568                             ignore=self.op.ignore_ipolicy)
7569
7570     if instance.admin_state == constants.ADMINST_UP:
7571       # check memory requirements on the secondary node
7572       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7573                            instance.name, bep[constants.BE_MAXMEM],
7574                            instance.hypervisor)
7575     else:
7576       self.LogInfo("Not checking memory on the secondary node as"
7577                    " instance will not be started")
7578
7579     # check bridge existance
7580     _CheckInstanceBridgesExist(self, instance, node=target_node)
7581
7582   def Exec(self, feedback_fn):
7583     """Move an instance.
7584
7585     The move is done by shutting it down on its present node, copying
7586     the data over (slow) and starting it on the new node.
7587
7588     """
7589     instance = self.instance
7590
7591     source_node = instance.primary_node
7592     target_node = self.target_node
7593
7594     self.LogInfo("Shutting down instance %s on source node %s",
7595                  instance.name, source_node)
7596
7597     assert (self.owned_locks(locking.LEVEL_NODE) ==
7598             self.owned_locks(locking.LEVEL_NODE_RES))
7599
7600     result = self.rpc.call_instance_shutdown(source_node, instance,
7601                                              self.op.shutdown_timeout)
7602     msg = result.fail_msg
7603     if msg:
7604       if self.op.ignore_consistency:
7605         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7606                              " Proceeding anyway. Please make sure node"
7607                              " %s is down. Error details: %s",
7608                              instance.name, source_node, source_node, msg)
7609       else:
7610         raise errors.OpExecError("Could not shutdown instance %s on"
7611                                  " node %s: %s" %
7612                                  (instance.name, source_node, msg))
7613
7614     # create the target disks
7615     try:
7616       _CreateDisks(self, instance, target_node=target_node)
7617     except errors.OpExecError:
7618       self.LogWarning("Device creation failed, reverting...")
7619       try:
7620         _RemoveDisks(self, instance, target_node=target_node)
7621       finally:
7622         self.cfg.ReleaseDRBDMinors(instance.name)
7623         raise
7624
7625     cluster_name = self.cfg.GetClusterInfo().cluster_name
7626
7627     errs = []
7628     # activate, get path, copy the data over
7629     for idx, disk in enumerate(instance.disks):
7630       self.LogInfo("Copying data for disk %d", idx)
7631       result = self.rpc.call_blockdev_assemble(target_node, disk,
7632                                                instance.name, True, idx)
7633       if result.fail_msg:
7634         self.LogWarning("Can't assemble newly created disk %d: %s",
7635                         idx, result.fail_msg)
7636         errs.append(result.fail_msg)
7637         break
7638       dev_path = result.payload
7639       result = self.rpc.call_blockdev_export(source_node, disk,
7640                                              target_node, dev_path,
7641                                              cluster_name)
7642       if result.fail_msg:
7643         self.LogWarning("Can't copy data over for disk %d: %s",
7644                         idx, result.fail_msg)
7645         errs.append(result.fail_msg)
7646         break
7647
7648     if errs:
7649       self.LogWarning("Some disks failed to copy, aborting")
7650       try:
7651         _RemoveDisks(self, instance, target_node=target_node)
7652       finally:
7653         self.cfg.ReleaseDRBDMinors(instance.name)
7654         raise errors.OpExecError("Errors during disk copy: %s" %
7655                                  (",".join(errs),))
7656
7657     instance.primary_node = target_node
7658     self.cfg.Update(instance, feedback_fn)
7659
7660     self.LogInfo("Removing the disks on the original node")
7661     _RemoveDisks(self, instance, target_node=source_node)
7662
7663     # Only start the instance if it's marked as up
7664     if instance.admin_state == constants.ADMINST_UP:
7665       self.LogInfo("Starting instance %s on node %s",
7666                    instance.name, target_node)
7667
7668       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7669                                            ignore_secondaries=True)
7670       if not disks_ok:
7671         _ShutdownInstanceDisks(self, instance)
7672         raise errors.OpExecError("Can't activate the instance's disks")
7673
7674       result = self.rpc.call_instance_start(target_node,
7675                                             (instance, None, None), False)
7676       msg = result.fail_msg
7677       if msg:
7678         _ShutdownInstanceDisks(self, instance)
7679         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7680                                  (instance.name, target_node, msg))
7681
7682
7683 class LUNodeMigrate(LogicalUnit):
7684   """Migrate all instances from a node.
7685
7686   """
7687   HPATH = "node-migrate"
7688   HTYPE = constants.HTYPE_NODE
7689   REQ_BGL = False
7690
7691   def CheckArguments(self):
7692     pass
7693
7694   def ExpandNames(self):
7695     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7696
7697     self.share_locks = _ShareAll()
7698     self.needed_locks = {
7699       locking.LEVEL_NODE: [self.op.node_name],
7700       }
7701
7702   def BuildHooksEnv(self):
7703     """Build hooks env.
7704
7705     This runs on the master, the primary and all the secondaries.
7706
7707     """
7708     return {
7709       "NODE_NAME": self.op.node_name,
7710       }
7711
7712   def BuildHooksNodes(self):
7713     """Build hooks nodes.
7714
7715     """
7716     nl = [self.cfg.GetMasterNode()]
7717     return (nl, nl)
7718
7719   def CheckPrereq(self):
7720     pass
7721
7722   def Exec(self, feedback_fn):
7723     # Prepare jobs for migration instances
7724     jobs = [
7725       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7726                                  mode=self.op.mode,
7727                                  live=self.op.live,
7728                                  iallocator=self.op.iallocator,
7729                                  target_node=self.op.target_node,
7730                                  ignore_ipolicy=self.op.ignore_ipolicy)]
7731       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7732       ]
7733
7734     # TODO: Run iallocator in this opcode and pass correct placement options to
7735     # OpInstanceMigrate. Since other jobs can modify the cluster between
7736     # running the iallocator and the actual migration, a good consistency model
7737     # will have to be found.
7738
7739     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7740             frozenset([self.op.node_name]))
7741
7742     return ResultWithJobs(jobs)
7743
7744
7745 class TLMigrateInstance(Tasklet):
7746   """Tasklet class for instance migration.
7747
7748   @type live: boolean
7749   @ivar live: whether the migration will be done live or non-live;
7750       this variable is initalized only after CheckPrereq has run
7751   @type cleanup: boolean
7752   @ivar cleanup: Wheater we cleanup from a failed migration
7753   @type iallocator: string
7754   @ivar iallocator: The iallocator used to determine target_node
7755   @type target_node: string
7756   @ivar target_node: If given, the target_node to reallocate the instance to
7757   @type failover: boolean
7758   @ivar failover: Whether operation results in failover or migration
7759   @type fallback: boolean
7760   @ivar fallback: Whether fallback to failover is allowed if migration not
7761                   possible
7762   @type ignore_consistency: boolean
7763   @ivar ignore_consistency: Wheter we should ignore consistency between source
7764                             and target node
7765   @type shutdown_timeout: int
7766   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7767   @type ignore_ipolicy: bool
7768   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7769
7770   """
7771
7772   # Constants
7773   _MIGRATION_POLL_INTERVAL = 1      # seconds
7774   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7775
7776   def __init__(self, lu, instance_name, cleanup=False,
7777                failover=False, fallback=False,
7778                ignore_consistency=False,
7779                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7780                ignore_ipolicy=False):
7781     """Initializes this class.
7782
7783     """
7784     Tasklet.__init__(self, lu)
7785
7786     # Parameters
7787     self.instance_name = instance_name
7788     self.cleanup = cleanup
7789     self.live = False # will be overridden later
7790     self.failover = failover
7791     self.fallback = fallback
7792     self.ignore_consistency = ignore_consistency
7793     self.shutdown_timeout = shutdown_timeout
7794     self.ignore_ipolicy = ignore_ipolicy
7795
7796   def CheckPrereq(self):
7797     """Check prerequisites.
7798
7799     This checks that the instance is in the cluster.
7800
7801     """
7802     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7803     instance = self.cfg.GetInstanceInfo(instance_name)
7804     assert instance is not None
7805     self.instance = instance
7806     cluster = self.cfg.GetClusterInfo()
7807
7808     if (not self.cleanup and
7809         not instance.admin_state == constants.ADMINST_UP and
7810         not self.failover and self.fallback):
7811       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7812                       " switching to failover")
7813       self.failover = True
7814
7815     if instance.disk_template not in constants.DTS_MIRRORED:
7816       if self.failover:
7817         text = "failovers"
7818       else:
7819         text = "migrations"
7820       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7821                                  " %s" % (instance.disk_template, text),
7822                                  errors.ECODE_STATE)
7823
7824     if instance.disk_template in constants.DTS_EXT_MIRROR:
7825       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7826
7827       if self.lu.op.iallocator:
7828         self._RunAllocator()
7829       else:
7830         # We set set self.target_node as it is required by
7831         # BuildHooksEnv
7832         self.target_node = self.lu.op.target_node
7833
7834       # Check that the target node is correct in terms of instance policy
7835       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7836       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7837       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7838       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7839                               ignore=self.ignore_ipolicy)
7840
7841       # self.target_node is already populated, either directly or by the
7842       # iallocator run
7843       target_node = self.target_node
7844       if self.target_node == instance.primary_node:
7845         raise errors.OpPrereqError("Cannot migrate instance %s"
7846                                    " to its primary (%s)" %
7847                                    (instance.name, instance.primary_node))
7848
7849       if len(self.lu.tasklets) == 1:
7850         # It is safe to release locks only when we're the only tasklet
7851         # in the LU
7852         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7853                       keep=[instance.primary_node, self.target_node])
7854
7855     else:
7856       secondary_nodes = instance.secondary_nodes
7857       if not secondary_nodes:
7858         raise errors.ConfigurationError("No secondary node but using"
7859                                         " %s disk template" %
7860                                         instance.disk_template)
7861       target_node = secondary_nodes[0]
7862       if self.lu.op.iallocator or (self.lu.op.target_node and
7863                                    self.lu.op.target_node != target_node):
7864         if self.failover:
7865           text = "failed over"
7866         else:
7867           text = "migrated"
7868         raise errors.OpPrereqError("Instances with disk template %s cannot"
7869                                    " be %s to arbitrary nodes"
7870                                    " (neither an iallocator nor a target"
7871                                    " node can be passed)" %
7872                                    (instance.disk_template, text),
7873                                    errors.ECODE_INVAL)
7874       nodeinfo = self.cfg.GetNodeInfo(target_node)
7875       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7876       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7877       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7878                               ignore=self.ignore_ipolicy)
7879
7880     i_be = cluster.FillBE(instance)
7881
7882     # check memory requirements on the secondary node
7883     if (not self.cleanup and
7884          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7885       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7886                            instance.name, i_be[constants.BE_MAXMEM],
7887                            instance.hypervisor)
7888     else:
7889       self.lu.LogInfo("Not checking memory on the secondary node as"
7890                       " instance will not be started")
7891
7892     # check if failover must be forced instead of migration
7893     if (not self.cleanup and not self.failover and
7894         i_be[constants.BE_ALWAYS_FAILOVER]):
7895       if self.fallback:
7896         self.lu.LogInfo("Instance configured to always failover; fallback"
7897                         " to failover")
7898         self.failover = True
7899       else:
7900         raise errors.OpPrereqError("This instance has been configured to"
7901                                    " always failover, please allow failover",
7902                                    errors.ECODE_STATE)
7903
7904     # check bridge existance
7905     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7906
7907     if not self.cleanup:
7908       _CheckNodeNotDrained(self.lu, target_node)
7909       if not self.failover:
7910         result = self.rpc.call_instance_migratable(instance.primary_node,
7911                                                    instance)
7912         if result.fail_msg and self.fallback:
7913           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7914                           " failover")
7915           self.failover = True
7916         else:
7917           result.Raise("Can't migrate, please use failover",
7918                        prereq=True, ecode=errors.ECODE_STATE)
7919
7920     assert not (self.failover and self.cleanup)
7921
7922     if not self.failover:
7923       if self.lu.op.live is not None and self.lu.op.mode is not None:
7924         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7925                                    " parameters are accepted",
7926                                    errors.ECODE_INVAL)
7927       if self.lu.op.live is not None:
7928         if self.lu.op.live:
7929           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7930         else:
7931           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7932         # reset the 'live' parameter to None so that repeated
7933         # invocations of CheckPrereq do not raise an exception
7934         self.lu.op.live = None
7935       elif self.lu.op.mode is None:
7936         # read the default value from the hypervisor
7937         i_hv = cluster.FillHV(self.instance, skip_globals=False)
7938         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7939
7940       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7941     else:
7942       # Failover is never live
7943       self.live = False
7944
7945   def _RunAllocator(self):
7946     """Run the allocator based on input opcode.
7947
7948     """
7949     # FIXME: add a self.ignore_ipolicy option
7950     ial = IAllocator(self.cfg, self.rpc,
7951                      mode=constants.IALLOCATOR_MODE_RELOC,
7952                      name=self.instance_name,
7953                      # TODO See why hail breaks with a single node below
7954                      relocate_from=[self.instance.primary_node,
7955                                     self.instance.primary_node],
7956                      )
7957
7958     ial.Run(self.lu.op.iallocator)
7959
7960     if not ial.success:
7961       raise errors.OpPrereqError("Can't compute nodes using"
7962                                  " iallocator '%s': %s" %
7963                                  (self.lu.op.iallocator, ial.info),
7964                                  errors.ECODE_NORES)
7965     if len(ial.result) != ial.required_nodes:
7966       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7967                                  " of nodes (%s), required %s" %
7968                                  (self.lu.op.iallocator, len(ial.result),
7969                                   ial.required_nodes), errors.ECODE_FAULT)
7970     self.target_node = ial.result[0]
7971     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7972                  self.instance_name, self.lu.op.iallocator,
7973                  utils.CommaJoin(ial.result))
7974
7975   def _WaitUntilSync(self):
7976     """Poll with custom rpc for disk sync.
7977
7978     This uses our own step-based rpc call.
7979
7980     """
7981     self.feedback_fn("* wait until resync is done")
7982     all_done = False
7983     while not all_done:
7984       all_done = True
7985       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7986                                             self.nodes_ip,
7987                                             self.instance.disks)
7988       min_percent = 100
7989       for node, nres in result.items():
7990         nres.Raise("Cannot resync disks on node %s" % node)
7991         node_done, node_percent = nres.payload
7992         all_done = all_done and node_done
7993         if node_percent is not None:
7994           min_percent = min(min_percent, node_percent)
7995       if not all_done:
7996         if min_percent < 100:
7997           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7998         time.sleep(2)
7999
8000   def _EnsureSecondary(self, node):
8001     """Demote a node to secondary.
8002
8003     """
8004     self.feedback_fn("* switching node %s to secondary mode" % node)
8005
8006     for dev in self.instance.disks:
8007       self.cfg.SetDiskID(dev, node)
8008
8009     result = self.rpc.call_blockdev_close(node, self.instance.name,
8010                                           self.instance.disks)
8011     result.Raise("Cannot change disk to secondary on node %s" % node)
8012
8013   def _GoStandalone(self):
8014     """Disconnect from the network.
8015
8016     """
8017     self.feedback_fn("* changing into standalone mode")
8018     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8019                                                self.instance.disks)
8020     for node, nres in result.items():
8021       nres.Raise("Cannot disconnect disks node %s" % node)
8022
8023   def _GoReconnect(self, multimaster):
8024     """Reconnect to the network.
8025
8026     """
8027     if multimaster:
8028       msg = "dual-master"
8029     else:
8030       msg = "single-master"
8031     self.feedback_fn("* changing disks into %s mode" % msg)
8032     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8033                                            self.instance.disks,
8034                                            self.instance.name, multimaster)
8035     for node, nres in result.items():
8036       nres.Raise("Cannot change disks config on node %s" % node)
8037
8038   def _ExecCleanup(self):
8039     """Try to cleanup after a failed migration.
8040
8041     The cleanup is done by:
8042       - check that the instance is running only on one node
8043         (and update the config if needed)
8044       - change disks on its secondary node to secondary
8045       - wait until disks are fully synchronized
8046       - disconnect from the network
8047       - change disks into single-master mode
8048       - wait again until disks are fully synchronized
8049
8050     """
8051     instance = self.instance
8052     target_node = self.target_node
8053     source_node = self.source_node
8054
8055     # check running on only one node
8056     self.feedback_fn("* checking where the instance actually runs"
8057                      " (if this hangs, the hypervisor might be in"
8058                      " a bad state)")
8059     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8060     for node, result in ins_l.items():
8061       result.Raise("Can't contact node %s" % node)
8062
8063     runningon_source = instance.name in ins_l[source_node].payload
8064     runningon_target = instance.name in ins_l[target_node].payload
8065
8066     if runningon_source and runningon_target:
8067       raise errors.OpExecError("Instance seems to be running on two nodes,"
8068                                " or the hypervisor is confused; you will have"
8069                                " to ensure manually that it runs only on one"
8070                                " and restart this operation")
8071
8072     if not (runningon_source or runningon_target):
8073       raise errors.OpExecError("Instance does not seem to be running at all;"
8074                                " in this case it's safer to repair by"
8075                                " running 'gnt-instance stop' to ensure disk"
8076                                " shutdown, and then restarting it")
8077
8078     if runningon_target:
8079       # the migration has actually succeeded, we need to update the config
8080       self.feedback_fn("* instance running on secondary node (%s),"
8081                        " updating config" % target_node)
8082       instance.primary_node = target_node
8083       self.cfg.Update(instance, self.feedback_fn)
8084       demoted_node = source_node
8085     else:
8086       self.feedback_fn("* instance confirmed to be running on its"
8087                        " primary node (%s)" % source_node)
8088       demoted_node = target_node
8089
8090     if instance.disk_template in constants.DTS_INT_MIRROR:
8091       self._EnsureSecondary(demoted_node)
8092       try:
8093         self._WaitUntilSync()
8094       except errors.OpExecError:
8095         # we ignore here errors, since if the device is standalone, it
8096         # won't be able to sync
8097         pass
8098       self._GoStandalone()
8099       self._GoReconnect(False)
8100       self._WaitUntilSync()
8101
8102     self.feedback_fn("* done")
8103
8104   def _RevertDiskStatus(self):
8105     """Try to revert the disk status after a failed migration.
8106
8107     """
8108     target_node = self.target_node
8109     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8110       return
8111
8112     try:
8113       self._EnsureSecondary(target_node)
8114       self._GoStandalone()
8115       self._GoReconnect(False)
8116       self._WaitUntilSync()
8117     except errors.OpExecError, err:
8118       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8119                          " please try to recover the instance manually;"
8120                          " error '%s'" % str(err))
8121
8122   def _AbortMigration(self):
8123     """Call the hypervisor code to abort a started migration.
8124
8125     """
8126     instance = self.instance
8127     target_node = self.target_node
8128     source_node = self.source_node
8129     migration_info = self.migration_info
8130
8131     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8132                                                                  instance,
8133                                                                  migration_info,
8134                                                                  False)
8135     abort_msg = abort_result.fail_msg
8136     if abort_msg:
8137       logging.error("Aborting migration failed on target node %s: %s",
8138                     target_node, abort_msg)
8139       # Don't raise an exception here, as we stil have to try to revert the
8140       # disk status, even if this step failed.
8141
8142     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8143         instance, False, self.live)
8144     abort_msg = abort_result.fail_msg
8145     if abort_msg:
8146       logging.error("Aborting migration failed on source node %s: %s",
8147                     source_node, abort_msg)
8148
8149   def _ExecMigration(self):
8150     """Migrate an instance.
8151
8152     The migrate is done by:
8153       - change the disks into dual-master mode
8154       - wait until disks are fully synchronized again
8155       - migrate the instance
8156       - change disks on the new secondary node (the old primary) to secondary
8157       - wait until disks are fully synchronized
8158       - change disks into single-master mode
8159
8160     """
8161     instance = self.instance
8162     target_node = self.target_node
8163     source_node = self.source_node
8164
8165     # Check for hypervisor version mismatch and warn the user.
8166     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8167                                        None, [self.instance.hypervisor])
8168     for ninfo in nodeinfo.values():
8169       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8170                   ninfo.node)
8171     (_, _, (src_info, )) = nodeinfo[source_node].payload
8172     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8173
8174     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8175         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8176       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8177       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8178       if src_version != dst_version:
8179         self.feedback_fn("* warning: hypervisor version mismatch between"
8180                          " source (%s) and target (%s) node" %
8181                          (src_version, dst_version))
8182
8183     self.feedback_fn("* checking disk consistency between source and target")
8184     for dev in instance.disks:
8185       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8186         raise errors.OpExecError("Disk %s is degraded or not fully"
8187                                  " synchronized on target node,"
8188                                  " aborting migration" % dev.iv_name)
8189
8190     # First get the migration information from the remote node
8191     result = self.rpc.call_migration_info(source_node, instance)
8192     msg = result.fail_msg
8193     if msg:
8194       log_err = ("Failed fetching source migration information from %s: %s" %
8195                  (source_node, msg))
8196       logging.error(log_err)
8197       raise errors.OpExecError(log_err)
8198
8199     self.migration_info = migration_info = result.payload
8200
8201     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8202       # Then switch the disks to master/master mode
8203       self._EnsureSecondary(target_node)
8204       self._GoStandalone()
8205       self._GoReconnect(True)
8206       self._WaitUntilSync()
8207
8208     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8209     result = self.rpc.call_accept_instance(target_node,
8210                                            instance,
8211                                            migration_info,
8212                                            self.nodes_ip[target_node])
8213
8214     msg = result.fail_msg
8215     if msg:
8216       logging.error("Instance pre-migration failed, trying to revert"
8217                     " disk status: %s", msg)
8218       self.feedback_fn("Pre-migration failed, aborting")
8219       self._AbortMigration()
8220       self._RevertDiskStatus()
8221       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8222                                (instance.name, msg))
8223
8224     self.feedback_fn("* migrating instance to %s" % target_node)
8225     result = self.rpc.call_instance_migrate(source_node, instance,
8226                                             self.nodes_ip[target_node],
8227                                             self.live)
8228     msg = result.fail_msg
8229     if msg:
8230       logging.error("Instance migration failed, trying to revert"
8231                     " disk status: %s", msg)
8232       self.feedback_fn("Migration failed, aborting")
8233       self._AbortMigration()
8234       self._RevertDiskStatus()
8235       raise errors.OpExecError("Could not migrate instance %s: %s" %
8236                                (instance.name, msg))
8237
8238     self.feedback_fn("* starting memory transfer")
8239     last_feedback = time.time()
8240     while True:
8241       result = self.rpc.call_instance_get_migration_status(source_node,
8242                                                            instance)
8243       msg = result.fail_msg
8244       ms = result.payload   # MigrationStatus instance
8245       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8246         logging.error("Instance migration failed, trying to revert"
8247                       " disk status: %s", msg)
8248         self.feedback_fn("Migration failed, aborting")
8249         self._AbortMigration()
8250         self._RevertDiskStatus()
8251         raise errors.OpExecError("Could not migrate instance %s: %s" %
8252                                  (instance.name, msg))
8253
8254       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8255         self.feedback_fn("* memory transfer complete")
8256         break
8257
8258       if (utils.TimeoutExpired(last_feedback,
8259                                self._MIGRATION_FEEDBACK_INTERVAL) and
8260           ms.transferred_ram is not None):
8261         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8262         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8263         last_feedback = time.time()
8264
8265       time.sleep(self._MIGRATION_POLL_INTERVAL)
8266
8267     result = self.rpc.call_instance_finalize_migration_src(source_node,
8268                                                            instance,
8269                                                            True,
8270                                                            self.live)
8271     msg = result.fail_msg
8272     if msg:
8273       logging.error("Instance migration succeeded, but finalization failed"
8274                     " on the source node: %s", msg)
8275       raise errors.OpExecError("Could not finalize instance migration: %s" %
8276                                msg)
8277
8278     instance.primary_node = target_node
8279
8280     # distribute new instance config to the other nodes
8281     self.cfg.Update(instance, self.feedback_fn)
8282
8283     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8284                                                            instance,
8285                                                            migration_info,
8286                                                            True)
8287     msg = result.fail_msg
8288     if msg:
8289       logging.error("Instance migration succeeded, but finalization failed"
8290                     " on the target node: %s", msg)
8291       raise errors.OpExecError("Could not finalize instance migration: %s" %
8292                                msg)
8293
8294     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8295       self._EnsureSecondary(source_node)
8296       self._WaitUntilSync()
8297       self._GoStandalone()
8298       self._GoReconnect(False)
8299       self._WaitUntilSync()
8300
8301     # If the instance's disk template is `rbd' and there was a successful
8302     # migration, unmap the device from the source node.
8303     if self.instance.disk_template == constants.DT_RBD:
8304       disks = _ExpandCheckDisks(instance, instance.disks)
8305       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8306       for disk in disks:
8307         result = self.rpc.call_blockdev_shutdown(source_node, disk)
8308         msg = result.fail_msg
8309         if msg:
8310           logging.error("Migration was successful, but couldn't unmap the"
8311                         " block device %s on source node %s: %s",
8312                         disk.iv_name, source_node, msg)
8313           logging.error("You need to unmap the device %s manually on %s",
8314                         disk.iv_name, source_node)
8315
8316     self.feedback_fn("* done")
8317
8318   def _ExecFailover(self):
8319     """Failover an instance.
8320
8321     The failover is done by shutting it down on its present node and
8322     starting it on the secondary.
8323
8324     """
8325     instance = self.instance
8326     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8327
8328     source_node = instance.primary_node
8329     target_node = self.target_node
8330
8331     if instance.admin_state == constants.ADMINST_UP:
8332       self.feedback_fn("* checking disk consistency between source and target")
8333       for dev in instance.disks:
8334         # for drbd, these are drbd over lvm
8335         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8336           if primary_node.offline:
8337             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8338                              " target node %s" %
8339                              (primary_node.name, dev.iv_name, target_node))
8340           elif not self.ignore_consistency:
8341             raise errors.OpExecError("Disk %s is degraded on target node,"
8342                                      " aborting failover" % dev.iv_name)
8343     else:
8344       self.feedback_fn("* not checking disk consistency as instance is not"
8345                        " running")
8346
8347     self.feedback_fn("* shutting down instance on source node")
8348     logging.info("Shutting down instance %s on node %s",
8349                  instance.name, source_node)
8350
8351     result = self.rpc.call_instance_shutdown(source_node, instance,
8352                                              self.shutdown_timeout)
8353     msg = result.fail_msg
8354     if msg:
8355       if self.ignore_consistency or primary_node.offline:
8356         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8357                            " proceeding anyway; please make sure node"
8358                            " %s is down; error details: %s",
8359                            instance.name, source_node, source_node, msg)
8360       else:
8361         raise errors.OpExecError("Could not shutdown instance %s on"
8362                                  " node %s: %s" %
8363                                  (instance.name, source_node, msg))
8364
8365     self.feedback_fn("* deactivating the instance's disks on source node")
8366     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8367       raise errors.OpExecError("Can't shut down the instance's disks")
8368
8369     instance.primary_node = target_node
8370     # distribute new instance config to the other nodes
8371     self.cfg.Update(instance, self.feedback_fn)
8372
8373     # Only start the instance if it's marked as up
8374     if instance.admin_state == constants.ADMINST_UP:
8375       self.feedback_fn("* activating the instance's disks on target node %s" %
8376                        target_node)
8377       logging.info("Starting instance %s on node %s",
8378                    instance.name, target_node)
8379
8380       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8381                                            ignore_secondaries=True)
8382       if not disks_ok:
8383         _ShutdownInstanceDisks(self.lu, instance)
8384         raise errors.OpExecError("Can't activate the instance's disks")
8385
8386       self.feedback_fn("* starting the instance on the target node %s" %
8387                        target_node)
8388       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8389                                             False)
8390       msg = result.fail_msg
8391       if msg:
8392         _ShutdownInstanceDisks(self.lu, instance)
8393         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8394                                  (instance.name, target_node, msg))
8395
8396   def Exec(self, feedback_fn):
8397     """Perform the migration.
8398
8399     """
8400     self.feedback_fn = feedback_fn
8401     self.source_node = self.instance.primary_node
8402
8403     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8404     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8405       self.target_node = self.instance.secondary_nodes[0]
8406       # Otherwise self.target_node has been populated either
8407       # directly, or through an iallocator.
8408
8409     self.all_nodes = [self.source_node, self.target_node]
8410     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8411                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8412
8413     if self.failover:
8414       feedback_fn("Failover instance %s" % self.instance.name)
8415       self._ExecFailover()
8416     else:
8417       feedback_fn("Migrating instance %s" % self.instance.name)
8418
8419       if self.cleanup:
8420         return self._ExecCleanup()
8421       else:
8422         return self._ExecMigration()
8423
8424
8425 def _CreateBlockDev(lu, node, instance, device, force_create,
8426                     info, force_open):
8427   """Create a tree of block devices on a given node.
8428
8429   If this device type has to be created on secondaries, create it and
8430   all its children.
8431
8432   If not, just recurse to children keeping the same 'force' value.
8433
8434   @param lu: the lu on whose behalf we execute
8435   @param node: the node on which to create the device
8436   @type instance: L{objects.Instance}
8437   @param instance: the instance which owns the device
8438   @type device: L{objects.Disk}
8439   @param device: the device to create
8440   @type force_create: boolean
8441   @param force_create: whether to force creation of this device; this
8442       will be change to True whenever we find a device which has
8443       CreateOnSecondary() attribute
8444   @param info: the extra 'metadata' we should attach to the device
8445       (this will be represented as a LVM tag)
8446   @type force_open: boolean
8447   @param force_open: this parameter will be passes to the
8448       L{backend.BlockdevCreate} function where it specifies
8449       whether we run on primary or not, and it affects both
8450       the child assembly and the device own Open() execution
8451
8452   """
8453   if device.CreateOnSecondary():
8454     force_create = True
8455
8456   if device.children:
8457     for child in device.children:
8458       _CreateBlockDev(lu, node, instance, child, force_create,
8459                       info, force_open)
8460
8461   if not force_create:
8462     return
8463
8464   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8465
8466
8467 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8468   """Create a single block device on a given node.
8469
8470   This will not recurse over children of the device, so they must be
8471   created in advance.
8472
8473   @param lu: the lu on whose behalf we execute
8474   @param node: the node on which to create the device
8475   @type instance: L{objects.Instance}
8476   @param instance: the instance which owns the device
8477   @type device: L{objects.Disk}
8478   @param device: the device to create
8479   @param info: the extra 'metadata' we should attach to the device
8480       (this will be represented as a LVM tag)
8481   @type force_open: boolean
8482   @param force_open: this parameter will be passes to the
8483       L{backend.BlockdevCreate} function where it specifies
8484       whether we run on primary or not, and it affects both
8485       the child assembly and the device own Open() execution
8486
8487   """
8488   lu.cfg.SetDiskID(device, node)
8489   result = lu.rpc.call_blockdev_create(node, device, device.size,
8490                                        instance.name, force_open, info)
8491   result.Raise("Can't create block device %s on"
8492                " node %s for instance %s" % (device, node, instance.name))
8493   if device.physical_id is None:
8494     device.physical_id = result.payload
8495
8496
8497 def _GenerateUniqueNames(lu, exts):
8498   """Generate a suitable LV name.
8499
8500   This will generate a logical volume name for the given instance.
8501
8502   """
8503   results = []
8504   for val in exts:
8505     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8506     results.append("%s%s" % (new_id, val))
8507   return results
8508
8509
8510 def _ComputeLDParams(disk_template, disk_params):
8511   """Computes Logical Disk parameters from Disk Template parameters.
8512
8513   @type disk_template: string
8514   @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8515   @type disk_params: dict
8516   @param disk_params: disk template parameters; dict(template_name -> parameters
8517   @rtype: list(dict)
8518   @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8519     contains the LD parameters of the node. The tree is flattened in-order.
8520
8521   """
8522   if disk_template not in constants.DISK_TEMPLATES:
8523     raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8524
8525   result = list()
8526   dt_params = disk_params[disk_template]
8527   if disk_template == constants.DT_DRBD8:
8528     drbd_params = {
8529       constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8530       constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8531       constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8532       constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8533       constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8534       constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8535       constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8536       constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8537       constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8538       constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8539       constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8540       constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8541       }
8542
8543     drbd_params = \
8544       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8545                        drbd_params)
8546
8547     result.append(drbd_params)
8548
8549     # data LV
8550     data_params = {
8551       constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8552       }
8553     data_params = \
8554       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8555                        data_params)
8556     result.append(data_params)
8557
8558     # metadata LV
8559     meta_params = {
8560       constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8561       }
8562     meta_params = \
8563       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8564                        meta_params)
8565     result.append(meta_params)
8566
8567   elif (disk_template == constants.DT_FILE or
8568         disk_template == constants.DT_SHARED_FILE):
8569     result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8570
8571   elif disk_template == constants.DT_PLAIN:
8572     params = {
8573       constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8574       }
8575     params = \
8576       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8577                        params)
8578     result.append(params)
8579
8580   elif disk_template == constants.DT_BLOCK:
8581     result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8582
8583   elif disk_template == constants.DT_RBD:
8584     params = {
8585       constants.LDP_POOL: dt_params[constants.RBD_POOL]
8586       }
8587     params = \
8588       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8589                        params)
8590     result.append(params)
8591
8592   return result
8593
8594
8595 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8596                          iv_name, p_minor, s_minor, drbd_params, data_params,
8597                          meta_params):
8598   """Generate a drbd8 device complete with its children.
8599
8600   """
8601   assert len(vgnames) == len(names) == 2
8602   port = lu.cfg.AllocatePort()
8603   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8604
8605   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8606                           logical_id=(vgnames[0], names[0]),
8607                           params=data_params)
8608   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8609                           logical_id=(vgnames[1], names[1]),
8610                           params=meta_params)
8611   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8612                           logical_id=(primary, secondary, port,
8613                                       p_minor, s_minor,
8614                                       shared_secret),
8615                           children=[dev_data, dev_meta],
8616                           iv_name=iv_name, params=drbd_params)
8617   return drbd_dev
8618
8619
8620 def _GenerateDiskTemplate(lu, template_name,
8621                           instance_name, primary_node,
8622                           secondary_nodes, disk_info,
8623                           file_storage_dir, file_driver,
8624                           base_index, feedback_fn, disk_params):
8625   """Generate the entire disk layout for a given template type.
8626
8627   """
8628   #TODO: compute space requirements
8629
8630   vgname = lu.cfg.GetVGName()
8631   disk_count = len(disk_info)
8632   disks = []
8633   ld_params = _ComputeLDParams(template_name, disk_params)
8634   if template_name == constants.DT_DISKLESS:
8635     pass
8636   elif template_name == constants.DT_PLAIN:
8637     if secondary_nodes:
8638       raise errors.ProgrammerError("Wrong template configuration")
8639
8640     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8641                                       for i in range(disk_count)])
8642     for idx, disk in enumerate(disk_info):
8643       disk_index = idx + base_index
8644       vg = disk.get(constants.IDISK_VG, vgname)
8645       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8646       disk_dev = objects.Disk(dev_type=constants.LD_LV,
8647                               size=disk[constants.IDISK_SIZE],
8648                               logical_id=(vg, names[idx]),
8649                               iv_name="disk/%d" % disk_index,
8650                               mode=disk[constants.IDISK_MODE],
8651                               params=ld_params[0])
8652       disks.append(disk_dev)
8653   elif template_name == constants.DT_DRBD8:
8654     drbd_params, data_params, meta_params = ld_params
8655     if len(secondary_nodes) != 1:
8656       raise errors.ProgrammerError("Wrong template configuration")
8657     remote_node = secondary_nodes[0]
8658     minors = lu.cfg.AllocateDRBDMinor(
8659       [primary_node, remote_node] * len(disk_info), instance_name)
8660
8661     names = []
8662     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8663                                                for i in range(disk_count)]):
8664       names.append(lv_prefix + "_data")
8665       names.append(lv_prefix + "_meta")
8666     for idx, disk in enumerate(disk_info):
8667       disk_index = idx + base_index
8668       drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8669       data_vg = disk.get(constants.IDISK_VG, vgname)
8670       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8671       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8672                                       disk[constants.IDISK_SIZE],
8673                                       [data_vg, meta_vg],
8674                                       names[idx * 2:idx * 2 + 2],
8675                                       "disk/%d" % disk_index,
8676                                       minors[idx * 2], minors[idx * 2 + 1],
8677                                       drbd_params, data_params, meta_params)
8678       disk_dev.mode = disk[constants.IDISK_MODE]
8679       disks.append(disk_dev)
8680   elif template_name == constants.DT_FILE:
8681     if secondary_nodes:
8682       raise errors.ProgrammerError("Wrong template configuration")
8683
8684     opcodes.RequireFileStorage()
8685
8686     for idx, disk in enumerate(disk_info):
8687       disk_index = idx + base_index
8688       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8689                               size=disk[constants.IDISK_SIZE],
8690                               iv_name="disk/%d" % disk_index,
8691                               logical_id=(file_driver,
8692                                           "%s/disk%d" % (file_storage_dir,
8693                                                          disk_index)),
8694                               mode=disk[constants.IDISK_MODE],
8695                               params=ld_params[0])
8696       disks.append(disk_dev)
8697   elif template_name == constants.DT_SHARED_FILE:
8698     if secondary_nodes:
8699       raise errors.ProgrammerError("Wrong template configuration")
8700
8701     opcodes.RequireSharedFileStorage()
8702
8703     for idx, disk in enumerate(disk_info):
8704       disk_index = idx + base_index
8705       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8706                               size=disk[constants.IDISK_SIZE],
8707                               iv_name="disk/%d" % disk_index,
8708                               logical_id=(file_driver,
8709                                           "%s/disk%d" % (file_storage_dir,
8710                                                          disk_index)),
8711                               mode=disk[constants.IDISK_MODE],
8712                               params=ld_params[0])
8713       disks.append(disk_dev)
8714   elif template_name == constants.DT_BLOCK:
8715     if secondary_nodes:
8716       raise errors.ProgrammerError("Wrong template configuration")
8717
8718     for idx, disk in enumerate(disk_info):
8719       disk_index = idx + base_index
8720       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8721                               size=disk[constants.IDISK_SIZE],
8722                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8723                                           disk[constants.IDISK_ADOPT]),
8724                               iv_name="disk/%d" % disk_index,
8725                               mode=disk[constants.IDISK_MODE],
8726                               params=ld_params[0])
8727       disks.append(disk_dev)
8728   elif template_name == constants.DT_RBD:
8729     if secondary_nodes:
8730       raise errors.ProgrammerError("Wrong template configuration")
8731
8732     names = _GenerateUniqueNames(lu, [".rbd.disk%d" % (base_index + i)
8733                                       for i in range(disk_count)])
8734
8735     for idx, disk in enumerate(disk_info):
8736       disk_index = idx + base_index
8737       disk_dev = objects.Disk(dev_type=constants.LD_RBD,
8738                               size=disk[constants.IDISK_SIZE],
8739                               logical_id=("rbd", names[idx]),
8740                               iv_name="disk/%d" % disk_index,
8741                               mode=disk[constants.IDISK_MODE],
8742                               params=ld_params[0])
8743       disks.append(disk_dev)
8744
8745   else:
8746     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8747   return disks
8748
8749
8750 def _GetInstanceInfoText(instance):
8751   """Compute that text that should be added to the disk's metadata.
8752
8753   """
8754   return "originstname+%s" % instance.name
8755
8756
8757 def _CalcEta(time_taken, written, total_size):
8758   """Calculates the ETA based on size written and total size.
8759
8760   @param time_taken: The time taken so far
8761   @param written: amount written so far
8762   @param total_size: The total size of data to be written
8763   @return: The remaining time in seconds
8764
8765   """
8766   avg_time = time_taken / float(written)
8767   return (total_size - written) * avg_time
8768
8769
8770 def _WipeDisks(lu, instance):
8771   """Wipes instance disks.
8772
8773   @type lu: L{LogicalUnit}
8774   @param lu: the logical unit on whose behalf we execute
8775   @type instance: L{objects.Instance}
8776   @param instance: the instance whose disks we should create
8777   @return: the success of the wipe
8778
8779   """
8780   node = instance.primary_node
8781
8782   for device in instance.disks:
8783     lu.cfg.SetDiskID(device, node)
8784
8785   logging.info("Pause sync of instance %s disks", instance.name)
8786   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8787
8788   for idx, success in enumerate(result.payload):
8789     if not success:
8790       logging.warn("pause-sync of instance %s for disks %d failed",
8791                    instance.name, idx)
8792
8793   try:
8794     for idx, device in enumerate(instance.disks):
8795       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8796       # MAX_WIPE_CHUNK at max
8797       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8798                             constants.MIN_WIPE_CHUNK_PERCENT)
8799       # we _must_ make this an int, otherwise rounding errors will
8800       # occur
8801       wipe_chunk_size = int(wipe_chunk_size)
8802
8803       lu.LogInfo("* Wiping disk %d", idx)
8804       logging.info("Wiping disk %d for instance %s, node %s using"
8805                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8806
8807       offset = 0
8808       size = device.size
8809       last_output = 0
8810       start_time = time.time()
8811
8812       while offset < size:
8813         wipe_size = min(wipe_chunk_size, size - offset)
8814         logging.debug("Wiping disk %d, offset %s, chunk %s",
8815                       idx, offset, wipe_size)
8816         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8817         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8818                      (idx, offset, wipe_size))
8819         now = time.time()
8820         offset += wipe_size
8821         if now - last_output >= 60:
8822           eta = _CalcEta(now - start_time, offset, size)
8823           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8824                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8825           last_output = now
8826   finally:
8827     logging.info("Resume sync of instance %s disks", instance.name)
8828
8829     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8830
8831     for idx, success in enumerate(result.payload):
8832       if not success:
8833         lu.LogWarning("Resume sync of disk %d failed, please have a"
8834                       " look at the status and troubleshoot the issue", idx)
8835         logging.warn("resume-sync of instance %s for disks %d failed",
8836                      instance.name, idx)
8837
8838
8839 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8840   """Create all disks for an instance.
8841
8842   This abstracts away some work from AddInstance.
8843
8844   @type lu: L{LogicalUnit}
8845   @param lu: the logical unit on whose behalf we execute
8846   @type instance: L{objects.Instance}
8847   @param instance: the instance whose disks we should create
8848   @type to_skip: list
8849   @param to_skip: list of indices to skip
8850   @type target_node: string
8851   @param target_node: if passed, overrides the target node for creation
8852   @rtype: boolean
8853   @return: the success of the creation
8854
8855   """
8856   info = _GetInstanceInfoText(instance)
8857   if target_node is None:
8858     pnode = instance.primary_node
8859     all_nodes = instance.all_nodes
8860   else:
8861     pnode = target_node
8862     all_nodes = [pnode]
8863
8864   if instance.disk_template in constants.DTS_FILEBASED:
8865     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8866     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8867
8868     result.Raise("Failed to create directory '%s' on"
8869                  " node %s" % (file_storage_dir, pnode))
8870
8871   # Note: this needs to be kept in sync with adding of disks in
8872   # LUInstanceSetParams
8873   for idx, device in enumerate(instance.disks):
8874     if to_skip and idx in to_skip:
8875       continue
8876     logging.info("Creating volume %s for instance %s",
8877                  device.iv_name, instance.name)
8878     #HARDCODE
8879     for node in all_nodes:
8880       f_create = node == pnode
8881       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8882
8883
8884 def _RemoveDisks(lu, instance, target_node=None):
8885   """Remove all disks for an instance.
8886
8887   This abstracts away some work from `AddInstance()` and
8888   `RemoveInstance()`. Note that in case some of the devices couldn't
8889   be removed, the removal will continue with the other ones (compare
8890   with `_CreateDisks()`).
8891
8892   @type lu: L{LogicalUnit}
8893   @param lu: the logical unit on whose behalf we execute
8894   @type instance: L{objects.Instance}
8895   @param instance: the instance whose disks we should remove
8896   @type target_node: string
8897   @param target_node: used to override the node on which to remove the disks
8898   @rtype: boolean
8899   @return: the success of the removal
8900
8901   """
8902   logging.info("Removing block devices for instance %s", instance.name)
8903
8904   all_result = True
8905   for device in instance.disks:
8906     if target_node:
8907       edata = [(target_node, device)]
8908     else:
8909       edata = device.ComputeNodeTree(instance.primary_node)
8910     for node, disk in edata:
8911       lu.cfg.SetDiskID(disk, node)
8912       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8913       if msg:
8914         lu.LogWarning("Could not remove block device %s on node %s,"
8915                       " continuing anyway: %s", device.iv_name, node, msg)
8916         all_result = False
8917
8918     # if this is a DRBD disk, return its port to the pool
8919     if device.dev_type in constants.LDS_DRBD:
8920       tcp_port = device.logical_id[2]
8921       lu.cfg.AddTcpUdpPort(tcp_port)
8922
8923   if instance.disk_template == constants.DT_FILE:
8924     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8925     if target_node:
8926       tgt = target_node
8927     else:
8928       tgt = instance.primary_node
8929     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8930     if result.fail_msg:
8931       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8932                     file_storage_dir, instance.primary_node, result.fail_msg)
8933       all_result = False
8934
8935   return all_result
8936
8937
8938 def _ComputeDiskSizePerVG(disk_template, disks):
8939   """Compute disk size requirements in the volume group
8940
8941   """
8942   def _compute(disks, payload):
8943     """Universal algorithm.
8944
8945     """
8946     vgs = {}
8947     for disk in disks:
8948       vgs[disk[constants.IDISK_VG]] = \
8949         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8950
8951     return vgs
8952
8953   # Required free disk space as a function of disk and swap space
8954   req_size_dict = {
8955     constants.DT_DISKLESS: {},
8956     constants.DT_PLAIN: _compute(disks, 0),
8957     # 128 MB are added for drbd metadata for each disk
8958     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8959     constants.DT_FILE: {},
8960     constants.DT_SHARED_FILE: {},
8961   }
8962
8963   if disk_template not in req_size_dict:
8964     raise errors.ProgrammerError("Disk template '%s' size requirement"
8965                                  " is unknown" % disk_template)
8966
8967   return req_size_dict[disk_template]
8968
8969
8970 def _ComputeDiskSize(disk_template, disks):
8971   """Compute disk size requirements in the volume group
8972
8973   """
8974   # Required free disk space as a function of disk and swap space
8975   req_size_dict = {
8976     constants.DT_DISKLESS: None,
8977     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8978     # 128 MB are added for drbd metadata for each disk
8979     constants.DT_DRBD8:
8980       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8981     constants.DT_FILE: None,
8982     constants.DT_SHARED_FILE: 0,
8983     constants.DT_BLOCK: 0,
8984     constants.DT_RBD: 0,
8985   }
8986
8987   if disk_template not in req_size_dict:
8988     raise errors.ProgrammerError("Disk template '%s' size requirement"
8989                                  " is unknown" % disk_template)
8990
8991   return req_size_dict[disk_template]
8992
8993
8994 def _FilterVmNodes(lu, nodenames):
8995   """Filters out non-vm_capable nodes from a list.
8996
8997   @type lu: L{LogicalUnit}
8998   @param lu: the logical unit for which we check
8999   @type nodenames: list
9000   @param nodenames: the list of nodes on which we should check
9001   @rtype: list
9002   @return: the list of vm-capable nodes
9003
9004   """
9005   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9006   return [name for name in nodenames if name not in vm_nodes]
9007
9008
9009 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9010   """Hypervisor parameter validation.
9011
9012   This function abstract the hypervisor parameter validation to be
9013   used in both instance create and instance modify.
9014
9015   @type lu: L{LogicalUnit}
9016   @param lu: the logical unit for which we check
9017   @type nodenames: list
9018   @param nodenames: the list of nodes on which we should check
9019   @type hvname: string
9020   @param hvname: the name of the hypervisor we should use
9021   @type hvparams: dict
9022   @param hvparams: the parameters which we need to check
9023   @raise errors.OpPrereqError: if the parameters are not valid
9024
9025   """
9026   nodenames = _FilterVmNodes(lu, nodenames)
9027
9028   cluster = lu.cfg.GetClusterInfo()
9029   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9030
9031   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9032   for node in nodenames:
9033     info = hvinfo[node]
9034     if info.offline:
9035       continue
9036     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9037
9038
9039 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9040   """OS parameters validation.
9041
9042   @type lu: L{LogicalUnit}
9043   @param lu: the logical unit for which we check
9044   @type required: boolean
9045   @param required: whether the validation should fail if the OS is not
9046       found
9047   @type nodenames: list
9048   @param nodenames: the list of nodes on which we should check
9049   @type osname: string
9050   @param osname: the name of the hypervisor we should use
9051   @type osparams: dict
9052   @param osparams: the parameters which we need to check
9053   @raise errors.OpPrereqError: if the parameters are not valid
9054
9055   """
9056   nodenames = _FilterVmNodes(lu, nodenames)
9057   result = lu.rpc.call_os_validate(nodenames, required, osname,
9058                                    [constants.OS_VALIDATE_PARAMETERS],
9059                                    osparams)
9060   for node, nres in result.items():
9061     # we don't check for offline cases since this should be run only
9062     # against the master node and/or an instance's nodes
9063     nres.Raise("OS Parameters validation failed on node %s" % node)
9064     if not nres.payload:
9065       lu.LogInfo("OS %s not found on node %s, validation skipped",
9066                  osname, node)
9067
9068
9069 class LUInstanceCreate(LogicalUnit):
9070   """Create an instance.
9071
9072   """
9073   HPATH = "instance-add"
9074   HTYPE = constants.HTYPE_INSTANCE
9075   REQ_BGL = False
9076
9077   def CheckArguments(self):
9078     """Check arguments.
9079
9080     """
9081     # do not require name_check to ease forward/backward compatibility
9082     # for tools
9083     if self.op.no_install and self.op.start:
9084       self.LogInfo("No-installation mode selected, disabling startup")
9085       self.op.start = False
9086     # validate/normalize the instance name
9087     self.op.instance_name = \
9088       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9089
9090     if self.op.ip_check and not self.op.name_check:
9091       # TODO: make the ip check more flexible and not depend on the name check
9092       raise errors.OpPrereqError("Cannot do IP address check without a name"
9093                                  " check", errors.ECODE_INVAL)
9094
9095     # check nics' parameter names
9096     for nic in self.op.nics:
9097       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9098
9099     # check disks. parameter names and consistent adopt/no-adopt strategy
9100     has_adopt = has_no_adopt = False
9101     for disk in self.op.disks:
9102       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9103       if constants.IDISK_ADOPT in disk:
9104         has_adopt = True
9105       else:
9106         has_no_adopt = True
9107     if has_adopt and has_no_adopt:
9108       raise errors.OpPrereqError("Either all disks are adopted or none is",
9109                                  errors.ECODE_INVAL)
9110     if has_adopt:
9111       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9112         raise errors.OpPrereqError("Disk adoption is not supported for the"
9113                                    " '%s' disk template" %
9114                                    self.op.disk_template,
9115                                    errors.ECODE_INVAL)
9116       if self.op.iallocator is not None:
9117         raise errors.OpPrereqError("Disk adoption not allowed with an"
9118                                    " iallocator script", errors.ECODE_INVAL)
9119       if self.op.mode == constants.INSTANCE_IMPORT:
9120         raise errors.OpPrereqError("Disk adoption not allowed for"
9121                                    " instance import", errors.ECODE_INVAL)
9122     else:
9123       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9124         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9125                                    " but no 'adopt' parameter given" %
9126                                    self.op.disk_template,
9127                                    errors.ECODE_INVAL)
9128
9129     self.adopt_disks = has_adopt
9130
9131     # instance name verification
9132     if self.op.name_check:
9133       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9134       self.op.instance_name = self.hostname1.name
9135       # used in CheckPrereq for ip ping check
9136       self.check_ip = self.hostname1.ip
9137     else:
9138       self.check_ip = None
9139
9140     # file storage checks
9141     if (self.op.file_driver and
9142         not self.op.file_driver in constants.FILE_DRIVER):
9143       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9144                                  self.op.file_driver, errors.ECODE_INVAL)
9145
9146     if self.op.disk_template == constants.DT_FILE:
9147       opcodes.RequireFileStorage()
9148     elif self.op.disk_template == constants.DT_SHARED_FILE:
9149       opcodes.RequireSharedFileStorage()
9150
9151     ### Node/iallocator related checks
9152     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9153
9154     if self.op.pnode is not None:
9155       if self.op.disk_template in constants.DTS_INT_MIRROR:
9156         if self.op.snode is None:
9157           raise errors.OpPrereqError("The networked disk templates need"
9158                                      " a mirror node", errors.ECODE_INVAL)
9159       elif self.op.snode:
9160         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9161                         " template")
9162         self.op.snode = None
9163
9164     self._cds = _GetClusterDomainSecret()
9165
9166     if self.op.mode == constants.INSTANCE_IMPORT:
9167       # On import force_variant must be True, because if we forced it at
9168       # initial install, our only chance when importing it back is that it
9169       # works again!
9170       self.op.force_variant = True
9171
9172       if self.op.no_install:
9173         self.LogInfo("No-installation mode has no effect during import")
9174
9175     elif self.op.mode == constants.INSTANCE_CREATE:
9176       if self.op.os_type is None:
9177         raise errors.OpPrereqError("No guest OS specified",
9178                                    errors.ECODE_INVAL)
9179       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9180         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9181                                    " installation" % self.op.os_type,
9182                                    errors.ECODE_STATE)
9183       if self.op.disk_template is None:
9184         raise errors.OpPrereqError("No disk template specified",
9185                                    errors.ECODE_INVAL)
9186
9187     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9188       # Check handshake to ensure both clusters have the same domain secret
9189       src_handshake = self.op.source_handshake
9190       if not src_handshake:
9191         raise errors.OpPrereqError("Missing source handshake",
9192                                    errors.ECODE_INVAL)
9193
9194       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9195                                                            src_handshake)
9196       if errmsg:
9197         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9198                                    errors.ECODE_INVAL)
9199
9200       # Load and check source CA
9201       self.source_x509_ca_pem = self.op.source_x509_ca
9202       if not self.source_x509_ca_pem:
9203         raise errors.OpPrereqError("Missing source X509 CA",
9204                                    errors.ECODE_INVAL)
9205
9206       try:
9207         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9208                                                     self._cds)
9209       except OpenSSL.crypto.Error, err:
9210         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9211                                    (err, ), errors.ECODE_INVAL)
9212
9213       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9214       if errcode is not None:
9215         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9216                                    errors.ECODE_INVAL)
9217
9218       self.source_x509_ca = cert
9219
9220       src_instance_name = self.op.source_instance_name
9221       if not src_instance_name:
9222         raise errors.OpPrereqError("Missing source instance name",
9223                                    errors.ECODE_INVAL)
9224
9225       self.source_instance_name = \
9226           netutils.GetHostname(name=src_instance_name).name
9227
9228     else:
9229       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9230                                  self.op.mode, errors.ECODE_INVAL)
9231
9232   def ExpandNames(self):
9233     """ExpandNames for CreateInstance.
9234
9235     Figure out the right locks for instance creation.
9236
9237     """
9238     self.needed_locks = {}
9239
9240     instance_name = self.op.instance_name
9241     # this is just a preventive check, but someone might still add this
9242     # instance in the meantime, and creation will fail at lock-add time
9243     if instance_name in self.cfg.GetInstanceList():
9244       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9245                                  instance_name, errors.ECODE_EXISTS)
9246
9247     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9248
9249     if self.op.iallocator:
9250       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9251       # specifying a group on instance creation and then selecting nodes from
9252       # that group
9253       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9254       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9255     else:
9256       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9257       nodelist = [self.op.pnode]
9258       if self.op.snode is not None:
9259         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9260         nodelist.append(self.op.snode)
9261       self.needed_locks[locking.LEVEL_NODE] = nodelist
9262       # Lock resources of instance's primary and secondary nodes (copy to
9263       # prevent accidential modification)
9264       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9265
9266     # in case of import lock the source node too
9267     if self.op.mode == constants.INSTANCE_IMPORT:
9268       src_node = self.op.src_node
9269       src_path = self.op.src_path
9270
9271       if src_path is None:
9272         self.op.src_path = src_path = self.op.instance_name
9273
9274       if src_node is None:
9275         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9276         self.op.src_node = None
9277         if os.path.isabs(src_path):
9278           raise errors.OpPrereqError("Importing an instance from a path"
9279                                      " requires a source node option",
9280                                      errors.ECODE_INVAL)
9281       else:
9282         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9283         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9284           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9285         if not os.path.isabs(src_path):
9286           self.op.src_path = src_path = \
9287             utils.PathJoin(constants.EXPORT_DIR, src_path)
9288
9289   def _RunAllocator(self):
9290     """Run the allocator based on input opcode.
9291
9292     """
9293     nics = [n.ToDict() for n in self.nics]
9294     ial = IAllocator(self.cfg, self.rpc,
9295                      mode=constants.IALLOCATOR_MODE_ALLOC,
9296                      name=self.op.instance_name,
9297                      disk_template=self.op.disk_template,
9298                      tags=self.op.tags,
9299                      os=self.op.os_type,
9300                      vcpus=self.be_full[constants.BE_VCPUS],
9301                      memory=self.be_full[constants.BE_MAXMEM],
9302                      disks=self.disks,
9303                      nics=nics,
9304                      hypervisor=self.op.hypervisor,
9305                      )
9306
9307     ial.Run(self.op.iallocator)
9308
9309     if not ial.success:
9310       raise errors.OpPrereqError("Can't compute nodes using"
9311                                  " iallocator '%s': %s" %
9312                                  (self.op.iallocator, ial.info),
9313                                  errors.ECODE_NORES)
9314     if len(ial.result) != ial.required_nodes:
9315       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9316                                  " of nodes (%s), required %s" %
9317                                  (self.op.iallocator, len(ial.result),
9318                                   ial.required_nodes), errors.ECODE_FAULT)
9319     self.op.pnode = ial.result[0]
9320     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9321                  self.op.instance_name, self.op.iallocator,
9322                  utils.CommaJoin(ial.result))
9323     if ial.required_nodes == 2:
9324       self.op.snode = ial.result[1]
9325
9326   def BuildHooksEnv(self):
9327     """Build hooks env.
9328
9329     This runs on master, primary and secondary nodes of the instance.
9330
9331     """
9332     env = {
9333       "ADD_MODE": self.op.mode,
9334       }
9335     if self.op.mode == constants.INSTANCE_IMPORT:
9336       env["SRC_NODE"] = self.op.src_node
9337       env["SRC_PATH"] = self.op.src_path
9338       env["SRC_IMAGES"] = self.src_images
9339
9340     env.update(_BuildInstanceHookEnv(
9341       name=self.op.instance_name,
9342       primary_node=self.op.pnode,
9343       secondary_nodes=self.secondaries,
9344       status=self.op.start,
9345       os_type=self.op.os_type,
9346       minmem=self.be_full[constants.BE_MINMEM],
9347       maxmem=self.be_full[constants.BE_MAXMEM],
9348       vcpus=self.be_full[constants.BE_VCPUS],
9349       nics=_NICListToTuple(self, self.nics),
9350       disk_template=self.op.disk_template,
9351       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9352              for d in self.disks],
9353       bep=self.be_full,
9354       hvp=self.hv_full,
9355       hypervisor_name=self.op.hypervisor,
9356       tags=self.op.tags,
9357     ))
9358
9359     return env
9360
9361   def BuildHooksNodes(self):
9362     """Build hooks nodes.
9363
9364     """
9365     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9366     return nl, nl
9367
9368   def _ReadExportInfo(self):
9369     """Reads the export information from disk.
9370
9371     It will override the opcode source node and path with the actual
9372     information, if these two were not specified before.
9373
9374     @return: the export information
9375
9376     """
9377     assert self.op.mode == constants.INSTANCE_IMPORT
9378
9379     src_node = self.op.src_node
9380     src_path = self.op.src_path
9381
9382     if src_node is None:
9383       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9384       exp_list = self.rpc.call_export_list(locked_nodes)
9385       found = False
9386       for node in exp_list:
9387         if exp_list[node].fail_msg:
9388           continue
9389         if src_path in exp_list[node].payload:
9390           found = True
9391           self.op.src_node = src_node = node
9392           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9393                                                        src_path)
9394           break
9395       if not found:
9396         raise errors.OpPrereqError("No export found for relative path %s" %
9397                                     src_path, errors.ECODE_INVAL)
9398
9399     _CheckNodeOnline(self, src_node)
9400     result = self.rpc.call_export_info(src_node, src_path)
9401     result.Raise("No export or invalid export found in dir %s" % src_path)
9402
9403     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9404     if not export_info.has_section(constants.INISECT_EXP):
9405       raise errors.ProgrammerError("Corrupted export config",
9406                                    errors.ECODE_ENVIRON)
9407
9408     ei_version = export_info.get(constants.INISECT_EXP, "version")
9409     if (int(ei_version) != constants.EXPORT_VERSION):
9410       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9411                                  (ei_version, constants.EXPORT_VERSION),
9412                                  errors.ECODE_ENVIRON)
9413     return export_info
9414
9415   def _ReadExportParams(self, einfo):
9416     """Use export parameters as defaults.
9417
9418     In case the opcode doesn't specify (as in override) some instance
9419     parameters, then try to use them from the export information, if
9420     that declares them.
9421
9422     """
9423     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9424
9425     if self.op.disk_template is None:
9426       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9427         self.op.disk_template = einfo.get(constants.INISECT_INS,
9428                                           "disk_template")
9429         if self.op.disk_template not in constants.DISK_TEMPLATES:
9430           raise errors.OpPrereqError("Disk template specified in configuration"
9431                                      " file is not one of the allowed values:"
9432                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9433       else:
9434         raise errors.OpPrereqError("No disk template specified and the export"
9435                                    " is missing the disk_template information",
9436                                    errors.ECODE_INVAL)
9437
9438     if not self.op.disks:
9439       disks = []
9440       # TODO: import the disk iv_name too
9441       for idx in range(constants.MAX_DISKS):
9442         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9443           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9444           disks.append({constants.IDISK_SIZE: disk_sz})
9445       self.op.disks = disks
9446       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9447         raise errors.OpPrereqError("No disk info specified and the export"
9448                                    " is missing the disk information",
9449                                    errors.ECODE_INVAL)
9450
9451     if not self.op.nics:
9452       nics = []
9453       for idx in range(constants.MAX_NICS):
9454         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9455           ndict = {}
9456           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9457             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9458             ndict[name] = v
9459           nics.append(ndict)
9460         else:
9461           break
9462       self.op.nics = nics
9463
9464     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9465       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9466
9467     if (self.op.hypervisor is None and
9468         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9469       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9470
9471     if einfo.has_section(constants.INISECT_HYP):
9472       # use the export parameters but do not override the ones
9473       # specified by the user
9474       for name, value in einfo.items(constants.INISECT_HYP):
9475         if name not in self.op.hvparams:
9476           self.op.hvparams[name] = value
9477
9478     if einfo.has_section(constants.INISECT_BEP):
9479       # use the parameters, without overriding
9480       for name, value in einfo.items(constants.INISECT_BEP):
9481         if name not in self.op.beparams:
9482           self.op.beparams[name] = value
9483         # Compatibility for the old "memory" be param
9484         if name == constants.BE_MEMORY:
9485           if constants.BE_MAXMEM not in self.op.beparams:
9486             self.op.beparams[constants.BE_MAXMEM] = value
9487           if constants.BE_MINMEM not in self.op.beparams:
9488             self.op.beparams[constants.BE_MINMEM] = value
9489     else:
9490       # try to read the parameters old style, from the main section
9491       for name in constants.BES_PARAMETERS:
9492         if (name not in self.op.beparams and
9493             einfo.has_option(constants.INISECT_INS, name)):
9494           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9495
9496     if einfo.has_section(constants.INISECT_OSP):
9497       # use the parameters, without overriding
9498       for name, value in einfo.items(constants.INISECT_OSP):
9499         if name not in self.op.osparams:
9500           self.op.osparams[name] = value
9501
9502   def _RevertToDefaults(self, cluster):
9503     """Revert the instance parameters to the default values.
9504
9505     """
9506     # hvparams
9507     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9508     for name in self.op.hvparams.keys():
9509       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9510         del self.op.hvparams[name]
9511     # beparams
9512     be_defs = cluster.SimpleFillBE({})
9513     for name in self.op.beparams.keys():
9514       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9515         del self.op.beparams[name]
9516     # nic params
9517     nic_defs = cluster.SimpleFillNIC({})
9518     for nic in self.op.nics:
9519       for name in constants.NICS_PARAMETERS:
9520         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9521           del nic[name]
9522     # osparams
9523     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9524     for name in self.op.osparams.keys():
9525       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9526         del self.op.osparams[name]
9527
9528   def _CalculateFileStorageDir(self):
9529     """Calculate final instance file storage dir.
9530
9531     """
9532     # file storage dir calculation/check
9533     self.instance_file_storage_dir = None
9534     if self.op.disk_template in constants.DTS_FILEBASED:
9535       # build the full file storage dir path
9536       joinargs = []
9537
9538       if self.op.disk_template == constants.DT_SHARED_FILE:
9539         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9540       else:
9541         get_fsd_fn = self.cfg.GetFileStorageDir
9542
9543       cfg_storagedir = get_fsd_fn()
9544       if not cfg_storagedir:
9545         raise errors.OpPrereqError("Cluster file storage dir not defined")
9546       joinargs.append(cfg_storagedir)
9547
9548       if self.op.file_storage_dir is not None:
9549         joinargs.append(self.op.file_storage_dir)
9550
9551       joinargs.append(self.op.instance_name)
9552
9553       # pylint: disable=W0142
9554       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9555
9556   def CheckPrereq(self): # pylint: disable=R0914
9557     """Check prerequisites.
9558
9559     """
9560     self._CalculateFileStorageDir()
9561
9562     if self.op.mode == constants.INSTANCE_IMPORT:
9563       export_info = self._ReadExportInfo()
9564       self._ReadExportParams(export_info)
9565
9566     if (not self.cfg.GetVGName() and
9567         self.op.disk_template not in constants.DTS_NOT_LVM):
9568       raise errors.OpPrereqError("Cluster does not support lvm-based"
9569                                  " instances", errors.ECODE_STATE)
9570
9571     if (self.op.hypervisor is None or
9572         self.op.hypervisor == constants.VALUE_AUTO):
9573       self.op.hypervisor = self.cfg.GetHypervisorType()
9574
9575     cluster = self.cfg.GetClusterInfo()
9576     enabled_hvs = cluster.enabled_hypervisors
9577     if self.op.hypervisor not in enabled_hvs:
9578       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9579                                  " cluster (%s)" % (self.op.hypervisor,
9580                                   ",".join(enabled_hvs)),
9581                                  errors.ECODE_STATE)
9582
9583     # Check tag validity
9584     for tag in self.op.tags:
9585       objects.TaggableObject.ValidateTag(tag)
9586
9587     # check hypervisor parameter syntax (locally)
9588     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9589     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9590                                       self.op.hvparams)
9591     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9592     hv_type.CheckParameterSyntax(filled_hvp)
9593     self.hv_full = filled_hvp
9594     # check that we don't specify global parameters on an instance
9595     _CheckGlobalHvParams(self.op.hvparams)
9596
9597     # fill and remember the beparams dict
9598     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9599     for param, value in self.op.beparams.iteritems():
9600       if value == constants.VALUE_AUTO:
9601         self.op.beparams[param] = default_beparams[param]
9602     objects.UpgradeBeParams(self.op.beparams)
9603     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9604     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9605
9606     # build os parameters
9607     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9608
9609     # now that hvp/bep are in final format, let's reset to defaults,
9610     # if told to do so
9611     if self.op.identify_defaults:
9612       self._RevertToDefaults(cluster)
9613
9614     # NIC buildup
9615     self.nics = []
9616     for idx, nic in enumerate(self.op.nics):
9617       nic_mode_req = nic.get(constants.INIC_MODE, None)
9618       nic_mode = nic_mode_req
9619       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9620         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9621
9622       # in routed mode, for the first nic, the default ip is 'auto'
9623       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9624         default_ip_mode = constants.VALUE_AUTO
9625       else:
9626         default_ip_mode = constants.VALUE_NONE
9627
9628       # ip validity checks
9629       ip = nic.get(constants.INIC_IP, default_ip_mode)
9630       if ip is None or ip.lower() == constants.VALUE_NONE:
9631         nic_ip = None
9632       elif ip.lower() == constants.VALUE_AUTO:
9633         if not self.op.name_check:
9634           raise errors.OpPrereqError("IP address set to auto but name checks"
9635                                      " have been skipped",
9636                                      errors.ECODE_INVAL)
9637         nic_ip = self.hostname1.ip
9638       else:
9639         if not netutils.IPAddress.IsValid(ip):
9640           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9641                                      errors.ECODE_INVAL)
9642         nic_ip = ip
9643
9644       # TODO: check the ip address for uniqueness
9645       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9646         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9647                                    errors.ECODE_INVAL)
9648
9649       # MAC address verification
9650       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9651       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9652         mac = utils.NormalizeAndValidateMac(mac)
9653
9654         try:
9655           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9656         except errors.ReservationError:
9657           raise errors.OpPrereqError("MAC address %s already in use"
9658                                      " in cluster" % mac,
9659                                      errors.ECODE_NOTUNIQUE)
9660
9661       #  Build nic parameters
9662       link = nic.get(constants.INIC_LINK, None)
9663       if link == constants.VALUE_AUTO:
9664         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9665       nicparams = {}
9666       if nic_mode_req:
9667         nicparams[constants.NIC_MODE] = nic_mode
9668       if link:
9669         nicparams[constants.NIC_LINK] = link
9670
9671       check_params = cluster.SimpleFillNIC(nicparams)
9672       objects.NIC.CheckParameterSyntax(check_params)
9673       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9674
9675     # disk checks/pre-build
9676     default_vg = self.cfg.GetVGName()
9677     self.disks = []
9678     for disk in self.op.disks:
9679       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9680       if mode not in constants.DISK_ACCESS_SET:
9681         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9682                                    mode, errors.ECODE_INVAL)
9683       size = disk.get(constants.IDISK_SIZE, None)
9684       if size is None:
9685         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9686       try:
9687         size = int(size)
9688       except (TypeError, ValueError):
9689         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9690                                    errors.ECODE_INVAL)
9691
9692       data_vg = disk.get(constants.IDISK_VG, default_vg)
9693       new_disk = {
9694         constants.IDISK_SIZE: size,
9695         constants.IDISK_MODE: mode,
9696         constants.IDISK_VG: data_vg,
9697         }
9698       if constants.IDISK_METAVG in disk:
9699         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9700       if constants.IDISK_ADOPT in disk:
9701         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9702       self.disks.append(new_disk)
9703
9704     if self.op.mode == constants.INSTANCE_IMPORT:
9705       disk_images = []
9706       for idx in range(len(self.disks)):
9707         option = "disk%d_dump" % idx
9708         if export_info.has_option(constants.INISECT_INS, option):
9709           # FIXME: are the old os-es, disk sizes, etc. useful?
9710           export_name = export_info.get(constants.INISECT_INS, option)
9711           image = utils.PathJoin(self.op.src_path, export_name)
9712           disk_images.append(image)
9713         else:
9714           disk_images.append(False)
9715
9716       self.src_images = disk_images
9717
9718       old_name = export_info.get(constants.INISECT_INS, "name")
9719       if self.op.instance_name == old_name:
9720         for idx, nic in enumerate(self.nics):
9721           if nic.mac == constants.VALUE_AUTO:
9722             nic_mac_ini = "nic%d_mac" % idx
9723             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9724
9725     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9726
9727     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9728     if self.op.ip_check:
9729       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9730         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9731                                    (self.check_ip, self.op.instance_name),
9732                                    errors.ECODE_NOTUNIQUE)
9733
9734     #### mac address generation
9735     # By generating here the mac address both the allocator and the hooks get
9736     # the real final mac address rather than the 'auto' or 'generate' value.
9737     # There is a race condition between the generation and the instance object
9738     # creation, which means that we know the mac is valid now, but we're not
9739     # sure it will be when we actually add the instance. If things go bad
9740     # adding the instance will abort because of a duplicate mac, and the
9741     # creation job will fail.
9742     for nic in self.nics:
9743       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9744         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9745
9746     #### allocator run
9747
9748     if self.op.iallocator is not None:
9749       self._RunAllocator()
9750
9751     # Release all unneeded node locks
9752     _ReleaseLocks(self, locking.LEVEL_NODE,
9753                   keep=filter(None, [self.op.pnode, self.op.snode,
9754                                      self.op.src_node]))
9755     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9756                   keep=filter(None, [self.op.pnode, self.op.snode,
9757                                      self.op.src_node]))
9758
9759     #### node related checks
9760
9761     # check primary node
9762     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9763     assert self.pnode is not None, \
9764       "Cannot retrieve locked node %s" % self.op.pnode
9765     if pnode.offline:
9766       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9767                                  pnode.name, errors.ECODE_STATE)
9768     if pnode.drained:
9769       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9770                                  pnode.name, errors.ECODE_STATE)
9771     if not pnode.vm_capable:
9772       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9773                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9774
9775     self.secondaries = []
9776
9777     # mirror node verification
9778     if self.op.disk_template in constants.DTS_INT_MIRROR:
9779       if self.op.snode == pnode.name:
9780         raise errors.OpPrereqError("The secondary node cannot be the"
9781                                    " primary node", errors.ECODE_INVAL)
9782       _CheckNodeOnline(self, self.op.snode)
9783       _CheckNodeNotDrained(self, self.op.snode)
9784       _CheckNodeVmCapable(self, self.op.snode)
9785       self.secondaries.append(self.op.snode)
9786
9787       snode = self.cfg.GetNodeInfo(self.op.snode)
9788       if pnode.group != snode.group:
9789         self.LogWarning("The primary and secondary nodes are in two"
9790                         " different node groups; the disk parameters"
9791                         " from the first disk's node group will be"
9792                         " used")
9793
9794     nodenames = [pnode.name] + self.secondaries
9795
9796     # Verify instance specs
9797     ispec = {
9798       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9799       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9800       constants.ISPEC_DISK_COUNT: len(self.disks),
9801       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9802       constants.ISPEC_NIC_COUNT: len(self.nics),
9803       }
9804
9805     group_info = self.cfg.GetNodeGroup(pnode.group)
9806     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9807     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9808     if not self.op.ignore_ipolicy and res:
9809       raise errors.OpPrereqError(("Instance allocation to group %s violates"
9810                                   " policy: %s") % (pnode.group,
9811                                                     utils.CommaJoin(res)),
9812                                   errors.ECODE_INVAL)
9813
9814     # disk parameters (not customizable at instance or node level)
9815     # just use the primary node parameters, ignoring the secondary.
9816     self.diskparams = group_info.diskparams
9817
9818     if not self.adopt_disks:
9819       if self.op.disk_template == constants.DT_RBD:
9820         # _CheckRADOSFreeSpace() is just a placeholder.
9821         # Any function that checks prerequisites can be placed here.
9822         # Check if there is enough space on the RADOS cluster.
9823         _CheckRADOSFreeSpace()
9824       else:
9825         # Check lv size requirements, if not adopting
9826         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9827         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9828
9829     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9830       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9831                                 disk[constants.IDISK_ADOPT])
9832                      for disk in self.disks])
9833       if len(all_lvs) != len(self.disks):
9834         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9835                                    errors.ECODE_INVAL)
9836       for lv_name in all_lvs:
9837         try:
9838           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9839           # to ReserveLV uses the same syntax
9840           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9841         except errors.ReservationError:
9842           raise errors.OpPrereqError("LV named %s used by another instance" %
9843                                      lv_name, errors.ECODE_NOTUNIQUE)
9844
9845       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9846       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9847
9848       node_lvs = self.rpc.call_lv_list([pnode.name],
9849                                        vg_names.payload.keys())[pnode.name]
9850       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9851       node_lvs = node_lvs.payload
9852
9853       delta = all_lvs.difference(node_lvs.keys())
9854       if delta:
9855         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9856                                    utils.CommaJoin(delta),
9857                                    errors.ECODE_INVAL)
9858       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9859       if online_lvs:
9860         raise errors.OpPrereqError("Online logical volumes found, cannot"
9861                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9862                                    errors.ECODE_STATE)
9863       # update the size of disk based on what is found
9864       for dsk in self.disks:
9865         dsk[constants.IDISK_SIZE] = \
9866           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9867                                         dsk[constants.IDISK_ADOPT])][0]))
9868
9869     elif self.op.disk_template == constants.DT_BLOCK:
9870       # Normalize and de-duplicate device paths
9871       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9872                        for disk in self.disks])
9873       if len(all_disks) != len(self.disks):
9874         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9875                                    errors.ECODE_INVAL)
9876       baddisks = [d for d in all_disks
9877                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9878       if baddisks:
9879         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9880                                    " cannot be adopted" %
9881                                    (", ".join(baddisks),
9882                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9883                                    errors.ECODE_INVAL)
9884
9885       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9886                                             list(all_disks))[pnode.name]
9887       node_disks.Raise("Cannot get block device information from node %s" %
9888                        pnode.name)
9889       node_disks = node_disks.payload
9890       delta = all_disks.difference(node_disks.keys())
9891       if delta:
9892         raise errors.OpPrereqError("Missing block device(s): %s" %
9893                                    utils.CommaJoin(delta),
9894                                    errors.ECODE_INVAL)
9895       for dsk in self.disks:
9896         dsk[constants.IDISK_SIZE] = \
9897           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9898
9899     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9900
9901     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9902     # check OS parameters (remotely)
9903     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9904
9905     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9906
9907     # memory check on primary node
9908     #TODO(dynmem): use MINMEM for checking
9909     if self.op.start:
9910       _CheckNodeFreeMemory(self, self.pnode.name,
9911                            "creating instance %s" % self.op.instance_name,
9912                            self.be_full[constants.BE_MAXMEM],
9913                            self.op.hypervisor)
9914
9915     self.dry_run_result = list(nodenames)
9916
9917   def Exec(self, feedback_fn):
9918     """Create and add the instance to the cluster.
9919
9920     """
9921     instance = self.op.instance_name
9922     pnode_name = self.pnode.name
9923
9924     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9925                 self.owned_locks(locking.LEVEL_NODE)), \
9926       "Node locks differ from node resource locks"
9927
9928     ht_kind = self.op.hypervisor
9929     if ht_kind in constants.HTS_REQ_PORT:
9930       network_port = self.cfg.AllocatePort()
9931     else:
9932       network_port = None
9933
9934     disks = _GenerateDiskTemplate(self,
9935                                   self.op.disk_template,
9936                                   instance, pnode_name,
9937                                   self.secondaries,
9938                                   self.disks,
9939                                   self.instance_file_storage_dir,
9940                                   self.op.file_driver,
9941                                   0,
9942                                   feedback_fn,
9943                                   self.diskparams)
9944
9945     iobj = objects.Instance(name=instance, os=self.op.os_type,
9946                             primary_node=pnode_name,
9947                             nics=self.nics, disks=disks,
9948                             disk_template=self.op.disk_template,
9949                             admin_state=constants.ADMINST_DOWN,
9950                             network_port=network_port,
9951                             beparams=self.op.beparams,
9952                             hvparams=self.op.hvparams,
9953                             hypervisor=self.op.hypervisor,
9954                             osparams=self.op.osparams,
9955                             )
9956
9957     if self.op.tags:
9958       for tag in self.op.tags:
9959         iobj.AddTag(tag)
9960
9961     if self.adopt_disks:
9962       if self.op.disk_template == constants.DT_PLAIN:
9963         # rename LVs to the newly-generated names; we need to construct
9964         # 'fake' LV disks with the old data, plus the new unique_id
9965         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9966         rename_to = []
9967         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9968           rename_to.append(t_dsk.logical_id)
9969           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9970           self.cfg.SetDiskID(t_dsk, pnode_name)
9971         result = self.rpc.call_blockdev_rename(pnode_name,
9972                                                zip(tmp_disks, rename_to))
9973         result.Raise("Failed to rename adoped LVs")
9974     else:
9975       feedback_fn("* creating instance disks...")
9976       try:
9977         _CreateDisks(self, iobj)
9978       except errors.OpExecError:
9979         self.LogWarning("Device creation failed, reverting...")
9980         try:
9981           _RemoveDisks(self, iobj)
9982         finally:
9983           self.cfg.ReleaseDRBDMinors(instance)
9984           raise
9985
9986     feedback_fn("adding instance %s to cluster config" % instance)
9987
9988     self.cfg.AddInstance(iobj, self.proc.GetECId())
9989
9990     # Declare that we don't want to remove the instance lock anymore, as we've
9991     # added the instance to the config
9992     del self.remove_locks[locking.LEVEL_INSTANCE]
9993
9994     if self.op.mode == constants.INSTANCE_IMPORT:
9995       # Release unused nodes
9996       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9997     else:
9998       # Release all nodes
9999       _ReleaseLocks(self, locking.LEVEL_NODE)
10000
10001     disk_abort = False
10002     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10003       feedback_fn("* wiping instance disks...")
10004       try:
10005         _WipeDisks(self, iobj)
10006       except errors.OpExecError, err:
10007         logging.exception("Wiping disks failed")
10008         self.LogWarning("Wiping instance disks failed (%s)", err)
10009         disk_abort = True
10010
10011     if disk_abort:
10012       # Something is already wrong with the disks, don't do anything else
10013       pass
10014     elif self.op.wait_for_sync:
10015       disk_abort = not _WaitForSync(self, iobj)
10016     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10017       # make sure the disks are not degraded (still sync-ing is ok)
10018       feedback_fn("* checking mirrors status")
10019       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10020     else:
10021       disk_abort = False
10022
10023     if disk_abort:
10024       _RemoveDisks(self, iobj)
10025       self.cfg.RemoveInstance(iobj.name)
10026       # Make sure the instance lock gets removed
10027       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10028       raise errors.OpExecError("There are some degraded disks for"
10029                                " this instance")
10030
10031     # Release all node resource locks
10032     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10033
10034     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10035       if self.op.mode == constants.INSTANCE_CREATE:
10036         if not self.op.no_install:
10037           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10038                         not self.op.wait_for_sync)
10039           if pause_sync:
10040             feedback_fn("* pausing disk sync to install instance OS")
10041             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10042                                                               iobj.disks, True)
10043             for idx, success in enumerate(result.payload):
10044               if not success:
10045                 logging.warn("pause-sync of instance %s for disk %d failed",
10046                              instance, idx)
10047
10048           feedback_fn("* running the instance OS create scripts...")
10049           # FIXME: pass debug option from opcode to backend
10050           os_add_result = \
10051             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10052                                           self.op.debug_level)
10053           if pause_sync:
10054             feedback_fn("* resuming disk sync")
10055             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10056                                                               iobj.disks, False)
10057             for idx, success in enumerate(result.payload):
10058               if not success:
10059                 logging.warn("resume-sync of instance %s for disk %d failed",
10060                              instance, idx)
10061
10062           os_add_result.Raise("Could not add os for instance %s"
10063                               " on node %s" % (instance, pnode_name))
10064
10065       elif self.op.mode == constants.INSTANCE_IMPORT:
10066         feedback_fn("* running the instance OS import scripts...")
10067
10068         transfers = []
10069
10070         for idx, image in enumerate(self.src_images):
10071           if not image:
10072             continue
10073
10074           # FIXME: pass debug option from opcode to backend
10075           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10076                                              constants.IEIO_FILE, (image, ),
10077                                              constants.IEIO_SCRIPT,
10078                                              (iobj.disks[idx], idx),
10079                                              None)
10080           transfers.append(dt)
10081
10082         import_result = \
10083           masterd.instance.TransferInstanceData(self, feedback_fn,
10084                                                 self.op.src_node, pnode_name,
10085                                                 self.pnode.secondary_ip,
10086                                                 iobj, transfers)
10087         if not compat.all(import_result):
10088           self.LogWarning("Some disks for instance %s on node %s were not"
10089                           " imported successfully" % (instance, pnode_name))
10090
10091       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10092         feedback_fn("* preparing remote import...")
10093         # The source cluster will stop the instance before attempting to make a
10094         # connection. In some cases stopping an instance can take a long time,
10095         # hence the shutdown timeout is added to the connection timeout.
10096         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10097                            self.op.source_shutdown_timeout)
10098         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10099
10100         assert iobj.primary_node == self.pnode.name
10101         disk_results = \
10102           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10103                                         self.source_x509_ca,
10104                                         self._cds, timeouts)
10105         if not compat.all(disk_results):
10106           # TODO: Should the instance still be started, even if some disks
10107           # failed to import (valid for local imports, too)?
10108           self.LogWarning("Some disks for instance %s on node %s were not"
10109                           " imported successfully" % (instance, pnode_name))
10110
10111         # Run rename script on newly imported instance
10112         assert iobj.name == instance
10113         feedback_fn("Running rename script for %s" % instance)
10114         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10115                                                    self.source_instance_name,
10116                                                    self.op.debug_level)
10117         if result.fail_msg:
10118           self.LogWarning("Failed to run rename script for %s on node"
10119                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10120
10121       else:
10122         # also checked in the prereq part
10123         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10124                                      % self.op.mode)
10125
10126     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10127
10128     if self.op.start:
10129       iobj.admin_state = constants.ADMINST_UP
10130       self.cfg.Update(iobj, feedback_fn)
10131       logging.info("Starting instance %s on node %s", instance, pnode_name)
10132       feedback_fn("* starting instance...")
10133       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10134                                             False)
10135       result.Raise("Could not start instance")
10136
10137     return list(iobj.all_nodes)
10138
10139
10140 def _CheckRADOSFreeSpace():
10141   """Compute disk size requirements inside the RADOS cluster.
10142
10143   """
10144   # For the RADOS cluster we assume there is always enough space.
10145   pass
10146
10147
10148 class LUInstanceConsole(NoHooksLU):
10149   """Connect to an instance's console.
10150
10151   This is somewhat special in that it returns the command line that
10152   you need to run on the master node in order to connect to the
10153   console.
10154
10155   """
10156   REQ_BGL = False
10157
10158   def ExpandNames(self):
10159     self.share_locks = _ShareAll()
10160     self._ExpandAndLockInstance()
10161
10162   def CheckPrereq(self):
10163     """Check prerequisites.
10164
10165     This checks that the instance is in the cluster.
10166
10167     """
10168     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10169     assert self.instance is not None, \
10170       "Cannot retrieve locked instance %s" % self.op.instance_name
10171     _CheckNodeOnline(self, self.instance.primary_node)
10172
10173   def Exec(self, feedback_fn):
10174     """Connect to the console of an instance
10175
10176     """
10177     instance = self.instance
10178     node = instance.primary_node
10179
10180     node_insts = self.rpc.call_instance_list([node],
10181                                              [instance.hypervisor])[node]
10182     node_insts.Raise("Can't get node information from %s" % node)
10183
10184     if instance.name not in node_insts.payload:
10185       if instance.admin_state == constants.ADMINST_UP:
10186         state = constants.INSTST_ERRORDOWN
10187       elif instance.admin_state == constants.ADMINST_DOWN:
10188         state = constants.INSTST_ADMINDOWN
10189       else:
10190         state = constants.INSTST_ADMINOFFLINE
10191       raise errors.OpExecError("Instance %s is not running (state %s)" %
10192                                (instance.name, state))
10193
10194     logging.debug("Connecting to console of %s on %s", instance.name, node)
10195
10196     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10197
10198
10199 def _GetInstanceConsole(cluster, instance):
10200   """Returns console information for an instance.
10201
10202   @type cluster: L{objects.Cluster}
10203   @type instance: L{objects.Instance}
10204   @rtype: dict
10205
10206   """
10207   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10208   # beparams and hvparams are passed separately, to avoid editing the
10209   # instance and then saving the defaults in the instance itself.
10210   hvparams = cluster.FillHV(instance)
10211   beparams = cluster.FillBE(instance)
10212   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10213
10214   assert console.instance == instance.name
10215   assert console.Validate()
10216
10217   return console.ToDict()
10218
10219
10220 class LUInstanceReplaceDisks(LogicalUnit):
10221   """Replace the disks of an instance.
10222
10223   """
10224   HPATH = "mirrors-replace"
10225   HTYPE = constants.HTYPE_INSTANCE
10226   REQ_BGL = False
10227
10228   def CheckArguments(self):
10229     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10230                                   self.op.iallocator)
10231
10232   def ExpandNames(self):
10233     self._ExpandAndLockInstance()
10234
10235     assert locking.LEVEL_NODE not in self.needed_locks
10236     assert locking.LEVEL_NODE_RES not in self.needed_locks
10237     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10238
10239     assert self.op.iallocator is None or self.op.remote_node is None, \
10240       "Conflicting options"
10241
10242     if self.op.remote_node is not None:
10243       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10244
10245       # Warning: do not remove the locking of the new secondary here
10246       # unless DRBD8.AddChildren is changed to work in parallel;
10247       # currently it doesn't since parallel invocations of
10248       # FindUnusedMinor will conflict
10249       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10250       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10251     else:
10252       self.needed_locks[locking.LEVEL_NODE] = []
10253       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10254
10255       if self.op.iallocator is not None:
10256         # iallocator will select a new node in the same group
10257         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10258
10259     self.needed_locks[locking.LEVEL_NODE_RES] = []
10260
10261     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10262                                    self.op.iallocator, self.op.remote_node,
10263                                    self.op.disks, False, self.op.early_release,
10264                                    self.op.ignore_ipolicy)
10265
10266     self.tasklets = [self.replacer]
10267
10268   def DeclareLocks(self, level):
10269     if level == locking.LEVEL_NODEGROUP:
10270       assert self.op.remote_node is None
10271       assert self.op.iallocator is not None
10272       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10273
10274       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10275       # Lock all groups used by instance optimistically; this requires going
10276       # via the node before it's locked, requiring verification later on
10277       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10278         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10279
10280     elif level == locking.LEVEL_NODE:
10281       if self.op.iallocator is not None:
10282         assert self.op.remote_node is None
10283         assert not self.needed_locks[locking.LEVEL_NODE]
10284
10285         # Lock member nodes of all locked groups
10286         self.needed_locks[locking.LEVEL_NODE] = [node_name
10287           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10288           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10289       else:
10290         self._LockInstancesNodes()
10291     elif level == locking.LEVEL_NODE_RES:
10292       # Reuse node locks
10293       self.needed_locks[locking.LEVEL_NODE_RES] = \
10294         self.needed_locks[locking.LEVEL_NODE]
10295
10296   def BuildHooksEnv(self):
10297     """Build hooks env.
10298
10299     This runs on the master, the primary and all the secondaries.
10300
10301     """
10302     instance = self.replacer.instance
10303     env = {
10304       "MODE": self.op.mode,
10305       "NEW_SECONDARY": self.op.remote_node,
10306       "OLD_SECONDARY": instance.secondary_nodes[0],
10307       }
10308     env.update(_BuildInstanceHookEnvByObject(self, instance))
10309     return env
10310
10311   def BuildHooksNodes(self):
10312     """Build hooks nodes.
10313
10314     """
10315     instance = self.replacer.instance
10316     nl = [
10317       self.cfg.GetMasterNode(),
10318       instance.primary_node,
10319       ]
10320     if self.op.remote_node is not None:
10321       nl.append(self.op.remote_node)
10322     return nl, nl
10323
10324   def CheckPrereq(self):
10325     """Check prerequisites.
10326
10327     """
10328     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10329             self.op.iallocator is None)
10330
10331     # Verify if node group locks are still correct
10332     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10333     if owned_groups:
10334       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10335
10336     return LogicalUnit.CheckPrereq(self)
10337
10338
10339 class TLReplaceDisks(Tasklet):
10340   """Replaces disks for an instance.
10341
10342   Note: Locking is not within the scope of this class.
10343
10344   """
10345   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10346                disks, delay_iallocator, early_release, ignore_ipolicy):
10347     """Initializes this class.
10348
10349     """
10350     Tasklet.__init__(self, lu)
10351
10352     # Parameters
10353     self.instance_name = instance_name
10354     self.mode = mode
10355     self.iallocator_name = iallocator_name
10356     self.remote_node = remote_node
10357     self.disks = disks
10358     self.delay_iallocator = delay_iallocator
10359     self.early_release = early_release
10360     self.ignore_ipolicy = ignore_ipolicy
10361
10362     # Runtime data
10363     self.instance = None
10364     self.new_node = None
10365     self.target_node = None
10366     self.other_node = None
10367     self.remote_node_info = None
10368     self.node_secondary_ip = None
10369
10370   @staticmethod
10371   def CheckArguments(mode, remote_node, iallocator):
10372     """Helper function for users of this class.
10373
10374     """
10375     # check for valid parameter combination
10376     if mode == constants.REPLACE_DISK_CHG:
10377       if remote_node is None and iallocator is None:
10378         raise errors.OpPrereqError("When changing the secondary either an"
10379                                    " iallocator script must be used or the"
10380                                    " new node given", errors.ECODE_INVAL)
10381
10382       if remote_node is not None and iallocator is not None:
10383         raise errors.OpPrereqError("Give either the iallocator or the new"
10384                                    " secondary, not both", errors.ECODE_INVAL)
10385
10386     elif remote_node is not None or iallocator is not None:
10387       # Not replacing the secondary
10388       raise errors.OpPrereqError("The iallocator and new node options can"
10389                                  " only be used when changing the"
10390                                  " secondary node", errors.ECODE_INVAL)
10391
10392   @staticmethod
10393   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10394     """Compute a new secondary node using an IAllocator.
10395
10396     """
10397     ial = IAllocator(lu.cfg, lu.rpc,
10398                      mode=constants.IALLOCATOR_MODE_RELOC,
10399                      name=instance_name,
10400                      relocate_from=list(relocate_from))
10401
10402     ial.Run(iallocator_name)
10403
10404     if not ial.success:
10405       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10406                                  " %s" % (iallocator_name, ial.info),
10407                                  errors.ECODE_NORES)
10408
10409     if len(ial.result) != ial.required_nodes:
10410       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10411                                  " of nodes (%s), required %s" %
10412                                  (iallocator_name,
10413                                   len(ial.result), ial.required_nodes),
10414                                  errors.ECODE_FAULT)
10415
10416     remote_node_name = ial.result[0]
10417
10418     lu.LogInfo("Selected new secondary for instance '%s': %s",
10419                instance_name, remote_node_name)
10420
10421     return remote_node_name
10422
10423   def _FindFaultyDisks(self, node_name):
10424     """Wrapper for L{_FindFaultyInstanceDisks}.
10425
10426     """
10427     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10428                                     node_name, True)
10429
10430   def _CheckDisksActivated(self, instance):
10431     """Checks if the instance disks are activated.
10432
10433     @param instance: The instance to check disks
10434     @return: True if they are activated, False otherwise
10435
10436     """
10437     nodes = instance.all_nodes
10438
10439     for idx, dev in enumerate(instance.disks):
10440       for node in nodes:
10441         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10442         self.cfg.SetDiskID(dev, node)
10443
10444         result = self.rpc.call_blockdev_find(node, dev)
10445
10446         if result.offline:
10447           continue
10448         elif result.fail_msg or not result.payload:
10449           return False
10450
10451     return True
10452
10453   def CheckPrereq(self):
10454     """Check prerequisites.
10455
10456     This checks that the instance is in the cluster.
10457
10458     """
10459     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10460     assert instance is not None, \
10461       "Cannot retrieve locked instance %s" % self.instance_name
10462
10463     if instance.disk_template != constants.DT_DRBD8:
10464       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10465                                  " instances", errors.ECODE_INVAL)
10466
10467     if len(instance.secondary_nodes) != 1:
10468       raise errors.OpPrereqError("The instance has a strange layout,"
10469                                  " expected one secondary but found %d" %
10470                                  len(instance.secondary_nodes),
10471                                  errors.ECODE_FAULT)
10472
10473     if not self.delay_iallocator:
10474       self._CheckPrereq2()
10475
10476   def _CheckPrereq2(self):
10477     """Check prerequisites, second part.
10478
10479     This function should always be part of CheckPrereq. It was separated and is
10480     now called from Exec because during node evacuation iallocator was only
10481     called with an unmodified cluster model, not taking planned changes into
10482     account.
10483
10484     """
10485     instance = self.instance
10486     secondary_node = instance.secondary_nodes[0]
10487
10488     if self.iallocator_name is None:
10489       remote_node = self.remote_node
10490     else:
10491       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10492                                        instance.name, instance.secondary_nodes)
10493
10494     if remote_node is None:
10495       self.remote_node_info = None
10496     else:
10497       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10498              "Remote node '%s' is not locked" % remote_node
10499
10500       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10501       assert self.remote_node_info is not None, \
10502         "Cannot retrieve locked node %s" % remote_node
10503
10504     if remote_node == self.instance.primary_node:
10505       raise errors.OpPrereqError("The specified node is the primary node of"
10506                                  " the instance", errors.ECODE_INVAL)
10507
10508     if remote_node == secondary_node:
10509       raise errors.OpPrereqError("The specified node is already the"
10510                                  " secondary node of the instance",
10511                                  errors.ECODE_INVAL)
10512
10513     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10514                                     constants.REPLACE_DISK_CHG):
10515       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10516                                  errors.ECODE_INVAL)
10517
10518     if self.mode == constants.REPLACE_DISK_AUTO:
10519       if not self._CheckDisksActivated(instance):
10520         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10521                                    " first" % self.instance_name,
10522                                    errors.ECODE_STATE)
10523       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10524       faulty_secondary = self._FindFaultyDisks(secondary_node)
10525
10526       if faulty_primary and faulty_secondary:
10527         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10528                                    " one node and can not be repaired"
10529                                    " automatically" % self.instance_name,
10530                                    errors.ECODE_STATE)
10531
10532       if faulty_primary:
10533         self.disks = faulty_primary
10534         self.target_node = instance.primary_node
10535         self.other_node = secondary_node
10536         check_nodes = [self.target_node, self.other_node]
10537       elif faulty_secondary:
10538         self.disks = faulty_secondary
10539         self.target_node = secondary_node
10540         self.other_node = instance.primary_node
10541         check_nodes = [self.target_node, self.other_node]
10542       else:
10543         self.disks = []
10544         check_nodes = []
10545
10546     else:
10547       # Non-automatic modes
10548       if self.mode == constants.REPLACE_DISK_PRI:
10549         self.target_node = instance.primary_node
10550         self.other_node = secondary_node
10551         check_nodes = [self.target_node, self.other_node]
10552
10553       elif self.mode == constants.REPLACE_DISK_SEC:
10554         self.target_node = secondary_node
10555         self.other_node = instance.primary_node
10556         check_nodes = [self.target_node, self.other_node]
10557
10558       elif self.mode == constants.REPLACE_DISK_CHG:
10559         self.new_node = remote_node
10560         self.other_node = instance.primary_node
10561         self.target_node = secondary_node
10562         check_nodes = [self.new_node, self.other_node]
10563
10564         _CheckNodeNotDrained(self.lu, remote_node)
10565         _CheckNodeVmCapable(self.lu, remote_node)
10566
10567         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10568         assert old_node_info is not None
10569         if old_node_info.offline and not self.early_release:
10570           # doesn't make sense to delay the release
10571           self.early_release = True
10572           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10573                           " early-release mode", secondary_node)
10574
10575       else:
10576         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10577                                      self.mode)
10578
10579       # If not specified all disks should be replaced
10580       if not self.disks:
10581         self.disks = range(len(self.instance.disks))
10582
10583     # TODO: This is ugly, but right now we can't distinguish between internal
10584     # submitted opcode and external one. We should fix that.
10585     if self.remote_node_info:
10586       # We change the node, lets verify it still meets instance policy
10587       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10588       ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10589                                        new_group_info)
10590       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10591                               ignore=self.ignore_ipolicy)
10592
10593     # TODO: compute disk parameters
10594     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10595     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10596     if primary_node_info.group != secondary_node_info.group:
10597       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10598                       " different node groups; the disk parameters of the"
10599                       " primary node's group will be applied.")
10600
10601     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10602
10603     for node in check_nodes:
10604       _CheckNodeOnline(self.lu, node)
10605
10606     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10607                                                           self.other_node,
10608                                                           self.target_node]
10609                               if node_name is not None)
10610
10611     # Release unneeded node and node resource locks
10612     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10613     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10614
10615     # Release any owned node group
10616     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10617       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10618
10619     # Check whether disks are valid
10620     for disk_idx in self.disks:
10621       instance.FindDisk(disk_idx)
10622
10623     # Get secondary node IP addresses
10624     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10625                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10626
10627   def Exec(self, feedback_fn):
10628     """Execute disk replacement.
10629
10630     This dispatches the disk replacement to the appropriate handler.
10631
10632     """
10633     if self.delay_iallocator:
10634       self._CheckPrereq2()
10635
10636     if __debug__:
10637       # Verify owned locks before starting operation
10638       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10639       assert set(owned_nodes) == set(self.node_secondary_ip), \
10640           ("Incorrect node locks, owning %s, expected %s" %
10641            (owned_nodes, self.node_secondary_ip.keys()))
10642       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10643               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10644
10645       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10646       assert list(owned_instances) == [self.instance_name], \
10647           "Instance '%s' not locked" % self.instance_name
10648
10649       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10650           "Should not own any node group lock at this point"
10651
10652     if not self.disks:
10653       feedback_fn("No disks need replacement")
10654       return
10655
10656     feedback_fn("Replacing disk(s) %s for %s" %
10657                 (utils.CommaJoin(self.disks), self.instance.name))
10658
10659     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10660
10661     # Activate the instance disks if we're replacing them on a down instance
10662     if activate_disks:
10663       _StartInstanceDisks(self.lu, self.instance, True)
10664
10665     try:
10666       # Should we replace the secondary node?
10667       if self.new_node is not None:
10668         fn = self._ExecDrbd8Secondary
10669       else:
10670         fn = self._ExecDrbd8DiskOnly
10671
10672       result = fn(feedback_fn)
10673     finally:
10674       # Deactivate the instance disks if we're replacing them on a
10675       # down instance
10676       if activate_disks:
10677         _SafeShutdownInstanceDisks(self.lu, self.instance)
10678
10679     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10680
10681     if __debug__:
10682       # Verify owned locks
10683       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10684       nodes = frozenset(self.node_secondary_ip)
10685       assert ((self.early_release and not owned_nodes) or
10686               (not self.early_release and not (set(owned_nodes) - nodes))), \
10687         ("Not owning the correct locks, early_release=%s, owned=%r,"
10688          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10689
10690     return result
10691
10692   def _CheckVolumeGroup(self, nodes):
10693     self.lu.LogInfo("Checking volume groups")
10694
10695     vgname = self.cfg.GetVGName()
10696
10697     # Make sure volume group exists on all involved nodes
10698     results = self.rpc.call_vg_list(nodes)
10699     if not results:
10700       raise errors.OpExecError("Can't list volume groups on the nodes")
10701
10702     for node in nodes:
10703       res = results[node]
10704       res.Raise("Error checking node %s" % node)
10705       if vgname not in res.payload:
10706         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10707                                  (vgname, node))
10708
10709   def _CheckDisksExistence(self, nodes):
10710     # Check disk existence
10711     for idx, dev in enumerate(self.instance.disks):
10712       if idx not in self.disks:
10713         continue
10714
10715       for node in nodes:
10716         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10717         self.cfg.SetDiskID(dev, node)
10718
10719         result = self.rpc.call_blockdev_find(node, dev)
10720
10721         msg = result.fail_msg
10722         if msg or not result.payload:
10723           if not msg:
10724             msg = "disk not found"
10725           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10726                                    (idx, node, msg))
10727
10728   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10729     for idx, dev in enumerate(self.instance.disks):
10730       if idx not in self.disks:
10731         continue
10732
10733       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10734                       (idx, node_name))
10735
10736       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10737                                    ldisk=ldisk):
10738         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10739                                  " replace disks for instance %s" %
10740                                  (node_name, self.instance.name))
10741
10742   def _CreateNewStorage(self, node_name):
10743     """Create new storage on the primary or secondary node.
10744
10745     This is only used for same-node replaces, not for changing the
10746     secondary node, hence we don't want to modify the existing disk.
10747
10748     """
10749     iv_names = {}
10750
10751     for idx, dev in enumerate(self.instance.disks):
10752       if idx not in self.disks:
10753         continue
10754
10755       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10756
10757       self.cfg.SetDiskID(dev, node_name)
10758
10759       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10760       names = _GenerateUniqueNames(self.lu, lv_names)
10761
10762       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10763
10764       vg_data = dev.children[0].logical_id[0]
10765       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10766                              logical_id=(vg_data, names[0]), params=data_p)
10767       vg_meta = dev.children[1].logical_id[0]
10768       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10769                              logical_id=(vg_meta, names[1]), params=meta_p)
10770
10771       new_lvs = [lv_data, lv_meta]
10772       old_lvs = [child.Copy() for child in dev.children]
10773       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10774
10775       # we pass force_create=True to force the LVM creation
10776       for new_lv in new_lvs:
10777         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10778                         _GetInstanceInfoText(self.instance), False)
10779
10780     return iv_names
10781
10782   def _CheckDevices(self, node_name, iv_names):
10783     for name, (dev, _, _) in iv_names.iteritems():
10784       self.cfg.SetDiskID(dev, node_name)
10785
10786       result = self.rpc.call_blockdev_find(node_name, dev)
10787
10788       msg = result.fail_msg
10789       if msg or not result.payload:
10790         if not msg:
10791           msg = "disk not found"
10792         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10793                                  (name, msg))
10794
10795       if result.payload.is_degraded:
10796         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10797
10798   def _RemoveOldStorage(self, node_name, iv_names):
10799     for name, (_, old_lvs, _) in iv_names.iteritems():
10800       self.lu.LogInfo("Remove logical volumes for %s" % name)
10801
10802       for lv in old_lvs:
10803         self.cfg.SetDiskID(lv, node_name)
10804
10805         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10806         if msg:
10807           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10808                              hint="remove unused LVs manually")
10809
10810   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10811     """Replace a disk on the primary or secondary for DRBD 8.
10812
10813     The algorithm for replace is quite complicated:
10814
10815       1. for each disk to be replaced:
10816
10817         1. create new LVs on the target node with unique names
10818         1. detach old LVs from the drbd device
10819         1. rename old LVs to name_replaced.<time_t>
10820         1. rename new LVs to old LVs
10821         1. attach the new LVs (with the old names now) to the drbd device
10822
10823       1. wait for sync across all devices
10824
10825       1. for each modified disk:
10826
10827         1. remove old LVs (which have the name name_replaces.<time_t>)
10828
10829     Failures are not very well handled.
10830
10831     """
10832     steps_total = 6
10833
10834     # Step: check device activation
10835     self.lu.LogStep(1, steps_total, "Check device existence")
10836     self._CheckDisksExistence([self.other_node, self.target_node])
10837     self._CheckVolumeGroup([self.target_node, self.other_node])
10838
10839     # Step: check other node consistency
10840     self.lu.LogStep(2, steps_total, "Check peer consistency")
10841     self._CheckDisksConsistency(self.other_node,
10842                                 self.other_node == self.instance.primary_node,
10843                                 False)
10844
10845     # Step: create new storage
10846     self.lu.LogStep(3, steps_total, "Allocate new storage")
10847     iv_names = self._CreateNewStorage(self.target_node)
10848
10849     # Step: for each lv, detach+rename*2+attach
10850     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10851     for dev, old_lvs, new_lvs in iv_names.itervalues():
10852       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10853
10854       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10855                                                      old_lvs)
10856       result.Raise("Can't detach drbd from local storage on node"
10857                    " %s for device %s" % (self.target_node, dev.iv_name))
10858       #dev.children = []
10859       #cfg.Update(instance)
10860
10861       # ok, we created the new LVs, so now we know we have the needed
10862       # storage; as such, we proceed on the target node to rename
10863       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10864       # using the assumption that logical_id == physical_id (which in
10865       # turn is the unique_id on that node)
10866
10867       # FIXME(iustin): use a better name for the replaced LVs
10868       temp_suffix = int(time.time())
10869       ren_fn = lambda d, suff: (d.physical_id[0],
10870                                 d.physical_id[1] + "_replaced-%s" % suff)
10871
10872       # Build the rename list based on what LVs exist on the node
10873       rename_old_to_new = []
10874       for to_ren in old_lvs:
10875         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10876         if not result.fail_msg and result.payload:
10877           # device exists
10878           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10879
10880       self.lu.LogInfo("Renaming the old LVs on the target node")
10881       result = self.rpc.call_blockdev_rename(self.target_node,
10882                                              rename_old_to_new)
10883       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10884
10885       # Now we rename the new LVs to the old LVs
10886       self.lu.LogInfo("Renaming the new LVs on the target node")
10887       rename_new_to_old = [(new, old.physical_id)
10888                            for old, new in zip(old_lvs, new_lvs)]
10889       result = self.rpc.call_blockdev_rename(self.target_node,
10890                                              rename_new_to_old)
10891       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10892
10893       # Intermediate steps of in memory modifications
10894       for old, new in zip(old_lvs, new_lvs):
10895         new.logical_id = old.logical_id
10896         self.cfg.SetDiskID(new, self.target_node)
10897
10898       # We need to modify old_lvs so that removal later removes the
10899       # right LVs, not the newly added ones; note that old_lvs is a
10900       # copy here
10901       for disk in old_lvs:
10902         disk.logical_id = ren_fn(disk, temp_suffix)
10903         self.cfg.SetDiskID(disk, self.target_node)
10904
10905       # Now that the new lvs have the old name, we can add them to the device
10906       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10907       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10908                                                   new_lvs)
10909       msg = result.fail_msg
10910       if msg:
10911         for new_lv in new_lvs:
10912           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10913                                                new_lv).fail_msg
10914           if msg2:
10915             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10916                                hint=("cleanup manually the unused logical"
10917                                      "volumes"))
10918         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10919
10920     cstep = itertools.count(5)
10921
10922     if self.early_release:
10923       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10924       self._RemoveOldStorage(self.target_node, iv_names)
10925       # TODO: Check if releasing locks early still makes sense
10926       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10927     else:
10928       # Release all resource locks except those used by the instance
10929       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10930                     keep=self.node_secondary_ip.keys())
10931
10932     # Release all node locks while waiting for sync
10933     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10934
10935     # TODO: Can the instance lock be downgraded here? Take the optional disk
10936     # shutdown in the caller into consideration.
10937
10938     # Wait for sync
10939     # This can fail as the old devices are degraded and _WaitForSync
10940     # does a combined result over all disks, so we don't check its return value
10941     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10942     _WaitForSync(self.lu, self.instance)
10943
10944     # Check all devices manually
10945     self._CheckDevices(self.instance.primary_node, iv_names)
10946
10947     # Step: remove old storage
10948     if not self.early_release:
10949       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10950       self._RemoveOldStorage(self.target_node, iv_names)
10951
10952   def _ExecDrbd8Secondary(self, feedback_fn):
10953     """Replace the secondary node for DRBD 8.
10954
10955     The algorithm for replace is quite complicated:
10956       - for all disks of the instance:
10957         - create new LVs on the new node with same names
10958         - shutdown the drbd device on the old secondary
10959         - disconnect the drbd network on the primary
10960         - create the drbd device on the new secondary
10961         - network attach the drbd on the primary, using an artifice:
10962           the drbd code for Attach() will connect to the network if it
10963           finds a device which is connected to the good local disks but
10964           not network enabled
10965       - wait for sync across all devices
10966       - remove all disks from the old secondary
10967
10968     Failures are not very well handled.
10969
10970     """
10971     steps_total = 6
10972
10973     pnode = self.instance.primary_node
10974
10975     # Step: check device activation
10976     self.lu.LogStep(1, steps_total, "Check device existence")
10977     self._CheckDisksExistence([self.instance.primary_node])
10978     self._CheckVolumeGroup([self.instance.primary_node])
10979
10980     # Step: check other node consistency
10981     self.lu.LogStep(2, steps_total, "Check peer consistency")
10982     self._CheckDisksConsistency(self.instance.primary_node, True, True)
10983
10984     # Step: create new storage
10985     self.lu.LogStep(3, steps_total, "Allocate new storage")
10986     for idx, dev in enumerate(self.instance.disks):
10987       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10988                       (self.new_node, idx))
10989       # we pass force_create=True to force LVM creation
10990       for new_lv in dev.children:
10991         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10992                         _GetInstanceInfoText(self.instance), False)
10993
10994     # Step 4: dbrd minors and drbd setups changes
10995     # after this, we must manually remove the drbd minors on both the
10996     # error and the success paths
10997     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10998     minors = self.cfg.AllocateDRBDMinor([self.new_node
10999                                          for dev in self.instance.disks],
11000                                         self.instance.name)
11001     logging.debug("Allocated minors %r", minors)
11002
11003     iv_names = {}
11004     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11005       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11006                       (self.new_node, idx))
11007       # create new devices on new_node; note that we create two IDs:
11008       # one without port, so the drbd will be activated without
11009       # networking information on the new node at this stage, and one
11010       # with network, for the latter activation in step 4
11011       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11012       if self.instance.primary_node == o_node1:
11013         p_minor = o_minor1
11014       else:
11015         assert self.instance.primary_node == o_node2, "Three-node instance?"
11016         p_minor = o_minor2
11017
11018       new_alone_id = (self.instance.primary_node, self.new_node, None,
11019                       p_minor, new_minor, o_secret)
11020       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11021                     p_minor, new_minor, o_secret)
11022
11023       iv_names[idx] = (dev, dev.children, new_net_id)
11024       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11025                     new_net_id)
11026       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11027       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11028                               logical_id=new_alone_id,
11029                               children=dev.children,
11030                               size=dev.size,
11031                               params=drbd_params)
11032       try:
11033         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11034                               _GetInstanceInfoText(self.instance), False)
11035       except errors.GenericError:
11036         self.cfg.ReleaseDRBDMinors(self.instance.name)
11037         raise
11038
11039     # We have new devices, shutdown the drbd on the old secondary
11040     for idx, dev in enumerate(self.instance.disks):
11041       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11042       self.cfg.SetDiskID(dev, self.target_node)
11043       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11044       if msg:
11045         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11046                            "node: %s" % (idx, msg),
11047                            hint=("Please cleanup this device manually as"
11048                                  " soon as possible"))
11049
11050     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11051     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11052                                                self.instance.disks)[pnode]
11053
11054     msg = result.fail_msg
11055     if msg:
11056       # detaches didn't succeed (unlikely)
11057       self.cfg.ReleaseDRBDMinors(self.instance.name)
11058       raise errors.OpExecError("Can't detach the disks from the network on"
11059                                " old node: %s" % (msg,))
11060
11061     # if we managed to detach at least one, we update all the disks of
11062     # the instance to point to the new secondary
11063     self.lu.LogInfo("Updating instance configuration")
11064     for dev, _, new_logical_id in iv_names.itervalues():
11065       dev.logical_id = new_logical_id
11066       self.cfg.SetDiskID(dev, self.instance.primary_node)
11067
11068     self.cfg.Update(self.instance, feedback_fn)
11069
11070     # Release all node locks (the configuration has been updated)
11071     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11072
11073     # and now perform the drbd attach
11074     self.lu.LogInfo("Attaching primary drbds to new secondary"
11075                     " (standalone => connected)")
11076     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11077                                             self.new_node],
11078                                            self.node_secondary_ip,
11079                                            self.instance.disks,
11080                                            self.instance.name,
11081                                            False)
11082     for to_node, to_result in result.items():
11083       msg = to_result.fail_msg
11084       if msg:
11085         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11086                            to_node, msg,
11087                            hint=("please do a gnt-instance info to see the"
11088                                  " status of disks"))
11089
11090     cstep = itertools.count(5)
11091
11092     if self.early_release:
11093       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11094       self._RemoveOldStorage(self.target_node, iv_names)
11095       # TODO: Check if releasing locks early still makes sense
11096       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11097     else:
11098       # Release all resource locks except those used by the instance
11099       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11100                     keep=self.node_secondary_ip.keys())
11101
11102     # TODO: Can the instance lock be downgraded here? Take the optional disk
11103     # shutdown in the caller into consideration.
11104
11105     # Wait for sync
11106     # This can fail as the old devices are degraded and _WaitForSync
11107     # does a combined result over all disks, so we don't check its return value
11108     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11109     _WaitForSync(self.lu, self.instance)
11110
11111     # Check all devices manually
11112     self._CheckDevices(self.instance.primary_node, iv_names)
11113
11114     # Step: remove old storage
11115     if not self.early_release:
11116       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11117       self._RemoveOldStorage(self.target_node, iv_names)
11118
11119
11120 class LURepairNodeStorage(NoHooksLU):
11121   """Repairs the volume group on a node.
11122
11123   """
11124   REQ_BGL = False
11125
11126   def CheckArguments(self):
11127     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11128
11129     storage_type = self.op.storage_type
11130
11131     if (constants.SO_FIX_CONSISTENCY not in
11132         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11133       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11134                                  " repaired" % storage_type,
11135                                  errors.ECODE_INVAL)
11136
11137   def ExpandNames(self):
11138     self.needed_locks = {
11139       locking.LEVEL_NODE: [self.op.node_name],
11140       }
11141
11142   def _CheckFaultyDisks(self, instance, node_name):
11143     """Ensure faulty disks abort the opcode or at least warn."""
11144     try:
11145       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11146                                   node_name, True):
11147         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11148                                    " node '%s'" % (instance.name, node_name),
11149                                    errors.ECODE_STATE)
11150     except errors.OpPrereqError, err:
11151       if self.op.ignore_consistency:
11152         self.proc.LogWarning(str(err.args[0]))
11153       else:
11154         raise
11155
11156   def CheckPrereq(self):
11157     """Check prerequisites.
11158
11159     """
11160     # Check whether any instance on this node has faulty disks
11161     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11162       if inst.admin_state != constants.ADMINST_UP:
11163         continue
11164       check_nodes = set(inst.all_nodes)
11165       check_nodes.discard(self.op.node_name)
11166       for inst_node_name in check_nodes:
11167         self._CheckFaultyDisks(inst, inst_node_name)
11168
11169   def Exec(self, feedback_fn):
11170     feedback_fn("Repairing storage unit '%s' on %s ..." %
11171                 (self.op.name, self.op.node_name))
11172
11173     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11174     result = self.rpc.call_storage_execute(self.op.node_name,
11175                                            self.op.storage_type, st_args,
11176                                            self.op.name,
11177                                            constants.SO_FIX_CONSISTENCY)
11178     result.Raise("Failed to repair storage unit '%s' on %s" %
11179                  (self.op.name, self.op.node_name))
11180
11181
11182 class LUNodeEvacuate(NoHooksLU):
11183   """Evacuates instances off a list of nodes.
11184
11185   """
11186   REQ_BGL = False
11187
11188   _MODE2IALLOCATOR = {
11189     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11190     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11191     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11192     }
11193   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11194   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11195           constants.IALLOCATOR_NEVAC_MODES)
11196
11197   def CheckArguments(self):
11198     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11199
11200   def ExpandNames(self):
11201     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11202
11203     if self.op.remote_node is not None:
11204       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11205       assert self.op.remote_node
11206
11207       if self.op.remote_node == self.op.node_name:
11208         raise errors.OpPrereqError("Can not use evacuated node as a new"
11209                                    " secondary node", errors.ECODE_INVAL)
11210
11211       if self.op.mode != constants.NODE_EVAC_SEC:
11212         raise errors.OpPrereqError("Without the use of an iallocator only"
11213                                    " secondary instances can be evacuated",
11214                                    errors.ECODE_INVAL)
11215
11216     # Declare locks
11217     self.share_locks = _ShareAll()
11218     self.needed_locks = {
11219       locking.LEVEL_INSTANCE: [],
11220       locking.LEVEL_NODEGROUP: [],
11221       locking.LEVEL_NODE: [],
11222       }
11223
11224     # Determine nodes (via group) optimistically, needs verification once locks
11225     # have been acquired
11226     self.lock_nodes = self._DetermineNodes()
11227
11228   def _DetermineNodes(self):
11229     """Gets the list of nodes to operate on.
11230
11231     """
11232     if self.op.remote_node is None:
11233       # Iallocator will choose any node(s) in the same group
11234       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11235     else:
11236       group_nodes = frozenset([self.op.remote_node])
11237
11238     # Determine nodes to be locked
11239     return set([self.op.node_name]) | group_nodes
11240
11241   def _DetermineInstances(self):
11242     """Builds list of instances to operate on.
11243
11244     """
11245     assert self.op.mode in constants.NODE_EVAC_MODES
11246
11247     if self.op.mode == constants.NODE_EVAC_PRI:
11248       # Primary instances only
11249       inst_fn = _GetNodePrimaryInstances
11250       assert self.op.remote_node is None, \
11251         "Evacuating primary instances requires iallocator"
11252     elif self.op.mode == constants.NODE_EVAC_SEC:
11253       # Secondary instances only
11254       inst_fn = _GetNodeSecondaryInstances
11255     else:
11256       # All instances
11257       assert self.op.mode == constants.NODE_EVAC_ALL
11258       inst_fn = _GetNodeInstances
11259       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11260       # per instance
11261       raise errors.OpPrereqError("Due to an issue with the iallocator"
11262                                  " interface it is not possible to evacuate"
11263                                  " all instances at once; specify explicitly"
11264                                  " whether to evacuate primary or secondary"
11265                                  " instances",
11266                                  errors.ECODE_INVAL)
11267
11268     return inst_fn(self.cfg, self.op.node_name)
11269
11270   def DeclareLocks(self, level):
11271     if level == locking.LEVEL_INSTANCE:
11272       # Lock instances optimistically, needs verification once node and group
11273       # locks have been acquired
11274       self.needed_locks[locking.LEVEL_INSTANCE] = \
11275         set(i.name for i in self._DetermineInstances())
11276
11277     elif level == locking.LEVEL_NODEGROUP:
11278       # Lock node groups for all potential target nodes optimistically, needs
11279       # verification once nodes have been acquired
11280       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11281         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11282
11283     elif level == locking.LEVEL_NODE:
11284       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11285
11286   def CheckPrereq(self):
11287     # Verify locks
11288     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11289     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11290     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11291
11292     need_nodes = self._DetermineNodes()
11293
11294     if not owned_nodes.issuperset(need_nodes):
11295       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11296                                  " locks were acquired, current nodes are"
11297                                  " are '%s', used to be '%s'; retry the"
11298                                  " operation" %
11299                                  (self.op.node_name,
11300                                   utils.CommaJoin(need_nodes),
11301                                   utils.CommaJoin(owned_nodes)),
11302                                  errors.ECODE_STATE)
11303
11304     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11305     if owned_groups != wanted_groups:
11306       raise errors.OpExecError("Node groups changed since locks were acquired,"
11307                                " current groups are '%s', used to be '%s';"
11308                                " retry the operation" %
11309                                (utils.CommaJoin(wanted_groups),
11310                                 utils.CommaJoin(owned_groups)))
11311
11312     # Determine affected instances
11313     self.instances = self._DetermineInstances()
11314     self.instance_names = [i.name for i in self.instances]
11315
11316     if set(self.instance_names) != owned_instances:
11317       raise errors.OpExecError("Instances on node '%s' changed since locks"
11318                                " were acquired, current instances are '%s',"
11319                                " used to be '%s'; retry the operation" %
11320                                (self.op.node_name,
11321                                 utils.CommaJoin(self.instance_names),
11322                                 utils.CommaJoin(owned_instances)))
11323
11324     if self.instance_names:
11325       self.LogInfo("Evacuating instances from node '%s': %s",
11326                    self.op.node_name,
11327                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11328     else:
11329       self.LogInfo("No instances to evacuate from node '%s'",
11330                    self.op.node_name)
11331
11332     if self.op.remote_node is not None:
11333       for i in self.instances:
11334         if i.primary_node == self.op.remote_node:
11335           raise errors.OpPrereqError("Node %s is the primary node of"
11336                                      " instance %s, cannot use it as"
11337                                      " secondary" %
11338                                      (self.op.remote_node, i.name),
11339                                      errors.ECODE_INVAL)
11340
11341   def Exec(self, feedback_fn):
11342     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11343
11344     if not self.instance_names:
11345       # No instances to evacuate
11346       jobs = []
11347
11348     elif self.op.iallocator is not None:
11349       # TODO: Implement relocation to other group
11350       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11351                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11352                        instances=list(self.instance_names))
11353
11354       ial.Run(self.op.iallocator)
11355
11356       if not ial.success:
11357         raise errors.OpPrereqError("Can't compute node evacuation using"
11358                                    " iallocator '%s': %s" %
11359                                    (self.op.iallocator, ial.info),
11360                                    errors.ECODE_NORES)
11361
11362       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11363
11364     elif self.op.remote_node is not None:
11365       assert self.op.mode == constants.NODE_EVAC_SEC
11366       jobs = [
11367         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11368                                         remote_node=self.op.remote_node,
11369                                         disks=[],
11370                                         mode=constants.REPLACE_DISK_CHG,
11371                                         early_release=self.op.early_release)]
11372         for instance_name in self.instance_names
11373         ]
11374
11375     else:
11376       raise errors.ProgrammerError("No iallocator or remote node")
11377
11378     return ResultWithJobs(jobs)
11379
11380
11381 def _SetOpEarlyRelease(early_release, op):
11382   """Sets C{early_release} flag on opcodes if available.
11383
11384   """
11385   try:
11386     op.early_release = early_release
11387   except AttributeError:
11388     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11389
11390   return op
11391
11392
11393 def _NodeEvacDest(use_nodes, group, nodes):
11394   """Returns group or nodes depending on caller's choice.
11395
11396   """
11397   if use_nodes:
11398     return utils.CommaJoin(nodes)
11399   else:
11400     return group
11401
11402
11403 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11404   """Unpacks the result of change-group and node-evacuate iallocator requests.
11405
11406   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11407   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11408
11409   @type lu: L{LogicalUnit}
11410   @param lu: Logical unit instance
11411   @type alloc_result: tuple/list
11412   @param alloc_result: Result from iallocator
11413   @type early_release: bool
11414   @param early_release: Whether to release locks early if possible
11415   @type use_nodes: bool
11416   @param use_nodes: Whether to display node names instead of groups
11417
11418   """
11419   (moved, failed, jobs) = alloc_result
11420
11421   if failed:
11422     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11423                                  for (name, reason) in failed)
11424     lu.LogWarning("Unable to evacuate instances %s", failreason)
11425     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11426
11427   if moved:
11428     lu.LogInfo("Instances to be moved: %s",
11429                utils.CommaJoin("%s (to %s)" %
11430                                (name, _NodeEvacDest(use_nodes, group, nodes))
11431                                for (name, group, nodes) in moved))
11432
11433   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11434               map(opcodes.OpCode.LoadOpCode, ops))
11435           for ops in jobs]
11436
11437
11438 class LUInstanceGrowDisk(LogicalUnit):
11439   """Grow a disk of an instance.
11440
11441   """
11442   HPATH = "disk-grow"
11443   HTYPE = constants.HTYPE_INSTANCE
11444   REQ_BGL = False
11445
11446   def ExpandNames(self):
11447     self._ExpandAndLockInstance()
11448     self.needed_locks[locking.LEVEL_NODE] = []
11449     self.needed_locks[locking.LEVEL_NODE_RES] = []
11450     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11451     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11452
11453   def DeclareLocks(self, level):
11454     if level == locking.LEVEL_NODE:
11455       self._LockInstancesNodes()
11456     elif level == locking.LEVEL_NODE_RES:
11457       # Copy node locks
11458       self.needed_locks[locking.LEVEL_NODE_RES] = \
11459         self.needed_locks[locking.LEVEL_NODE][:]
11460
11461   def BuildHooksEnv(self):
11462     """Build hooks env.
11463
11464     This runs on the master, the primary and all the secondaries.
11465
11466     """
11467     env = {
11468       "DISK": self.op.disk,
11469       "AMOUNT": self.op.amount,
11470       }
11471     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11472     return env
11473
11474   def BuildHooksNodes(self):
11475     """Build hooks nodes.
11476
11477     """
11478     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11479     return (nl, nl)
11480
11481   def CheckPrereq(self):
11482     """Check prerequisites.
11483
11484     This checks that the instance is in the cluster.
11485
11486     """
11487     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11488     assert instance is not None, \
11489       "Cannot retrieve locked instance %s" % self.op.instance_name
11490     nodenames = list(instance.all_nodes)
11491     for node in nodenames:
11492       _CheckNodeOnline(self, node)
11493
11494     self.instance = instance
11495
11496     if instance.disk_template not in constants.DTS_GROWABLE:
11497       raise errors.OpPrereqError("Instance's disk layout does not support"
11498                                  " growing", errors.ECODE_INVAL)
11499
11500     self.disk = instance.FindDisk(self.op.disk)
11501
11502     if instance.disk_template not in (constants.DT_FILE,
11503                                       constants.DT_SHARED_FILE,
11504                                       constants.DT_RBD):
11505       # TODO: check the free disk space for file, when that feature will be
11506       # supported
11507       _CheckNodesFreeDiskPerVG(self, nodenames,
11508                                self.disk.ComputeGrowth(self.op.amount))
11509
11510   def Exec(self, feedback_fn):
11511     """Execute disk grow.
11512
11513     """
11514     instance = self.instance
11515     disk = self.disk
11516
11517     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11518     assert (self.owned_locks(locking.LEVEL_NODE) ==
11519             self.owned_locks(locking.LEVEL_NODE_RES))
11520
11521     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11522     if not disks_ok:
11523       raise errors.OpExecError("Cannot activate block device to grow")
11524
11525     feedback_fn("Growing disk %s of instance '%s' by %s" %
11526                 (self.op.disk, instance.name,
11527                  utils.FormatUnit(self.op.amount, "h")))
11528
11529     # First run all grow ops in dry-run mode
11530     for node in instance.all_nodes:
11531       self.cfg.SetDiskID(disk, node)
11532       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11533       result.Raise("Grow request failed to node %s" % node)
11534
11535     # We know that (as far as we can test) operations across different
11536     # nodes will succeed, time to run it for real
11537     for node in instance.all_nodes:
11538       self.cfg.SetDiskID(disk, node)
11539       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11540       result.Raise("Grow request failed to node %s" % node)
11541
11542       # TODO: Rewrite code to work properly
11543       # DRBD goes into sync mode for a short amount of time after executing the
11544       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11545       # calling "resize" in sync mode fails. Sleeping for a short amount of
11546       # time is a work-around.
11547       time.sleep(5)
11548
11549     disk.RecordGrow(self.op.amount)
11550     self.cfg.Update(instance, feedback_fn)
11551
11552     # Changes have been recorded, release node lock
11553     _ReleaseLocks(self, locking.LEVEL_NODE)
11554
11555     # Downgrade lock while waiting for sync
11556     self.glm.downgrade(locking.LEVEL_INSTANCE)
11557
11558     if self.op.wait_for_sync:
11559       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11560       if disk_abort:
11561         self.proc.LogWarning("Disk sync-ing has not returned a good"
11562                              " status; please check the instance")
11563       if instance.admin_state != constants.ADMINST_UP:
11564         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11565     elif instance.admin_state != constants.ADMINST_UP:
11566       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11567                            " not supposed to be running because no wait for"
11568                            " sync mode was requested")
11569
11570     assert self.owned_locks(locking.LEVEL_NODE_RES)
11571     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11572
11573
11574 class LUInstanceQueryData(NoHooksLU):
11575   """Query runtime instance data.
11576
11577   """
11578   REQ_BGL = False
11579
11580   def ExpandNames(self):
11581     self.needed_locks = {}
11582
11583     # Use locking if requested or when non-static information is wanted
11584     if not (self.op.static or self.op.use_locking):
11585       self.LogWarning("Non-static data requested, locks need to be acquired")
11586       self.op.use_locking = True
11587
11588     if self.op.instances or not self.op.use_locking:
11589       # Expand instance names right here
11590       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11591     else:
11592       # Will use acquired locks
11593       self.wanted_names = None
11594
11595     if self.op.use_locking:
11596       self.share_locks = _ShareAll()
11597
11598       if self.wanted_names is None:
11599         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11600       else:
11601         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11602
11603       self.needed_locks[locking.LEVEL_NODE] = []
11604       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11605
11606   def DeclareLocks(self, level):
11607     if self.op.use_locking and level == locking.LEVEL_NODE:
11608       self._LockInstancesNodes()
11609
11610   def CheckPrereq(self):
11611     """Check prerequisites.
11612
11613     This only checks the optional instance list against the existing names.
11614
11615     """
11616     if self.wanted_names is None:
11617       assert self.op.use_locking, "Locking was not used"
11618       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11619
11620     self.wanted_instances = \
11621         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11622
11623   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11624     """Returns the status of a block device
11625
11626     """
11627     if self.op.static or not node:
11628       return None
11629
11630     self.cfg.SetDiskID(dev, node)
11631
11632     result = self.rpc.call_blockdev_find(node, dev)
11633     if result.offline:
11634       return None
11635
11636     result.Raise("Can't compute disk status for %s" % instance_name)
11637
11638     status = result.payload
11639     if status is None:
11640       return None
11641
11642     return (status.dev_path, status.major, status.minor,
11643             status.sync_percent, status.estimated_time,
11644             status.is_degraded, status.ldisk_status)
11645
11646   def _ComputeDiskStatus(self, instance, snode, dev):
11647     """Compute block device status.
11648
11649     """
11650     if dev.dev_type in constants.LDS_DRBD:
11651       # we change the snode then (otherwise we use the one passed in)
11652       if dev.logical_id[0] == instance.primary_node:
11653         snode = dev.logical_id[1]
11654       else:
11655         snode = dev.logical_id[0]
11656
11657     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11658                                               instance.name, dev)
11659     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11660
11661     if dev.children:
11662       dev_children = map(compat.partial(self._ComputeDiskStatus,
11663                                         instance, snode),
11664                          dev.children)
11665     else:
11666       dev_children = []
11667
11668     return {
11669       "iv_name": dev.iv_name,
11670       "dev_type": dev.dev_type,
11671       "logical_id": dev.logical_id,
11672       "physical_id": dev.physical_id,
11673       "pstatus": dev_pstatus,
11674       "sstatus": dev_sstatus,
11675       "children": dev_children,
11676       "mode": dev.mode,
11677       "size": dev.size,
11678       }
11679
11680   def Exec(self, feedback_fn):
11681     """Gather and return data"""
11682     result = {}
11683
11684     cluster = self.cfg.GetClusterInfo()
11685
11686     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11687                                           for i in self.wanted_instances)
11688     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11689       if self.op.static or pnode.offline:
11690         remote_state = None
11691         if pnode.offline:
11692           self.LogWarning("Primary node %s is marked offline, returning static"
11693                           " information only for instance %s" %
11694                           (pnode.name, instance.name))
11695       else:
11696         remote_info = self.rpc.call_instance_info(instance.primary_node,
11697                                                   instance.name,
11698                                                   instance.hypervisor)
11699         remote_info.Raise("Error checking node %s" % instance.primary_node)
11700         remote_info = remote_info.payload
11701         if remote_info and "state" in remote_info:
11702           remote_state = "up"
11703         else:
11704           if instance.admin_state == constants.ADMINST_UP:
11705             remote_state = "down"
11706           else:
11707             remote_state = instance.admin_state
11708
11709       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11710                   instance.disks)
11711
11712       result[instance.name] = {
11713         "name": instance.name,
11714         "config_state": instance.admin_state,
11715         "run_state": remote_state,
11716         "pnode": instance.primary_node,
11717         "snodes": instance.secondary_nodes,
11718         "os": instance.os,
11719         # this happens to be the same format used for hooks
11720         "nics": _NICListToTuple(self, instance.nics),
11721         "disk_template": instance.disk_template,
11722         "disks": disks,
11723         "hypervisor": instance.hypervisor,
11724         "network_port": instance.network_port,
11725         "hv_instance": instance.hvparams,
11726         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11727         "be_instance": instance.beparams,
11728         "be_actual": cluster.FillBE(instance),
11729         "os_instance": instance.osparams,
11730         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11731         "serial_no": instance.serial_no,
11732         "mtime": instance.mtime,
11733         "ctime": instance.ctime,
11734         "uuid": instance.uuid,
11735         }
11736
11737     return result
11738
11739
11740 class LUInstanceSetParams(LogicalUnit):
11741   """Modifies an instances's parameters.
11742
11743   """
11744   HPATH = "instance-modify"
11745   HTYPE = constants.HTYPE_INSTANCE
11746   REQ_BGL = False
11747
11748   def CheckArguments(self):
11749     if not (self.op.nics or self.op.disks or self.op.disk_template or
11750             self.op.hvparams or self.op.beparams or self.op.os_name or
11751             self.op.online_inst or self.op.offline_inst or
11752             self.op.runtime_mem):
11753       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11754
11755     if self.op.hvparams:
11756       _CheckGlobalHvParams(self.op.hvparams)
11757
11758     # Disk validation
11759     disk_addremove = 0
11760     for disk_op, disk_dict in self.op.disks:
11761       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11762       if disk_op == constants.DDM_REMOVE:
11763         disk_addremove += 1
11764         continue
11765       elif disk_op == constants.DDM_ADD:
11766         disk_addremove += 1
11767       else:
11768         if not isinstance(disk_op, int):
11769           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11770         if not isinstance(disk_dict, dict):
11771           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11772           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11773
11774       if disk_op == constants.DDM_ADD:
11775         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11776         if mode not in constants.DISK_ACCESS_SET:
11777           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11778                                      errors.ECODE_INVAL)
11779         size = disk_dict.get(constants.IDISK_SIZE, None)
11780         if size is None:
11781           raise errors.OpPrereqError("Required disk parameter size missing",
11782                                      errors.ECODE_INVAL)
11783         try:
11784           size = int(size)
11785         except (TypeError, ValueError), err:
11786           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11787                                      str(err), errors.ECODE_INVAL)
11788         disk_dict[constants.IDISK_SIZE] = size
11789       else:
11790         # modification of disk
11791         if constants.IDISK_SIZE in disk_dict:
11792           raise errors.OpPrereqError("Disk size change not possible, use"
11793                                      " grow-disk", errors.ECODE_INVAL)
11794
11795     if disk_addremove > 1:
11796       raise errors.OpPrereqError("Only one disk add or remove operation"
11797                                  " supported at a time", errors.ECODE_INVAL)
11798
11799     if self.op.disks and self.op.disk_template is not None:
11800       raise errors.OpPrereqError("Disk template conversion and other disk"
11801                                  " changes not supported at the same time",
11802                                  errors.ECODE_INVAL)
11803
11804     if (self.op.disk_template and
11805         self.op.disk_template in constants.DTS_INT_MIRROR and
11806         self.op.remote_node is None):
11807       raise errors.OpPrereqError("Changing the disk template to a mirrored"
11808                                  " one requires specifying a secondary node",
11809                                  errors.ECODE_INVAL)
11810
11811     # NIC validation
11812     nic_addremove = 0
11813     for nic_op, nic_dict in self.op.nics:
11814       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11815       if nic_op == constants.DDM_REMOVE:
11816         nic_addremove += 1
11817         continue
11818       elif nic_op == constants.DDM_ADD:
11819         nic_addremove += 1
11820       else:
11821         if not isinstance(nic_op, int):
11822           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11823         if not isinstance(nic_dict, dict):
11824           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11825           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11826
11827       # nic_dict should be a dict
11828       nic_ip = nic_dict.get(constants.INIC_IP, None)
11829       if nic_ip is not None:
11830         if nic_ip.lower() == constants.VALUE_NONE:
11831           nic_dict[constants.INIC_IP] = None
11832         else:
11833           if not netutils.IPAddress.IsValid(nic_ip):
11834             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11835                                        errors.ECODE_INVAL)
11836
11837       nic_bridge = nic_dict.get("bridge", None)
11838       nic_link = nic_dict.get(constants.INIC_LINK, None)
11839       if nic_bridge and nic_link:
11840         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11841                                    " at the same time", errors.ECODE_INVAL)
11842       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11843         nic_dict["bridge"] = None
11844       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11845         nic_dict[constants.INIC_LINK] = None
11846
11847       if nic_op == constants.DDM_ADD:
11848         nic_mac = nic_dict.get(constants.INIC_MAC, None)
11849         if nic_mac is None:
11850           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11851
11852       if constants.INIC_MAC in nic_dict:
11853         nic_mac = nic_dict[constants.INIC_MAC]
11854         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11855           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11856
11857         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11858           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11859                                      " modifying an existing nic",
11860                                      errors.ECODE_INVAL)
11861
11862     if nic_addremove > 1:
11863       raise errors.OpPrereqError("Only one NIC add or remove operation"
11864                                  " supported at a time", errors.ECODE_INVAL)
11865
11866   def ExpandNames(self):
11867     self._ExpandAndLockInstance()
11868     # Can't even acquire node locks in shared mode as upcoming changes in
11869     # Ganeti 2.6 will start to modify the node object on disk conversion
11870     self.needed_locks[locking.LEVEL_NODE] = []
11871     self.needed_locks[locking.LEVEL_NODE_RES] = []
11872     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11873
11874   def DeclareLocks(self, level):
11875     if level == locking.LEVEL_NODE:
11876       self._LockInstancesNodes()
11877       if self.op.disk_template and self.op.remote_node:
11878         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11879         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11880     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11881       # Copy node locks
11882       self.needed_locks[locking.LEVEL_NODE_RES] = \
11883         self.needed_locks[locking.LEVEL_NODE][:]
11884
11885   def BuildHooksEnv(self):
11886     """Build hooks env.
11887
11888     This runs on the master, primary and secondaries.
11889
11890     """
11891     args = dict()
11892     if constants.BE_MINMEM in self.be_new:
11893       args["minmem"] = self.be_new[constants.BE_MINMEM]
11894     if constants.BE_MAXMEM in self.be_new:
11895       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11896     if constants.BE_VCPUS in self.be_new:
11897       args["vcpus"] = self.be_new[constants.BE_VCPUS]
11898     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11899     # information at all.
11900     if self.op.nics:
11901       args["nics"] = []
11902       nic_override = dict(self.op.nics)
11903       for idx, nic in enumerate(self.instance.nics):
11904         if idx in nic_override:
11905           this_nic_override = nic_override[idx]
11906         else:
11907           this_nic_override = {}
11908         if constants.INIC_IP in this_nic_override:
11909           ip = this_nic_override[constants.INIC_IP]
11910         else:
11911           ip = nic.ip
11912         if constants.INIC_MAC in this_nic_override:
11913           mac = this_nic_override[constants.INIC_MAC]
11914         else:
11915           mac = nic.mac
11916         if idx in self.nic_pnew:
11917           nicparams = self.nic_pnew[idx]
11918         else:
11919           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11920         mode = nicparams[constants.NIC_MODE]
11921         link = nicparams[constants.NIC_LINK]
11922         args["nics"].append((ip, mac, mode, link))
11923       if constants.DDM_ADD in nic_override:
11924         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11925         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11926         nicparams = self.nic_pnew[constants.DDM_ADD]
11927         mode = nicparams[constants.NIC_MODE]
11928         link = nicparams[constants.NIC_LINK]
11929         args["nics"].append((ip, mac, mode, link))
11930       elif constants.DDM_REMOVE in nic_override:
11931         del args["nics"][-1]
11932
11933     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11934     if self.op.disk_template:
11935       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11936     if self.op.runtime_mem:
11937       env["RUNTIME_MEMORY"] = self.op.runtime_mem
11938
11939     return env
11940
11941   def BuildHooksNodes(self):
11942     """Build hooks nodes.
11943
11944     """
11945     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11946     return (nl, nl)
11947
11948   def CheckPrereq(self):
11949     """Check prerequisites.
11950
11951     This only checks the instance list against the existing names.
11952
11953     """
11954     # checking the new params on the primary/secondary nodes
11955
11956     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11957     cluster = self.cluster = self.cfg.GetClusterInfo()
11958     assert self.instance is not None, \
11959       "Cannot retrieve locked instance %s" % self.op.instance_name
11960     pnode = instance.primary_node
11961     nodelist = list(instance.all_nodes)
11962     pnode_info = self.cfg.GetNodeInfo(pnode)
11963     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11964
11965     # OS change
11966     if self.op.os_name and not self.op.force:
11967       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11968                       self.op.force_variant)
11969       instance_os = self.op.os_name
11970     else:
11971       instance_os = instance.os
11972
11973     if self.op.disk_template:
11974       if instance.disk_template == self.op.disk_template:
11975         raise errors.OpPrereqError("Instance already has disk template %s" %
11976                                    instance.disk_template, errors.ECODE_INVAL)
11977
11978       if (instance.disk_template,
11979           self.op.disk_template) not in self._DISK_CONVERSIONS:
11980         raise errors.OpPrereqError("Unsupported disk template conversion from"
11981                                    " %s to %s" % (instance.disk_template,
11982                                                   self.op.disk_template),
11983                                    errors.ECODE_INVAL)
11984       _CheckInstanceState(self, instance, INSTANCE_DOWN,
11985                           msg="cannot change disk template")
11986       if self.op.disk_template in constants.DTS_INT_MIRROR:
11987         if self.op.remote_node == pnode:
11988           raise errors.OpPrereqError("Given new secondary node %s is the same"
11989                                      " as the primary node of the instance" %
11990                                      self.op.remote_node, errors.ECODE_STATE)
11991         _CheckNodeOnline(self, self.op.remote_node)
11992         _CheckNodeNotDrained(self, self.op.remote_node)
11993         # FIXME: here we assume that the old instance type is DT_PLAIN
11994         assert instance.disk_template == constants.DT_PLAIN
11995         disks = [{constants.IDISK_SIZE: d.size,
11996                   constants.IDISK_VG: d.logical_id[0]}
11997                  for d in instance.disks]
11998         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11999         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12000
12001         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12002         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12003         ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12004         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12005                                 ignore=self.op.ignore_ipolicy)
12006         if pnode_info.group != snode_info.group:
12007           self.LogWarning("The primary and secondary nodes are in two"
12008                           " different node groups; the disk parameters"
12009                           " from the first disk's node group will be"
12010                           " used")
12011
12012     # hvparams processing
12013     if self.op.hvparams:
12014       hv_type = instance.hypervisor
12015       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12016       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12017       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12018
12019       # local check
12020       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12021       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12022       self.hv_proposed = self.hv_new = hv_new # the new actual values
12023       self.hv_inst = i_hvdict # the new dict (without defaults)
12024     else:
12025       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12026                                               instance.hvparams)
12027       self.hv_new = self.hv_inst = {}
12028
12029     # beparams processing
12030     if self.op.beparams:
12031       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12032                                    use_none=True)
12033       objects.UpgradeBeParams(i_bedict)
12034       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12035       be_new = cluster.SimpleFillBE(i_bedict)
12036       self.be_proposed = self.be_new = be_new # the new actual values
12037       self.be_inst = i_bedict # the new dict (without defaults)
12038     else:
12039       self.be_new = self.be_inst = {}
12040       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12041     be_old = cluster.FillBE(instance)
12042
12043     # CPU param validation -- checking every time a paramtere is
12044     # changed to cover all cases where either CPU mask or vcpus have
12045     # changed
12046     if (constants.BE_VCPUS in self.be_proposed and
12047         constants.HV_CPU_MASK in self.hv_proposed):
12048       cpu_list = \
12049         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12050       # Verify mask is consistent with number of vCPUs. Can skip this
12051       # test if only 1 entry in the CPU mask, which means same mask
12052       # is applied to all vCPUs.
12053       if (len(cpu_list) > 1 and
12054           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12055         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12056                                    " CPU mask [%s]" %
12057                                    (self.be_proposed[constants.BE_VCPUS],
12058                                     self.hv_proposed[constants.HV_CPU_MASK]),
12059                                    errors.ECODE_INVAL)
12060
12061       # Only perform this test if a new CPU mask is given
12062       if constants.HV_CPU_MASK in self.hv_new:
12063         # Calculate the largest CPU number requested
12064         max_requested_cpu = max(map(max, cpu_list))
12065         # Check that all of the instance's nodes have enough physical CPUs to
12066         # satisfy the requested CPU mask
12067         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12068                                 max_requested_cpu + 1, instance.hypervisor)
12069
12070     # osparams processing
12071     if self.op.osparams:
12072       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12073       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12074       self.os_inst = i_osdict # the new dict (without defaults)
12075     else:
12076       self.os_inst = {}
12077
12078     self.warn = []
12079
12080     #TODO(dynmem): do the appropriate check involving MINMEM
12081     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12082         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12083       mem_check_list = [pnode]
12084       if be_new[constants.BE_AUTO_BALANCE]:
12085         # either we changed auto_balance to yes or it was from before
12086         mem_check_list.extend(instance.secondary_nodes)
12087       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12088                                                   instance.hypervisor)
12089       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12090                                          [instance.hypervisor])
12091       pninfo = nodeinfo[pnode]
12092       msg = pninfo.fail_msg
12093       if msg:
12094         # Assume the primary node is unreachable and go ahead
12095         self.warn.append("Can't get info from primary node %s: %s" %
12096                          (pnode, msg))
12097       else:
12098         (_, _, (pnhvinfo, )) = pninfo.payload
12099         if not isinstance(pnhvinfo.get("memory_free", None), int):
12100           self.warn.append("Node data from primary node %s doesn't contain"
12101                            " free memory information" % pnode)
12102         elif instance_info.fail_msg:
12103           self.warn.append("Can't get instance runtime information: %s" %
12104                           instance_info.fail_msg)
12105         else:
12106           if instance_info.payload:
12107             current_mem = int(instance_info.payload["memory"])
12108           else:
12109             # Assume instance not running
12110             # (there is a slight race condition here, but it's not very
12111             # probable, and we have no other way to check)
12112             # TODO: Describe race condition
12113             current_mem = 0
12114           #TODO(dynmem): do the appropriate check involving MINMEM
12115           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12116                       pnhvinfo["memory_free"])
12117           if miss_mem > 0:
12118             raise errors.OpPrereqError("This change will prevent the instance"
12119                                        " from starting, due to %d MB of memory"
12120                                        " missing on its primary node" %
12121                                        miss_mem,
12122                                        errors.ECODE_NORES)
12123
12124       if be_new[constants.BE_AUTO_BALANCE]:
12125         for node, nres in nodeinfo.items():
12126           if node not in instance.secondary_nodes:
12127             continue
12128           nres.Raise("Can't get info from secondary node %s" % node,
12129                      prereq=True, ecode=errors.ECODE_STATE)
12130           (_, _, (nhvinfo, )) = nres.payload
12131           if not isinstance(nhvinfo.get("memory_free", None), int):
12132             raise errors.OpPrereqError("Secondary node %s didn't return free"
12133                                        " memory information" % node,
12134                                        errors.ECODE_STATE)
12135           #TODO(dynmem): do the appropriate check involving MINMEM
12136           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12137             raise errors.OpPrereqError("This change will prevent the instance"
12138                                        " from failover to its secondary node"
12139                                        " %s, due to not enough memory" % node,
12140                                        errors.ECODE_STATE)
12141
12142     if self.op.runtime_mem:
12143       remote_info = self.rpc.call_instance_info(instance.primary_node,
12144                                                 instance.name,
12145                                                 instance.hypervisor)
12146       remote_info.Raise("Error checking node %s" % instance.primary_node)
12147       if not remote_info.payload: # not running already
12148         raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12149                                    errors.ECODE_STATE)
12150
12151       current_memory = remote_info.payload["memory"]
12152       if (not self.op.force and
12153            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12154             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12155         raise errors.OpPrereqError("Instance %s must have memory between %d"
12156                                    " and %d MB of memory unless --force is"
12157                                    " given" % (instance.name,
12158                                     self.be_proposed[constants.BE_MINMEM],
12159                                     self.be_proposed[constants.BE_MAXMEM]),
12160                                    errors.ECODE_INVAL)
12161
12162       if self.op.runtime_mem > current_memory:
12163         _CheckNodeFreeMemory(self, instance.primary_node,
12164                              "ballooning memory for instance %s" %
12165                              instance.name,
12166                              self.op.memory - current_memory,
12167                              instance.hypervisor)
12168
12169     # NIC processing
12170     self.nic_pnew = {}
12171     self.nic_pinst = {}
12172     for nic_op, nic_dict in self.op.nics:
12173       if nic_op == constants.DDM_REMOVE:
12174         if not instance.nics:
12175           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
12176                                      errors.ECODE_INVAL)
12177         continue
12178       if nic_op != constants.DDM_ADD:
12179         # an existing nic
12180         if not instance.nics:
12181           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
12182                                      " no NICs" % nic_op,
12183                                      errors.ECODE_INVAL)
12184         if nic_op < 0 or nic_op >= len(instance.nics):
12185           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
12186                                      " are 0 to %d" %
12187                                      (nic_op, len(instance.nics) - 1),
12188                                      errors.ECODE_INVAL)
12189         old_nic_params = instance.nics[nic_op].nicparams
12190         old_nic_ip = instance.nics[nic_op].ip
12191       else:
12192         old_nic_params = {}
12193         old_nic_ip = None
12194
12195       update_params_dict = dict([(key, nic_dict[key])
12196                                  for key in constants.NICS_PARAMETERS
12197                                  if key in nic_dict])
12198
12199       if "bridge" in nic_dict:
12200         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
12201
12202       new_nic_params = _GetUpdatedParams(old_nic_params,
12203                                          update_params_dict)
12204       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
12205       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
12206       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
12207       self.nic_pinst[nic_op] = new_nic_params
12208       self.nic_pnew[nic_op] = new_filled_nic_params
12209       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
12210
12211       if new_nic_mode == constants.NIC_MODE_BRIDGED:
12212         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
12213         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
12214         if msg:
12215           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
12216           if self.op.force:
12217             self.warn.append(msg)
12218           else:
12219             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12220       if new_nic_mode == constants.NIC_MODE_ROUTED:
12221         if constants.INIC_IP in nic_dict:
12222           nic_ip = nic_dict[constants.INIC_IP]
12223         else:
12224           nic_ip = old_nic_ip
12225         if nic_ip is None:
12226           raise errors.OpPrereqError("Cannot set the nic ip to None"
12227                                      " on a routed nic", errors.ECODE_INVAL)
12228       if constants.INIC_MAC in nic_dict:
12229         nic_mac = nic_dict[constants.INIC_MAC]
12230         if nic_mac is None:
12231           raise errors.OpPrereqError("Cannot set the nic mac to None",
12232                                      errors.ECODE_INVAL)
12233         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12234           # otherwise generate the mac
12235           nic_dict[constants.INIC_MAC] = \
12236             self.cfg.GenerateMAC(self.proc.GetECId())
12237         else:
12238           # or validate/reserve the current one
12239           try:
12240             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
12241           except errors.ReservationError:
12242             raise errors.OpPrereqError("MAC address %s already in use"
12243                                        " in cluster" % nic_mac,
12244                                        errors.ECODE_NOTUNIQUE)
12245
12246     # DISK processing
12247     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12248       raise errors.OpPrereqError("Disk operations not supported for"
12249                                  " diskless instances",
12250                                  errors.ECODE_INVAL)
12251     for disk_op, _ in self.op.disks:
12252       if disk_op == constants.DDM_REMOVE:
12253         if len(instance.disks) == 1:
12254           raise errors.OpPrereqError("Cannot remove the last disk of"
12255                                      " an instance", errors.ECODE_INVAL)
12256         _CheckInstanceState(self, instance, INSTANCE_DOWN,
12257                             msg="cannot remove disks")
12258
12259       if (disk_op == constants.DDM_ADD and
12260           len(instance.disks) >= constants.MAX_DISKS):
12261         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
12262                                    " add more" % constants.MAX_DISKS,
12263                                    errors.ECODE_STATE)
12264       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
12265         # an existing disk
12266         if disk_op < 0 or disk_op >= len(instance.disks):
12267           raise errors.OpPrereqError("Invalid disk index %s, valid values"
12268                                      " are 0 to %d" %
12269                                      (disk_op, len(instance.disks)),
12270                                      errors.ECODE_INVAL)
12271
12272     # disabling the instance
12273     if self.op.offline_inst:
12274       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12275                           msg="cannot change instance state to offline")
12276
12277     # enabling the instance
12278     if self.op.online_inst:
12279       _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
12280                           msg="cannot make instance go online")
12281
12282   def _ConvertPlainToDrbd(self, feedback_fn):
12283     """Converts an instance from plain to drbd.
12284
12285     """
12286     feedback_fn("Converting template to drbd")
12287     instance = self.instance
12288     pnode = instance.primary_node
12289     snode = self.op.remote_node
12290
12291     assert instance.disk_template == constants.DT_PLAIN
12292
12293     # create a fake disk info for _GenerateDiskTemplate
12294     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12295                   constants.IDISK_VG: d.logical_id[0]}
12296                  for d in instance.disks]
12297     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12298                                       instance.name, pnode, [snode],
12299                                       disk_info, None, None, 0, feedback_fn,
12300                                       self.diskparams)
12301     info = _GetInstanceInfoText(instance)
12302     feedback_fn("Creating aditional volumes...")
12303     # first, create the missing data and meta devices
12304     for disk in new_disks:
12305       # unfortunately this is... not too nice
12306       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12307                             info, True)
12308       for child in disk.children:
12309         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12310     # at this stage, all new LVs have been created, we can rename the
12311     # old ones
12312     feedback_fn("Renaming original volumes...")
12313     rename_list = [(o, n.children[0].logical_id)
12314                    for (o, n) in zip(instance.disks, new_disks)]
12315     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12316     result.Raise("Failed to rename original LVs")
12317
12318     feedback_fn("Initializing DRBD devices...")
12319     # all child devices are in place, we can now create the DRBD devices
12320     for disk in new_disks:
12321       for node in [pnode, snode]:
12322         f_create = node == pnode
12323         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12324
12325     # at this point, the instance has been modified
12326     instance.disk_template = constants.DT_DRBD8
12327     instance.disks = new_disks
12328     self.cfg.Update(instance, feedback_fn)
12329
12330     # Release node locks while waiting for sync
12331     _ReleaseLocks(self, locking.LEVEL_NODE)
12332
12333     # disks are created, waiting for sync
12334     disk_abort = not _WaitForSync(self, instance,
12335                                   oneshot=not self.op.wait_for_sync)
12336     if disk_abort:
12337       raise errors.OpExecError("There are some degraded disks for"
12338                                " this instance, please cleanup manually")
12339
12340     # Node resource locks will be released by caller
12341
12342   def _ConvertDrbdToPlain(self, feedback_fn):
12343     """Converts an instance from drbd to plain.
12344
12345     """
12346     instance = self.instance
12347
12348     assert len(instance.secondary_nodes) == 1
12349     assert instance.disk_template == constants.DT_DRBD8
12350
12351     pnode = instance.primary_node
12352     snode = instance.secondary_nodes[0]
12353     feedback_fn("Converting template to plain")
12354
12355     old_disks = instance.disks
12356     new_disks = [d.children[0] for d in old_disks]
12357
12358     # copy over size and mode
12359     for parent, child in zip(old_disks, new_disks):
12360       child.size = parent.size
12361       child.mode = parent.mode
12362
12363     # update instance structure
12364     instance.disks = new_disks
12365     instance.disk_template = constants.DT_PLAIN
12366     self.cfg.Update(instance, feedback_fn)
12367
12368     # Release locks in case removing disks takes a while
12369     _ReleaseLocks(self, locking.LEVEL_NODE)
12370
12371     feedback_fn("Removing volumes on the secondary node...")
12372     for disk in old_disks:
12373       self.cfg.SetDiskID(disk, snode)
12374       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12375       if msg:
12376         self.LogWarning("Could not remove block device %s on node %s,"
12377                         " continuing anyway: %s", disk.iv_name, snode, msg)
12378
12379     feedback_fn("Removing unneeded volumes on the primary node...")
12380     for idx, disk in enumerate(old_disks):
12381       meta = disk.children[1]
12382       self.cfg.SetDiskID(meta, pnode)
12383       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12384       if msg:
12385         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12386                         " continuing anyway: %s", idx, pnode, msg)
12387
12388     # this is a DRBD disk, return its port to the pool
12389     for disk in old_disks:
12390       tcp_port = disk.logical_id[2]
12391       self.cfg.AddTcpUdpPort(tcp_port)
12392
12393     # Node resource locks will be released by caller
12394
12395   def Exec(self, feedback_fn):
12396     """Modifies an instance.
12397
12398     All parameters take effect only at the next restart of the instance.
12399
12400     """
12401     # Process here the warnings from CheckPrereq, as we don't have a
12402     # feedback_fn there.
12403     for warn in self.warn:
12404       feedback_fn("WARNING: %s" % warn)
12405
12406     assert ((self.op.disk_template is None) ^
12407             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12408       "Not owning any node resource locks"
12409
12410     result = []
12411     instance = self.instance
12412
12413     # runtime memory
12414     if self.op.runtime_mem:
12415       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12416                                                      instance,
12417                                                      self.op.runtime_mem)
12418       rpcres.Raise("Cannot modify instance runtime memory")
12419       result.append(("runtime_memory", self.op.runtime_mem))
12420
12421     # disk changes
12422     for disk_op, disk_dict in self.op.disks:
12423       if disk_op == constants.DDM_REMOVE:
12424         # remove the last disk
12425         device = instance.disks.pop()
12426         device_idx = len(instance.disks)
12427         for node, disk in device.ComputeNodeTree(instance.primary_node):
12428           self.cfg.SetDiskID(disk, node)
12429           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12430           if msg:
12431             self.LogWarning("Could not remove disk/%d on node %s: %s,"
12432                             " continuing anyway", device_idx, node, msg)
12433         result.append(("disk/%d" % device_idx, "remove"))
12434
12435         # if this is a DRBD disk, return its port to the pool
12436         if device.dev_type in constants.LDS_DRBD:
12437           tcp_port = device.logical_id[2]
12438           self.cfg.AddTcpUdpPort(tcp_port)
12439       elif disk_op == constants.DDM_ADD:
12440         # add a new disk
12441         if instance.disk_template in (constants.DT_FILE,
12442                                         constants.DT_SHARED_FILE):
12443           file_driver, file_path = instance.disks[0].logical_id
12444           file_path = os.path.dirname(file_path)
12445         else:
12446           file_driver = file_path = None
12447         disk_idx_base = len(instance.disks)
12448         new_disk = _GenerateDiskTemplate(self,
12449                                          instance.disk_template,
12450                                          instance.name, instance.primary_node,
12451                                          instance.secondary_nodes,
12452                                          [disk_dict],
12453                                          file_path,
12454                                          file_driver,
12455                                          disk_idx_base,
12456                                          feedback_fn,
12457                                          self.diskparams)[0]
12458         instance.disks.append(new_disk)
12459         info = _GetInstanceInfoText(instance)
12460
12461         logging.info("Creating volume %s for instance %s",
12462                      new_disk.iv_name, instance.name)
12463         # Note: this needs to be kept in sync with _CreateDisks
12464         #HARDCODE
12465         for node in instance.all_nodes:
12466           f_create = node == instance.primary_node
12467           try:
12468             _CreateBlockDev(self, node, instance, new_disk,
12469                             f_create, info, f_create)
12470           except errors.OpExecError, err:
12471             self.LogWarning("Failed to create volume %s (%s) on"
12472                             " node %s: %s",
12473                             new_disk.iv_name, new_disk, node, err)
12474         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12475                        (new_disk.size, new_disk.mode)))
12476       else:
12477         # change a given disk
12478         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12479         result.append(("disk.mode/%d" % disk_op,
12480                        disk_dict[constants.IDISK_MODE]))
12481
12482     if self.op.disk_template:
12483       if __debug__:
12484         check_nodes = set(instance.all_nodes)
12485         if self.op.remote_node:
12486           check_nodes.add(self.op.remote_node)
12487         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12488           owned = self.owned_locks(level)
12489           assert not (check_nodes - owned), \
12490             ("Not owning the correct locks, owning %r, expected at least %r" %
12491              (owned, check_nodes))
12492
12493       r_shut = _ShutdownInstanceDisks(self, instance)
12494       if not r_shut:
12495         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12496                                  " proceed with disk template conversion")
12497       mode = (instance.disk_template, self.op.disk_template)
12498       try:
12499         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12500       except:
12501         self.cfg.ReleaseDRBDMinors(instance.name)
12502         raise
12503       result.append(("disk_template", self.op.disk_template))
12504
12505       assert instance.disk_template == self.op.disk_template, \
12506         ("Expected disk template '%s', found '%s'" %
12507          (self.op.disk_template, instance.disk_template))
12508
12509     # Release node and resource locks if there are any (they might already have
12510     # been released during disk conversion)
12511     _ReleaseLocks(self, locking.LEVEL_NODE)
12512     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12513
12514     # NIC changes
12515     for nic_op, nic_dict in self.op.nics:
12516       if nic_op == constants.DDM_REMOVE:
12517         # remove the last nic
12518         del instance.nics[-1]
12519         result.append(("nic.%d" % len(instance.nics), "remove"))
12520       elif nic_op == constants.DDM_ADD:
12521         # mac and bridge should be set, by now
12522         mac = nic_dict[constants.INIC_MAC]
12523         ip = nic_dict.get(constants.INIC_IP, None)
12524         nicparams = self.nic_pinst[constants.DDM_ADD]
12525         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12526         instance.nics.append(new_nic)
12527         result.append(("nic.%d" % (len(instance.nics) - 1),
12528                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12529                        (new_nic.mac, new_nic.ip,
12530                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12531                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12532                        )))
12533       else:
12534         for key in (constants.INIC_MAC, constants.INIC_IP):
12535           if key in nic_dict:
12536             setattr(instance.nics[nic_op], key, nic_dict[key])
12537         if nic_op in self.nic_pinst:
12538           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12539         for key, val in nic_dict.iteritems():
12540           result.append(("nic.%s/%d" % (key, nic_op), val))
12541
12542     # hvparams changes
12543     if self.op.hvparams:
12544       instance.hvparams = self.hv_inst
12545       for key, val in self.op.hvparams.iteritems():
12546         result.append(("hv/%s" % key, val))
12547
12548     # beparams changes
12549     if self.op.beparams:
12550       instance.beparams = self.be_inst
12551       for key, val in self.op.beparams.iteritems():
12552         result.append(("be/%s" % key, val))
12553
12554     # OS change
12555     if self.op.os_name:
12556       instance.os = self.op.os_name
12557
12558     # osparams changes
12559     if self.op.osparams:
12560       instance.osparams = self.os_inst
12561       for key, val in self.op.osparams.iteritems():
12562         result.append(("os/%s" % key, val))
12563
12564     # online/offline instance
12565     if self.op.online_inst:
12566       self.cfg.MarkInstanceDown(instance.name)
12567       result.append(("admin_state", constants.ADMINST_DOWN))
12568     if self.op.offline_inst:
12569       self.cfg.MarkInstanceOffline(instance.name)
12570       result.append(("admin_state", constants.ADMINST_OFFLINE))
12571
12572     self.cfg.Update(instance, feedback_fn)
12573
12574     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12575                 self.owned_locks(locking.LEVEL_NODE)), \
12576       "All node locks should have been released by now"
12577
12578     return result
12579
12580   _DISK_CONVERSIONS = {
12581     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12582     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12583     }
12584
12585
12586 class LUInstanceChangeGroup(LogicalUnit):
12587   HPATH = "instance-change-group"
12588   HTYPE = constants.HTYPE_INSTANCE
12589   REQ_BGL = False
12590
12591   def ExpandNames(self):
12592     self.share_locks = _ShareAll()
12593     self.needed_locks = {
12594       locking.LEVEL_NODEGROUP: [],
12595       locking.LEVEL_NODE: [],
12596       }
12597
12598     self._ExpandAndLockInstance()
12599
12600     if self.op.target_groups:
12601       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12602                                   self.op.target_groups)
12603     else:
12604       self.req_target_uuids = None
12605
12606     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12607
12608   def DeclareLocks(self, level):
12609     if level == locking.LEVEL_NODEGROUP:
12610       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12611
12612       if self.req_target_uuids:
12613         lock_groups = set(self.req_target_uuids)
12614
12615         # Lock all groups used by instance optimistically; this requires going
12616         # via the node before it's locked, requiring verification later on
12617         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12618         lock_groups.update(instance_groups)
12619       else:
12620         # No target groups, need to lock all of them
12621         lock_groups = locking.ALL_SET
12622
12623       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12624
12625     elif level == locking.LEVEL_NODE:
12626       if self.req_target_uuids:
12627         # Lock all nodes used by instances
12628         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12629         self._LockInstancesNodes()
12630
12631         # Lock all nodes in all potential target groups
12632         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12633                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12634         member_nodes = [node_name
12635                         for group in lock_groups
12636                         for node_name in self.cfg.GetNodeGroup(group).members]
12637         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12638       else:
12639         # Lock all nodes as all groups are potential targets
12640         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12641
12642   def CheckPrereq(self):
12643     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12644     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12645     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12646
12647     assert (self.req_target_uuids is None or
12648             owned_groups.issuperset(self.req_target_uuids))
12649     assert owned_instances == set([self.op.instance_name])
12650
12651     # Get instance information
12652     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12653
12654     # Check if node groups for locked instance are still correct
12655     assert owned_nodes.issuperset(self.instance.all_nodes), \
12656       ("Instance %s's nodes changed while we kept the lock" %
12657        self.op.instance_name)
12658
12659     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12660                                            owned_groups)
12661
12662     if self.req_target_uuids:
12663       # User requested specific target groups
12664       self.target_uuids = self.req_target_uuids
12665     else:
12666       # All groups except those used by the instance are potential targets
12667       self.target_uuids = owned_groups - inst_groups
12668
12669     conflicting_groups = self.target_uuids & inst_groups
12670     if conflicting_groups:
12671       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12672                                  " used by the instance '%s'" %
12673                                  (utils.CommaJoin(conflicting_groups),
12674                                   self.op.instance_name),
12675                                  errors.ECODE_INVAL)
12676
12677     if not self.target_uuids:
12678       raise errors.OpPrereqError("There are no possible target groups",
12679                                  errors.ECODE_INVAL)
12680
12681   def BuildHooksEnv(self):
12682     """Build hooks env.
12683
12684     """
12685     assert self.target_uuids
12686
12687     env = {
12688       "TARGET_GROUPS": " ".join(self.target_uuids),
12689       }
12690
12691     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12692
12693     return env
12694
12695   def BuildHooksNodes(self):
12696     """Build hooks nodes.
12697
12698     """
12699     mn = self.cfg.GetMasterNode()
12700     return ([mn], [mn])
12701
12702   def Exec(self, feedback_fn):
12703     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12704
12705     assert instances == [self.op.instance_name], "Instance not locked"
12706
12707     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12708                      instances=instances, target_groups=list(self.target_uuids))
12709
12710     ial.Run(self.op.iallocator)
12711
12712     if not ial.success:
12713       raise errors.OpPrereqError("Can't compute solution for changing group of"
12714                                  " instance '%s' using iallocator '%s': %s" %
12715                                  (self.op.instance_name, self.op.iallocator,
12716                                   ial.info),
12717                                  errors.ECODE_NORES)
12718
12719     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12720
12721     self.LogInfo("Iallocator returned %s job(s) for changing group of"
12722                  " instance '%s'", len(jobs), self.op.instance_name)
12723
12724     return ResultWithJobs(jobs)
12725
12726
12727 class LUBackupQuery(NoHooksLU):
12728   """Query the exports list
12729
12730   """
12731   REQ_BGL = False
12732
12733   def ExpandNames(self):
12734     self.needed_locks = {}
12735     self.share_locks[locking.LEVEL_NODE] = 1
12736     if not self.op.nodes:
12737       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12738     else:
12739       self.needed_locks[locking.LEVEL_NODE] = \
12740         _GetWantedNodes(self, self.op.nodes)
12741
12742   def Exec(self, feedback_fn):
12743     """Compute the list of all the exported system images.
12744
12745     @rtype: dict
12746     @return: a dictionary with the structure node->(export-list)
12747         where export-list is a list of the instances exported on
12748         that node.
12749
12750     """
12751     self.nodes = self.owned_locks(locking.LEVEL_NODE)
12752     rpcresult = self.rpc.call_export_list(self.nodes)
12753     result = {}
12754     for node in rpcresult:
12755       if rpcresult[node].fail_msg:
12756         result[node] = False
12757       else:
12758         result[node] = rpcresult[node].payload
12759
12760     return result
12761
12762
12763 class LUBackupPrepare(NoHooksLU):
12764   """Prepares an instance for an export and returns useful information.
12765
12766   """
12767   REQ_BGL = False
12768
12769   def ExpandNames(self):
12770     self._ExpandAndLockInstance()
12771
12772   def CheckPrereq(self):
12773     """Check prerequisites.
12774
12775     """
12776     instance_name = self.op.instance_name
12777
12778     self.instance = self.cfg.GetInstanceInfo(instance_name)
12779     assert self.instance is not None, \
12780           "Cannot retrieve locked instance %s" % self.op.instance_name
12781     _CheckNodeOnline(self, self.instance.primary_node)
12782
12783     self._cds = _GetClusterDomainSecret()
12784
12785   def Exec(self, feedback_fn):
12786     """Prepares an instance for an export.
12787
12788     """
12789     instance = self.instance
12790
12791     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12792       salt = utils.GenerateSecret(8)
12793
12794       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12795       result = self.rpc.call_x509_cert_create(instance.primary_node,
12796                                               constants.RIE_CERT_VALIDITY)
12797       result.Raise("Can't create X509 key and certificate on %s" % result.node)
12798
12799       (name, cert_pem) = result.payload
12800
12801       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12802                                              cert_pem)
12803
12804       return {
12805         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12806         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12807                           salt),
12808         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12809         }
12810
12811     return None
12812
12813
12814 class LUBackupExport(LogicalUnit):
12815   """Export an instance to an image in the cluster.
12816
12817   """
12818   HPATH = "instance-export"
12819   HTYPE = constants.HTYPE_INSTANCE
12820   REQ_BGL = False
12821
12822   def CheckArguments(self):
12823     """Check the arguments.
12824
12825     """
12826     self.x509_key_name = self.op.x509_key_name
12827     self.dest_x509_ca_pem = self.op.destination_x509_ca
12828
12829     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12830       if not self.x509_key_name:
12831         raise errors.OpPrereqError("Missing X509 key name for encryption",
12832                                    errors.ECODE_INVAL)
12833
12834       if not self.dest_x509_ca_pem:
12835         raise errors.OpPrereqError("Missing destination X509 CA",
12836                                    errors.ECODE_INVAL)
12837
12838   def ExpandNames(self):
12839     self._ExpandAndLockInstance()
12840
12841     # Lock all nodes for local exports
12842     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12843       # FIXME: lock only instance primary and destination node
12844       #
12845       # Sad but true, for now we have do lock all nodes, as we don't know where
12846       # the previous export might be, and in this LU we search for it and
12847       # remove it from its current node. In the future we could fix this by:
12848       #  - making a tasklet to search (share-lock all), then create the
12849       #    new one, then one to remove, after
12850       #  - removing the removal operation altogether
12851       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12852
12853   def DeclareLocks(self, level):
12854     """Last minute lock declaration."""
12855     # All nodes are locked anyway, so nothing to do here.
12856
12857   def BuildHooksEnv(self):
12858     """Build hooks env.
12859
12860     This will run on the master, primary node and target node.
12861
12862     """
12863     env = {
12864       "EXPORT_MODE": self.op.mode,
12865       "EXPORT_NODE": self.op.target_node,
12866       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12867       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12868       # TODO: Generic function for boolean env variables
12869       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12870       }
12871
12872     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12873
12874     return env
12875
12876   def BuildHooksNodes(self):
12877     """Build hooks nodes.
12878
12879     """
12880     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12881
12882     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12883       nl.append(self.op.target_node)
12884
12885     return (nl, nl)
12886
12887   def CheckPrereq(self):
12888     """Check prerequisites.
12889
12890     This checks that the instance and node names are valid.
12891
12892     """
12893     instance_name = self.op.instance_name
12894
12895     self.instance = self.cfg.GetInstanceInfo(instance_name)
12896     assert self.instance is not None, \
12897           "Cannot retrieve locked instance %s" % self.op.instance_name
12898     _CheckNodeOnline(self, self.instance.primary_node)
12899
12900     if (self.op.remove_instance and
12901         self.instance.admin_state == constants.ADMINST_UP and
12902         not self.op.shutdown):
12903       raise errors.OpPrereqError("Can not remove instance without shutting it"
12904                                  " down before")
12905
12906     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12907       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12908       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12909       assert self.dst_node is not None
12910
12911       _CheckNodeOnline(self, self.dst_node.name)
12912       _CheckNodeNotDrained(self, self.dst_node.name)
12913
12914       self._cds = None
12915       self.dest_disk_info = None
12916       self.dest_x509_ca = None
12917
12918     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12919       self.dst_node = None
12920
12921       if len(self.op.target_node) != len(self.instance.disks):
12922         raise errors.OpPrereqError(("Received destination information for %s"
12923                                     " disks, but instance %s has %s disks") %
12924                                    (len(self.op.target_node), instance_name,
12925                                     len(self.instance.disks)),
12926                                    errors.ECODE_INVAL)
12927
12928       cds = _GetClusterDomainSecret()
12929
12930       # Check X509 key name
12931       try:
12932         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12933       except (TypeError, ValueError), err:
12934         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12935
12936       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12937         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12938                                    errors.ECODE_INVAL)
12939
12940       # Load and verify CA
12941       try:
12942         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12943       except OpenSSL.crypto.Error, err:
12944         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12945                                    (err, ), errors.ECODE_INVAL)
12946
12947       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12948       if errcode is not None:
12949         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12950                                    (msg, ), errors.ECODE_INVAL)
12951
12952       self.dest_x509_ca = cert
12953
12954       # Verify target information
12955       disk_info = []
12956       for idx, disk_data in enumerate(self.op.target_node):
12957         try:
12958           (host, port, magic) = \
12959             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12960         except errors.GenericError, err:
12961           raise errors.OpPrereqError("Target info for disk %s: %s" %
12962                                      (idx, err), errors.ECODE_INVAL)
12963
12964         disk_info.append((host, port, magic))
12965
12966       assert len(disk_info) == len(self.op.target_node)
12967       self.dest_disk_info = disk_info
12968
12969     else:
12970       raise errors.ProgrammerError("Unhandled export mode %r" %
12971                                    self.op.mode)
12972
12973     # instance disk type verification
12974     # TODO: Implement export support for file-based disks
12975     for disk in self.instance.disks:
12976       if disk.dev_type == constants.LD_FILE:
12977         raise errors.OpPrereqError("Export not supported for instances with"
12978                                    " file-based disks", errors.ECODE_INVAL)
12979
12980   def _CleanupExports(self, feedback_fn):
12981     """Removes exports of current instance from all other nodes.
12982
12983     If an instance in a cluster with nodes A..D was exported to node C, its
12984     exports will be removed from the nodes A, B and D.
12985
12986     """
12987     assert self.op.mode != constants.EXPORT_MODE_REMOTE
12988
12989     nodelist = self.cfg.GetNodeList()
12990     nodelist.remove(self.dst_node.name)
12991
12992     # on one-node clusters nodelist will be empty after the removal
12993     # if we proceed the backup would be removed because OpBackupQuery
12994     # substitutes an empty list with the full cluster node list.
12995     iname = self.instance.name
12996     if nodelist:
12997       feedback_fn("Removing old exports for instance %s" % iname)
12998       exportlist = self.rpc.call_export_list(nodelist)
12999       for node in exportlist:
13000         if exportlist[node].fail_msg:
13001           continue
13002         if iname in exportlist[node].payload:
13003           msg = self.rpc.call_export_remove(node, iname).fail_msg
13004           if msg:
13005             self.LogWarning("Could not remove older export for instance %s"
13006                             " on node %s: %s", iname, node, msg)
13007
13008   def Exec(self, feedback_fn):
13009     """Export an instance to an image in the cluster.
13010
13011     """
13012     assert self.op.mode in constants.EXPORT_MODES
13013
13014     instance = self.instance
13015     src_node = instance.primary_node
13016
13017     if self.op.shutdown:
13018       # shutdown the instance, but not the disks
13019       feedback_fn("Shutting down instance %s" % instance.name)
13020       result = self.rpc.call_instance_shutdown(src_node, instance,
13021                                                self.op.shutdown_timeout)
13022       # TODO: Maybe ignore failures if ignore_remove_failures is set
13023       result.Raise("Could not shutdown instance %s on"
13024                    " node %s" % (instance.name, src_node))
13025
13026     # set the disks ID correctly since call_instance_start needs the
13027     # correct drbd minor to create the symlinks
13028     for disk in instance.disks:
13029       self.cfg.SetDiskID(disk, src_node)
13030
13031     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13032
13033     if activate_disks:
13034       # Activate the instance disks if we'exporting a stopped instance
13035       feedback_fn("Activating disks for %s" % instance.name)
13036       _StartInstanceDisks(self, instance, None)
13037
13038     try:
13039       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13040                                                      instance)
13041
13042       helper.CreateSnapshots()
13043       try:
13044         if (self.op.shutdown and
13045             instance.admin_state == constants.ADMINST_UP and
13046             not self.op.remove_instance):
13047           assert not activate_disks
13048           feedback_fn("Starting instance %s" % instance.name)
13049           result = self.rpc.call_instance_start(src_node,
13050                                                 (instance, None, None), False)
13051           msg = result.fail_msg
13052           if msg:
13053             feedback_fn("Failed to start instance: %s" % msg)
13054             _ShutdownInstanceDisks(self, instance)
13055             raise errors.OpExecError("Could not start instance: %s" % msg)
13056
13057         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13058           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13059         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13060           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13061           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13062
13063           (key_name, _, _) = self.x509_key_name
13064
13065           dest_ca_pem = \
13066             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13067                                             self.dest_x509_ca)
13068
13069           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13070                                                      key_name, dest_ca_pem,
13071                                                      timeouts)
13072       finally:
13073         helper.Cleanup()
13074
13075       # Check for backwards compatibility
13076       assert len(dresults) == len(instance.disks)
13077       assert compat.all(isinstance(i, bool) for i in dresults), \
13078              "Not all results are boolean: %r" % dresults
13079
13080     finally:
13081       if activate_disks:
13082         feedback_fn("Deactivating disks for %s" % instance.name)
13083         _ShutdownInstanceDisks(self, instance)
13084
13085     if not (compat.all(dresults) and fin_resu):
13086       failures = []
13087       if not fin_resu:
13088         failures.append("export finalization")
13089       if not compat.all(dresults):
13090         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13091                                if not dsk)
13092         failures.append("disk export: disk(s) %s" % fdsk)
13093
13094       raise errors.OpExecError("Export failed, errors in %s" %
13095                                utils.CommaJoin(failures))
13096
13097     # At this point, the export was successful, we can cleanup/finish
13098
13099     # Remove instance if requested
13100     if self.op.remove_instance:
13101       feedback_fn("Removing instance %s" % instance.name)
13102       _RemoveInstance(self, feedback_fn, instance,
13103                       self.op.ignore_remove_failures)
13104
13105     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13106       self._CleanupExports(feedback_fn)
13107
13108     return fin_resu, dresults
13109
13110
13111 class LUBackupRemove(NoHooksLU):
13112   """Remove exports related to the named instance.
13113
13114   """
13115   REQ_BGL = False
13116
13117   def ExpandNames(self):
13118     self.needed_locks = {}
13119     # We need all nodes to be locked in order for RemoveExport to work, but we
13120     # don't need to lock the instance itself, as nothing will happen to it (and
13121     # we can remove exports also for a removed instance)
13122     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13123
13124   def Exec(self, feedback_fn):
13125     """Remove any export.
13126
13127     """
13128     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13129     # If the instance was not found we'll try with the name that was passed in.
13130     # This will only work if it was an FQDN, though.
13131     fqdn_warn = False
13132     if not instance_name:
13133       fqdn_warn = True
13134       instance_name = self.op.instance_name
13135
13136     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13137     exportlist = self.rpc.call_export_list(locked_nodes)
13138     found = False
13139     for node in exportlist:
13140       msg = exportlist[node].fail_msg
13141       if msg:
13142         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13143         continue
13144       if instance_name in exportlist[node].payload:
13145         found = True
13146         result = self.rpc.call_export_remove(node, instance_name)
13147         msg = result.fail_msg
13148         if msg:
13149           logging.error("Could not remove export for instance %s"
13150                         " on node %s: %s", instance_name, node, msg)
13151
13152     if fqdn_warn and not found:
13153       feedback_fn("Export not found. If trying to remove an export belonging"
13154                   " to a deleted instance please use its Fully Qualified"
13155                   " Domain Name.")
13156
13157
13158 class LUGroupAdd(LogicalUnit):
13159   """Logical unit for creating node groups.
13160
13161   """
13162   HPATH = "group-add"
13163   HTYPE = constants.HTYPE_GROUP
13164   REQ_BGL = False
13165
13166   def ExpandNames(self):
13167     # We need the new group's UUID here so that we can create and acquire the
13168     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13169     # that it should not check whether the UUID exists in the configuration.
13170     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13171     self.needed_locks = {}
13172     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13173
13174   def CheckPrereq(self):
13175     """Check prerequisites.
13176
13177     This checks that the given group name is not an existing node group
13178     already.
13179
13180     """
13181     try:
13182       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13183     except errors.OpPrereqError:
13184       pass
13185     else:
13186       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13187                                  " node group (UUID: %s)" %
13188                                  (self.op.group_name, existing_uuid),
13189                                  errors.ECODE_EXISTS)
13190
13191     if self.op.ndparams:
13192       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13193
13194     if self.op.hv_state:
13195       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13196     else:
13197       self.new_hv_state = None
13198
13199     if self.op.disk_state:
13200       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13201     else:
13202       self.new_disk_state = None
13203
13204     if self.op.diskparams:
13205       for templ in constants.DISK_TEMPLATES:
13206         if templ not in self.op.diskparams:
13207           self.op.diskparams[templ] = {}
13208         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13209     else:
13210       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13211
13212     if self.op.ipolicy:
13213       cluster = self.cfg.GetClusterInfo()
13214       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13215       try:
13216         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13217       except errors.ConfigurationError, err:
13218         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13219                                    errors.ECODE_INVAL)
13220
13221   def BuildHooksEnv(self):
13222     """Build hooks env.
13223
13224     """
13225     return {
13226       "GROUP_NAME": self.op.group_name,
13227       }
13228
13229   def BuildHooksNodes(self):
13230     """Build hooks nodes.
13231
13232     """
13233     mn = self.cfg.GetMasterNode()
13234     return ([mn], [mn])
13235
13236   def Exec(self, feedback_fn):
13237     """Add the node group to the cluster.
13238
13239     """
13240     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13241                                   uuid=self.group_uuid,
13242                                   alloc_policy=self.op.alloc_policy,
13243                                   ndparams=self.op.ndparams,
13244                                   diskparams=self.op.diskparams,
13245                                   ipolicy=self.op.ipolicy,
13246                                   hv_state_static=self.new_hv_state,
13247                                   disk_state_static=self.new_disk_state)
13248
13249     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13250     del self.remove_locks[locking.LEVEL_NODEGROUP]
13251
13252
13253 class LUGroupAssignNodes(NoHooksLU):
13254   """Logical unit for assigning nodes to groups.
13255
13256   """
13257   REQ_BGL = False
13258
13259   def ExpandNames(self):
13260     # These raise errors.OpPrereqError on their own:
13261     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13262     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13263
13264     # We want to lock all the affected nodes and groups. We have readily
13265     # available the list of nodes, and the *destination* group. To gather the
13266     # list of "source" groups, we need to fetch node information later on.
13267     self.needed_locks = {
13268       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13269       locking.LEVEL_NODE: self.op.nodes,
13270       }
13271
13272   def DeclareLocks(self, level):
13273     if level == locking.LEVEL_NODEGROUP:
13274       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13275
13276       # Try to get all affected nodes' groups without having the group or node
13277       # lock yet. Needs verification later in the code flow.
13278       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13279
13280       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13281
13282   def CheckPrereq(self):
13283     """Check prerequisites.
13284
13285     """
13286     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13287     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13288             frozenset(self.op.nodes))
13289
13290     expected_locks = (set([self.group_uuid]) |
13291                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13292     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13293     if actual_locks != expected_locks:
13294       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13295                                " current groups are '%s', used to be '%s'" %
13296                                (utils.CommaJoin(expected_locks),
13297                                 utils.CommaJoin(actual_locks)))
13298
13299     self.node_data = self.cfg.GetAllNodesInfo()
13300     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13301     instance_data = self.cfg.GetAllInstancesInfo()
13302
13303     if self.group is None:
13304       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13305                                (self.op.group_name, self.group_uuid))
13306
13307     (new_splits, previous_splits) = \
13308       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13309                                              for node in self.op.nodes],
13310                                             self.node_data, instance_data)
13311
13312     if new_splits:
13313       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13314
13315       if not self.op.force:
13316         raise errors.OpExecError("The following instances get split by this"
13317                                  " change and --force was not given: %s" %
13318                                  fmt_new_splits)
13319       else:
13320         self.LogWarning("This operation will split the following instances: %s",
13321                         fmt_new_splits)
13322
13323         if previous_splits:
13324           self.LogWarning("In addition, these already-split instances continue"
13325                           " to be split across groups: %s",
13326                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13327
13328   def Exec(self, feedback_fn):
13329     """Assign nodes to a new group.
13330
13331     """
13332     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13333
13334     self.cfg.AssignGroupNodes(mods)
13335
13336   @staticmethod
13337   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13338     """Check for split instances after a node assignment.
13339
13340     This method considers a series of node assignments as an atomic operation,
13341     and returns information about split instances after applying the set of
13342     changes.
13343
13344     In particular, it returns information about newly split instances, and
13345     instances that were already split, and remain so after the change.
13346
13347     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13348     considered.
13349
13350     @type changes: list of (node_name, new_group_uuid) pairs.
13351     @param changes: list of node assignments to consider.
13352     @param node_data: a dict with data for all nodes
13353     @param instance_data: a dict with all instances to consider
13354     @rtype: a two-tuple
13355     @return: a list of instances that were previously okay and result split as a
13356       consequence of this change, and a list of instances that were previously
13357       split and this change does not fix.
13358
13359     """
13360     changed_nodes = dict((node, group) for node, group in changes
13361                          if node_data[node].group != group)
13362
13363     all_split_instances = set()
13364     previously_split_instances = set()
13365
13366     def InstanceNodes(instance):
13367       return [instance.primary_node] + list(instance.secondary_nodes)
13368
13369     for inst in instance_data.values():
13370       if inst.disk_template not in constants.DTS_INT_MIRROR:
13371         continue
13372
13373       instance_nodes = InstanceNodes(inst)
13374
13375       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13376         previously_split_instances.add(inst.name)
13377
13378       if len(set(changed_nodes.get(node, node_data[node].group)
13379                  for node in instance_nodes)) > 1:
13380         all_split_instances.add(inst.name)
13381
13382     return (list(all_split_instances - previously_split_instances),
13383             list(previously_split_instances & all_split_instances))
13384
13385
13386 class _GroupQuery(_QueryBase):
13387   FIELDS = query.GROUP_FIELDS
13388
13389   def ExpandNames(self, lu):
13390     lu.needed_locks = {}
13391
13392     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13393     self._cluster = lu.cfg.GetClusterInfo()
13394     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13395
13396     if not self.names:
13397       self.wanted = [name_to_uuid[name]
13398                      for name in utils.NiceSort(name_to_uuid.keys())]
13399     else:
13400       # Accept names to be either names or UUIDs.
13401       missing = []
13402       self.wanted = []
13403       all_uuid = frozenset(self._all_groups.keys())
13404
13405       for name in self.names:
13406         if name in all_uuid:
13407           self.wanted.append(name)
13408         elif name in name_to_uuid:
13409           self.wanted.append(name_to_uuid[name])
13410         else:
13411           missing.append(name)
13412
13413       if missing:
13414         raise errors.OpPrereqError("Some groups do not exist: %s" %
13415                                    utils.CommaJoin(missing),
13416                                    errors.ECODE_NOENT)
13417
13418   def DeclareLocks(self, lu, level):
13419     pass
13420
13421   def _GetQueryData(self, lu):
13422     """Computes the list of node groups and their attributes.
13423
13424     """
13425     do_nodes = query.GQ_NODE in self.requested_data
13426     do_instances = query.GQ_INST in self.requested_data
13427
13428     group_to_nodes = None
13429     group_to_instances = None
13430
13431     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13432     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13433     # latter GetAllInstancesInfo() is not enough, for we have to go through
13434     # instance->node. Hence, we will need to process nodes even if we only need
13435     # instance information.
13436     if do_nodes or do_instances:
13437       all_nodes = lu.cfg.GetAllNodesInfo()
13438       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13439       node_to_group = {}
13440
13441       for node in all_nodes.values():
13442         if node.group in group_to_nodes:
13443           group_to_nodes[node.group].append(node.name)
13444           node_to_group[node.name] = node.group
13445
13446       if do_instances:
13447         all_instances = lu.cfg.GetAllInstancesInfo()
13448         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13449
13450         for instance in all_instances.values():
13451           node = instance.primary_node
13452           if node in node_to_group:
13453             group_to_instances[node_to_group[node]].append(instance.name)
13454
13455         if not do_nodes:
13456           # Do not pass on node information if it was not requested.
13457           group_to_nodes = None
13458
13459     return query.GroupQueryData(self._cluster,
13460                                 [self._all_groups[uuid]
13461                                  for uuid in self.wanted],
13462                                 group_to_nodes, group_to_instances)
13463
13464
13465 class LUGroupQuery(NoHooksLU):
13466   """Logical unit for querying node groups.
13467
13468   """
13469   REQ_BGL = False
13470
13471   def CheckArguments(self):
13472     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13473                           self.op.output_fields, False)
13474
13475   def ExpandNames(self):
13476     self.gq.ExpandNames(self)
13477
13478   def DeclareLocks(self, level):
13479     self.gq.DeclareLocks(self, level)
13480
13481   def Exec(self, feedback_fn):
13482     return self.gq.OldStyleQuery(self)
13483
13484
13485 class LUGroupSetParams(LogicalUnit):
13486   """Modifies the parameters of a node group.
13487
13488   """
13489   HPATH = "group-modify"
13490   HTYPE = constants.HTYPE_GROUP
13491   REQ_BGL = False
13492
13493   def CheckArguments(self):
13494     all_changes = [
13495       self.op.ndparams,
13496       self.op.diskparams,
13497       self.op.alloc_policy,
13498       self.op.hv_state,
13499       self.op.disk_state,
13500       self.op.ipolicy,
13501       ]
13502
13503     if all_changes.count(None) == len(all_changes):
13504       raise errors.OpPrereqError("Please pass at least one modification",
13505                                  errors.ECODE_INVAL)
13506
13507   def ExpandNames(self):
13508     # This raises errors.OpPrereqError on its own:
13509     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13510
13511     self.needed_locks = {
13512       locking.LEVEL_INSTANCE: [],
13513       locking.LEVEL_NODEGROUP: [self.group_uuid],
13514       }
13515
13516     self.share_locks[locking.LEVEL_INSTANCE] = 1
13517
13518   def DeclareLocks(self, level):
13519     if level == locking.LEVEL_INSTANCE:
13520       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13521
13522       # Lock instances optimistically, needs verification once group lock has
13523       # been acquired
13524       self.needed_locks[locking.LEVEL_INSTANCE] = \
13525           self.cfg.GetNodeGroupInstances(self.group_uuid)
13526
13527   def CheckPrereq(self):
13528     """Check prerequisites.
13529
13530     """
13531     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13532
13533     # Check if locked instances are still correct
13534     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13535
13536     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13537     cluster = self.cfg.GetClusterInfo()
13538
13539     if self.group is None:
13540       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13541                                (self.op.group_name, self.group_uuid))
13542
13543     if self.op.ndparams:
13544       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13545       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13546       self.new_ndparams = new_ndparams
13547
13548     if self.op.diskparams:
13549       self.new_diskparams = dict()
13550       for templ in constants.DISK_TEMPLATES:
13551         if templ not in self.op.diskparams:
13552           self.op.diskparams[templ] = {}
13553         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13554                                              self.op.diskparams[templ])
13555         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13556         self.new_diskparams[templ] = new_templ_params
13557
13558     if self.op.hv_state:
13559       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13560                                                  self.group.hv_state_static)
13561
13562     if self.op.disk_state:
13563       self.new_disk_state = \
13564         _MergeAndVerifyDiskState(self.op.disk_state,
13565                                  self.group.disk_state_static)
13566
13567     if self.op.ipolicy:
13568       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13569                                             self.op.ipolicy,
13570                                             group_policy=True)
13571
13572       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13573       inst_filter = lambda inst: inst.name in owned_instances
13574       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13575       violations = \
13576           _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13577                                                                self.group),
13578                                         new_ipolicy, instances)
13579
13580       if violations:
13581         self.LogWarning("After the ipolicy change the following instances"
13582                         " violate them: %s",
13583                         utils.CommaJoin(violations))
13584
13585   def BuildHooksEnv(self):
13586     """Build hooks env.
13587
13588     """
13589     return {
13590       "GROUP_NAME": self.op.group_name,
13591       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13592       }
13593
13594   def BuildHooksNodes(self):
13595     """Build hooks nodes.
13596
13597     """
13598     mn = self.cfg.GetMasterNode()
13599     return ([mn], [mn])
13600
13601   def Exec(self, feedback_fn):
13602     """Modifies the node group.
13603
13604     """
13605     result = []
13606
13607     if self.op.ndparams:
13608       self.group.ndparams = self.new_ndparams
13609       result.append(("ndparams", str(self.group.ndparams)))
13610
13611     if self.op.diskparams:
13612       self.group.diskparams = self.new_diskparams
13613       result.append(("diskparams", str(self.group.diskparams)))
13614
13615     if self.op.alloc_policy:
13616       self.group.alloc_policy = self.op.alloc_policy
13617
13618     if self.op.hv_state:
13619       self.group.hv_state_static = self.new_hv_state
13620
13621     if self.op.disk_state:
13622       self.group.disk_state_static = self.new_disk_state
13623
13624     if self.op.ipolicy:
13625       self.group.ipolicy = self.new_ipolicy
13626
13627     self.cfg.Update(self.group, feedback_fn)
13628     return result
13629
13630
13631 class LUGroupRemove(LogicalUnit):
13632   HPATH = "group-remove"
13633   HTYPE = constants.HTYPE_GROUP
13634   REQ_BGL = False
13635
13636   def ExpandNames(self):
13637     # This will raises errors.OpPrereqError on its own:
13638     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13639     self.needed_locks = {
13640       locking.LEVEL_NODEGROUP: [self.group_uuid],
13641       }
13642
13643   def CheckPrereq(self):
13644     """Check prerequisites.
13645
13646     This checks that the given group name exists as a node group, that is
13647     empty (i.e., contains no nodes), and that is not the last group of the
13648     cluster.
13649
13650     """
13651     # Verify that the group is empty.
13652     group_nodes = [node.name
13653                    for node in self.cfg.GetAllNodesInfo().values()
13654                    if node.group == self.group_uuid]
13655
13656     if group_nodes:
13657       raise errors.OpPrereqError("Group '%s' not empty, has the following"
13658                                  " nodes: %s" %
13659                                  (self.op.group_name,
13660                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
13661                                  errors.ECODE_STATE)
13662
13663     # Verify the cluster would not be left group-less.
13664     if len(self.cfg.GetNodeGroupList()) == 1:
13665       raise errors.OpPrereqError("Group '%s' is the only group,"
13666                                  " cannot be removed" %
13667                                  self.op.group_name,
13668                                  errors.ECODE_STATE)
13669
13670   def BuildHooksEnv(self):
13671     """Build hooks env.
13672
13673     """
13674     return {
13675       "GROUP_NAME": self.op.group_name,
13676       }
13677
13678   def BuildHooksNodes(self):
13679     """Build hooks nodes.
13680
13681     """
13682     mn = self.cfg.GetMasterNode()
13683     return ([mn], [mn])
13684
13685   def Exec(self, feedback_fn):
13686     """Remove the node group.
13687
13688     """
13689     try:
13690       self.cfg.RemoveNodeGroup(self.group_uuid)
13691     except errors.ConfigurationError:
13692       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13693                                (self.op.group_name, self.group_uuid))
13694
13695     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13696
13697
13698 class LUGroupRename(LogicalUnit):
13699   HPATH = "group-rename"
13700   HTYPE = constants.HTYPE_GROUP
13701   REQ_BGL = False
13702
13703   def ExpandNames(self):
13704     # This raises errors.OpPrereqError on its own:
13705     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13706
13707     self.needed_locks = {
13708       locking.LEVEL_NODEGROUP: [self.group_uuid],
13709       }
13710
13711   def CheckPrereq(self):
13712     """Check prerequisites.
13713
13714     Ensures requested new name is not yet used.
13715
13716     """
13717     try:
13718       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13719     except errors.OpPrereqError:
13720       pass
13721     else:
13722       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13723                                  " node group (UUID: %s)" %
13724                                  (self.op.new_name, new_name_uuid),
13725                                  errors.ECODE_EXISTS)
13726
13727   def BuildHooksEnv(self):
13728     """Build hooks env.
13729
13730     """
13731     return {
13732       "OLD_NAME": self.op.group_name,
13733       "NEW_NAME": self.op.new_name,
13734       }
13735
13736   def BuildHooksNodes(self):
13737     """Build hooks nodes.
13738
13739     """
13740     mn = self.cfg.GetMasterNode()
13741
13742     all_nodes = self.cfg.GetAllNodesInfo()
13743     all_nodes.pop(mn, None)
13744
13745     run_nodes = [mn]
13746     run_nodes.extend(node.name for node in all_nodes.values()
13747                      if node.group == self.group_uuid)
13748
13749     return (run_nodes, run_nodes)
13750
13751   def Exec(self, feedback_fn):
13752     """Rename the node group.
13753
13754     """
13755     group = self.cfg.GetNodeGroup(self.group_uuid)
13756
13757     if group is None:
13758       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13759                                (self.op.group_name, self.group_uuid))
13760
13761     group.name = self.op.new_name
13762     self.cfg.Update(group, feedback_fn)
13763
13764     return self.op.new_name
13765
13766
13767 class LUGroupEvacuate(LogicalUnit):
13768   HPATH = "group-evacuate"
13769   HTYPE = constants.HTYPE_GROUP
13770   REQ_BGL = False
13771
13772   def ExpandNames(self):
13773     # This raises errors.OpPrereqError on its own:
13774     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13775
13776     if self.op.target_groups:
13777       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13778                                   self.op.target_groups)
13779     else:
13780       self.req_target_uuids = []
13781
13782     if self.group_uuid in self.req_target_uuids:
13783       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13784                                  " as a target group (targets are %s)" %
13785                                  (self.group_uuid,
13786                                   utils.CommaJoin(self.req_target_uuids)),
13787                                  errors.ECODE_INVAL)
13788
13789     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13790
13791     self.share_locks = _ShareAll()
13792     self.needed_locks = {
13793       locking.LEVEL_INSTANCE: [],
13794       locking.LEVEL_NODEGROUP: [],
13795       locking.LEVEL_NODE: [],
13796       }
13797
13798   def DeclareLocks(self, level):
13799     if level == locking.LEVEL_INSTANCE:
13800       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13801
13802       # Lock instances optimistically, needs verification once node and group
13803       # locks have been acquired
13804       self.needed_locks[locking.LEVEL_INSTANCE] = \
13805         self.cfg.GetNodeGroupInstances(self.group_uuid)
13806
13807     elif level == locking.LEVEL_NODEGROUP:
13808       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13809
13810       if self.req_target_uuids:
13811         lock_groups = set([self.group_uuid] + self.req_target_uuids)
13812
13813         # Lock all groups used by instances optimistically; this requires going
13814         # via the node before it's locked, requiring verification later on
13815         lock_groups.update(group_uuid
13816                            for instance_name in
13817                              self.owned_locks(locking.LEVEL_INSTANCE)
13818                            for group_uuid in
13819                              self.cfg.GetInstanceNodeGroups(instance_name))
13820       else:
13821         # No target groups, need to lock all of them
13822         lock_groups = locking.ALL_SET
13823
13824       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13825
13826     elif level == locking.LEVEL_NODE:
13827       # This will only lock the nodes in the group to be evacuated which
13828       # contain actual instances
13829       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13830       self._LockInstancesNodes()
13831
13832       # Lock all nodes in group to be evacuated and target groups
13833       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13834       assert self.group_uuid in owned_groups
13835       member_nodes = [node_name
13836                       for group in owned_groups
13837                       for node_name in self.cfg.GetNodeGroup(group).members]
13838       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13839
13840   def CheckPrereq(self):
13841     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13842     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13843     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13844
13845     assert owned_groups.issuperset(self.req_target_uuids)
13846     assert self.group_uuid in owned_groups
13847
13848     # Check if locked instances are still correct
13849     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13850
13851     # Get instance information
13852     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13853
13854     # Check if node groups for locked instances are still correct
13855     for instance_name in owned_instances:
13856       inst = self.instances[instance_name]
13857       assert owned_nodes.issuperset(inst.all_nodes), \
13858         "Instance %s's nodes changed while we kept the lock" % instance_name
13859
13860       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13861                                              owned_groups)
13862
13863       assert self.group_uuid in inst_groups, \
13864         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13865
13866     if self.req_target_uuids:
13867       # User requested specific target groups
13868       self.target_uuids = self.req_target_uuids
13869     else:
13870       # All groups except the one to be evacuated are potential targets
13871       self.target_uuids = [group_uuid for group_uuid in owned_groups
13872                            if group_uuid != self.group_uuid]
13873
13874       if not self.target_uuids:
13875         raise errors.OpPrereqError("There are no possible target groups",
13876                                    errors.ECODE_INVAL)
13877
13878   def BuildHooksEnv(self):
13879     """Build hooks env.
13880
13881     """
13882     return {
13883       "GROUP_NAME": self.op.group_name,
13884       "TARGET_GROUPS": " ".join(self.target_uuids),
13885       }
13886
13887   def BuildHooksNodes(self):
13888     """Build hooks nodes.
13889
13890     """
13891     mn = self.cfg.GetMasterNode()
13892
13893     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13894
13895     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13896
13897     return (run_nodes, run_nodes)
13898
13899   def Exec(self, feedback_fn):
13900     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13901
13902     assert self.group_uuid not in self.target_uuids
13903
13904     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13905                      instances=instances, target_groups=self.target_uuids)
13906
13907     ial.Run(self.op.iallocator)
13908
13909     if not ial.success:
13910       raise errors.OpPrereqError("Can't compute group evacuation using"
13911                                  " iallocator '%s': %s" %
13912                                  (self.op.iallocator, ial.info),
13913                                  errors.ECODE_NORES)
13914
13915     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13916
13917     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13918                  len(jobs), self.op.group_name)
13919
13920     return ResultWithJobs(jobs)
13921
13922
13923 class TagsLU(NoHooksLU): # pylint: disable=W0223
13924   """Generic tags LU.
13925
13926   This is an abstract class which is the parent of all the other tags LUs.
13927
13928   """
13929   def ExpandNames(self):
13930     self.group_uuid = None
13931     self.needed_locks = {}
13932     if self.op.kind == constants.TAG_NODE:
13933       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13934       self.needed_locks[locking.LEVEL_NODE] = self.op.name
13935     elif self.op.kind == constants.TAG_INSTANCE:
13936       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13937       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13938     elif self.op.kind == constants.TAG_NODEGROUP:
13939       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13940
13941     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13942     # not possible to acquire the BGL based on opcode parameters)
13943
13944   def CheckPrereq(self):
13945     """Check prerequisites.
13946
13947     """
13948     if self.op.kind == constants.TAG_CLUSTER:
13949       self.target = self.cfg.GetClusterInfo()
13950     elif self.op.kind == constants.TAG_NODE:
13951       self.target = self.cfg.GetNodeInfo(self.op.name)
13952     elif self.op.kind == constants.TAG_INSTANCE:
13953       self.target = self.cfg.GetInstanceInfo(self.op.name)
13954     elif self.op.kind == constants.TAG_NODEGROUP:
13955       self.target = self.cfg.GetNodeGroup(self.group_uuid)
13956     else:
13957       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13958                                  str(self.op.kind), errors.ECODE_INVAL)
13959
13960
13961 class LUTagsGet(TagsLU):
13962   """Returns the tags of a given object.
13963
13964   """
13965   REQ_BGL = False
13966
13967   def ExpandNames(self):
13968     TagsLU.ExpandNames(self)
13969
13970     # Share locks as this is only a read operation
13971     self.share_locks = _ShareAll()
13972
13973   def Exec(self, feedback_fn):
13974     """Returns the tag list.
13975
13976     """
13977     return list(self.target.GetTags())
13978
13979
13980 class LUTagsSearch(NoHooksLU):
13981   """Searches the tags for a given pattern.
13982
13983   """
13984   REQ_BGL = False
13985
13986   def ExpandNames(self):
13987     self.needed_locks = {}
13988
13989   def CheckPrereq(self):
13990     """Check prerequisites.
13991
13992     This checks the pattern passed for validity by compiling it.
13993
13994     """
13995     try:
13996       self.re = re.compile(self.op.pattern)
13997     except re.error, err:
13998       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13999                                  (self.op.pattern, err), errors.ECODE_INVAL)
14000
14001   def Exec(self, feedback_fn):
14002     """Returns the tag list.
14003
14004     """
14005     cfg = self.cfg
14006     tgts = [("/cluster", cfg.GetClusterInfo())]
14007     ilist = cfg.GetAllInstancesInfo().values()
14008     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14009     nlist = cfg.GetAllNodesInfo().values()
14010     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14011     tgts.extend(("/nodegroup/%s" % n.name, n)
14012                 for n in cfg.GetAllNodeGroupsInfo().values())
14013     results = []
14014     for path, target in tgts:
14015       for tag in target.GetTags():
14016         if self.re.search(tag):
14017           results.append((path, tag))
14018     return results
14019
14020
14021 class LUTagsSet(TagsLU):
14022   """Sets a tag on a given object.
14023
14024   """
14025   REQ_BGL = False
14026
14027   def CheckPrereq(self):
14028     """Check prerequisites.
14029
14030     This checks the type and length of the tag name and value.
14031
14032     """
14033     TagsLU.CheckPrereq(self)
14034     for tag in self.op.tags:
14035       objects.TaggableObject.ValidateTag(tag)
14036
14037   def Exec(self, feedback_fn):
14038     """Sets the tag.
14039
14040     """
14041     try:
14042       for tag in self.op.tags:
14043         self.target.AddTag(tag)
14044     except errors.TagError, err:
14045       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14046     self.cfg.Update(self.target, feedback_fn)
14047
14048
14049 class LUTagsDel(TagsLU):
14050   """Delete a list of tags from a given object.
14051
14052   """
14053   REQ_BGL = False
14054
14055   def CheckPrereq(self):
14056     """Check prerequisites.
14057
14058     This checks that we have the given tag.
14059
14060     """
14061     TagsLU.CheckPrereq(self)
14062     for tag in self.op.tags:
14063       objects.TaggableObject.ValidateTag(tag)
14064     del_tags = frozenset(self.op.tags)
14065     cur_tags = self.target.GetTags()
14066
14067     diff_tags = del_tags - cur_tags
14068     if diff_tags:
14069       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14070       raise errors.OpPrereqError("Tag(s) %s not found" %
14071                                  (utils.CommaJoin(diff_names), ),
14072                                  errors.ECODE_NOENT)
14073
14074   def Exec(self, feedback_fn):
14075     """Remove the tag from the object.
14076
14077     """
14078     for tag in self.op.tags:
14079       self.target.RemoveTag(tag)
14080     self.cfg.Update(self.target, feedback_fn)
14081
14082
14083 class LUTestDelay(NoHooksLU):
14084   """Sleep for a specified amount of time.
14085
14086   This LU sleeps on the master and/or nodes for a specified amount of
14087   time.
14088
14089   """
14090   REQ_BGL = False
14091
14092   def ExpandNames(self):
14093     """Expand names and set required locks.
14094
14095     This expands the node list, if any.
14096
14097     """
14098     self.needed_locks = {}
14099     if self.op.on_nodes:
14100       # _GetWantedNodes can be used here, but is not always appropriate to use
14101       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14102       # more information.
14103       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14104       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14105
14106   def _TestDelay(self):
14107     """Do the actual sleep.
14108
14109     """
14110     if self.op.on_master:
14111       if not utils.TestDelay(self.op.duration):
14112         raise errors.OpExecError("Error during master delay test")
14113     if self.op.on_nodes:
14114       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14115       for node, node_result in result.items():
14116         node_result.Raise("Failure during rpc call to node %s" % node)
14117
14118   def Exec(self, feedback_fn):
14119     """Execute the test delay opcode, with the wanted repetitions.
14120
14121     """
14122     if self.op.repeat == 0:
14123       self._TestDelay()
14124     else:
14125       top_value = self.op.repeat - 1
14126       for i in range(self.op.repeat):
14127         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14128         self._TestDelay()
14129
14130
14131 class LUTestJqueue(NoHooksLU):
14132   """Utility LU to test some aspects of the job queue.
14133
14134   """
14135   REQ_BGL = False
14136
14137   # Must be lower than default timeout for WaitForJobChange to see whether it
14138   # notices changed jobs
14139   _CLIENT_CONNECT_TIMEOUT = 20.0
14140   _CLIENT_CONFIRM_TIMEOUT = 60.0
14141
14142   @classmethod
14143   def _NotifyUsingSocket(cls, cb, errcls):
14144     """Opens a Unix socket and waits for another program to connect.
14145
14146     @type cb: callable
14147     @param cb: Callback to send socket name to client
14148     @type errcls: class
14149     @param errcls: Exception class to use for errors
14150
14151     """
14152     # Using a temporary directory as there's no easy way to create temporary
14153     # sockets without writing a custom loop around tempfile.mktemp and
14154     # socket.bind
14155     tmpdir = tempfile.mkdtemp()
14156     try:
14157       tmpsock = utils.PathJoin(tmpdir, "sock")
14158
14159       logging.debug("Creating temporary socket at %s", tmpsock)
14160       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14161       try:
14162         sock.bind(tmpsock)
14163         sock.listen(1)
14164
14165         # Send details to client
14166         cb(tmpsock)
14167
14168         # Wait for client to connect before continuing
14169         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14170         try:
14171           (conn, _) = sock.accept()
14172         except socket.error, err:
14173           raise errcls("Client didn't connect in time (%s)" % err)
14174       finally:
14175         sock.close()
14176     finally:
14177       # Remove as soon as client is connected
14178       shutil.rmtree(tmpdir)
14179
14180     # Wait for client to close
14181     try:
14182       try:
14183         # pylint: disable=E1101
14184         # Instance of '_socketobject' has no ... member
14185         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14186         conn.recv(1)
14187       except socket.error, err:
14188         raise errcls("Client failed to confirm notification (%s)" % err)
14189     finally:
14190       conn.close()
14191
14192   def _SendNotification(self, test, arg, sockname):
14193     """Sends a notification to the client.
14194
14195     @type test: string
14196     @param test: Test name
14197     @param arg: Test argument (depends on test)
14198     @type sockname: string
14199     @param sockname: Socket path
14200
14201     """
14202     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14203
14204   def _Notify(self, prereq, test, arg):
14205     """Notifies the client of a test.
14206
14207     @type prereq: bool
14208     @param prereq: Whether this is a prereq-phase test
14209     @type test: string
14210     @param test: Test name
14211     @param arg: Test argument (depends on test)
14212
14213     """
14214     if prereq:
14215       errcls = errors.OpPrereqError
14216     else:
14217       errcls = errors.OpExecError
14218
14219     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14220                                                   test, arg),
14221                                    errcls)
14222
14223   def CheckArguments(self):
14224     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14225     self.expandnames_calls = 0
14226
14227   def ExpandNames(self):
14228     checkargs_calls = getattr(self, "checkargs_calls", 0)
14229     if checkargs_calls < 1:
14230       raise errors.ProgrammerError("CheckArguments was not called")
14231
14232     self.expandnames_calls += 1
14233
14234     if self.op.notify_waitlock:
14235       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14236
14237     self.LogInfo("Expanding names")
14238
14239     # Get lock on master node (just to get a lock, not for a particular reason)
14240     self.needed_locks = {
14241       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14242       }
14243
14244   def Exec(self, feedback_fn):
14245     if self.expandnames_calls < 1:
14246       raise errors.ProgrammerError("ExpandNames was not called")
14247
14248     if self.op.notify_exec:
14249       self._Notify(False, constants.JQT_EXEC, None)
14250
14251     self.LogInfo("Executing")
14252
14253     if self.op.log_messages:
14254       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14255       for idx, msg in enumerate(self.op.log_messages):
14256         self.LogInfo("Sending log message %s", idx + 1)
14257         feedback_fn(constants.JQT_MSGPREFIX + msg)
14258         # Report how many test messages have been sent
14259         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14260
14261     if self.op.fail:
14262       raise errors.OpExecError("Opcode failure was requested")
14263
14264     return True
14265
14266
14267 class IAllocator(object):
14268   """IAllocator framework.
14269
14270   An IAllocator instance has three sets of attributes:
14271     - cfg that is needed to query the cluster
14272     - input data (all members of the _KEYS class attribute are required)
14273     - four buffer attributes (in|out_data|text), that represent the
14274       input (to the external script) in text and data structure format,
14275       and the output from it, again in two formats
14276     - the result variables from the script (success, info, nodes) for
14277       easy usage
14278
14279   """
14280   # pylint: disable=R0902
14281   # lots of instance attributes
14282
14283   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14284     self.cfg = cfg
14285     self.rpc = rpc_runner
14286     # init buffer variables
14287     self.in_text = self.out_text = self.in_data = self.out_data = None
14288     # init all input fields so that pylint is happy
14289     self.mode = mode
14290     self.memory = self.disks = self.disk_template = None
14291     self.os = self.tags = self.nics = self.vcpus = None
14292     self.hypervisor = None
14293     self.relocate_from = None
14294     self.name = None
14295     self.instances = None
14296     self.evac_mode = None
14297     self.target_groups = []
14298     # computed fields
14299     self.required_nodes = None
14300     # init result fields
14301     self.success = self.info = self.result = None
14302
14303     try:
14304       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14305     except KeyError:
14306       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14307                                    " IAllocator" % self.mode)
14308
14309     keyset = [n for (n, _) in keydata]
14310
14311     for key in kwargs:
14312       if key not in keyset:
14313         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14314                                      " IAllocator" % key)
14315       setattr(self, key, kwargs[key])
14316
14317     for key in keyset:
14318       if key not in kwargs:
14319         raise errors.ProgrammerError("Missing input parameter '%s' to"
14320                                      " IAllocator" % key)
14321     self._BuildInputData(compat.partial(fn, self), keydata)
14322
14323   def _ComputeClusterData(self):
14324     """Compute the generic allocator input data.
14325
14326     This is the data that is independent of the actual operation.
14327
14328     """
14329     cfg = self.cfg
14330     cluster_info = cfg.GetClusterInfo()
14331     # cluster data
14332     data = {
14333       "version": constants.IALLOCATOR_VERSION,
14334       "cluster_name": cfg.GetClusterName(),
14335       "cluster_tags": list(cluster_info.GetTags()),
14336       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14337       # we don't have job IDs
14338       }
14339     ninfo = cfg.GetAllNodesInfo()
14340     iinfo = cfg.GetAllInstancesInfo().values()
14341     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14342
14343     # node data
14344     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14345
14346     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14347       hypervisor_name = self.hypervisor
14348     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14349       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14350     else:
14351       hypervisor_name = cluster_info.primary_hypervisor
14352
14353     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14354                                         [hypervisor_name])
14355     node_iinfo = \
14356       self.rpc.call_all_instances_info(node_list,
14357                                        cluster_info.enabled_hypervisors)
14358
14359     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14360
14361     config_ndata = self._ComputeBasicNodeData(ninfo)
14362     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14363                                                  i_list, config_ndata)
14364     assert len(data["nodes"]) == len(ninfo), \
14365         "Incomplete node data computed"
14366
14367     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14368
14369     self.in_data = data
14370
14371   @staticmethod
14372   def _ComputeNodeGroupData(cfg):
14373     """Compute node groups data.
14374
14375     """
14376     cluster = cfg.GetClusterInfo()
14377     ng = dict((guuid, {
14378       "name": gdata.name,
14379       "alloc_policy": gdata.alloc_policy,
14380       "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14381       })
14382       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14383
14384     return ng
14385
14386   @staticmethod
14387   def _ComputeBasicNodeData(node_cfg):
14388     """Compute global node data.
14389
14390     @rtype: dict
14391     @returns: a dict of name: (node dict, node config)
14392
14393     """
14394     # fill in static (config-based) values
14395     node_results = dict((ninfo.name, {
14396       "tags": list(ninfo.GetTags()),
14397       "primary_ip": ninfo.primary_ip,
14398       "secondary_ip": ninfo.secondary_ip,
14399       "offline": ninfo.offline,
14400       "drained": ninfo.drained,
14401       "master_candidate": ninfo.master_candidate,
14402       "group": ninfo.group,
14403       "master_capable": ninfo.master_capable,
14404       "vm_capable": ninfo.vm_capable,
14405       })
14406       for ninfo in node_cfg.values())
14407
14408     return node_results
14409
14410   @staticmethod
14411   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14412                               node_results):
14413     """Compute global node data.
14414
14415     @param node_results: the basic node structures as filled from the config
14416
14417     """
14418     #TODO(dynmem): compute the right data on MAX and MIN memory
14419     # make a copy of the current dict
14420     node_results = dict(node_results)
14421     for nname, nresult in node_data.items():
14422       assert nname in node_results, "Missing basic data for node %s" % nname
14423       ninfo = node_cfg[nname]
14424
14425       if not (ninfo.offline or ninfo.drained):
14426         nresult.Raise("Can't get data for node %s" % nname)
14427         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14428                                 nname)
14429         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14430
14431         for attr in ["memory_total", "memory_free", "memory_dom0",
14432                      "vg_size", "vg_free", "cpu_total"]:
14433           if attr not in remote_info:
14434             raise errors.OpExecError("Node '%s' didn't return attribute"
14435                                      " '%s'" % (nname, attr))
14436           if not isinstance(remote_info[attr], int):
14437             raise errors.OpExecError("Node '%s' returned invalid value"
14438                                      " for '%s': %s" %
14439                                      (nname, attr, remote_info[attr]))
14440         # compute memory used by primary instances
14441         i_p_mem = i_p_up_mem = 0
14442         for iinfo, beinfo in i_list:
14443           if iinfo.primary_node == nname:
14444             i_p_mem += beinfo[constants.BE_MAXMEM]
14445             if iinfo.name not in node_iinfo[nname].payload:
14446               i_used_mem = 0
14447             else:
14448               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14449             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14450             remote_info["memory_free"] -= max(0, i_mem_diff)
14451
14452             if iinfo.admin_state == constants.ADMINST_UP:
14453               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14454
14455         # compute memory used by instances
14456         pnr_dyn = {
14457           "total_memory": remote_info["memory_total"],
14458           "reserved_memory": remote_info["memory_dom0"],
14459           "free_memory": remote_info["memory_free"],
14460           "total_disk": remote_info["vg_size"],
14461           "free_disk": remote_info["vg_free"],
14462           "total_cpus": remote_info["cpu_total"],
14463           "i_pri_memory": i_p_mem,
14464           "i_pri_up_memory": i_p_up_mem,
14465           }
14466         pnr_dyn.update(node_results[nname])
14467         node_results[nname] = pnr_dyn
14468
14469     return node_results
14470
14471   @staticmethod
14472   def _ComputeInstanceData(cluster_info, i_list):
14473     """Compute global instance data.
14474
14475     """
14476     instance_data = {}
14477     for iinfo, beinfo in i_list:
14478       nic_data = []
14479       for nic in iinfo.nics:
14480         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14481         nic_dict = {
14482           "mac": nic.mac,
14483           "ip": nic.ip,
14484           "mode": filled_params[constants.NIC_MODE],
14485           "link": filled_params[constants.NIC_LINK],
14486           }
14487         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14488           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14489         nic_data.append(nic_dict)
14490       pir = {
14491         "tags": list(iinfo.GetTags()),
14492         "admin_state": iinfo.admin_state,
14493         "vcpus": beinfo[constants.BE_VCPUS],
14494         "memory": beinfo[constants.BE_MAXMEM],
14495         "os": iinfo.os,
14496         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14497         "nics": nic_data,
14498         "disks": [{constants.IDISK_SIZE: dsk.size,
14499                    constants.IDISK_MODE: dsk.mode}
14500                   for dsk in iinfo.disks],
14501         "disk_template": iinfo.disk_template,
14502         "hypervisor": iinfo.hypervisor,
14503         }
14504       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14505                                                  pir["disks"])
14506       instance_data[iinfo.name] = pir
14507
14508     return instance_data
14509
14510   def _AddNewInstance(self):
14511     """Add new instance data to allocator structure.
14512
14513     This in combination with _AllocatorGetClusterData will create the
14514     correct structure needed as input for the allocator.
14515
14516     The checks for the completeness of the opcode must have already been
14517     done.
14518
14519     """
14520     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14521
14522     if self.disk_template in constants.DTS_INT_MIRROR:
14523       self.required_nodes = 2
14524     else:
14525       self.required_nodes = 1
14526
14527     request = {
14528       "name": self.name,
14529       "disk_template": self.disk_template,
14530       "tags": self.tags,
14531       "os": self.os,
14532       "vcpus": self.vcpus,
14533       "memory": self.memory,
14534       "disks": self.disks,
14535       "disk_space_total": disk_space,
14536       "nics": self.nics,
14537       "required_nodes": self.required_nodes,
14538       "hypervisor": self.hypervisor,
14539       }
14540
14541     return request
14542
14543   def _AddRelocateInstance(self):
14544     """Add relocate instance data to allocator structure.
14545
14546     This in combination with _IAllocatorGetClusterData will create the
14547     correct structure needed as input for the allocator.
14548
14549     The checks for the completeness of the opcode must have already been
14550     done.
14551
14552     """
14553     instance = self.cfg.GetInstanceInfo(self.name)
14554     if instance is None:
14555       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14556                                    " IAllocator" % self.name)
14557
14558     if instance.disk_template not in constants.DTS_MIRRORED:
14559       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14560                                  errors.ECODE_INVAL)
14561
14562     if instance.disk_template in constants.DTS_INT_MIRROR and \
14563         len(instance.secondary_nodes) != 1:
14564       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14565                                  errors.ECODE_STATE)
14566
14567     self.required_nodes = 1
14568     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14569     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14570
14571     request = {
14572       "name": self.name,
14573       "disk_space_total": disk_space,
14574       "required_nodes": self.required_nodes,
14575       "relocate_from": self.relocate_from,
14576       }
14577     return request
14578
14579   def _AddNodeEvacuate(self):
14580     """Get data for node-evacuate requests.
14581
14582     """
14583     return {
14584       "instances": self.instances,
14585       "evac_mode": self.evac_mode,
14586       }
14587
14588   def _AddChangeGroup(self):
14589     """Get data for node-evacuate requests.
14590
14591     """
14592     return {
14593       "instances": self.instances,
14594       "target_groups": self.target_groups,
14595       }
14596
14597   def _BuildInputData(self, fn, keydata):
14598     """Build input data structures.
14599
14600     """
14601     self._ComputeClusterData()
14602
14603     request = fn()
14604     request["type"] = self.mode
14605     for keyname, keytype in keydata:
14606       if keyname not in request:
14607         raise errors.ProgrammerError("Request parameter %s is missing" %
14608                                      keyname)
14609       val = request[keyname]
14610       if not keytype(val):
14611         raise errors.ProgrammerError("Request parameter %s doesn't pass"
14612                                      " validation, value %s, expected"
14613                                      " type %s" % (keyname, val, keytype))
14614     self.in_data["request"] = request
14615
14616     self.in_text = serializer.Dump(self.in_data)
14617
14618   _STRING_LIST = ht.TListOf(ht.TString)
14619   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14620      # pylint: disable=E1101
14621      # Class '...' has no 'OP_ID' member
14622      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14623                           opcodes.OpInstanceMigrate.OP_ID,
14624                           opcodes.OpInstanceReplaceDisks.OP_ID])
14625      })))
14626
14627   _NEVAC_MOVED = \
14628     ht.TListOf(ht.TAnd(ht.TIsLength(3),
14629                        ht.TItems([ht.TNonEmptyString,
14630                                   ht.TNonEmptyString,
14631                                   ht.TListOf(ht.TNonEmptyString),
14632                                  ])))
14633   _NEVAC_FAILED = \
14634     ht.TListOf(ht.TAnd(ht.TIsLength(2),
14635                        ht.TItems([ht.TNonEmptyString,
14636                                   ht.TMaybeString,
14637                                  ])))
14638   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14639                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14640
14641   _MODE_DATA = {
14642     constants.IALLOCATOR_MODE_ALLOC:
14643       (_AddNewInstance,
14644        [
14645         ("name", ht.TString),
14646         ("memory", ht.TInt),
14647         ("disks", ht.TListOf(ht.TDict)),
14648         ("disk_template", ht.TString),
14649         ("os", ht.TString),
14650         ("tags", _STRING_LIST),
14651         ("nics", ht.TListOf(ht.TDict)),
14652         ("vcpus", ht.TInt),
14653         ("hypervisor", ht.TString),
14654         ], ht.TList),
14655     constants.IALLOCATOR_MODE_RELOC:
14656       (_AddRelocateInstance,
14657        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14658        ht.TList),
14659      constants.IALLOCATOR_MODE_NODE_EVAC:
14660       (_AddNodeEvacuate, [
14661         ("instances", _STRING_LIST),
14662         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14663         ], _NEVAC_RESULT),
14664      constants.IALLOCATOR_MODE_CHG_GROUP:
14665       (_AddChangeGroup, [
14666         ("instances", _STRING_LIST),
14667         ("target_groups", _STRING_LIST),
14668         ], _NEVAC_RESULT),
14669     }
14670
14671   def Run(self, name, validate=True, call_fn=None):
14672     """Run an instance allocator and return the results.
14673
14674     """
14675     if call_fn is None:
14676       call_fn = self.rpc.call_iallocator_runner
14677
14678     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14679     result.Raise("Failure while running the iallocator script")
14680
14681     self.out_text = result.payload
14682     if validate:
14683       self._ValidateResult()
14684
14685   def _ValidateResult(self):
14686     """Process the allocator results.
14687
14688     This will process and if successful save the result in
14689     self.out_data and the other parameters.
14690
14691     """
14692     try:
14693       rdict = serializer.Load(self.out_text)
14694     except Exception, err:
14695       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14696
14697     if not isinstance(rdict, dict):
14698       raise errors.OpExecError("Can't parse iallocator results: not a dict")
14699
14700     # TODO: remove backwards compatiblity in later versions
14701     if "nodes" in rdict and "result" not in rdict:
14702       rdict["result"] = rdict["nodes"]
14703       del rdict["nodes"]
14704
14705     for key in "success", "info", "result":
14706       if key not in rdict:
14707         raise errors.OpExecError("Can't parse iallocator results:"
14708                                  " missing key '%s'" % key)
14709       setattr(self, key, rdict[key])
14710
14711     if not self._result_check(self.result):
14712       raise errors.OpExecError("Iallocator returned invalid result,"
14713                                " expected %s, got %s" %
14714                                (self._result_check, self.result),
14715                                errors.ECODE_INVAL)
14716
14717     if self.mode == constants.IALLOCATOR_MODE_RELOC:
14718       assert self.relocate_from is not None
14719       assert self.required_nodes == 1
14720
14721       node2group = dict((name, ndata["group"])
14722                         for (name, ndata) in self.in_data["nodes"].items())
14723
14724       fn = compat.partial(self._NodesToGroups, node2group,
14725                           self.in_data["nodegroups"])
14726
14727       instance = self.cfg.GetInstanceInfo(self.name)
14728       request_groups = fn(self.relocate_from + [instance.primary_node])
14729       result_groups = fn(rdict["result"] + [instance.primary_node])
14730
14731       if self.success and not set(result_groups).issubset(request_groups):
14732         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14733                                  " differ from original groups (%s)" %
14734                                  (utils.CommaJoin(result_groups),
14735                                   utils.CommaJoin(request_groups)))
14736
14737     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14738       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14739
14740     self.out_data = rdict
14741
14742   @staticmethod
14743   def _NodesToGroups(node2group, groups, nodes):
14744     """Returns a list of unique group names for a list of nodes.
14745
14746     @type node2group: dict
14747     @param node2group: Map from node name to group UUID
14748     @type groups: dict
14749     @param groups: Group information
14750     @type nodes: list
14751     @param nodes: Node names
14752
14753     """
14754     result = set()
14755
14756     for node in nodes:
14757       try:
14758         group_uuid = node2group[node]
14759       except KeyError:
14760         # Ignore unknown node
14761         pass
14762       else:
14763         try:
14764           group = groups[group_uuid]
14765         except KeyError:
14766           # Can't find group, let's use UUID
14767           group_name = group_uuid
14768         else:
14769           group_name = group["name"]
14770
14771         result.add(group_name)
14772
14773     return sorted(result)
14774
14775
14776 class LUTestAllocator(NoHooksLU):
14777   """Run allocator tests.
14778
14779   This LU runs the allocator tests
14780
14781   """
14782   def CheckPrereq(self):
14783     """Check prerequisites.
14784
14785     This checks the opcode parameters depending on the director and mode test.
14786
14787     """
14788     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14789       for attr in ["memory", "disks", "disk_template",
14790                    "os", "tags", "nics", "vcpus"]:
14791         if not hasattr(self.op, attr):
14792           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14793                                      attr, errors.ECODE_INVAL)
14794       iname = self.cfg.ExpandInstanceName(self.op.name)
14795       if iname is not None:
14796         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14797                                    iname, errors.ECODE_EXISTS)
14798       if not isinstance(self.op.nics, list):
14799         raise errors.OpPrereqError("Invalid parameter 'nics'",
14800                                    errors.ECODE_INVAL)
14801       if not isinstance(self.op.disks, list):
14802         raise errors.OpPrereqError("Invalid parameter 'disks'",
14803                                    errors.ECODE_INVAL)
14804       for row in self.op.disks:
14805         if (not isinstance(row, dict) or
14806             constants.IDISK_SIZE not in row or
14807             not isinstance(row[constants.IDISK_SIZE], int) or
14808             constants.IDISK_MODE not in row or
14809             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14810           raise errors.OpPrereqError("Invalid contents of the 'disks'"
14811                                      " parameter", errors.ECODE_INVAL)
14812       if self.op.hypervisor is None:
14813         self.op.hypervisor = self.cfg.GetHypervisorType()
14814     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14815       fname = _ExpandInstanceName(self.cfg, self.op.name)
14816       self.op.name = fname
14817       self.relocate_from = \
14818           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14819     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14820                           constants.IALLOCATOR_MODE_NODE_EVAC):
14821       if not self.op.instances:
14822         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14823       self.op.instances = _GetWantedInstances(self, self.op.instances)
14824     else:
14825       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14826                                  self.op.mode, errors.ECODE_INVAL)
14827
14828     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14829       if self.op.allocator is None:
14830         raise errors.OpPrereqError("Missing allocator name",
14831                                    errors.ECODE_INVAL)
14832     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14833       raise errors.OpPrereqError("Wrong allocator test '%s'" %
14834                                  self.op.direction, errors.ECODE_INVAL)
14835
14836   def Exec(self, feedback_fn):
14837     """Run the allocator test.
14838
14839     """
14840     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14841       ial = IAllocator(self.cfg, self.rpc,
14842                        mode=self.op.mode,
14843                        name=self.op.name,
14844                        memory=self.op.memory,
14845                        disks=self.op.disks,
14846                        disk_template=self.op.disk_template,
14847                        os=self.op.os,
14848                        tags=self.op.tags,
14849                        nics=self.op.nics,
14850                        vcpus=self.op.vcpus,
14851                        hypervisor=self.op.hypervisor,
14852                        )
14853     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14854       ial = IAllocator(self.cfg, self.rpc,
14855                        mode=self.op.mode,
14856                        name=self.op.name,
14857                        relocate_from=list(self.relocate_from),
14858                        )
14859     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14860       ial = IAllocator(self.cfg, self.rpc,
14861                        mode=self.op.mode,
14862                        instances=self.op.instances,
14863                        target_groups=self.op.target_groups)
14864     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14865       ial = IAllocator(self.cfg, self.rpc,
14866                        mode=self.op.mode,
14867                        instances=self.op.instances,
14868                        evac_mode=self.op.evac_mode)
14869     else:
14870       raise errors.ProgrammerError("Uncatched mode %s in"
14871                                    " LUTestAllocator.Exec", self.op.mode)
14872
14873     if self.op.direction == constants.IALLOCATOR_DIR_IN:
14874       result = ial.in_text
14875     else:
14876       ial.Run(self.op.allocator, validate=False)
14877       result = ial.out_text
14878     return result
14879
14880
14881 #: Query type implementations
14882 _QUERY_IMPL = {
14883   constants.QR_INSTANCE: _InstanceQuery,
14884   constants.QR_NODE: _NodeQuery,
14885   constants.QR_GROUP: _GroupQuery,
14886   constants.QR_OS: _OsQuery,
14887   }
14888
14889 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14890
14891
14892 def _GetQueryImplementation(name):
14893   """Returns the implemtnation for a query type.
14894
14895   @param name: Query type, must be one of L{constants.QR_VIA_OP}
14896
14897   """
14898   try:
14899     return _QUERY_IMPL[name]
14900   except KeyError:
14901     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14902                                errors.ECODE_INVAL)