code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_UP = [constants.ADMINST_UP]
  72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
  74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  76
  77
  78 class ResultWithJobs:
  79   """Data container for LU results with jobs.
  80
  81   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  82   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  83   contained in the C{jobs} attribute and include the job IDs in the opcode
  84   result.
  85
  86   """
  87   def __init__(self, jobs, **kwargs):
  88     """Initializes this class.
  89
  90     Additional return values can be specified as keyword arguments.
  91
  92     @type jobs: list of lists of L{opcode.OpCode}
  93     @param jobs: A list of lists of opcode objects
  94
  95     """
  96     self.jobs = jobs
  97     self.other = kwargs
  98
  99
 100 class LogicalUnit(object):
 101   """Logical Unit base class.
 102
 103   Subclasses must follow these rules:
 104     - implement ExpandNames
 105     - implement CheckPrereq (except when tasklets are used)
 106     - implement Exec (except when tasklets are used)
 107     - implement BuildHooksEnv
 108     - implement BuildHooksNodes
 109     - redefine HPATH and HTYPE
 110     - optionally redefine their run requirements:
 111         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 112
 113   Note that all commands require root permissions.
 114
 115   @ivar dry_run_result: the value (if any) that will be returned to the caller
 116       in dry-run mode (signalled by opcode dry_run parameter)
 117
 118   """
 119   HPATH = None
 120   HTYPE = None
 121   REQ_BGL = True
 122
 123   def __init__(self, processor, op, context, rpc_runner):
 124     """Constructor for LogicalUnit.
 125
 126     This needs to be overridden in derived classes in order to check op
 127     validity.
 128
 129     """
 130     self.proc = processor
 131     self.op = op
 132     self.cfg = context.cfg
 133     self.glm = context.glm
 134     # readability alias
 135     self.owned_locks = context.glm.list_owned
 136     self.context = context
 137     self.rpc = rpc_runner
 138     # Dicts used to declare locking needs to mcpu
 139     self.needed_locks = None
 140     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 141     self.add_locks = {}
 142     self.remove_locks = {}
 143     # Used to force good behavior when calling helper functions
 144     self.recalculate_locks = {}
 145     # logging
 146     self.Log = processor.Log # pylint: disable=C0103
 147     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 148     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 149     self.LogStep = processor.LogStep # pylint: disable=C0103
 150     # support for dry-run
 151     self.dry_run_result = None
 152     # support for generic debug attribute
 153     if (not hasattr(self.op, "debug_level") or
 154         not isinstance(self.op.debug_level, int)):
 155       self.op.debug_level = 0
 156
 157     # Tasklets
 158     self.tasklets = None
 159
 160     # Validate opcode parameters and set defaults
 161     self.op.Validate(True)
 162
 163     self.CheckArguments()
 164
 165   def CheckArguments(self):
 166     """Check syntactic validity for the opcode arguments.
 167
 168     This method is for doing a simple syntactic check and ensure
 169     validity of opcode parameters, without any cluster-related
 170     checks. While the same can be accomplished in ExpandNames and/or
 171     CheckPrereq, doing these separate is better because:
 172
 173       - ExpandNames is left as as purely a lock-related function
 174       - CheckPrereq is run after we have acquired locks (and possible
 175         waited for them)
 176
 177     The function is allowed to change the self.op attribute so that
 178     later methods can no longer worry about missing parameters.
 179
 180     """
 181     pass
 182
 183   def ExpandNames(self):
 184     """Expand names for this LU.
 185
 186     This method is called before starting to execute the opcode, and it should
 187     update all the parameters of the opcode to their canonical form (e.g. a
 188     short node name must be fully expanded after this method has successfully
 189     completed). This way locking, hooks, logging, etc. can work correctly.
 190
 191     LUs which implement this method must also populate the self.needed_locks
 192     member, as a dict with lock levels as keys, and a list of needed lock names
 193     as values. Rules:
 194
 195       - use an empty dict if you don't need any lock
 196       - if you don't need any lock at a particular level omit that level
 197       - don't put anything for the BGL level
 198       - if you want all locks at a level use locking.ALL_SET as a value
 199
 200     If you need to share locks (rather than acquire them exclusively) at one
 201     level you can modify self.share_locks, setting a true value (usually 1) for
 202     that level. By default locks are not shared.
 203
 204     This function can also define a list of tasklets, which then will be
 205     executed in order instead of the usual LU-level CheckPrereq and Exec
 206     functions, if those are not defined by the LU.
 207
 208     Examples::
 209
 210       # Acquire all nodes and one instance
 211       self.needed_locks = {
 212         locking.LEVEL_NODE: locking.ALL_SET,
 213         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 214       }
 215       # Acquire just two nodes
 216       self.needed_locks = {
 217         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 218       }
 219       # Acquire no locks
 220       self.needed_locks = {} # No, you can't leave it to the default value None
 221
 222     """
 223     # The implementation of this method is mandatory only if the new LU is
 224     # concurrent, so that old LUs don't need to be changed all at the same
 225     # time.
 226     if self.REQ_BGL:
 227       self.needed_locks = {} # Exclusive LUs don't need locks.
 228     else:
 229       raise NotImplementedError
 230
 231   def DeclareLocks(self, level):
 232     """Declare LU locking needs for a level
 233
 234     While most LUs can just declare their locking needs at ExpandNames time,
 235     sometimes there's the need to calculate some locks after having acquired
 236     the ones before. This function is called just before acquiring locks at a
 237     particular level, but after acquiring the ones at lower levels, and permits
 238     such calculations. It can be used to modify self.needed_locks, and by
 239     default it does nothing.
 240
 241     This function is only called if you have something already set in
 242     self.needed_locks for the level.
 243
 244     @param level: Locking level which is going to be locked
 245     @type level: member of ganeti.locking.LEVELS
 246
 247     """
 248
 249   def CheckPrereq(self):
 250     """Check prerequisites for this LU.
 251
 252     This method should check that the prerequisites for the execution
 253     of this LU are fulfilled. It can do internode communication, but
 254     it should be idempotent - no cluster or system changes are
 255     allowed.
 256
 257     The method should raise errors.OpPrereqError in case something is
 258     not fulfilled. Its return value is ignored.
 259
 260     This method should also update all the parameters of the opcode to
 261     their canonical form if it hasn't been done by ExpandNames before.
 262
 263     """
 264     if self.tasklets is not None:
 265       for (idx, tl) in enumerate(self.tasklets):
 266         logging.debug("Checking prerequisites for tasklet %s/%s",
 267                       idx + 1, len(self.tasklets))
 268         tl.CheckPrereq()
 269     else:
 270       pass
 271
 272   def Exec(self, feedback_fn):
 273     """Execute the LU.
 274
 275     This method should implement the actual work. It should raise
 276     errors.OpExecError for failures that are somewhat dealt with in
 277     code, or expected.
 278
 279     """
 280     if self.tasklets is not None:
 281       for (idx, tl) in enumerate(self.tasklets):
 282         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 283         tl.Exec(feedback_fn)
 284     else:
 285       raise NotImplementedError
 286
 287   def BuildHooksEnv(self):
 288     """Build hooks environment for this LU.
 289
 290     @rtype: dict
 291     @return: Dictionary containing the environment that will be used for
 292       running the hooks for this LU. The keys of the dict must not be prefixed
 293       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 294       will extend the environment with additional variables. If no environment
 295       should be defined, an empty dictionary should be returned (not C{None}).
 296     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 297       will not be called.
 298
 299     """
 300     raise NotImplementedError
 301
 302   def BuildHooksNodes(self):
 303     """Build list of nodes to run LU's hooks.
 304
 305     @rtype: tuple; (list, list)
 306     @return: Tuple containing a list of node names on which the hook
 307       should run before the execution and a list of node names on which the
 308       hook should run after the execution. No nodes should be returned as an
 309       empty list (and not None).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 317     """Notify the LU about the results of its hooks.
 318
 319     This method is called every time a hooks phase is executed, and notifies
 320     the Logical Unit about the hooks' result. The LU can then use it to alter
 321     its result based on the hooks.  By default the method does nothing and the
 322     previous result is passed back unchanged but any LU can define it if it
 323     wants to use the local cluster hook-scripts somehow.
 324
 325     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 326         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 327     @param hook_results: the results of the multi-node hooks rpc call
 328     @param feedback_fn: function used send feedback back to the caller
 329     @param lu_result: the previous Exec result this LU had, or None
 330         in the PRE phase
 331     @return: the new Exec result, based on the previous result
 332         and hook results
 333
 334     """
 335     # API must be kept, thus we ignore the unused argument and could
 336     # be a function warnings
 337     # pylint: disable=W0613,R0201
 338     return lu_result
 339
 340   def _ExpandAndLockInstance(self):
 341     """Helper function to expand and lock an instance.
 342
 343     Many LUs that work on an instance take its name in self.op.instance_name
 344     and need to expand it and then declare the expanded name for locking. This
 345     function does it, and then updates self.op.instance_name to the expanded
 346     name. It also initializes needed_locks as a dict, if this hasn't been done
 347     before.
 348
 349     """
 350     if self.needed_locks is None:
 351       self.needed_locks = {}
 352     else:
 353       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 354         "_ExpandAndLockInstance called with instance-level locks set"
 355     self.op.instance_name = _ExpandInstanceName(self.cfg,
 356                                                 self.op.instance_name)
 357     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 358
 359   def _LockInstancesNodes(self, primary_only=False,
 360                           level=locking.LEVEL_NODE):
 361     """Helper function to declare instances' nodes for locking.
 362
 363     This function should be called after locking one or more instances to lock
 364     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 365     with all primary or secondary nodes for instances already locked and
 366     present in self.needed_locks[locking.LEVEL_INSTANCE].
 367
 368     It should be called from DeclareLocks, and for safety only works if
 369     self.recalculate_locks[locking.LEVEL_NODE] is set.
 370
 371     In the future it may grow parameters to just lock some instance's nodes, or
 372     to just lock primaries or secondary nodes, if needed.
 373
 374     If should be called in DeclareLocks in a way similar to::
 375
 376       if level == locking.LEVEL_NODE:
 377         self._LockInstancesNodes()
 378
 379     @type primary_only: boolean
 380     @param primary_only: only lock primary nodes of locked instances
 381     @param level: Which lock level to use for locking nodes
 382
 383     """
 384     assert level in self.recalculate_locks, \
 385       "_LockInstancesNodes helper function called with no nodes to recalculate"
 386
 387     # TODO: check if we're really been called with the instance locks held
 388
 389     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 390     # future we might want to have different behaviors depending on the value
 391     # of self.recalculate_locks[locking.LEVEL_NODE]
 392     wanted_nodes = []
 393     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 394     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 395       wanted_nodes.append(instance.primary_node)
 396       if not primary_only:
 397         wanted_nodes.extend(instance.secondary_nodes)
 398
 399     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 400       self.needed_locks[level] = wanted_nodes
 401     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 402       self.needed_locks[level].extend(wanted_nodes)
 403     else:
 404       raise errors.ProgrammerError("Unknown recalculation mode")
 405
 406     del self.recalculate_locks[level]
 407
 408
 409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 410   """Simple LU which runs no hooks.
 411
 412   This LU is intended as a parent for other LogicalUnits which will
 413   run no hooks, in order to reduce duplicate code.
 414
 415   """
 416   HPATH = None
 417   HTYPE = None
 418
 419   def BuildHooksEnv(self):
 420     """Empty BuildHooksEnv for NoHooksLu.
 421
 422     This just raises an error.
 423
 424     """
 425     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 426
 427   def BuildHooksNodes(self):
 428     """Empty BuildHooksNodes for NoHooksLU.
 429
 430     """
 431     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 432
 433
 434 class Tasklet:
 435   """Tasklet base class.
 436
 437   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 438   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 439   tasklets know nothing about locks.
 440
 441   Subclasses must follow these rules:
 442     - Implement CheckPrereq
 443     - Implement Exec
 444
 445   """
 446   def __init__(self, lu):
 447     self.lu = lu
 448
 449     # Shortcuts
 450     self.cfg = lu.cfg
 451     self.rpc = lu.rpc
 452
 453   def CheckPrereq(self):
 454     """Check prerequisites for this tasklets.
 455
 456     This method should check whether the prerequisites for the execution of
 457     this tasklet are fulfilled. It can do internode communication, but it
 458     should be idempotent - no cluster or system changes are allowed.
 459
 460     The method should raise errors.OpPrereqError in case something is not
 461     fulfilled. Its return value is ignored.
 462
 463     This method should also update all parameters to their canonical form if it
 464     hasn't been done before.
 465
 466     """
 467     pass
 468
 469   def Exec(self, feedback_fn):
 470     """Execute the tasklet.
 471
 472     This method should implement the actual work. It should raise
 473     errors.OpExecError for failures that are somewhat dealt with in code, or
 474     expected.
 475
 476     """
 477     raise NotImplementedError
 478
 479
 480 class _QueryBase:
 481   """Base for query utility classes.
 482
 483   """
 484   #: Attribute holding field definitions
 485   FIELDS = None
 486
 487   def __init__(self, qfilter, fields, use_locking):
 488     """Initializes this class.
 489
 490     """
 491     self.use_locking = use_locking
 492
 493     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 494                              namefield="name")
 495     self.requested_data = self.query.RequestedData()
 496     self.names = self.query.RequestedNames()
 497
 498     # Sort only if no names were requested
 499     self.sort_by_name = not self.names
 500
 501     self.do_locking = None
 502     self.wanted = None
 503
 504   def _GetNames(self, lu, all_names, lock_level):
 505     """Helper function to determine names asked for in the query.
 506
 507     """
 508     if self.do_locking:
 509       names = lu.owned_locks(lock_level)
 510     else:
 511       names = all_names
 512
 513     if self.wanted == locking.ALL_SET:
 514       assert not self.names
 515       # caller didn't specify names, so ordering is not important
 516       return utils.NiceSort(names)
 517
 518     # caller specified names and we must keep the same order
 519     assert self.names
 520     assert not self.do_locking or lu.glm.is_owned(lock_level)
 521
 522     missing = set(self.wanted).difference(names)
 523     if missing:
 524       raise errors.OpExecError("Some items were removed before retrieving"
 525                                " their data: %s" % missing)
 526
 527     # Return expanded names
 528     return self.wanted
 529
 530   def ExpandNames(self, lu):
 531     """Expand names for this query.
 532
 533     See L{LogicalUnit.ExpandNames}.
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def DeclareLocks(self, lu, level):
 539     """Declare locks for this query.
 540
 541     See L{LogicalUnit.DeclareLocks}.
 542
 543     """
 544     raise NotImplementedError()
 545
 546   def _GetQueryData(self, lu):
 547     """Collects all data for this query.
 548
 549     @return: Query data object
 550
 551     """
 552     raise NotImplementedError()
 553
 554   def NewStyleQuery(self, lu):
 555     """Collect data and execute query.
 556
 557     """
 558     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 559                                   sort_by_name=self.sort_by_name)
 560
 561   def OldStyleQuery(self, lu):
 562     """Collect data and execute query.
 563
 564     """
 565     return self.query.OldStyleQuery(self._GetQueryData(lu),
 566                                     sort_by_name=self.sort_by_name)
 567
 568
 569 def _ShareAll():
 570   """Returns a dict declaring all lock levels shared.
 571
 572   """
 573   return dict.fromkeys(locking.LEVELS, 1)
 574
 575
 576 def _MakeLegacyNodeInfo(data):
 577   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 578
 579   Converts the data into a single dictionary. This is fine for most use cases,
 580   but some require information from more than one volume group or hypervisor.
 581
 582   """
 583   (bootid, (vg_info, ), (hv_info, )) = data
 584
 585   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 586     "bootid": bootid,
 587     })
 588
 589
 590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 591   """Checks if the owned node groups are still correct for an instance.
 592
 593   @type cfg: L{config.ConfigWriter}
 594   @param cfg: The cluster configuration
 595   @type instance_name: string
 596   @param instance_name: Instance name
 597   @type owned_groups: set or frozenset
 598   @param owned_groups: List of currently owned node groups
 599
 600   """
 601   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 602
 603   if not owned_groups.issuperset(inst_groups):
 604     raise errors.OpPrereqError("Instance %s's node groups changed since"
 605                                " locks were acquired, current groups are"
 606                                " are '%s', owning groups '%s'; retry the"
 607                                " operation" %
 608                                (instance_name,
 609                                 utils.CommaJoin(inst_groups),
 610                                 utils.CommaJoin(owned_groups)),
 611                                errors.ECODE_STATE)
 612
 613   return inst_groups
 614
 615
 616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 617   """Checks if the instances in a node group are still correct.
 618
 619   @type cfg: L{config.ConfigWriter}
 620   @param cfg: The cluster configuration
 621   @type group_uuid: string
 622   @param group_uuid: Node group UUID
 623   @type owned_instances: set or frozenset
 624   @param owned_instances: List of currently owned instances
 625
 626   """
 627   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 628   if owned_instances != wanted_instances:
 629     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 630                                " locks were acquired, wanted '%s', have '%s';"
 631                                " retry the operation" %
 632                                (group_uuid,
 633                                 utils.CommaJoin(wanted_instances),
 634                                 utils.CommaJoin(owned_instances)),
 635                                errors.ECODE_STATE)
 636
 637   return wanted_instances
 638
 639
 640 def _SupportsOob(cfg, node):
 641   """Tells if node supports OOB.
 642
 643   @type cfg: L{config.ConfigWriter}
 644   @param cfg: The cluster configuration
 645   @type node: L{objects.Node}
 646   @param node: The node
 647   @return: The OOB script if supported or an empty string otherwise
 648
 649   """
 650   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 651
 652
 653 def _GetWantedNodes(lu, nodes):
 654   """Returns list of checked and expanded node names.
 655
 656   @type lu: L{LogicalUnit}
 657   @param lu: the logical unit on whose behalf we execute
 658   @type nodes: list
 659   @param nodes: list of node names or None for all nodes
 660   @rtype: list
 661   @return: the list of nodes, sorted
 662   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 663
 664   """
 665   if nodes:
 666     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 667
 668   return utils.NiceSort(lu.cfg.GetNodeList())
 669
 670
 671 def _GetWantedInstances(lu, instances):
 672   """Returns list of checked and expanded instance names.
 673
 674   @type lu: L{LogicalUnit}
 675   @param lu: the logical unit on whose behalf we execute
 676   @type instances: list
 677   @param instances: list of instance names or None for all instances
 678   @rtype: list
 679   @return: the list of instances, sorted
 680   @raise errors.OpPrereqError: if the instances parameter is wrong type
 681   @raise errors.OpPrereqError: if any of the passed instances is not found
 682
 683   """
 684   if instances:
 685     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 686   else:
 687     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 688   return wanted
 689
 690
 691 def _GetUpdatedParams(old_params, update_dict,
 692                       use_default=True, use_none=False):
 693   """Return the new version of a parameter dictionary.
 694
 695   @type old_params: dict
 696   @param old_params: old parameters
 697   @type update_dict: dict
 698   @param update_dict: dict containing new parameter values, or
 699       constants.VALUE_DEFAULT to reset the parameter to its default
 700       value
 701   @param use_default: boolean
 702   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 703       values as 'to be deleted' values
 704   @param use_none: boolean
 705   @type use_none: whether to recognise C{None} values as 'to be
 706       deleted' values
 707   @rtype: dict
 708   @return: the new parameter dictionary
 709
 710   """
 711   params_copy = copy.deepcopy(old_params)
 712   for key, val in update_dict.iteritems():
 713     if ((use_default and val == constants.VALUE_DEFAULT) or
 714         (use_none and val is None)):
 715       try:
 716         del params_copy[key]
 717       except KeyError:
 718         pass
 719     else:
 720       params_copy[key] = val
 721   return params_copy
 722
 723
 724 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 725   """Return the new version of a instance policy.
 726
 727   @param group_policy: whether this policy applies to a group and thus
 728     we should support removal of policy entries
 729
 730   """
 731   use_none = use_default = group_policy
 732   ipolicy = copy.deepcopy(old_ipolicy)
 733   for key, value in new_ipolicy.items():
 734     if key not in constants.IPOLICY_ALL_KEYS:
 735       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 736                                  errors.ECODE_INVAL)
 737     if key in constants.IPOLICY_ISPECS:
 738       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 739       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 740                                        use_none=use_none,
 741                                        use_default=use_default)
 742     else:
 743       if not value or value == [constants.VALUE_DEFAULT]:
 744         if group_policy:
 745           del ipolicy[key]
 746         else:
 747           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 748                                      " on the cluster'" % key,
 749                                      errors.ECODE_INVAL)
 750       else:
 751         if key in constants.IPOLICY_PARAMETERS:
 752           # FIXME: we assume all such values are float
 753           try:
 754             ipolicy[key] = float(value)
 755           except (TypeError, ValueError), err:
 756             raise errors.OpPrereqError("Invalid value for attribute"
 757                                        " '%s': '%s', error: %s" %
 758                                        (key, value, err), errors.ECODE_INVAL)
 759         else:
 760           # FIXME: we assume all others are lists; this should be redone
 761           # in a nicer way
 762           ipolicy[key] = list(value)
 763   try:
 764     objects.InstancePolicy.CheckParameterSyntax(ipolicy)
 765   except errors.ConfigurationError, err:
 766     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 767                                errors.ECODE_INVAL)
 768   return ipolicy
 769
 770
 771 def _UpdateAndVerifySubDict(base, updates, type_check):
 772   """Updates and verifies a dict with sub dicts of the same type.
 773
 774   @param base: The dict with the old data
 775   @param updates: The dict with the new data
 776   @param type_check: Dict suitable to ForceDictType to verify correct types
 777   @returns: A new dict with updated and verified values
 778
 779   """
 780   def fn(old, value):
 781     new = _GetUpdatedParams(old, value)
 782     utils.ForceDictType(new, type_check)
 783     return new
 784
 785   ret = copy.deepcopy(base)
 786   ret.update(dict((key, fn(base.get(key, {}), value))
 787                   for key, value in updates.items()))
 788   return ret
 789
 790
 791 def _MergeAndVerifyHvState(op_input, obj_input):
 792   """Combines the hv state from an opcode with the one of the object
 793
 794   @param op_input: The input dict from the opcode
 795   @param obj_input: The input dict from the objects
 796   @return: The verified and updated dict
 797
 798   """
 799   if op_input:
 800     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 801     if invalid_hvs:
 802       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 803                                  " %s" % utils.CommaJoin(invalid_hvs),
 804                                  errors.ECODE_INVAL)
 805     if obj_input is None:
 806       obj_input = {}
 807     type_check = constants.HVSTS_PARAMETER_TYPES
 808     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 809
 810   return None
 811
 812
 813 def _MergeAndVerifyDiskState(op_input, obj_input):
 814   """Combines the disk state from an opcode with the one of the object
 815
 816   @param op_input: The input dict from the opcode
 817   @param obj_input: The input dict from the objects
 818   @return: The verified and updated dict
 819   """
 820   if op_input:
 821     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 822     if invalid_dst:
 823       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 824                                  utils.CommaJoin(invalid_dst),
 825                                  errors.ECODE_INVAL)
 826     type_check = constants.DSS_PARAMETER_TYPES
 827     if obj_input is None:
 828       obj_input = {}
 829     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 830                                               type_check))
 831                 for key, value in op_input.items())
 832
 833   return None
 834
 835
 836 def _ReleaseLocks(lu, level, names=None, keep=None):
 837   """Releases locks owned by an LU.
 838
 839   @type lu: L{LogicalUnit}
 840   @param level: Lock level
 841   @type names: list or None
 842   @param names: Names of locks to release
 843   @type keep: list or None
 844   @param keep: Names of locks to retain
 845
 846   """
 847   assert not (keep is not None and names is not None), \
 848          "Only one of the 'names' and the 'keep' parameters can be given"
 849
 850   if names is not None:
 851     should_release = names.__contains__
 852   elif keep:
 853     should_release = lambda name: name not in keep
 854   else:
 855     should_release = None
 856
 857   owned = lu.owned_locks(level)
 858   if not owned:
 859     # Not owning any lock at this level, do nothing
 860     pass
 861
 862   elif should_release:
 863     retain = []
 864     release = []
 865
 866     # Determine which locks to release
 867     for name in owned:
 868       if should_release(name):
 869         release.append(name)
 870       else:
 871         retain.append(name)
 872
 873     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 874
 875     # Release just some locks
 876     lu.glm.release(level, names=release)
 877
 878     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 879   else:
 880     # Release everything
 881     lu.glm.release(level)
 882
 883     assert not lu.glm.is_owned(level), "No locks should be owned"
 884
 885
 886 def _MapInstanceDisksToNodes(instances):
 887   """Creates a map from (node, volume) to instance name.
 888
 889   @type instances: list of L{objects.Instance}
 890   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 891
 892   """
 893   return dict(((node, vol), inst.name)
 894               for inst in instances
 895               for (node, vols) in inst.MapLVsByNode().items()
 896               for vol in vols)
 897
 898
 899 def _RunPostHook(lu, node_name):
 900   """Runs the post-hook for an opcode on a single node.
 901
 902   """
 903   hm = lu.proc.BuildHooksManager(lu)
 904   try:
 905     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 906   except:
 907     # pylint: disable=W0702
 908     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 909
 910
 911 def _CheckOutputFields(static, dynamic, selected):
 912   """Checks whether all selected fields are valid.
 913
 914   @type static: L{utils.FieldSet}
 915   @param static: static fields set
 916   @type dynamic: L{utils.FieldSet}
 917   @param dynamic: dynamic fields set
 918
 919   """
 920   f = utils.FieldSet()
 921   f.Extend(static)
 922   f.Extend(dynamic)
 923
 924   delta = f.NonMatching(selected)
 925   if delta:
 926     raise errors.OpPrereqError("Unknown output fields selected: %s"
 927                                % ",".join(delta), errors.ECODE_INVAL)
 928
 929
 930 def _CheckGlobalHvParams(params):
 931   """Validates that given hypervisor params are not global ones.
 932
 933   This will ensure that instances don't get customised versions of
 934   global params.
 935
 936   """
 937   used_globals = constants.HVC_GLOBALS.intersection(params)
 938   if used_globals:
 939     msg = ("The following hypervisor parameters are global and cannot"
 940            " be customized at instance level, please modify them at"
 941            " cluster level: %s" % utils.CommaJoin(used_globals))
 942     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 943
 944
 945 def _CheckNodeOnline(lu, node, msg=None):
 946   """Ensure that a given node is online.
 947
 948   @param lu: the LU on behalf of which we make the check
 949   @param node: the node to check
 950   @param msg: if passed, should be a message to replace the default one
 951   @raise errors.OpPrereqError: if the node is offline
 952
 953   """
 954   if msg is None:
 955     msg = "Can't use offline node"
 956   if lu.cfg.GetNodeInfo(node).offline:
 957     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 958
 959
 960 def _CheckNodeNotDrained(lu, node):
 961   """Ensure that a given node is not drained.
 962
 963   @param lu: the LU on behalf of which we make the check
 964   @param node: the node to check
 965   @raise errors.OpPrereqError: if the node is drained
 966
 967   """
 968   if lu.cfg.GetNodeInfo(node).drained:
 969     raise errors.OpPrereqError("Can't use drained node %s" % node,
 970                                errors.ECODE_STATE)
 971
 972
 973 def _CheckNodeVmCapable(lu, node):
 974   """Ensure that a given node is vm capable.
 975
 976   @param lu: the LU on behalf of which we make the check
 977   @param node: the node to check
 978   @raise errors.OpPrereqError: if the node is not vm capable
 979
 980   """
 981   if not lu.cfg.GetNodeInfo(node).vm_capable:
 982     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 983                                errors.ECODE_STATE)
 984
 985
 986 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 987   """Ensure that a node supports a given OS.
 988
 989   @param lu: the LU on behalf of which we make the check
 990   @param node: the node to check
 991   @param os_name: the OS to query about
 992   @param force_variant: whether to ignore variant errors
 993   @raise errors.OpPrereqError: if the node is not supporting the OS
 994
 995   """
 996   result = lu.rpc.call_os_get(node, os_name)
 997   result.Raise("OS '%s' not in supported OS list for node %s" %
 998                (os_name, node),
 999                prereq=True, ecode=errors.ECODE_INVAL)
1000   if not force_variant:
1001     _CheckOSVariant(result.payload, os_name)
1002
1003
1004 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1005   """Ensure that a node has the given secondary ip.
1006
1007   @type lu: L{LogicalUnit}
1008   @param lu: the LU on behalf of which we make the check
1009   @type node: string
1010   @param node: the node to check
1011   @type secondary_ip: string
1012   @param secondary_ip: the ip to check
1013   @type prereq: boolean
1014   @param prereq: whether to throw a prerequisite or an execute error
1015   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1016   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1017
1018   """
1019   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1020   result.Raise("Failure checking secondary ip on node %s" % node,
1021                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1022   if not result.payload:
1023     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1024            " please fix and re-run this command" % secondary_ip)
1025     if prereq:
1026       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1027     else:
1028       raise errors.OpExecError(msg)
1029
1030
1031 def _GetClusterDomainSecret():
1032   """Reads the cluster domain secret.
1033
1034   """
1035   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1036                                strict=True)
1037
1038
1039 def _CheckInstanceState(lu, instance, req_states, msg=None):
1040   """Ensure that an instance is in one of the required states.
1041
1042   @param lu: the LU on behalf of which we make the check
1043   @param instance: the instance to check
1044   @param msg: if passed, should be a message to replace the default one
1045   @raise errors.OpPrereqError: if the instance is not in the required state
1046
1047   """
1048   if msg is None:
1049     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1050   if instance.admin_state not in req_states:
1051     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1052                                (instance.name, instance.admin_state, msg),
1053                                errors.ECODE_STATE)
1054
1055   if constants.ADMINST_UP not in req_states:
1056     pnode = instance.primary_node
1057     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1058     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1059                 prereq=True, ecode=errors.ECODE_ENVIRON)
1060
1061     if instance.name in ins_l.payload:
1062       raise errors.OpPrereqError("Instance %s is running, %s" %
1063                                  (instance.name, msg), errors.ECODE_STATE)
1064
1065
1066 def _ComputeMinMaxSpec(name, ipolicy, value):
1067   """Computes if value is in the desired range.
1068
1069   @param name: name of the parameter for which we perform the check
1070   @param ipolicy: dictionary containing min, max and std values
1071   @param value: actual value that we want to use
1072   @return: None or element not meeting the criteria
1073
1074
1075   """
1076   if value in [None, constants.VALUE_AUTO]:
1077     return None
1078   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1079   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1080   if value > max_v or min_v > value:
1081     return ("%s value %s is not in range [%s, %s]" %
1082             (name, value, min_v, max_v))
1083   return None
1084
1085
1086 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1087                                  nic_count, disk_sizes,
1088                                  _compute_fn=_ComputeMinMaxSpec):
1089   """Verifies ipolicy against provided specs.
1090
1091   @type ipolicy: dict
1092   @param ipolicy: The ipolicy
1093   @type mem_size: int
1094   @param mem_size: The memory size
1095   @type cpu_count: int
1096   @param cpu_count: Used cpu cores
1097   @type disk_count: int
1098   @param disk_count: Number of disks used
1099   @type nic_count: int
1100   @param nic_count: Number of nics used
1101   @type disk_sizes: list of ints
1102   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1103   @param _compute_fn: The compute function (unittest only)
1104   @return: A list of violations, or an empty list of no violations are found
1105
1106   """
1107   assert disk_count == len(disk_sizes)
1108
1109   test_settings = [
1110     (constants.ISPEC_MEM_SIZE, mem_size),
1111     (constants.ISPEC_CPU_COUNT, cpu_count),
1112     (constants.ISPEC_DISK_COUNT, disk_count),
1113     (constants.ISPEC_NIC_COUNT, nic_count),
1114     ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1115
1116   return filter(None,
1117                 (_compute_fn(name, ipolicy, value)
1118                  for (name, value) in test_settings))
1119
1120
1121 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1122                                      _compute_fn=_ComputeIPolicySpecViolation):
1123   """Compute if instance meets the specs of ipolicy.
1124
1125   @type ipolicy: dict
1126   @param ipolicy: The ipolicy to verify against
1127   @type instance: L{objects.Instance}
1128   @param instance: The instance to verify
1129   @param _compute_fn: The function to verify ipolicy (unittest only)
1130   @see: L{_ComputeIPolicySpecViolation}
1131
1132   """
1133   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1134   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1135   disk_count = len(instance.disks)
1136   disk_sizes = [disk.size for disk in instance.disks]
1137   nic_count = len(instance.nics)
1138
1139   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1140                      disk_sizes)
1141
1142
1143 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1144     _compute_fn=_ComputeIPolicySpecViolation):
1145   """Compute if instance specs meets the specs of ipolicy.
1146
1147   @type ipolicy: dict
1148   @param ipolicy: The ipolicy to verify against
1149   @param instance_spec: dict
1150   @param instance_spec: The instance spec to verify
1151   @param _compute_fn: The function to verify ipolicy (unittest only)
1152   @see: L{_ComputeIPolicySpecViolation}
1153
1154   """
1155   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1156   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1157   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1158   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1159   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1160
1161   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1162                      disk_sizes)
1163
1164
1165 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1166                                  target_group,
1167                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1168   """Compute if instance meets the specs of the new target group.
1169
1170   @param ipolicy: The ipolicy to verify
1171   @param instance: The instance object to verify
1172   @param current_group: The current group of the instance
1173   @param target_group: The new group of the instance
1174   @param _compute_fn: The function to verify ipolicy (unittest only)
1175   @see: L{_ComputeIPolicySpecViolation}
1176
1177   """
1178   if current_group == target_group:
1179     return []
1180   else:
1181     return _compute_fn(ipolicy, instance)
1182
1183
1184 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1185                             _compute_fn=_ComputeIPolicyNodeViolation):
1186   """Checks that the target node is correct in terms of instance policy.
1187
1188   @param ipolicy: The ipolicy to verify
1189   @param instance: The instance object to verify
1190   @param node: The new node to relocate
1191   @param ignore: Ignore violations of the ipolicy
1192   @param _compute_fn: The function to verify ipolicy (unittest only)
1193   @see: L{_ComputeIPolicySpecViolation}
1194
1195   """
1196   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1197   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1198
1199   if res:
1200     msg = ("Instance does not meet target node group's (%s) instance"
1201            " policy: %s") % (node.group, utils.CommaJoin(res))
1202     if ignore:
1203       lu.LogWarning(msg)
1204     else:
1205       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1206
1207
1208 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1209   """Computes a set of any instances that would violate the new ipolicy.
1210
1211   @param old_ipolicy: The current (still in-place) ipolicy
1212   @param new_ipolicy: The new (to become) ipolicy
1213   @param instances: List of instances to verify
1214   @return: A list of instances which violates the new ipolicy but did not before
1215
1216   """
1217   return (_ComputeViolatingInstances(old_ipolicy, instances) -
1218           _ComputeViolatingInstances(new_ipolicy, instances))
1219
1220
1221 def _ExpandItemName(fn, name, kind):
1222   """Expand an item name.
1223
1224   @param fn: the function to use for expansion
1225   @param name: requested item name
1226   @param kind: text description ('Node' or 'Instance')
1227   @return: the resolved (full) name
1228   @raise errors.OpPrereqError: if the item is not found
1229
1230   """
1231   full_name = fn(name)
1232   if full_name is None:
1233     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1234                                errors.ECODE_NOENT)
1235   return full_name
1236
1237
1238 def _ExpandNodeName(cfg, name):
1239   """Wrapper over L{_ExpandItemName} for nodes."""
1240   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1241
1242
1243 def _ExpandInstanceName(cfg, name):
1244   """Wrapper over L{_ExpandItemName} for instance."""
1245   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1246
1247
1248 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1249                           minmem, maxmem, vcpus, nics, disk_template, disks,
1250                           bep, hvp, hypervisor_name, tags):
1251   """Builds instance related env variables for hooks
1252
1253   This builds the hook environment from individual variables.
1254
1255   @type name: string
1256   @param name: the name of the instance
1257   @type primary_node: string
1258   @param primary_node: the name of the instance's primary node
1259   @type secondary_nodes: list
1260   @param secondary_nodes: list of secondary nodes as strings
1261   @type os_type: string
1262   @param os_type: the name of the instance's OS
1263   @type status: string
1264   @param status: the desired status of the instance
1265   @type minmem: string
1266   @param minmem: the minimum memory size of the instance
1267   @type maxmem: string
1268   @param maxmem: the maximum memory size of the instance
1269   @type vcpus: string
1270   @param vcpus: the count of VCPUs the instance has
1271   @type nics: list
1272   @param nics: list of tuples (ip, mac, mode, link) representing
1273       the NICs the instance has
1274   @type disk_template: string
1275   @param disk_template: the disk template of the instance
1276   @type disks: list
1277   @param disks: the list of (size, mode) pairs
1278   @type bep: dict
1279   @param bep: the backend parameters for the instance
1280   @type hvp: dict
1281   @param hvp: the hypervisor parameters for the instance
1282   @type hypervisor_name: string
1283   @param hypervisor_name: the hypervisor for the instance
1284   @type tags: list
1285   @param tags: list of instance tags as strings
1286   @rtype: dict
1287   @return: the hook environment for this instance
1288
1289   """
1290   env = {
1291     "OP_TARGET": name,
1292     "INSTANCE_NAME": name,
1293     "INSTANCE_PRIMARY": primary_node,
1294     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1295     "INSTANCE_OS_TYPE": os_type,
1296     "INSTANCE_STATUS": status,
1297     "INSTANCE_MINMEM": minmem,
1298     "INSTANCE_MAXMEM": maxmem,
1299     # TODO(2.7) remove deprecated "memory" value
1300     "INSTANCE_MEMORY": maxmem,
1301     "INSTANCE_VCPUS": vcpus,
1302     "INSTANCE_DISK_TEMPLATE": disk_template,
1303     "INSTANCE_HYPERVISOR": hypervisor_name,
1304   }
1305   if nics:
1306     nic_count = len(nics)
1307     for idx, (ip, mac, mode, link) in enumerate(nics):
1308       if ip is None:
1309         ip = ""
1310       env["INSTANCE_NIC%d_IP" % idx] = ip
1311       env["INSTANCE_NIC%d_MAC" % idx] = mac
1312       env["INSTANCE_NIC%d_MODE" % idx] = mode
1313       env["INSTANCE_NIC%d_LINK" % idx] = link
1314       if mode == constants.NIC_MODE_BRIDGED:
1315         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1316   else:
1317     nic_count = 0
1318
1319   env["INSTANCE_NIC_COUNT"] = nic_count
1320
1321   if disks:
1322     disk_count = len(disks)
1323     for idx, (size, mode) in enumerate(disks):
1324       env["INSTANCE_DISK%d_SIZE" % idx] = size
1325       env["INSTANCE_DISK%d_MODE" % idx] = mode
1326   else:
1327     disk_count = 0
1328
1329   env["INSTANCE_DISK_COUNT"] = disk_count
1330
1331   if not tags:
1332     tags = []
1333
1334   env["INSTANCE_TAGS"] = " ".join(tags)
1335
1336   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1337     for key, value in source.items():
1338       env["INSTANCE_%s_%s" % (kind, key)] = value
1339
1340   return env
1341
1342
1343 def _NICListToTuple(lu, nics):
1344   """Build a list of nic information tuples.
1345
1346   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1347   value in LUInstanceQueryData.
1348
1349   @type lu:  L{LogicalUnit}
1350   @param lu: the logical unit on whose behalf we execute
1351   @type nics: list of L{objects.NIC}
1352   @param nics: list of nics to convert to hooks tuples
1353
1354   """
1355   hooks_nics = []
1356   cluster = lu.cfg.GetClusterInfo()
1357   for nic in nics:
1358     ip = nic.ip
1359     mac = nic.mac
1360     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1361     mode = filled_params[constants.NIC_MODE]
1362     link = filled_params[constants.NIC_LINK]
1363     hooks_nics.append((ip, mac, mode, link))
1364   return hooks_nics
1365
1366
1367 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1368   """Builds instance related env variables for hooks from an object.
1369
1370   @type lu: L{LogicalUnit}
1371   @param lu: the logical unit on whose behalf we execute
1372   @type instance: L{objects.Instance}
1373   @param instance: the instance for which we should build the
1374       environment
1375   @type override: dict
1376   @param override: dictionary with key/values that will override
1377       our values
1378   @rtype: dict
1379   @return: the hook environment dictionary
1380
1381   """
1382   cluster = lu.cfg.GetClusterInfo()
1383   bep = cluster.FillBE(instance)
1384   hvp = cluster.FillHV(instance)
1385   args = {
1386     "name": instance.name,
1387     "primary_node": instance.primary_node,
1388     "secondary_nodes": instance.secondary_nodes,
1389     "os_type": instance.os,
1390     "status": instance.admin_state,
1391     "maxmem": bep[constants.BE_MAXMEM],
1392     "minmem": bep[constants.BE_MINMEM],
1393     "vcpus": bep[constants.BE_VCPUS],
1394     "nics": _NICListToTuple(lu, instance.nics),
1395     "disk_template": instance.disk_template,
1396     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1397     "bep": bep,
1398     "hvp": hvp,
1399     "hypervisor_name": instance.hypervisor,
1400     "tags": instance.tags,
1401   }
1402   if override:
1403     args.update(override)
1404   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1405
1406
1407 def _AdjustCandidatePool(lu, exceptions):
1408   """Adjust the candidate pool after node operations.
1409
1410   """
1411   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1412   if mod_list:
1413     lu.LogInfo("Promoted nodes to master candidate role: %s",
1414                utils.CommaJoin(node.name for node in mod_list))
1415     for name in mod_list:
1416       lu.context.ReaddNode(name)
1417   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1418   if mc_now > mc_max:
1419     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1420                (mc_now, mc_max))
1421
1422
1423 def _DecideSelfPromotion(lu, exceptions=None):
1424   """Decide whether I should promote myself as a master candidate.
1425
1426   """
1427   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1428   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1429   # the new node will increase mc_max with one, so:
1430   mc_should = min(mc_should + 1, cp_size)
1431   return mc_now < mc_should
1432
1433
1434 def _CalculateGroupIPolicy(cluster, group):
1435   """Calculate instance policy for group.
1436
1437   """
1438   return cluster.SimpleFillIPolicy(group.ipolicy)
1439
1440
1441 def _ComputeViolatingInstances(ipolicy, instances):
1442   """Computes a set of instances who violates given ipolicy.
1443
1444   @param ipolicy: The ipolicy to verify
1445   @type instances: object.Instance
1446   @param instances: List of instances to verify
1447   @return: A frozenset of instance names violating the ipolicy
1448
1449   """
1450   return frozenset([inst.name for inst in instances
1451                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1452
1453
1454 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1455   """Check that the brigdes needed by a list of nics exist.
1456
1457   """
1458   cluster = lu.cfg.GetClusterInfo()
1459   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1460   brlist = [params[constants.NIC_LINK] for params in paramslist
1461             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1462   if brlist:
1463     result = lu.rpc.call_bridges_exist(target_node, brlist)
1464     result.Raise("Error checking bridges on destination node '%s'" %
1465                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1466
1467
1468 def _CheckInstanceBridgesExist(lu, instance, node=None):
1469   """Check that the brigdes needed by an instance exist.
1470
1471   """
1472   if node is None:
1473     node = instance.primary_node
1474   _CheckNicsBridgesExist(lu, instance.nics, node)
1475
1476
1477 def _CheckOSVariant(os_obj, name):
1478   """Check whether an OS name conforms to the os variants specification.
1479
1480   @type os_obj: L{objects.OS}
1481   @param os_obj: OS object to check
1482   @type name: string
1483   @param name: OS name passed by the user, to check for validity
1484
1485   """
1486   variant = objects.OS.GetVariant(name)
1487   if not os_obj.supported_variants:
1488     if variant:
1489       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1490                                  " passed)" % (os_obj.name, variant),
1491                                  errors.ECODE_INVAL)
1492     return
1493   if not variant:
1494     raise errors.OpPrereqError("OS name must include a variant",
1495                                errors.ECODE_INVAL)
1496
1497   if variant not in os_obj.supported_variants:
1498     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1499
1500
1501 def _GetNodeInstancesInner(cfg, fn):
1502   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1503
1504
1505 def _GetNodeInstances(cfg, node_name):
1506   """Returns a list of all primary and secondary instances on a node.
1507
1508   """
1509
1510   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1511
1512
1513 def _GetNodePrimaryInstances(cfg, node_name):
1514   """Returns primary instances on a node.
1515
1516   """
1517   return _GetNodeInstancesInner(cfg,
1518                                 lambda inst: node_name == inst.primary_node)
1519
1520
1521 def _GetNodeSecondaryInstances(cfg, node_name):
1522   """Returns secondary instances on a node.
1523
1524   """
1525   return _GetNodeInstancesInner(cfg,
1526                                 lambda inst: node_name in inst.secondary_nodes)
1527
1528
1529 def _GetStorageTypeArgs(cfg, storage_type):
1530   """Returns the arguments for a storage type.
1531
1532   """
1533   # Special case for file storage
1534   if storage_type == constants.ST_FILE:
1535     # storage.FileStorage wants a list of storage directories
1536     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1537
1538   return []
1539
1540
1541 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1542   faulty = []
1543
1544   for dev in instance.disks:
1545     cfg.SetDiskID(dev, node_name)
1546
1547   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1548   result.Raise("Failed to get disk status from node %s" % node_name,
1549                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1550
1551   for idx, bdev_status in enumerate(result.payload):
1552     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1553       faulty.append(idx)
1554
1555   return faulty
1556
1557
1558 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1559   """Check the sanity of iallocator and node arguments and use the
1560   cluster-wide iallocator if appropriate.
1561
1562   Check that at most one of (iallocator, node) is specified. If none is
1563   specified, then the LU's opcode's iallocator slot is filled with the
1564   cluster-wide default iallocator.
1565
1566   @type iallocator_slot: string
1567   @param iallocator_slot: the name of the opcode iallocator slot
1568   @type node_slot: string
1569   @param node_slot: the name of the opcode target node slot
1570
1571   """
1572   node = getattr(lu.op, node_slot, None)
1573   iallocator = getattr(lu.op, iallocator_slot, None)
1574
1575   if node is not None and iallocator is not None:
1576     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1577                                errors.ECODE_INVAL)
1578   elif node is None and iallocator is None:
1579     default_iallocator = lu.cfg.GetDefaultIAllocator()
1580     if default_iallocator:
1581       setattr(lu.op, iallocator_slot, default_iallocator)
1582     else:
1583       raise errors.OpPrereqError("No iallocator or node given and no"
1584                                  " cluster-wide default iallocator found;"
1585                                  " please specify either an iallocator or a"
1586                                  " node, or set a cluster-wide default"
1587                                  " iallocator")
1588
1589
1590 def _GetDefaultIAllocator(cfg, iallocator):
1591   """Decides on which iallocator to use.
1592
1593   @type cfg: L{config.ConfigWriter}
1594   @param cfg: Cluster configuration object
1595   @type iallocator: string or None
1596   @param iallocator: Iallocator specified in opcode
1597   @rtype: string
1598   @return: Iallocator name
1599
1600   """
1601   if not iallocator:
1602     # Use default iallocator
1603     iallocator = cfg.GetDefaultIAllocator()
1604
1605   if not iallocator:
1606     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1607                                " opcode nor as a cluster-wide default",
1608                                errors.ECODE_INVAL)
1609
1610   return iallocator
1611
1612
1613 class LUClusterPostInit(LogicalUnit):
1614   """Logical unit for running hooks after cluster initialization.
1615
1616   """
1617   HPATH = "cluster-init"
1618   HTYPE = constants.HTYPE_CLUSTER
1619
1620   def BuildHooksEnv(self):
1621     """Build hooks env.
1622
1623     """
1624     return {
1625       "OP_TARGET": self.cfg.GetClusterName(),
1626       }
1627
1628   def BuildHooksNodes(self):
1629     """Build hooks nodes.
1630
1631     """
1632     return ([], [self.cfg.GetMasterNode()])
1633
1634   def Exec(self, feedback_fn):
1635     """Nothing to do.
1636
1637     """
1638     return True
1639
1640
1641 class LUClusterDestroy(LogicalUnit):
1642   """Logical unit for destroying the cluster.
1643
1644   """
1645   HPATH = "cluster-destroy"
1646   HTYPE = constants.HTYPE_CLUSTER
1647
1648   def BuildHooksEnv(self):
1649     """Build hooks env.
1650
1651     """
1652     return {
1653       "OP_TARGET": self.cfg.GetClusterName(),
1654       }
1655
1656   def BuildHooksNodes(self):
1657     """Build hooks nodes.
1658
1659     """
1660     return ([], [])
1661
1662   def CheckPrereq(self):
1663     """Check prerequisites.
1664
1665     This checks whether the cluster is empty.
1666
1667     Any errors are signaled by raising errors.OpPrereqError.
1668
1669     """
1670     master = self.cfg.GetMasterNode()
1671
1672     nodelist = self.cfg.GetNodeList()
1673     if len(nodelist) != 1 or nodelist[0] != master:
1674       raise errors.OpPrereqError("There are still %d node(s) in"
1675                                  " this cluster." % (len(nodelist) - 1),
1676                                  errors.ECODE_INVAL)
1677     instancelist = self.cfg.GetInstanceList()
1678     if instancelist:
1679       raise errors.OpPrereqError("There are still %d instance(s) in"
1680                                  " this cluster." % len(instancelist),
1681                                  errors.ECODE_INVAL)
1682
1683   def Exec(self, feedback_fn):
1684     """Destroys the cluster.
1685
1686     """
1687     master_params = self.cfg.GetMasterNetworkParameters()
1688
1689     # Run post hooks on master node before it's removed
1690     _RunPostHook(self, master_params.name)
1691
1692     ems = self.cfg.GetUseExternalMipScript()
1693     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1694                                                      master_params, ems)
1695     if result.fail_msg:
1696       self.LogWarning("Error disabling the master IP address: %s",
1697                       result.fail_msg)
1698
1699     return master_params.name
1700
1701
1702 def _VerifyCertificate(filename):
1703   """Verifies a certificate for L{LUClusterVerifyConfig}.
1704
1705   @type filename: string
1706   @param filename: Path to PEM file
1707
1708   """
1709   try:
1710     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1711                                            utils.ReadFile(filename))
1712   except Exception, err: # pylint: disable=W0703
1713     return (LUClusterVerifyConfig.ETYPE_ERROR,
1714             "Failed to load X509 certificate %s: %s" % (filename, err))
1715
1716   (errcode, msg) = \
1717     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1718                                 constants.SSL_CERT_EXPIRATION_ERROR)
1719
1720   if msg:
1721     fnamemsg = "While verifying %s: %s" % (filename, msg)
1722   else:
1723     fnamemsg = None
1724
1725   if errcode is None:
1726     return (None, fnamemsg)
1727   elif errcode == utils.CERT_WARNING:
1728     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1729   elif errcode == utils.CERT_ERROR:
1730     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1731
1732   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1733
1734
1735 def _GetAllHypervisorParameters(cluster, instances):
1736   """Compute the set of all hypervisor parameters.
1737
1738   @type cluster: L{objects.Cluster}
1739   @param cluster: the cluster object
1740   @param instances: list of L{objects.Instance}
1741   @param instances: additional instances from which to obtain parameters
1742   @rtype: list of (origin, hypervisor, parameters)
1743   @return: a list with all parameters found, indicating the hypervisor they
1744        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1745
1746   """
1747   hvp_data = []
1748
1749   for hv_name in cluster.enabled_hypervisors:
1750     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1751
1752   for os_name, os_hvp in cluster.os_hvp.items():
1753     for hv_name, hv_params in os_hvp.items():
1754       if hv_params:
1755         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1756         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1757
1758   # TODO: collapse identical parameter values in a single one
1759   for instance in instances:
1760     if instance.hvparams:
1761       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1762                        cluster.FillHV(instance)))
1763
1764   return hvp_data
1765
1766
1767 class _VerifyErrors(object):
1768   """Mix-in for cluster/group verify LUs.
1769
1770   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1771   self.op and self._feedback_fn to be available.)
1772
1773   """
1774
1775   ETYPE_FIELD = "code"
1776   ETYPE_ERROR = "ERROR"
1777   ETYPE_WARNING = "WARNING"
1778
1779   def _Error(self, ecode, item, msg, *args, **kwargs):
1780     """Format an error message.
1781
1782     Based on the opcode's error_codes parameter, either format a
1783     parseable error code, or a simpler error string.
1784
1785     This must be called only from Exec and functions called from Exec.
1786
1787     """
1788     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1789     itype, etxt, _ = ecode
1790     # first complete the msg
1791     if args:
1792       msg = msg % args
1793     # then format the whole message
1794     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1795       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1796     else:
1797       if item:
1798         item = " " + item
1799       else:
1800         item = ""
1801       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1802     # and finally report it via the feedback_fn
1803     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1804
1805   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1806     """Log an error message if the passed condition is True.
1807
1808     """
1809     cond = (bool(cond)
1810             or self.op.debug_simulate_errors) # pylint: disable=E1101
1811
1812     # If the error code is in the list of ignored errors, demote the error to a
1813     # warning
1814     (_, etxt, _) = ecode
1815     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1816       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1817
1818     if cond:
1819       self._Error(ecode, *args, **kwargs)
1820
1821     # do not mark the operation as failed for WARN cases only
1822     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1823       self.bad = self.bad or cond
1824
1825
1826 class LUClusterVerify(NoHooksLU):
1827   """Submits all jobs necessary to verify the cluster.
1828
1829   """
1830   REQ_BGL = False
1831
1832   def ExpandNames(self):
1833     self.needed_locks = {}
1834
1835   def Exec(self, feedback_fn):
1836     jobs = []
1837
1838     if self.op.group_name:
1839       groups = [self.op.group_name]
1840       depends_fn = lambda: None
1841     else:
1842       groups = self.cfg.GetNodeGroupList()
1843
1844       # Verify global configuration
1845       jobs.append([
1846         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1847         ])
1848
1849       # Always depend on global verification
1850       depends_fn = lambda: [(-len(jobs), [])]
1851
1852     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1853                                             ignore_errors=self.op.ignore_errors,
1854                                             depends=depends_fn())]
1855                 for group in groups)
1856
1857     # Fix up all parameters
1858     for op in itertools.chain(*jobs): # pylint: disable=W0142
1859       op.debug_simulate_errors = self.op.debug_simulate_errors
1860       op.verbose = self.op.verbose
1861       op.error_codes = self.op.error_codes
1862       try:
1863         op.skip_checks = self.op.skip_checks
1864       except AttributeError:
1865         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1866
1867     return ResultWithJobs(jobs)
1868
1869
1870 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1871   """Verifies the cluster config.
1872
1873   """
1874   REQ_BGL = True
1875
1876   def _VerifyHVP(self, hvp_data):
1877     """Verifies locally the syntax of the hypervisor parameters.
1878
1879     """
1880     for item, hv_name, hv_params in hvp_data:
1881       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1882              (item, hv_name))
1883       try:
1884         hv_class = hypervisor.GetHypervisor(hv_name)
1885         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1886         hv_class.CheckParameterSyntax(hv_params)
1887       except errors.GenericError, err:
1888         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1889
1890   def ExpandNames(self):
1891     # Information can be safely retrieved as the BGL is acquired in exclusive
1892     # mode
1893     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1894     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1895     self.all_node_info = self.cfg.GetAllNodesInfo()
1896     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1897     self.needed_locks = {}
1898
1899   def Exec(self, feedback_fn):
1900     """Verify integrity of cluster, performing various test on nodes.
1901
1902     """
1903     self.bad = False
1904     self._feedback_fn = feedback_fn
1905
1906     feedback_fn("* Verifying cluster config")
1907
1908     for msg in self.cfg.VerifyConfig():
1909       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1910
1911     feedback_fn("* Verifying cluster certificate files")
1912
1913     for cert_filename in constants.ALL_CERT_FILES:
1914       (errcode, msg) = _VerifyCertificate(cert_filename)
1915       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1916
1917     feedback_fn("* Verifying hypervisor parameters")
1918
1919     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1920                                                 self.all_inst_info.values()))
1921
1922     feedback_fn("* Verifying all nodes belong to an existing group")
1923
1924     # We do this verification here because, should this bogus circumstance
1925     # occur, it would never be caught by VerifyGroup, which only acts on
1926     # nodes/instances reachable from existing node groups.
1927
1928     dangling_nodes = set(node.name for node in self.all_node_info.values()
1929                          if node.group not in self.all_group_info)
1930
1931     dangling_instances = {}
1932     no_node_instances = []
1933
1934     for inst in self.all_inst_info.values():
1935       if inst.primary_node in dangling_nodes:
1936         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1937       elif inst.primary_node not in self.all_node_info:
1938         no_node_instances.append(inst.name)
1939
1940     pretty_dangling = [
1941         "%s (%s)" %
1942         (node.name,
1943          utils.CommaJoin(dangling_instances.get(node.name,
1944                                                 ["no instances"])))
1945         for node in dangling_nodes]
1946
1947     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1948                   None,
1949                   "the following nodes (and their instances) belong to a non"
1950                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1951
1952     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1953                   None,
1954                   "the following instances have a non-existing primary-node:"
1955                   " %s", utils.CommaJoin(no_node_instances))
1956
1957     return not self.bad
1958
1959
1960 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1961   """Verifies the status of a node group.
1962
1963   """
1964   HPATH = "cluster-verify"
1965   HTYPE = constants.HTYPE_CLUSTER
1966   REQ_BGL = False
1967
1968   _HOOKS_INDENT_RE = re.compile("^", re.M)
1969
1970   class NodeImage(object):
1971     """A class representing the logical and physical status of a node.
1972
1973     @type name: string
1974     @ivar name: the node name to which this object refers
1975     @ivar volumes: a structure as returned from
1976         L{ganeti.backend.GetVolumeList} (runtime)
1977     @ivar instances: a list of running instances (runtime)
1978     @ivar pinst: list of configured primary instances (config)
1979     @ivar sinst: list of configured secondary instances (config)
1980     @ivar sbp: dictionary of {primary-node: list of instances} for all
1981         instances for which this node is secondary (config)
1982     @ivar mfree: free memory, as reported by hypervisor (runtime)
1983     @ivar dfree: free disk, as reported by the node (runtime)
1984     @ivar offline: the offline status (config)
1985     @type rpc_fail: boolean
1986     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1987         not whether the individual keys were correct) (runtime)
1988     @type lvm_fail: boolean
1989     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1990     @type hyp_fail: boolean
1991     @ivar hyp_fail: whether the RPC call didn't return the instance list
1992     @type ghost: boolean
1993     @ivar ghost: whether this is a known node or not (config)
1994     @type os_fail: boolean
1995     @ivar os_fail: whether the RPC call didn't return valid OS data
1996     @type oslist: list
1997     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1998     @type vm_capable: boolean
1999     @ivar vm_capable: whether the node can host instances
2000
2001     """
2002     def __init__(self, offline=False, name=None, vm_capable=True):
2003       self.name = name
2004       self.volumes = {}
2005       self.instances = []
2006       self.pinst = []
2007       self.sinst = []
2008       self.sbp = {}
2009       self.mfree = 0
2010       self.dfree = 0
2011       self.offline = offline
2012       self.vm_capable = vm_capable
2013       self.rpc_fail = False
2014       self.lvm_fail = False
2015       self.hyp_fail = False
2016       self.ghost = False
2017       self.os_fail = False
2018       self.oslist = {}
2019
2020   def ExpandNames(self):
2021     # This raises errors.OpPrereqError on its own:
2022     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2023
2024     # Get instances in node group; this is unsafe and needs verification later
2025     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
2026
2027     self.needed_locks = {
2028       locking.LEVEL_INSTANCE: inst_names,
2029       locking.LEVEL_NODEGROUP: [self.group_uuid],
2030       locking.LEVEL_NODE: [],
2031       }
2032
2033     self.share_locks = _ShareAll()
2034
2035   def DeclareLocks(self, level):
2036     if level == locking.LEVEL_NODE:
2037       # Get members of node group; this is unsafe and needs verification later
2038       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2039
2040       all_inst_info = self.cfg.GetAllInstancesInfo()
2041
2042       # In Exec(), we warn about mirrored instances that have primary and
2043       # secondary living in separate node groups. To fully verify that
2044       # volumes for these instances are healthy, we will need to do an
2045       # extra call to their secondaries. We ensure here those nodes will
2046       # be locked.
2047       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2048         # Important: access only the instances whose lock is owned
2049         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2050           nodes.update(all_inst_info[inst].secondary_nodes)
2051
2052       self.needed_locks[locking.LEVEL_NODE] = nodes
2053
2054   def CheckPrereq(self):
2055     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2056     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2057
2058     group_nodes = set(self.group_info.members)
2059     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2060
2061     unlocked_nodes = \
2062         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2063
2064     unlocked_instances = \
2065         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2066
2067     if unlocked_nodes:
2068       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2069                                  utils.CommaJoin(unlocked_nodes))
2070
2071     if unlocked_instances:
2072       raise errors.OpPrereqError("Missing lock for instances: %s" %
2073                                  utils.CommaJoin(unlocked_instances))
2074
2075     self.all_node_info = self.cfg.GetAllNodesInfo()
2076     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2077
2078     self.my_node_names = utils.NiceSort(group_nodes)
2079     self.my_inst_names = utils.NiceSort(group_instances)
2080
2081     self.my_node_info = dict((name, self.all_node_info[name])
2082                              for name in self.my_node_names)
2083
2084     self.my_inst_info = dict((name, self.all_inst_info[name])
2085                              for name in self.my_inst_names)
2086
2087     # We detect here the nodes that will need the extra RPC calls for verifying
2088     # split LV volumes; they should be locked.
2089     extra_lv_nodes = set()
2090
2091     for inst in self.my_inst_info.values():
2092       if inst.disk_template in constants.DTS_INT_MIRROR:
2093         group = self.my_node_info[inst.primary_node].group
2094         for nname in inst.secondary_nodes:
2095           if self.all_node_info[nname].group != group:
2096             extra_lv_nodes.add(nname)
2097
2098     unlocked_lv_nodes = \
2099         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2100
2101     if unlocked_lv_nodes:
2102       raise errors.OpPrereqError("these nodes could be locked: %s" %
2103                                  utils.CommaJoin(unlocked_lv_nodes))
2104     self.extra_lv_nodes = list(extra_lv_nodes)
2105
2106   def _VerifyNode(self, ninfo, nresult):
2107     """Perform some basic validation on data returned from a node.
2108
2109       - check the result data structure is well formed and has all the
2110         mandatory fields
2111       - check ganeti version
2112
2113     @type ninfo: L{objects.Node}
2114     @param ninfo: the node to check
2115     @param nresult: the results from the node
2116     @rtype: boolean
2117     @return: whether overall this call was successful (and we can expect
2118          reasonable values in the respose)
2119
2120     """
2121     node = ninfo.name
2122     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2123
2124     # main result, nresult should be a non-empty dict
2125     test = not nresult or not isinstance(nresult, dict)
2126     _ErrorIf(test, constants.CV_ENODERPC, node,
2127                   "unable to verify node: no data returned")
2128     if test:
2129       return False
2130
2131     # compares ganeti version
2132     local_version = constants.PROTOCOL_VERSION
2133     remote_version = nresult.get("version", None)
2134     test = not (remote_version and
2135                 isinstance(remote_version, (list, tuple)) and
2136                 len(remote_version) == 2)
2137     _ErrorIf(test, constants.CV_ENODERPC, node,
2138              "connection to node returned invalid data")
2139     if test:
2140       return False
2141
2142     test = local_version != remote_version[0]
2143     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2144              "incompatible protocol versions: master %s,"
2145              " node %s", local_version, remote_version[0])
2146     if test:
2147       return False
2148
2149     # node seems compatible, we can actually try to look into its results
2150
2151     # full package version
2152     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2153                   constants.CV_ENODEVERSION, node,
2154                   "software version mismatch: master %s, node %s",
2155                   constants.RELEASE_VERSION, remote_version[1],
2156                   code=self.ETYPE_WARNING)
2157
2158     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2159     if ninfo.vm_capable and isinstance(hyp_result, dict):
2160       for hv_name, hv_result in hyp_result.iteritems():
2161         test = hv_result is not None
2162         _ErrorIf(test, constants.CV_ENODEHV, node,
2163                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2164
2165     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2166     if ninfo.vm_capable and isinstance(hvp_result, list):
2167       for item, hv_name, hv_result in hvp_result:
2168         _ErrorIf(True, constants.CV_ENODEHV, node,
2169                  "hypervisor %s parameter verify failure (source %s): %s",
2170                  hv_name, item, hv_result)
2171
2172     test = nresult.get(constants.NV_NODESETUP,
2173                        ["Missing NODESETUP results"])
2174     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2175              "; ".join(test))
2176
2177     return True
2178
2179   def _VerifyNodeTime(self, ninfo, nresult,
2180                       nvinfo_starttime, nvinfo_endtime):
2181     """Check the node time.
2182
2183     @type ninfo: L{objects.Node}
2184     @param ninfo: the node to check
2185     @param nresult: the remote results for the node
2186     @param nvinfo_starttime: the start time of the RPC call
2187     @param nvinfo_endtime: the end time of the RPC call
2188
2189     """
2190     node = ninfo.name
2191     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2192
2193     ntime = nresult.get(constants.NV_TIME, None)
2194     try:
2195       ntime_merged = utils.MergeTime(ntime)
2196     except (ValueError, TypeError):
2197       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2198       return
2199
2200     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2201       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2202     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2203       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2204     else:
2205       ntime_diff = None
2206
2207     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2208              "Node time diverges by at least %s from master node time",
2209              ntime_diff)
2210
2211   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2212     """Check the node LVM results.
2213
2214     @type ninfo: L{objects.Node}
2215     @param ninfo: the node to check
2216     @param nresult: the remote results for the node
2217     @param vg_name: the configured VG name
2218
2219     """
2220     if vg_name is None:
2221       return
2222
2223     node = ninfo.name
2224     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2225
2226     # checks vg existence and size > 20G
2227     vglist = nresult.get(constants.NV_VGLIST, None)
2228     test = not vglist
2229     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2230     if not test:
2231       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2232                                             constants.MIN_VG_SIZE)
2233       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2234
2235     # check pv names
2236     pvlist = nresult.get(constants.NV_PVLIST, None)
2237     test = pvlist is None
2238     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2239     if not test:
2240       # check that ':' is not present in PV names, since it's a
2241       # special character for lvcreate (denotes the range of PEs to
2242       # use on the PV)
2243       for _, pvname, owner_vg in pvlist:
2244         test = ":" in pvname
2245         _ErrorIf(test, constants.CV_ENODELVM, node,
2246                  "Invalid character ':' in PV '%s' of VG '%s'",
2247                  pvname, owner_vg)
2248
2249   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2250     """Check the node bridges.
2251
2252     @type ninfo: L{objects.Node}
2253     @param ninfo: the node to check
2254     @param nresult: the remote results for the node
2255     @param bridges: the expected list of bridges
2256
2257     """
2258     if not bridges:
2259       return
2260
2261     node = ninfo.name
2262     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2263
2264     missing = nresult.get(constants.NV_BRIDGES, None)
2265     test = not isinstance(missing, list)
2266     _ErrorIf(test, constants.CV_ENODENET, node,
2267              "did not return valid bridge information")
2268     if not test:
2269       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2270                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2271
2272   def _VerifyNodeUserScripts(self, ninfo, nresult):
2273     """Check the results of user scripts presence and executability on the node
2274
2275     @type ninfo: L{objects.Node}
2276     @param ninfo: the node to check
2277     @param nresult: the remote results for the node
2278
2279     """
2280     node = ninfo.name
2281
2282     test = not constants.NV_USERSCRIPTS in nresult
2283     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2284                   "did not return user scripts information")
2285
2286     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2287     if not test:
2288       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2289                     "user scripts not present or not executable: %s" %
2290                     utils.CommaJoin(sorted(broken_scripts)))
2291
2292   def _VerifyNodeNetwork(self, ninfo, nresult):
2293     """Check the node network connectivity results.
2294
2295     @type ninfo: L{objects.Node}
2296     @param ninfo: the node to check
2297     @param nresult: the remote results for the node
2298
2299     """
2300     node = ninfo.name
2301     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2302
2303     test = constants.NV_NODELIST not in nresult
2304     _ErrorIf(test, constants.CV_ENODESSH, node,
2305              "node hasn't returned node ssh connectivity data")
2306     if not test:
2307       if nresult[constants.NV_NODELIST]:
2308         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2309           _ErrorIf(True, constants.CV_ENODESSH, node,
2310                    "ssh communication with node '%s': %s", a_node, a_msg)
2311
2312     test = constants.NV_NODENETTEST not in nresult
2313     _ErrorIf(test, constants.CV_ENODENET, node,
2314              "node hasn't returned node tcp connectivity data")
2315     if not test:
2316       if nresult[constants.NV_NODENETTEST]:
2317         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2318         for anode in nlist:
2319           _ErrorIf(True, constants.CV_ENODENET, node,
2320                    "tcp communication with node '%s': %s",
2321                    anode, nresult[constants.NV_NODENETTEST][anode])
2322
2323     test = constants.NV_MASTERIP not in nresult
2324     _ErrorIf(test, constants.CV_ENODENET, node,
2325              "node hasn't returned node master IP reachability data")
2326     if not test:
2327       if not nresult[constants.NV_MASTERIP]:
2328         if node == self.master_node:
2329           msg = "the master node cannot reach the master IP (not configured?)"
2330         else:
2331           msg = "cannot reach the master IP"
2332         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2333
2334   def _VerifyInstance(self, instance, instanceconfig, node_image,
2335                       diskstatus):
2336     """Verify an instance.
2337
2338     This function checks to see if the required block devices are
2339     available on the instance's node.
2340
2341     """
2342     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2343     node_current = instanceconfig.primary_node
2344
2345     node_vol_should = {}
2346     instanceconfig.MapLVsByNode(node_vol_should)
2347
2348     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2349     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2350     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2351
2352     for node in node_vol_should:
2353       n_img = node_image[node]
2354       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2355         # ignore missing volumes on offline or broken nodes
2356         continue
2357       for volume in node_vol_should[node]:
2358         test = volume not in n_img.volumes
2359         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2360                  "volume %s missing on node %s", volume, node)
2361
2362     if instanceconfig.admin_state == constants.ADMINST_UP:
2363       pri_img = node_image[node_current]
2364       test = instance not in pri_img.instances and not pri_img.offline
2365       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2366                "instance not running on its primary node %s",
2367                node_current)
2368
2369     diskdata = [(nname, success, status, idx)
2370                 for (nname, disks) in diskstatus.items()
2371                 for idx, (success, status) in enumerate(disks)]
2372
2373     for nname, success, bdev_status, idx in diskdata:
2374       # the 'ghost node' construction in Exec() ensures that we have a
2375       # node here
2376       snode = node_image[nname]
2377       bad_snode = snode.ghost or snode.offline
2378       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2379                not success and not bad_snode,
2380                constants.CV_EINSTANCEFAULTYDISK, instance,
2381                "couldn't retrieve status for disk/%s on %s: %s",
2382                idx, nname, bdev_status)
2383       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2384                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2385                constants.CV_EINSTANCEFAULTYDISK, instance,
2386                "disk/%s on %s is faulty", idx, nname)
2387
2388   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2389     """Verify if there are any unknown volumes in the cluster.
2390
2391     The .os, .swap and backup volumes are ignored. All other volumes are
2392     reported as unknown.
2393
2394     @type reserved: L{ganeti.utils.FieldSet}
2395     @param reserved: a FieldSet of reserved volume names
2396
2397     """
2398     for node, n_img in node_image.items():
2399       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2400         # skip non-healthy nodes
2401         continue
2402       for volume in n_img.volumes:
2403         test = ((node not in node_vol_should or
2404                 volume not in node_vol_should[node]) and
2405                 not reserved.Matches(volume))
2406         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2407                       "volume %s is unknown", volume)
2408
2409   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2410     """Verify N+1 Memory Resilience.
2411
2412     Check that if one single node dies we can still start all the
2413     instances it was primary for.
2414
2415     """
2416     cluster_info = self.cfg.GetClusterInfo()
2417     for node, n_img in node_image.items():
2418       # This code checks that every node which is now listed as
2419       # secondary has enough memory to host all instances it is
2420       # supposed to should a single other node in the cluster fail.
2421       # FIXME: not ready for failover to an arbitrary node
2422       # FIXME: does not support file-backed instances
2423       # WARNING: we currently take into account down instances as well
2424       # as up ones, considering that even if they're down someone
2425       # might want to start them even in the event of a node failure.
2426       if n_img.offline:
2427         # we're skipping offline nodes from the N+1 warning, since
2428         # most likely we don't have good memory infromation from them;
2429         # we already list instances living on such nodes, and that's
2430         # enough warning
2431         continue
2432       #TODO(dynmem): use MINMEM for checking
2433       #TODO(dynmem): also consider ballooning out other instances
2434       for prinode, instances in n_img.sbp.items():
2435         needed_mem = 0
2436         for instance in instances:
2437           bep = cluster_info.FillBE(instance_cfg[instance])
2438           if bep[constants.BE_AUTO_BALANCE]:
2439             needed_mem += bep[constants.BE_MAXMEM]
2440         test = n_img.mfree < needed_mem
2441         self._ErrorIf(test, constants.CV_ENODEN1, node,
2442                       "not enough memory to accomodate instance failovers"
2443                       " should node %s fail (%dMiB needed, %dMiB available)",
2444                       prinode, needed_mem, n_img.mfree)
2445
2446   @classmethod
2447   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2448                    (files_all, files_opt, files_mc, files_vm)):
2449     """Verifies file checksums collected from all nodes.
2450
2451     @param errorif: Callback for reporting errors
2452     @param nodeinfo: List of L{objects.Node} objects
2453     @param master_node: Name of master node
2454     @param all_nvinfo: RPC results
2455
2456     """
2457     # Define functions determining which nodes to consider for a file
2458     files2nodefn = [
2459       (files_all, None),
2460       (files_mc, lambda node: (node.master_candidate or
2461                                node.name == master_node)),
2462       (files_vm, lambda node: node.vm_capable),
2463       ]
2464
2465     # Build mapping from filename to list of nodes which should have the file
2466     nodefiles = {}
2467     for (files, fn) in files2nodefn:
2468       if fn is None:
2469         filenodes = nodeinfo
2470       else:
2471         filenodes = filter(fn, nodeinfo)
2472       nodefiles.update((filename,
2473                         frozenset(map(operator.attrgetter("name"), filenodes)))
2474                        for filename in files)
2475
2476     assert set(nodefiles) == (files_all | files_mc | files_vm)
2477
2478     fileinfo = dict((filename, {}) for filename in nodefiles)
2479     ignore_nodes = set()
2480
2481     for node in nodeinfo:
2482       if node.offline:
2483         ignore_nodes.add(node.name)
2484         continue
2485
2486       nresult = all_nvinfo[node.name]
2487
2488       if nresult.fail_msg or not nresult.payload:
2489         node_files = None
2490       else:
2491         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2492
2493       test = not (node_files and isinstance(node_files, dict))
2494       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2495               "Node did not return file checksum data")
2496       if test:
2497         ignore_nodes.add(node.name)
2498         continue
2499
2500       # Build per-checksum mapping from filename to nodes having it
2501       for (filename, checksum) in node_files.items():
2502         assert filename in nodefiles
2503         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2504
2505     for (filename, checksums) in fileinfo.items():
2506       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2507
2508       # Nodes having the file
2509       with_file = frozenset(node_name
2510                             for nodes in fileinfo[filename].values()
2511                             for node_name in nodes) - ignore_nodes
2512
2513       expected_nodes = nodefiles[filename] - ignore_nodes
2514
2515       # Nodes missing file
2516       missing_file = expected_nodes - with_file
2517
2518       if filename in files_opt:
2519         # All or no nodes
2520         errorif(missing_file and missing_file != expected_nodes,
2521                 constants.CV_ECLUSTERFILECHECK, None,
2522                 "File %s is optional, but it must exist on all or no"
2523                 " nodes (not found on %s)",
2524                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2525       else:
2526         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2527                 "File %s is missing from node(s) %s", filename,
2528                 utils.CommaJoin(utils.NiceSort(missing_file)))
2529
2530         # Warn if a node has a file it shouldn't
2531         unexpected = with_file - expected_nodes
2532         errorif(unexpected,
2533                 constants.CV_ECLUSTERFILECHECK, None,
2534                 "File %s should not exist on node(s) %s",
2535                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2536
2537       # See if there are multiple versions of the file
2538       test = len(checksums) > 1
2539       if test:
2540         variants = ["variant %s on %s" %
2541                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2542                     for (idx, (checksum, nodes)) in
2543                       enumerate(sorted(checksums.items()))]
2544       else:
2545         variants = []
2546
2547       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2548               "File %s found with %s different checksums (%s)",
2549               filename, len(checksums), "; ".join(variants))
2550
2551   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2552                       drbd_map):
2553     """Verifies and the node DRBD status.
2554
2555     @type ninfo: L{objects.Node}
2556     @param ninfo: the node to check
2557     @param nresult: the remote results for the node
2558     @param instanceinfo: the dict of instances
2559     @param drbd_helper: the configured DRBD usermode helper
2560     @param drbd_map: the DRBD map as returned by
2561         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2562
2563     """
2564     node = ninfo.name
2565     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2566
2567     if drbd_helper:
2568       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2569       test = (helper_result == None)
2570       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2571                "no drbd usermode helper returned")
2572       if helper_result:
2573         status, payload = helper_result
2574         test = not status
2575         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2576                  "drbd usermode helper check unsuccessful: %s", payload)
2577         test = status and (payload != drbd_helper)
2578         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2579                  "wrong drbd usermode helper: %s", payload)
2580
2581     # compute the DRBD minors
2582     node_drbd = {}
2583     for minor, instance in drbd_map[node].items():
2584       test = instance not in instanceinfo
2585       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2586                "ghost instance '%s' in temporary DRBD map", instance)
2587         # ghost instance should not be running, but otherwise we
2588         # don't give double warnings (both ghost instance and
2589         # unallocated minor in use)
2590       if test:
2591         node_drbd[minor] = (instance, False)
2592       else:
2593         instance = instanceinfo[instance]
2594         node_drbd[minor] = (instance.name,
2595                             instance.admin_state == constants.ADMINST_UP)
2596
2597     # and now check them
2598     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2599     test = not isinstance(used_minors, (tuple, list))
2600     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2601              "cannot parse drbd status file: %s", str(used_minors))
2602     if test:
2603       # we cannot check drbd status
2604       return
2605
2606     for minor, (iname, must_exist) in node_drbd.items():
2607       test = minor not in used_minors and must_exist
2608       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2609                "drbd minor %d of instance %s is not active", minor, iname)
2610     for minor in used_minors:
2611       test = minor not in node_drbd
2612       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2613                "unallocated drbd minor %d is in use", minor)
2614
2615   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2616     """Builds the node OS structures.
2617
2618     @type ninfo: L{objects.Node}
2619     @param ninfo: the node to check
2620     @param nresult: the remote results for the node
2621     @param nimg: the node image object
2622
2623     """
2624     node = ninfo.name
2625     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2626
2627     remote_os = nresult.get(constants.NV_OSLIST, None)
2628     test = (not isinstance(remote_os, list) or
2629             not compat.all(isinstance(v, list) and len(v) == 7
2630                            for v in remote_os))
2631
2632     _ErrorIf(test, constants.CV_ENODEOS, node,
2633              "node hasn't returned valid OS data")
2634
2635     nimg.os_fail = test
2636
2637     if test:
2638       return
2639
2640     os_dict = {}
2641
2642     for (name, os_path, status, diagnose,
2643          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2644
2645       if name not in os_dict:
2646         os_dict[name] = []
2647
2648       # parameters is a list of lists instead of list of tuples due to
2649       # JSON lacking a real tuple type, fix it:
2650       parameters = [tuple(v) for v in parameters]
2651       os_dict[name].append((os_path, status, diagnose,
2652                             set(variants), set(parameters), set(api_ver)))
2653
2654     nimg.oslist = os_dict
2655
2656   def _VerifyNodeOS(self, ninfo, nimg, base):
2657     """Verifies the node OS list.
2658
2659     @type ninfo: L{objects.Node}
2660     @param ninfo: the node to check
2661     @param nimg: the node image object
2662     @param base: the 'template' node we match against (e.g. from the master)
2663
2664     """
2665     node = ninfo.name
2666     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2667
2668     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2669
2670     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2671     for os_name, os_data in nimg.oslist.items():
2672       assert os_data, "Empty OS status for OS %s?!" % os_name
2673       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2674       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2675                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2676       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2677                "OS '%s' has multiple entries (first one shadows the rest): %s",
2678                os_name, utils.CommaJoin([v[0] for v in os_data]))
2679       # comparisons with the 'base' image
2680       test = os_name not in base.oslist
2681       _ErrorIf(test, constants.CV_ENODEOS, node,
2682                "Extra OS %s not present on reference node (%s)",
2683                os_name, base.name)
2684       if test:
2685         continue
2686       assert base.oslist[os_name], "Base node has empty OS status?"
2687       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2688       if not b_status:
2689         # base OS is invalid, skipping
2690         continue
2691       for kind, a, b in [("API version", f_api, b_api),
2692                          ("variants list", f_var, b_var),
2693                          ("parameters", beautify_params(f_param),
2694                           beautify_params(b_param))]:
2695         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2696                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2697                  kind, os_name, base.name,
2698                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2699
2700     # check any missing OSes
2701     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2702     _ErrorIf(missing, constants.CV_ENODEOS, node,
2703              "OSes present on reference node %s but missing on this node: %s",
2704              base.name, utils.CommaJoin(missing))
2705
2706   def _VerifyOob(self, ninfo, nresult):
2707     """Verifies out of band functionality of a node.
2708
2709     @type ninfo: L{objects.Node}
2710     @param ninfo: the node to check
2711     @param nresult: the remote results for the node
2712
2713     """
2714     node = ninfo.name
2715     # We just have to verify the paths on master and/or master candidates
2716     # as the oob helper is invoked on the master
2717     if ((ninfo.master_candidate or ninfo.master_capable) and
2718         constants.NV_OOB_PATHS in nresult):
2719       for path_result in nresult[constants.NV_OOB_PATHS]:
2720         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2721
2722   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2723     """Verifies and updates the node volume data.
2724
2725     This function will update a L{NodeImage}'s internal structures
2726     with data from the remote call.
2727
2728     @type ninfo: L{objects.Node}
2729     @param ninfo: the node to check
2730     @param nresult: the remote results for the node
2731     @param nimg: the node image object
2732     @param vg_name: the configured VG name
2733
2734     """
2735     node = ninfo.name
2736     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2737
2738     nimg.lvm_fail = True
2739     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2740     if vg_name is None:
2741       pass
2742     elif isinstance(lvdata, basestring):
2743       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2744                utils.SafeEncode(lvdata))
2745     elif not isinstance(lvdata, dict):
2746       _ErrorIf(True, constants.CV_ENODELVM, node,
2747                "rpc call to node failed (lvlist)")
2748     else:
2749       nimg.volumes = lvdata
2750       nimg.lvm_fail = False
2751
2752   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2753     """Verifies and updates the node instance list.
2754
2755     If the listing was successful, then updates this node's instance
2756     list. Otherwise, it marks the RPC call as failed for the instance
2757     list key.
2758
2759     @type ninfo: L{objects.Node}
2760     @param ninfo: the node to check
2761     @param nresult: the remote results for the node
2762     @param nimg: the node image object
2763
2764     """
2765     idata = nresult.get(constants.NV_INSTANCELIST, None)
2766     test = not isinstance(idata, list)
2767     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2768                   "rpc call to node failed (instancelist): %s",
2769                   utils.SafeEncode(str(idata)))
2770     if test:
2771       nimg.hyp_fail = True
2772     else:
2773       nimg.instances = idata
2774
2775   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2776     """Verifies and computes a node information map
2777
2778     @type ninfo: L{objects.Node}
2779     @param ninfo: the node to check
2780     @param nresult: the remote results for the node
2781     @param nimg: the node image object
2782     @param vg_name: the configured VG name
2783
2784     """
2785     node = ninfo.name
2786     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2787
2788     # try to read free memory (from the hypervisor)
2789     hv_info = nresult.get(constants.NV_HVINFO, None)
2790     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2791     _ErrorIf(test, constants.CV_ENODEHV, node,
2792              "rpc call to node failed (hvinfo)")
2793     if not test:
2794       try:
2795         nimg.mfree = int(hv_info["memory_free"])
2796       except (ValueError, TypeError):
2797         _ErrorIf(True, constants.CV_ENODERPC, node,
2798                  "node returned invalid nodeinfo, check hypervisor")
2799
2800     # FIXME: devise a free space model for file based instances as well
2801     if vg_name is not None:
2802       test = (constants.NV_VGLIST not in nresult or
2803               vg_name not in nresult[constants.NV_VGLIST])
2804       _ErrorIf(test, constants.CV_ENODELVM, node,
2805                "node didn't return data for the volume group '%s'"
2806                " - it is either missing or broken", vg_name)
2807       if not test:
2808         try:
2809           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2810         except (ValueError, TypeError):
2811           _ErrorIf(True, constants.CV_ENODERPC, node,
2812                    "node returned invalid LVM info, check LVM status")
2813
2814   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2815     """Gets per-disk status information for all instances.
2816
2817     @type nodelist: list of strings
2818     @param nodelist: Node names
2819     @type node_image: dict of (name, L{objects.Node})
2820     @param node_image: Node objects
2821     @type instanceinfo: dict of (name, L{objects.Instance})
2822     @param instanceinfo: Instance objects
2823     @rtype: {instance: {node: [(succes, payload)]}}
2824     @return: a dictionary of per-instance dictionaries with nodes as
2825         keys and disk information as values; the disk information is a
2826         list of tuples (success, payload)
2827
2828     """
2829     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2830
2831     node_disks = {}
2832     node_disks_devonly = {}
2833     diskless_instances = set()
2834     diskless = constants.DT_DISKLESS
2835
2836     for nname in nodelist:
2837       node_instances = list(itertools.chain(node_image[nname].pinst,
2838                                             node_image[nname].sinst))
2839       diskless_instances.update(inst for inst in node_instances
2840                                 if instanceinfo[inst].disk_template == diskless)
2841       disks = [(inst, disk)
2842                for inst in node_instances
2843                for disk in instanceinfo[inst].disks]
2844
2845       if not disks:
2846         # No need to collect data
2847         continue
2848
2849       node_disks[nname] = disks
2850
2851       # Creating copies as SetDiskID below will modify the objects and that can
2852       # lead to incorrect data returned from nodes
2853       devonly = [dev.Copy() for (_, dev) in disks]
2854
2855       for dev in devonly:
2856         self.cfg.SetDiskID(dev, nname)
2857
2858       node_disks_devonly[nname] = devonly
2859
2860     assert len(node_disks) == len(node_disks_devonly)
2861
2862     # Collect data from all nodes with disks
2863     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2864                                                           node_disks_devonly)
2865
2866     assert len(result) == len(node_disks)
2867
2868     instdisk = {}
2869
2870     for (nname, nres) in result.items():
2871       disks = node_disks[nname]
2872
2873       if nres.offline:
2874         # No data from this node
2875         data = len(disks) * [(False, "node offline")]
2876       else:
2877         msg = nres.fail_msg
2878         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2879                  "while getting disk information: %s", msg)
2880         if msg:
2881           # No data from this node
2882           data = len(disks) * [(False, msg)]
2883         else:
2884           data = []
2885           for idx, i in enumerate(nres.payload):
2886             if isinstance(i, (tuple, list)) and len(i) == 2:
2887               data.append(i)
2888             else:
2889               logging.warning("Invalid result from node %s, entry %d: %s",
2890                               nname, idx, i)
2891               data.append((False, "Invalid result from the remote node"))
2892
2893       for ((inst, _), status) in zip(disks, data):
2894         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2895
2896     # Add empty entries for diskless instances.
2897     for inst in diskless_instances:
2898       assert inst not in instdisk
2899       instdisk[inst] = {}
2900
2901     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2902                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2903                       compat.all(isinstance(s, (tuple, list)) and
2904                                  len(s) == 2 for s in statuses)
2905                       for inst, nnames in instdisk.items()
2906                       for nname, statuses in nnames.items())
2907     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2908
2909     return instdisk
2910
2911   @staticmethod
2912   def _SshNodeSelector(group_uuid, all_nodes):
2913     """Create endless iterators for all potential SSH check hosts.
2914
2915     """
2916     nodes = [node for node in all_nodes
2917              if (node.group != group_uuid and
2918                  not node.offline)]
2919     keyfunc = operator.attrgetter("group")
2920
2921     return map(itertools.cycle,
2922                [sorted(map(operator.attrgetter("name"), names))
2923                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2924                                                   keyfunc)])
2925
2926   @classmethod
2927   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2928     """Choose which nodes should talk to which other nodes.
2929
2930     We will make nodes contact all nodes in their group, and one node from
2931     every other group.
2932
2933     @warning: This algorithm has a known issue if one node group is much
2934       smaller than others (e.g. just one node). In such a case all other
2935       nodes will talk to the single node.
2936
2937     """
2938     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2939     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2940
2941     return (online_nodes,
2942             dict((name, sorted([i.next() for i in sel]))
2943                  for name in online_nodes))
2944
2945   def BuildHooksEnv(self):
2946     """Build hooks env.
2947
2948     Cluster-Verify hooks just ran in the post phase and their failure makes
2949     the output be logged in the verify output and the verification to fail.
2950
2951     """
2952     env = {
2953       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2954       }
2955
2956     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2957                for node in self.my_node_info.values())
2958
2959     return env
2960
2961   def BuildHooksNodes(self):
2962     """Build hooks nodes.
2963
2964     """
2965     return ([], self.my_node_names)
2966
2967   def Exec(self, feedback_fn):
2968     """Verify integrity of the node group, performing various test on nodes.
2969
2970     """
2971     # This method has too many local variables. pylint: disable=R0914
2972     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2973
2974     if not self.my_node_names:
2975       # empty node group
2976       feedback_fn("* Empty node group, skipping verification")
2977       return True
2978
2979     self.bad = False
2980     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2981     verbose = self.op.verbose
2982     self._feedback_fn = feedback_fn
2983
2984     vg_name = self.cfg.GetVGName()
2985     drbd_helper = self.cfg.GetDRBDHelper()
2986     cluster = self.cfg.GetClusterInfo()
2987     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2988     hypervisors = cluster.enabled_hypervisors
2989     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2990
2991     i_non_redundant = [] # Non redundant instances
2992     i_non_a_balanced = [] # Non auto-balanced instances
2993     i_offline = 0 # Count of offline instances
2994     n_offline = 0 # Count of offline nodes
2995     n_drained = 0 # Count of nodes being drained
2996     node_vol_should = {}
2997
2998     # FIXME: verify OS list
2999
3000     # File verification
3001     filemap = _ComputeAncillaryFiles(cluster, False)
3002
3003     # do local checksums
3004     master_node = self.master_node = self.cfg.GetMasterNode()
3005     master_ip = self.cfg.GetMasterIP()
3006
3007     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3008
3009     user_scripts = []
3010     if self.cfg.GetUseExternalMipScript():
3011       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3012
3013     node_verify_param = {
3014       constants.NV_FILELIST:
3015         utils.UniqueSequence(filename
3016                              for files in filemap
3017                              for filename in files),
3018       constants.NV_NODELIST:
3019         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3020                                   self.all_node_info.values()),
3021       constants.NV_HYPERVISOR: hypervisors,
3022       constants.NV_HVPARAMS:
3023         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3024       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3025                                  for node in node_data_list
3026                                  if not node.offline],
3027       constants.NV_INSTANCELIST: hypervisors,
3028       constants.NV_VERSION: None,
3029       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3030       constants.NV_NODESETUP: None,
3031       constants.NV_TIME: None,
3032       constants.NV_MASTERIP: (master_node, master_ip),
3033       constants.NV_OSLIST: None,
3034       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3035       constants.NV_USERSCRIPTS: user_scripts,
3036       }
3037
3038     if vg_name is not None:
3039       node_verify_param[constants.NV_VGLIST] = None
3040       node_verify_param[constants.NV_LVLIST] = vg_name
3041       node_verify_param[constants.NV_PVLIST] = [vg_name]
3042       node_verify_param[constants.NV_DRBDLIST] = None
3043
3044     if drbd_helper:
3045       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3046
3047     # bridge checks
3048     # FIXME: this needs to be changed per node-group, not cluster-wide
3049     bridges = set()
3050     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3051     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3052       bridges.add(default_nicpp[constants.NIC_LINK])
3053     for instance in self.my_inst_info.values():
3054       for nic in instance.nics:
3055         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3056         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3057           bridges.add(full_nic[constants.NIC_LINK])
3058
3059     if bridges:
3060       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3061
3062     # Build our expected cluster state
3063     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3064                                                  name=node.name,
3065                                                  vm_capable=node.vm_capable))
3066                       for node in node_data_list)
3067
3068     # Gather OOB paths
3069     oob_paths = []
3070     for node in self.all_node_info.values():
3071       path = _SupportsOob(self.cfg, node)
3072       if path and path not in oob_paths:
3073         oob_paths.append(path)
3074
3075     if oob_paths:
3076       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3077
3078     for instance in self.my_inst_names:
3079       inst_config = self.my_inst_info[instance]
3080
3081       for nname in inst_config.all_nodes:
3082         if nname not in node_image:
3083           gnode = self.NodeImage(name=nname)
3084           gnode.ghost = (nname not in self.all_node_info)
3085           node_image[nname] = gnode
3086
3087       inst_config.MapLVsByNode(node_vol_should)
3088
3089       pnode = inst_config.primary_node
3090       node_image[pnode].pinst.append(instance)
3091
3092       for snode in inst_config.secondary_nodes:
3093         nimg = node_image[snode]
3094         nimg.sinst.append(instance)
3095         if pnode not in nimg.sbp:
3096           nimg.sbp[pnode] = []
3097         nimg.sbp[pnode].append(instance)
3098
3099     # At this point, we have the in-memory data structures complete,
3100     # except for the runtime information, which we'll gather next
3101
3102     # Due to the way our RPC system works, exact response times cannot be
3103     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3104     # time before and after executing the request, we can at least have a time
3105     # window.
3106     nvinfo_starttime = time.time()
3107     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3108                                            node_verify_param,
3109                                            self.cfg.GetClusterName())
3110     nvinfo_endtime = time.time()
3111
3112     if self.extra_lv_nodes and vg_name is not None:
3113       extra_lv_nvinfo = \
3114           self.rpc.call_node_verify(self.extra_lv_nodes,
3115                                     {constants.NV_LVLIST: vg_name},
3116                                     self.cfg.GetClusterName())
3117     else:
3118       extra_lv_nvinfo = {}
3119
3120     all_drbd_map = self.cfg.ComputeDRBDMap()
3121
3122     feedback_fn("* Gathering disk information (%s nodes)" %
3123                 len(self.my_node_names))
3124     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3125                                      self.my_inst_info)
3126
3127     feedback_fn("* Verifying configuration file consistency")
3128
3129     # If not all nodes are being checked, we need to make sure the master node
3130     # and a non-checked vm_capable node are in the list.
3131     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3132     if absent_nodes:
3133       vf_nvinfo = all_nvinfo.copy()
3134       vf_node_info = list(self.my_node_info.values())
3135       additional_nodes = []
3136       if master_node not in self.my_node_info:
3137         additional_nodes.append(master_node)
3138         vf_node_info.append(self.all_node_info[master_node])
3139       # Add the first vm_capable node we find which is not included
3140       for node in absent_nodes:
3141         nodeinfo = self.all_node_info[node]
3142         if nodeinfo.vm_capable and not nodeinfo.offline:
3143           additional_nodes.append(node)
3144           vf_node_info.append(self.all_node_info[node])
3145           break
3146       key = constants.NV_FILELIST
3147       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3148                                                  {key: node_verify_param[key]},
3149                                                  self.cfg.GetClusterName()))
3150     else:
3151       vf_nvinfo = all_nvinfo
3152       vf_node_info = self.my_node_info.values()
3153
3154     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3155
3156     feedback_fn("* Verifying node status")
3157
3158     refos_img = None
3159
3160     for node_i in node_data_list:
3161       node = node_i.name
3162       nimg = node_image[node]
3163
3164       if node_i.offline:
3165         if verbose:
3166           feedback_fn("* Skipping offline node %s" % (node,))
3167         n_offline += 1
3168         continue
3169
3170       if node == master_node:
3171         ntype = "master"
3172       elif node_i.master_candidate:
3173         ntype = "master candidate"
3174       elif node_i.drained:
3175         ntype = "drained"
3176         n_drained += 1
3177       else:
3178         ntype = "regular"
3179       if verbose:
3180         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3181
3182       msg = all_nvinfo[node].fail_msg
3183       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3184                msg)
3185       if msg:
3186         nimg.rpc_fail = True
3187         continue
3188
3189       nresult = all_nvinfo[node].payload
3190
3191       nimg.call_ok = self._VerifyNode(node_i, nresult)
3192       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3193       self._VerifyNodeNetwork(node_i, nresult)
3194       self._VerifyNodeUserScripts(node_i, nresult)
3195       self._VerifyOob(node_i, nresult)
3196
3197       if nimg.vm_capable:
3198         self._VerifyNodeLVM(node_i, nresult, vg_name)
3199         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3200                              all_drbd_map)
3201
3202         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3203         self._UpdateNodeInstances(node_i, nresult, nimg)
3204         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3205         self._UpdateNodeOS(node_i, nresult, nimg)
3206
3207         if not nimg.os_fail:
3208           if refos_img is None:
3209             refos_img = nimg
3210           self._VerifyNodeOS(node_i, nimg, refos_img)
3211         self._VerifyNodeBridges(node_i, nresult, bridges)
3212
3213         # Check whether all running instancies are primary for the node. (This
3214         # can no longer be done from _VerifyInstance below, since some of the
3215         # wrong instances could be from other node groups.)
3216         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3217
3218         for inst in non_primary_inst:
3219           # FIXME: investigate best way to handle offline insts
3220           if inst.admin_state == constants.ADMINST_OFFLINE:
3221             if verbose:
3222               feedback_fn("* Skipping offline instance %s" % inst.name)
3223             i_offline += 1
3224             continue
3225           test = inst in self.all_inst_info
3226           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3227                    "instance should not run on node %s", node_i.name)
3228           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3229                    "node is running unknown instance %s", inst)
3230
3231     for node, result in extra_lv_nvinfo.items():
3232       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3233                               node_image[node], vg_name)
3234
3235     feedback_fn("* Verifying instance status")
3236     for instance in self.my_inst_names:
3237       if verbose:
3238         feedback_fn("* Verifying instance %s" % instance)
3239       inst_config = self.my_inst_info[instance]
3240       self._VerifyInstance(instance, inst_config, node_image,
3241                            instdisk[instance])
3242       inst_nodes_offline = []
3243
3244       pnode = inst_config.primary_node
3245       pnode_img = node_image[pnode]
3246       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3247                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3248                " primary node failed", instance)
3249
3250       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3251                pnode_img.offline,
3252                constants.CV_EINSTANCEBADNODE, instance,
3253                "instance is marked as running and lives on offline node %s",
3254                inst_config.primary_node)
3255
3256       # If the instance is non-redundant we cannot survive losing its primary
3257       # node, so we are not N+1 compliant. On the other hand we have no disk
3258       # templates with more than one secondary so that situation is not well
3259       # supported either.
3260       # FIXME: does not support file-backed instances
3261       if not inst_config.secondary_nodes:
3262         i_non_redundant.append(instance)
3263
3264       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3265                constants.CV_EINSTANCELAYOUT,
3266                instance, "instance has multiple secondary nodes: %s",
3267                utils.CommaJoin(inst_config.secondary_nodes),
3268                code=self.ETYPE_WARNING)
3269
3270       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3271         pnode = inst_config.primary_node
3272         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3273         instance_groups = {}
3274
3275         for node in instance_nodes:
3276           instance_groups.setdefault(self.all_node_info[node].group,
3277                                      []).append(node)
3278
3279         pretty_list = [
3280           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3281           # Sort so that we always list the primary node first.
3282           for group, nodes in sorted(instance_groups.items(),
3283                                      key=lambda (_, nodes): pnode in nodes,
3284                                      reverse=True)]
3285
3286         self._ErrorIf(len(instance_groups) > 1,
3287                       constants.CV_EINSTANCESPLITGROUPS,
3288                       instance, "instance has primary and secondary nodes in"
3289                       " different groups: %s", utils.CommaJoin(pretty_list),
3290                       code=self.ETYPE_WARNING)
3291
3292       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3293         i_non_a_balanced.append(instance)
3294
3295       for snode in inst_config.secondary_nodes:
3296         s_img = node_image[snode]
3297         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3298                  snode, "instance %s, connection to secondary node failed",
3299                  instance)
3300
3301         if s_img.offline:
3302           inst_nodes_offline.append(snode)
3303
3304       # warn that the instance lives on offline nodes
3305       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3306                "instance has offline secondary node(s) %s",
3307                utils.CommaJoin(inst_nodes_offline))
3308       # ... or ghost/non-vm_capable nodes
3309       for node in inst_config.all_nodes:
3310         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3311                  instance, "instance lives on ghost node %s", node)
3312         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3313                  instance, "instance lives on non-vm_capable node %s", node)
3314
3315     feedback_fn("* Verifying orphan volumes")
3316     reserved = utils.FieldSet(*cluster.reserved_lvs)
3317
3318     # We will get spurious "unknown volume" warnings if any node of this group
3319     # is secondary for an instance whose primary is in another group. To avoid
3320     # them, we find these instances and add their volumes to node_vol_should.
3321     for inst in self.all_inst_info.values():
3322       for secondary in inst.secondary_nodes:
3323         if (secondary in self.my_node_info
3324             and inst.name not in self.my_inst_info):
3325           inst.MapLVsByNode(node_vol_should)
3326           break
3327
3328     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3329
3330     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3331       feedback_fn("* Verifying N+1 Memory redundancy")
3332       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3333
3334     feedback_fn("* Other Notes")
3335     if i_non_redundant:
3336       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3337                   % len(i_non_redundant))
3338
3339     if i_non_a_balanced:
3340       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3341                   % len(i_non_a_balanced))
3342
3343     if i_offline:
3344       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3345
3346     if n_offline:
3347       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3348
3349     if n_drained:
3350       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3351
3352     return not self.bad
3353
3354   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3355     """Analyze the post-hooks' result
3356
3357     This method analyses the hook result, handles it, and sends some
3358     nicely-formatted feedback back to the user.
3359
3360     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3361         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3362     @param hooks_results: the results of the multi-node hooks rpc call
3363     @param feedback_fn: function used send feedback back to the caller
3364     @param lu_result: previous Exec result
3365     @return: the new Exec result, based on the previous result
3366         and hook results
3367
3368     """
3369     # We only really run POST phase hooks, only for non-empty groups,
3370     # and are only interested in their results
3371     if not self.my_node_names:
3372       # empty node group
3373       pass
3374     elif phase == constants.HOOKS_PHASE_POST:
3375       # Used to change hooks' output to proper indentation
3376       feedback_fn("* Hooks Results")
3377       assert hooks_results, "invalid result from hooks"
3378
3379       for node_name in hooks_results:
3380         res = hooks_results[node_name]
3381         msg = res.fail_msg
3382         test = msg and not res.offline
3383         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3384                       "Communication failure in hooks execution: %s", msg)
3385         if res.offline or msg:
3386           # No need to investigate payload if node is offline or gave
3387           # an error.
3388           continue
3389         for script, hkr, output in res.payload:
3390           test = hkr == constants.HKR_FAIL
3391           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3392                         "Script %s failed, output:", script)
3393           if test:
3394             output = self._HOOKS_INDENT_RE.sub("      ", output)
3395             feedback_fn("%s" % output)
3396             lu_result = False
3397
3398     return lu_result
3399
3400
3401 class LUClusterVerifyDisks(NoHooksLU):
3402   """Verifies the cluster disks status.
3403
3404   """
3405   REQ_BGL = False
3406
3407   def ExpandNames(self):
3408     self.share_locks = _ShareAll()
3409     self.needed_locks = {
3410       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3411       }
3412
3413   def Exec(self, feedback_fn):
3414     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3415
3416     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3417     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3418                            for group in group_names])
3419
3420
3421 class LUGroupVerifyDisks(NoHooksLU):
3422   """Verifies the status of all disks in a node group.
3423
3424   """
3425   REQ_BGL = False
3426
3427   def ExpandNames(self):
3428     # Raises errors.OpPrereqError on its own if group can't be found
3429     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3430
3431     self.share_locks = _ShareAll()
3432     self.needed_locks = {
3433       locking.LEVEL_INSTANCE: [],
3434       locking.LEVEL_NODEGROUP: [],
3435       locking.LEVEL_NODE: [],
3436       }
3437
3438   def DeclareLocks(self, level):
3439     if level == locking.LEVEL_INSTANCE:
3440       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3441
3442       # Lock instances optimistically, needs verification once node and group
3443       # locks have been acquired
3444       self.needed_locks[locking.LEVEL_INSTANCE] = \
3445         self.cfg.GetNodeGroupInstances(self.group_uuid)
3446
3447     elif level == locking.LEVEL_NODEGROUP:
3448       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3449
3450       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3451         set([self.group_uuid] +
3452             # Lock all groups used by instances optimistically; this requires
3453             # going via the node before it's locked, requiring verification
3454             # later on
3455             [group_uuid
3456              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3457              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3458
3459     elif level == locking.LEVEL_NODE:
3460       # This will only lock the nodes in the group to be verified which contain
3461       # actual instances
3462       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3463       self._LockInstancesNodes()
3464
3465       # Lock all nodes in group to be verified
3466       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3467       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3468       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3469
3470   def CheckPrereq(self):
3471     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3472     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3473     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3474
3475     assert self.group_uuid in owned_groups
3476
3477     # Check if locked instances are still correct
3478     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3479
3480     # Get instance information
3481     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3482
3483     # Check if node groups for locked instances are still correct
3484     for (instance_name, inst) in self.instances.items():
3485       assert owned_nodes.issuperset(inst.all_nodes), \
3486         "Instance %s's nodes changed while we kept the lock" % instance_name
3487
3488       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3489                                              owned_groups)
3490
3491       assert self.group_uuid in inst_groups, \
3492         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3493
3494   def Exec(self, feedback_fn):
3495     """Verify integrity of cluster disks.
3496
3497     @rtype: tuple of three items
3498     @return: a tuple of (dict of node-to-node_error, list of instances
3499         which need activate-disks, dict of instance: (node, volume) for
3500         missing volumes
3501
3502     """
3503     res_nodes = {}
3504     res_instances = set()
3505     res_missing = {}
3506
3507     nv_dict = _MapInstanceDisksToNodes([inst
3508             for inst in self.instances.values()
3509             if inst.admin_state == constants.ADMINST_UP])
3510
3511     if nv_dict:
3512       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3513                              set(self.cfg.GetVmCapableNodeList()))
3514
3515       node_lvs = self.rpc.call_lv_list(nodes, [])
3516
3517       for (node, node_res) in node_lvs.items():
3518         if node_res.offline:
3519           continue
3520
3521         msg = node_res.fail_msg
3522         if msg:
3523           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3524           res_nodes[node] = msg
3525           continue
3526
3527         for lv_name, (_, _, lv_online) in node_res.payload.items():
3528           inst = nv_dict.pop((node, lv_name), None)
3529           if not (lv_online or inst is None):
3530             res_instances.add(inst)
3531
3532       # any leftover items in nv_dict are missing LVs, let's arrange the data
3533       # better
3534       for key, inst in nv_dict.iteritems():
3535         res_missing.setdefault(inst, []).append(list(key))
3536
3537     return (res_nodes, list(res_instances), res_missing)
3538
3539
3540 class LUClusterRepairDiskSizes(NoHooksLU):
3541   """Verifies the cluster disks sizes.
3542
3543   """
3544   REQ_BGL = False
3545
3546   def ExpandNames(self):
3547     if self.op.instances:
3548       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3549       self.needed_locks = {
3550         locking.LEVEL_NODE_RES: [],
3551         locking.LEVEL_INSTANCE: self.wanted_names,
3552         }
3553       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3554     else:
3555       self.wanted_names = None
3556       self.needed_locks = {
3557         locking.LEVEL_NODE_RES: locking.ALL_SET,
3558         locking.LEVEL_INSTANCE: locking.ALL_SET,
3559         }
3560     self.share_locks = {
3561       locking.LEVEL_NODE_RES: 1,
3562       locking.LEVEL_INSTANCE: 0,
3563       }
3564
3565   def DeclareLocks(self, level):
3566     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3567       self._LockInstancesNodes(primary_only=True, level=level)
3568
3569   def CheckPrereq(self):
3570     """Check prerequisites.
3571
3572     This only checks the optional instance list against the existing names.
3573
3574     """
3575     if self.wanted_names is None:
3576       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3577
3578     self.wanted_instances = \
3579         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3580
3581   def _EnsureChildSizes(self, disk):
3582     """Ensure children of the disk have the needed disk size.
3583
3584     This is valid mainly for DRBD8 and fixes an issue where the
3585     children have smaller disk size.
3586
3587     @param disk: an L{ganeti.objects.Disk} object
3588
3589     """
3590     if disk.dev_type == constants.LD_DRBD8:
3591       assert disk.children, "Empty children for DRBD8?"
3592       fchild = disk.children[0]
3593       mismatch = fchild.size < disk.size
3594       if mismatch:
3595         self.LogInfo("Child disk has size %d, parent %d, fixing",
3596                      fchild.size, disk.size)
3597         fchild.size = disk.size
3598
3599       # and we recurse on this child only, not on the metadev
3600       return self._EnsureChildSizes(fchild) or mismatch
3601     else:
3602       return False
3603
3604   def Exec(self, feedback_fn):
3605     """Verify the size of cluster disks.
3606
3607     """
3608     # TODO: check child disks too
3609     # TODO: check differences in size between primary/secondary nodes
3610     per_node_disks = {}
3611     for instance in self.wanted_instances:
3612       pnode = instance.primary_node
3613       if pnode not in per_node_disks:
3614         per_node_disks[pnode] = []
3615       for idx, disk in enumerate(instance.disks):
3616         per_node_disks[pnode].append((instance, idx, disk))
3617
3618     assert not (frozenset(per_node_disks.keys()) -
3619                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3620       "Not owning correct locks"
3621     assert not self.owned_locks(locking.LEVEL_NODE)
3622
3623     changed = []
3624     for node, dskl in per_node_disks.items():
3625       newl = [v[2].Copy() for v in dskl]
3626       for dsk in newl:
3627         self.cfg.SetDiskID(dsk, node)
3628       result = self.rpc.call_blockdev_getsize(node, newl)
3629       if result.fail_msg:
3630         self.LogWarning("Failure in blockdev_getsize call to node"
3631                         " %s, ignoring", node)
3632         continue
3633       if len(result.payload) != len(dskl):
3634         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3635                         " result.payload=%s", node, len(dskl), result.payload)
3636         self.LogWarning("Invalid result from node %s, ignoring node results",
3637                         node)
3638         continue
3639       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3640         if size is None:
3641           self.LogWarning("Disk %d of instance %s did not return size"
3642                           " information, ignoring", idx, instance.name)
3643           continue
3644         if not isinstance(size, (int, long)):
3645           self.LogWarning("Disk %d of instance %s did not return valid"
3646                           " size information, ignoring", idx, instance.name)
3647           continue
3648         size = size >> 20
3649         if size != disk.size:
3650           self.LogInfo("Disk %d of instance %s has mismatched size,"
3651                        " correcting: recorded %d, actual %d", idx,
3652                        instance.name, disk.size, size)
3653           disk.size = size
3654           self.cfg.Update(instance, feedback_fn)
3655           changed.append((instance.name, idx, size))
3656         if self._EnsureChildSizes(disk):
3657           self.cfg.Update(instance, feedback_fn)
3658           changed.append((instance.name, idx, disk.size))
3659     return changed
3660
3661
3662 class LUClusterRename(LogicalUnit):
3663   """Rename the cluster.
3664
3665   """
3666   HPATH = "cluster-rename"
3667   HTYPE = constants.HTYPE_CLUSTER
3668
3669   def BuildHooksEnv(self):
3670     """Build hooks env.
3671
3672     """
3673     return {
3674       "OP_TARGET": self.cfg.GetClusterName(),
3675       "NEW_NAME": self.op.name,
3676       }
3677
3678   def BuildHooksNodes(self):
3679     """Build hooks nodes.
3680
3681     """
3682     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3683
3684   def CheckPrereq(self):
3685     """Verify that the passed name is a valid one.
3686
3687     """
3688     hostname = netutils.GetHostname(name=self.op.name,
3689                                     family=self.cfg.GetPrimaryIPFamily())
3690
3691     new_name = hostname.name
3692     self.ip = new_ip = hostname.ip
3693     old_name = self.cfg.GetClusterName()
3694     old_ip = self.cfg.GetMasterIP()
3695     if new_name == old_name and new_ip == old_ip:
3696       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3697                                  " cluster has changed",
3698                                  errors.ECODE_INVAL)
3699     if new_ip != old_ip:
3700       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3701         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3702                                    " reachable on the network" %
3703                                    new_ip, errors.ECODE_NOTUNIQUE)
3704
3705     self.op.name = new_name
3706
3707   def Exec(self, feedback_fn):
3708     """Rename the cluster.
3709
3710     """
3711     clustername = self.op.name
3712     new_ip = self.ip
3713
3714     # shutdown the master IP
3715     master_params = self.cfg.GetMasterNetworkParameters()
3716     ems = self.cfg.GetUseExternalMipScript()
3717     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3718                                                      master_params, ems)
3719     result.Raise("Could not disable the master role")
3720
3721     try:
3722       cluster = self.cfg.GetClusterInfo()
3723       cluster.cluster_name = clustername
3724       cluster.master_ip = new_ip
3725       self.cfg.Update(cluster, feedback_fn)
3726
3727       # update the known hosts file
3728       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3729       node_list = self.cfg.GetOnlineNodeList()
3730       try:
3731         node_list.remove(master_params.name)
3732       except ValueError:
3733         pass
3734       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3735     finally:
3736       master_params.ip = new_ip
3737       result = self.rpc.call_node_activate_master_ip(master_params.name,
3738                                                      master_params, ems)
3739       msg = result.fail_msg
3740       if msg:
3741         self.LogWarning("Could not re-enable the master role on"
3742                         " the master, please restart manually: %s", msg)
3743
3744     return clustername
3745
3746
3747 def _ValidateNetmask(cfg, netmask):
3748   """Checks if a netmask is valid.
3749
3750   @type cfg: L{config.ConfigWriter}
3751   @param cfg: The cluster configuration
3752   @type netmask: int
3753   @param netmask: the netmask to be verified
3754   @raise errors.OpPrereqError: if the validation fails
3755
3756   """
3757   ip_family = cfg.GetPrimaryIPFamily()
3758   try:
3759     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3760   except errors.ProgrammerError:
3761     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3762                                ip_family)
3763   if not ipcls.ValidateNetmask(netmask):
3764     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3765                                 (netmask))
3766
3767
3768 class LUClusterSetParams(LogicalUnit):
3769   """Change the parameters of the cluster.
3770
3771   """
3772   HPATH = "cluster-modify"
3773   HTYPE = constants.HTYPE_CLUSTER
3774   REQ_BGL = False
3775
3776   def CheckArguments(self):
3777     """Check parameters
3778
3779     """
3780     if self.op.uid_pool:
3781       uidpool.CheckUidPool(self.op.uid_pool)
3782
3783     if self.op.add_uids:
3784       uidpool.CheckUidPool(self.op.add_uids)
3785
3786     if self.op.remove_uids:
3787       uidpool.CheckUidPool(self.op.remove_uids)
3788
3789     if self.op.master_netmask is not None:
3790       _ValidateNetmask(self.cfg, self.op.master_netmask)
3791
3792     if self.op.diskparams:
3793       for dt_params in self.op.diskparams.values():
3794         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3795
3796   def ExpandNames(self):
3797     # FIXME: in the future maybe other cluster params won't require checking on
3798     # all nodes to be modified.
3799     self.needed_locks = {
3800       locking.LEVEL_NODE: locking.ALL_SET,
3801       locking.LEVEL_INSTANCE: locking.ALL_SET,
3802       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3803     }
3804     self.share_locks = {
3805         locking.LEVEL_NODE: 1,
3806         locking.LEVEL_INSTANCE: 1,
3807         locking.LEVEL_NODEGROUP: 1,
3808     }
3809
3810   def BuildHooksEnv(self):
3811     """Build hooks env.
3812
3813     """
3814     return {
3815       "OP_TARGET": self.cfg.GetClusterName(),
3816       "NEW_VG_NAME": self.op.vg_name,
3817       }
3818
3819   def BuildHooksNodes(self):
3820     """Build hooks nodes.
3821
3822     """
3823     mn = self.cfg.GetMasterNode()
3824     return ([mn], [mn])
3825
3826   def CheckPrereq(self):
3827     """Check prerequisites.
3828
3829     This checks whether the given params don't conflict and
3830     if the given volume group is valid.
3831
3832     """
3833     if self.op.vg_name is not None and not self.op.vg_name:
3834       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3835         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3836                                    " instances exist", errors.ECODE_INVAL)
3837
3838     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3839       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3840         raise errors.OpPrereqError("Cannot disable drbd helper while"
3841                                    " drbd-based instances exist",
3842                                    errors.ECODE_INVAL)
3843
3844     node_list = self.owned_locks(locking.LEVEL_NODE)
3845
3846     # if vg_name not None, checks given volume group on all nodes
3847     if self.op.vg_name:
3848       vglist = self.rpc.call_vg_list(node_list)
3849       for node in node_list:
3850         msg = vglist[node].fail_msg
3851         if msg:
3852           # ignoring down node
3853           self.LogWarning("Error while gathering data on node %s"
3854                           " (ignoring node): %s", node, msg)
3855           continue
3856         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3857                                               self.op.vg_name,
3858                                               constants.MIN_VG_SIZE)
3859         if vgstatus:
3860           raise errors.OpPrereqError("Error on node '%s': %s" %
3861                                      (node, vgstatus), errors.ECODE_ENVIRON)
3862
3863     if self.op.drbd_helper:
3864       # checks given drbd helper on all nodes
3865       helpers = self.rpc.call_drbd_helper(node_list)
3866       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3867         if ninfo.offline:
3868           self.LogInfo("Not checking drbd helper on offline node %s", node)
3869           continue
3870         msg = helpers[node].fail_msg
3871         if msg:
3872           raise errors.OpPrereqError("Error checking drbd helper on node"
3873                                      " '%s': %s" % (node, msg),
3874                                      errors.ECODE_ENVIRON)
3875         node_helper = helpers[node].payload
3876         if node_helper != self.op.drbd_helper:
3877           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3878                                      (node, node_helper), errors.ECODE_ENVIRON)
3879
3880     self.cluster = cluster = self.cfg.GetClusterInfo()
3881     # validate params changes
3882     if self.op.beparams:
3883       objects.UpgradeBeParams(self.op.beparams)
3884       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3885       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3886
3887     if self.op.ndparams:
3888       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3889       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3890
3891       # TODO: we need a more general way to handle resetting
3892       # cluster-level parameters to default values
3893       if self.new_ndparams["oob_program"] == "":
3894         self.new_ndparams["oob_program"] = \
3895             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3896
3897     if self.op.hv_state:
3898       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3899                                             self.cluster.hv_state_static)
3900       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3901                                for hv, values in new_hv_state.items())
3902
3903     if self.op.disk_state:
3904       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3905                                                 self.cluster.disk_state_static)
3906       self.new_disk_state = \
3907         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3908                             for name, values in svalues.items()))
3909              for storage, svalues in new_disk_state.items())
3910
3911     if self.op.ipolicy:
3912       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3913                                             group_policy=False)
3914
3915       all_instances = self.cfg.GetAllInstancesInfo().values()
3916       violations = set()
3917       for group in self.cfg.GetAllNodeGroupsInfo().values():
3918         instances = frozenset([inst for inst in all_instances
3919                                if compat.any(node in group.members
3920                                              for node in inst.all_nodes)])
3921         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3922         new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3923                                                                    group),
3924                                             new_ipolicy, instances)
3925         if new:
3926           violations.update(new)
3927
3928       if violations:
3929         self.LogWarning("After the ipolicy change the following instances"
3930                         " violate them: %s",
3931                         utils.CommaJoin(violations))
3932
3933     if self.op.nicparams:
3934       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3935       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3936       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3937       nic_errors = []
3938
3939       # check all instances for consistency
3940       for instance in self.cfg.GetAllInstancesInfo().values():
3941         for nic_idx, nic in enumerate(instance.nics):
3942           params_copy = copy.deepcopy(nic.nicparams)
3943           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3944
3945           # check parameter syntax
3946           try:
3947             objects.NIC.CheckParameterSyntax(params_filled)
3948           except errors.ConfigurationError, err:
3949             nic_errors.append("Instance %s, nic/%d: %s" %
3950                               (instance.name, nic_idx, err))
3951
3952           # if we're moving instances to routed, check that they have an ip
3953           target_mode = params_filled[constants.NIC_MODE]
3954           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3955             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3956                               " address" % (instance.name, nic_idx))
3957       if nic_errors:
3958         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3959                                    "\n".join(nic_errors))
3960
3961     # hypervisor list/parameters
3962     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3963     if self.op.hvparams:
3964       for hv_name, hv_dict in self.op.hvparams.items():
3965         if hv_name not in self.new_hvparams:
3966           self.new_hvparams[hv_name] = hv_dict
3967         else:
3968           self.new_hvparams[hv_name].update(hv_dict)
3969
3970     # disk template parameters
3971     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3972     if self.op.diskparams:
3973       for dt_name, dt_params in self.op.diskparams.items():
3974         if dt_name not in self.op.diskparams:
3975           self.new_diskparams[dt_name] = dt_params
3976         else:
3977           self.new_diskparams[dt_name].update(dt_params)
3978
3979     # os hypervisor parameters
3980     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3981     if self.op.os_hvp:
3982       for os_name, hvs in self.op.os_hvp.items():
3983         if os_name not in self.new_os_hvp:
3984           self.new_os_hvp[os_name] = hvs
3985         else:
3986           for hv_name, hv_dict in hvs.items():
3987             if hv_name not in self.new_os_hvp[os_name]:
3988               self.new_os_hvp[os_name][hv_name] = hv_dict
3989             else:
3990               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3991
3992     # os parameters
3993     self.new_osp = objects.FillDict(cluster.osparams, {})
3994     if self.op.osparams:
3995       for os_name, osp in self.op.osparams.items():
3996         if os_name not in self.new_osp:
3997           self.new_osp[os_name] = {}
3998
3999         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4000                                                   use_none=True)
4001
4002         if not self.new_osp[os_name]:
4003           # we removed all parameters
4004           del self.new_osp[os_name]
4005         else:
4006           # check the parameter validity (remote check)
4007           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4008                          os_name, self.new_osp[os_name])
4009
4010     # changes to the hypervisor list
4011     if self.op.enabled_hypervisors is not None:
4012       self.hv_list = self.op.enabled_hypervisors
4013       for hv in self.hv_list:
4014         # if the hypervisor doesn't already exist in the cluster
4015         # hvparams, we initialize it to empty, and then (in both
4016         # cases) we make sure to fill the defaults, as we might not
4017         # have a complete defaults list if the hypervisor wasn't
4018         # enabled before
4019         if hv not in new_hvp:
4020           new_hvp[hv] = {}
4021         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4022         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4023     else:
4024       self.hv_list = cluster.enabled_hypervisors
4025
4026     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4027       # either the enabled list has changed, or the parameters have, validate
4028       for hv_name, hv_params in self.new_hvparams.items():
4029         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4030             (self.op.enabled_hypervisors and
4031              hv_name in self.op.enabled_hypervisors)):
4032           # either this is a new hypervisor, or its parameters have changed
4033           hv_class = hypervisor.GetHypervisor(hv_name)
4034           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4035           hv_class.CheckParameterSyntax(hv_params)
4036           _CheckHVParams(self, node_list, hv_name, hv_params)
4037
4038     if self.op.os_hvp:
4039       # no need to check any newly-enabled hypervisors, since the
4040       # defaults have already been checked in the above code-block
4041       for os_name, os_hvp in self.new_os_hvp.items():
4042         for hv_name, hv_params in os_hvp.items():
4043           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4044           # we need to fill in the new os_hvp on top of the actual hv_p
4045           cluster_defaults = self.new_hvparams.get(hv_name, {})
4046           new_osp = objects.FillDict(cluster_defaults, hv_params)
4047           hv_class = hypervisor.GetHypervisor(hv_name)
4048           hv_class.CheckParameterSyntax(new_osp)
4049           _CheckHVParams(self, node_list, hv_name, new_osp)
4050
4051     if self.op.default_iallocator:
4052       alloc_script = utils.FindFile(self.op.default_iallocator,
4053                                     constants.IALLOCATOR_SEARCH_PATH,
4054                                     os.path.isfile)
4055       if alloc_script is None:
4056         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4057                                    " specified" % self.op.default_iallocator,
4058                                    errors.ECODE_INVAL)
4059
4060   def Exec(self, feedback_fn):
4061     """Change the parameters of the cluster.
4062
4063     """
4064     if self.op.vg_name is not None:
4065       new_volume = self.op.vg_name
4066       if not new_volume:
4067         new_volume = None
4068       if new_volume != self.cfg.GetVGName():
4069         self.cfg.SetVGName(new_volume)
4070       else:
4071         feedback_fn("Cluster LVM configuration already in desired"
4072                     " state, not changing")
4073     if self.op.drbd_helper is not None:
4074       new_helper = self.op.drbd_helper
4075       if not new_helper:
4076         new_helper = None
4077       if new_helper != self.cfg.GetDRBDHelper():
4078         self.cfg.SetDRBDHelper(new_helper)
4079       else:
4080         feedback_fn("Cluster DRBD helper already in desired state,"
4081                     " not changing")
4082     if self.op.hvparams:
4083       self.cluster.hvparams = self.new_hvparams
4084     if self.op.os_hvp:
4085       self.cluster.os_hvp = self.new_os_hvp
4086     if self.op.enabled_hypervisors is not None:
4087       self.cluster.hvparams = self.new_hvparams
4088       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4089     if self.op.beparams:
4090       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4091     if self.op.nicparams:
4092       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4093     if self.op.ipolicy:
4094       self.cluster.ipolicy = self.new_ipolicy
4095     if self.op.osparams:
4096       self.cluster.osparams = self.new_osp
4097     if self.op.ndparams:
4098       self.cluster.ndparams = self.new_ndparams
4099     if self.op.diskparams:
4100       self.cluster.diskparams = self.new_diskparams
4101     if self.op.hv_state:
4102       self.cluster.hv_state_static = self.new_hv_state
4103     if self.op.disk_state:
4104       self.cluster.disk_state_static = self.new_disk_state
4105
4106     if self.op.candidate_pool_size is not None:
4107       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4108       # we need to update the pool size here, otherwise the save will fail
4109       _AdjustCandidatePool(self, [])
4110
4111     if self.op.maintain_node_health is not None:
4112       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4113         feedback_fn("Note: CONFD was disabled at build time, node health"
4114                     " maintenance is not useful (still enabling it)")
4115       self.cluster.maintain_node_health = self.op.maintain_node_health
4116
4117     if self.op.prealloc_wipe_disks is not None:
4118       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4119
4120     if self.op.add_uids is not None:
4121       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4122
4123     if self.op.remove_uids is not None:
4124       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4125
4126     if self.op.uid_pool is not None:
4127       self.cluster.uid_pool = self.op.uid_pool
4128
4129     if self.op.default_iallocator is not None:
4130       self.cluster.default_iallocator = self.op.default_iallocator
4131
4132     if self.op.reserved_lvs is not None:
4133       self.cluster.reserved_lvs = self.op.reserved_lvs
4134
4135     if self.op.use_external_mip_script is not None:
4136       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4137
4138     def helper_os(aname, mods, desc):
4139       desc += " OS list"
4140       lst = getattr(self.cluster, aname)
4141       for key, val in mods:
4142         if key == constants.DDM_ADD:
4143           if val in lst:
4144             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4145           else:
4146             lst.append(val)
4147         elif key == constants.DDM_REMOVE:
4148           if val in lst:
4149             lst.remove(val)
4150           else:
4151             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4152         else:
4153           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4154
4155     if self.op.hidden_os:
4156       helper_os("hidden_os", self.op.hidden_os, "hidden")
4157
4158     if self.op.blacklisted_os:
4159       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4160
4161     if self.op.master_netdev:
4162       master_params = self.cfg.GetMasterNetworkParameters()
4163       ems = self.cfg.GetUseExternalMipScript()
4164       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4165                   self.cluster.master_netdev)
4166       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4167                                                        master_params, ems)
4168       result.Raise("Could not disable the master ip")
4169       feedback_fn("Changing master_netdev from %s to %s" %
4170                   (master_params.netdev, self.op.master_netdev))
4171       self.cluster.master_netdev = self.op.master_netdev
4172
4173     if self.op.master_netmask:
4174       master_params = self.cfg.GetMasterNetworkParameters()
4175       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4176       result = self.rpc.call_node_change_master_netmask(master_params.name,
4177                                                         master_params.netmask,
4178                                                         self.op.master_netmask,
4179                                                         master_params.ip,
4180                                                         master_params.netdev)
4181       if result.fail_msg:
4182         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4183         feedback_fn(msg)
4184
4185       self.cluster.master_netmask = self.op.master_netmask
4186
4187     self.cfg.Update(self.cluster, feedback_fn)
4188
4189     if self.op.master_netdev:
4190       master_params = self.cfg.GetMasterNetworkParameters()
4191       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4192                   self.op.master_netdev)
4193       ems = self.cfg.GetUseExternalMipScript()
4194       result = self.rpc.call_node_activate_master_ip(master_params.name,
4195                                                      master_params, ems)
4196       if result.fail_msg:
4197         self.LogWarning("Could not re-enable the master ip on"
4198                         " the master, please restart manually: %s",
4199                         result.fail_msg)
4200
4201
4202 def _UploadHelper(lu, nodes, fname):
4203   """Helper for uploading a file and showing warnings.
4204
4205   """
4206   if os.path.exists(fname):
4207     result = lu.rpc.call_upload_file(nodes, fname)
4208     for to_node, to_result in result.items():
4209       msg = to_result.fail_msg
4210       if msg:
4211         msg = ("Copy of file %s to node %s failed: %s" %
4212                (fname, to_node, msg))
4213         lu.proc.LogWarning(msg)
4214
4215
4216 def _ComputeAncillaryFiles(cluster, redist):
4217   """Compute files external to Ganeti which need to be consistent.
4218
4219   @type redist: boolean
4220   @param redist: Whether to include files which need to be redistributed
4221
4222   """
4223   # Compute files for all nodes
4224   files_all = set([
4225     constants.SSH_KNOWN_HOSTS_FILE,
4226     constants.CONFD_HMAC_KEY,
4227     constants.CLUSTER_DOMAIN_SECRET_FILE,
4228     constants.SPICE_CERT_FILE,
4229     constants.SPICE_CACERT_FILE,
4230     constants.RAPI_USERS_FILE,
4231     ])
4232
4233   if not redist:
4234     files_all.update(constants.ALL_CERT_FILES)
4235     files_all.update(ssconf.SimpleStore().GetFileList())
4236   else:
4237     # we need to ship at least the RAPI certificate
4238     files_all.add(constants.RAPI_CERT_FILE)
4239
4240   if cluster.modify_etc_hosts:
4241     files_all.add(constants.ETC_HOSTS)
4242
4243   # Files which are optional, these must:
4244   # - be present in one other category as well
4245   # - either exist or not exist on all nodes of that category (mc, vm all)
4246   files_opt = set([
4247     constants.RAPI_USERS_FILE,
4248     ])
4249
4250   # Files which should only be on master candidates
4251   files_mc = set()
4252
4253   if not redist:
4254     files_mc.add(constants.CLUSTER_CONF_FILE)
4255
4256     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4257     # replication
4258     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4259
4260   # Files which should only be on VM-capable nodes
4261   files_vm = set(filename
4262     for hv_name in cluster.enabled_hypervisors
4263     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4264
4265   files_opt |= set(filename
4266     for hv_name in cluster.enabled_hypervisors
4267     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4268
4269   # Filenames in each category must be unique
4270   all_files_set = files_all | files_mc | files_vm
4271   assert (len(all_files_set) ==
4272           sum(map(len, [files_all, files_mc, files_vm]))), \
4273          "Found file listed in more than one file list"
4274
4275   # Optional files must be present in one other category
4276   assert all_files_set.issuperset(files_opt), \
4277          "Optional file not in a different required list"
4278
4279   return (files_all, files_opt, files_mc, files_vm)
4280
4281
4282 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4283   """Distribute additional files which are part of the cluster configuration.
4284
4285   ConfigWriter takes care of distributing the config and ssconf files, but
4286   there are more files which should be distributed to all nodes. This function
4287   makes sure those are copied.
4288
4289   @param lu: calling logical unit
4290   @param additional_nodes: list of nodes not in the config to distribute to
4291   @type additional_vm: boolean
4292   @param additional_vm: whether the additional nodes are vm-capable or not
4293
4294   """
4295   # Gather target nodes
4296   cluster = lu.cfg.GetClusterInfo()
4297   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4298
4299   online_nodes = lu.cfg.GetOnlineNodeList()
4300   vm_nodes = lu.cfg.GetVmCapableNodeList()
4301
4302   if additional_nodes is not None:
4303     online_nodes.extend(additional_nodes)
4304     if additional_vm:
4305       vm_nodes.extend(additional_nodes)
4306
4307   # Never distribute to master node
4308   for nodelist in [online_nodes, vm_nodes]:
4309     if master_info.name in nodelist:
4310       nodelist.remove(master_info.name)
4311
4312   # Gather file lists
4313   (files_all, _, files_mc, files_vm) = \
4314     _ComputeAncillaryFiles(cluster, True)
4315
4316   # Never re-distribute configuration file from here
4317   assert not (constants.CLUSTER_CONF_FILE in files_all or
4318               constants.CLUSTER_CONF_FILE in files_vm)
4319   assert not files_mc, "Master candidates not handled in this function"
4320
4321   filemap = [
4322     (online_nodes, files_all),
4323     (vm_nodes, files_vm),
4324     ]
4325
4326   # Upload the files
4327   for (node_list, files) in filemap:
4328     for fname in files:
4329       _UploadHelper(lu, node_list, fname)
4330
4331
4332 class LUClusterRedistConf(NoHooksLU):
4333   """Force the redistribution of cluster configuration.
4334
4335   This is a very simple LU.
4336
4337   """
4338   REQ_BGL = False
4339
4340   def ExpandNames(self):
4341     self.needed_locks = {
4342       locking.LEVEL_NODE: locking.ALL_SET,
4343     }
4344     self.share_locks[locking.LEVEL_NODE] = 1
4345
4346   def Exec(self, feedback_fn):
4347     """Redistribute the configuration.
4348
4349     """
4350     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4351     _RedistributeAncillaryFiles(self)
4352
4353
4354 class LUClusterActivateMasterIp(NoHooksLU):
4355   """Activate the master IP on the master node.
4356
4357   """
4358   def Exec(self, feedback_fn):
4359     """Activate the master IP.
4360
4361     """
4362     master_params = self.cfg.GetMasterNetworkParameters()
4363     ems = self.cfg.GetUseExternalMipScript()
4364     result = self.rpc.call_node_activate_master_ip(master_params.name,
4365                                                    master_params, ems)
4366     result.Raise("Could not activate the master IP")
4367
4368
4369 class LUClusterDeactivateMasterIp(NoHooksLU):
4370   """Deactivate the master IP on the master node.
4371
4372   """
4373   def Exec(self, feedback_fn):
4374     """Deactivate the master IP.
4375
4376     """
4377     master_params = self.cfg.GetMasterNetworkParameters()
4378     ems = self.cfg.GetUseExternalMipScript()
4379     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4380                                                      master_params, ems)
4381     result.Raise("Could not deactivate the master IP")
4382
4383
4384 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4385   """Sleep and poll for an instance's disk to sync.
4386
4387   """
4388   if not instance.disks or disks is not None and not disks:
4389     return True
4390
4391   disks = _ExpandCheckDisks(instance, disks)
4392
4393   if not oneshot:
4394     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4395
4396   node = instance.primary_node
4397
4398   for dev in disks:
4399     lu.cfg.SetDiskID(dev, node)
4400
4401   # TODO: Convert to utils.Retry
4402
4403   retries = 0
4404   degr_retries = 10 # in seconds, as we sleep 1 second each time
4405   while True:
4406     max_time = 0
4407     done = True
4408     cumul_degraded = False
4409     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4410     msg = rstats.fail_msg
4411     if msg:
4412       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4413       retries += 1
4414       if retries >= 10:
4415         raise errors.RemoteError("Can't contact node %s for mirror data,"
4416                                  " aborting." % node)
4417       time.sleep(6)
4418       continue
4419     rstats = rstats.payload
4420     retries = 0
4421     for i, mstat in enumerate(rstats):
4422       if mstat is None:
4423         lu.LogWarning("Can't compute data for node %s/%s",
4424                            node, disks[i].iv_name)
4425         continue
4426
4427       cumul_degraded = (cumul_degraded or
4428                         (mstat.is_degraded and mstat.sync_percent is None))
4429       if mstat.sync_percent is not None:
4430         done = False
4431         if mstat.estimated_time is not None:
4432           rem_time = ("%s remaining (estimated)" %
4433                       utils.FormatSeconds(mstat.estimated_time))
4434           max_time = mstat.estimated_time
4435         else:
4436           rem_time = "no time estimate"
4437         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4438                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4439
4440     # if we're done but degraded, let's do a few small retries, to
4441     # make sure we see a stable and not transient situation; therefore
4442     # we force restart of the loop
4443     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4444       logging.info("Degraded disks found, %d retries left", degr_retries)
4445       degr_retries -= 1
4446       time.sleep(1)
4447       continue
4448
4449     if done or oneshot:
4450       break
4451
4452     time.sleep(min(60, max_time))
4453
4454   if done:
4455     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4456   return not cumul_degraded
4457
4458
4459 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4460   """Check that mirrors are not degraded.
4461
4462   The ldisk parameter, if True, will change the test from the
4463   is_degraded attribute (which represents overall non-ok status for
4464   the device(s)) to the ldisk (representing the local storage status).
4465
4466   """
4467   lu.cfg.SetDiskID(dev, node)
4468
4469   result = True
4470
4471   if on_primary or dev.AssembleOnSecondary():
4472     rstats = lu.rpc.call_blockdev_find(node, dev)
4473     msg = rstats.fail_msg
4474     if msg:
4475       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4476       result = False
4477     elif not rstats.payload:
4478       lu.LogWarning("Can't find disk on node %s", node)
4479       result = False
4480     else:
4481       if ldisk:
4482         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4483       else:
4484         result = result and not rstats.payload.is_degraded
4485
4486   if dev.children:
4487     for child in dev.children:
4488       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4489
4490   return result
4491
4492
4493 class LUOobCommand(NoHooksLU):
4494   """Logical unit for OOB handling.
4495
4496   """
4497   REG_BGL = False
4498   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4499
4500   def ExpandNames(self):
4501     """Gather locks we need.
4502
4503     """
4504     if self.op.node_names:
4505       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4506       lock_names = self.op.node_names
4507     else:
4508       lock_names = locking.ALL_SET
4509
4510     self.needed_locks = {
4511       locking.LEVEL_NODE: lock_names,
4512       }
4513
4514   def CheckPrereq(self):
4515     """Check prerequisites.
4516
4517     This checks:
4518      - the node exists in the configuration
4519      - OOB is supported
4520
4521     Any errors are signaled by raising errors.OpPrereqError.
4522
4523     """
4524     self.nodes = []
4525     self.master_node = self.cfg.GetMasterNode()
4526
4527     assert self.op.power_delay >= 0.0
4528
4529     if self.op.node_names:
4530       if (self.op.command in self._SKIP_MASTER and
4531           self.master_node in self.op.node_names):
4532         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4533         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4534
4535         if master_oob_handler:
4536           additional_text = ("run '%s %s %s' if you want to operate on the"
4537                              " master regardless") % (master_oob_handler,
4538                                                       self.op.command,
4539                                                       self.master_node)
4540         else:
4541           additional_text = "it does not support out-of-band operations"
4542
4543         raise errors.OpPrereqError(("Operating on the master node %s is not"
4544                                     " allowed for %s; %s") %
4545                                    (self.master_node, self.op.command,
4546                                     additional_text), errors.ECODE_INVAL)
4547     else:
4548       self.op.node_names = self.cfg.GetNodeList()
4549       if self.op.command in self._SKIP_MASTER:
4550         self.op.node_names.remove(self.master_node)
4551
4552     if self.op.command in self._SKIP_MASTER:
4553       assert self.master_node not in self.op.node_names
4554
4555     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4556       if node is None:
4557         raise errors.OpPrereqError("Node %s not found" % node_name,
4558                                    errors.ECODE_NOENT)
4559       else:
4560         self.nodes.append(node)
4561
4562       if (not self.op.ignore_status and
4563           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4564         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4565                                     " not marked offline") % node_name,
4566                                    errors.ECODE_STATE)
4567
4568   def Exec(self, feedback_fn):
4569     """Execute OOB and return result if we expect any.
4570
4571     """
4572     master_node = self.master_node
4573     ret = []
4574
4575     for idx, node in enumerate(utils.NiceSort(self.nodes,
4576                                               key=lambda node: node.name)):
4577       node_entry = [(constants.RS_NORMAL, node.name)]
4578       ret.append(node_entry)
4579
4580       oob_program = _SupportsOob(self.cfg, node)
4581
4582       if not oob_program:
4583         node_entry.append((constants.RS_UNAVAIL, None))
4584         continue
4585
4586       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4587                    self.op.command, oob_program, node.name)
4588       result = self.rpc.call_run_oob(master_node, oob_program,
4589                                      self.op.command, node.name,
4590                                      self.op.timeout)
4591
4592       if result.fail_msg:
4593         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4594                         node.name, result.fail_msg)
4595         node_entry.append((constants.RS_NODATA, None))
4596       else:
4597         try:
4598           self._CheckPayload(result)
4599         except errors.OpExecError, err:
4600           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4601                           node.name, err)
4602           node_entry.append((constants.RS_NODATA, None))
4603         else:
4604           if self.op.command == constants.OOB_HEALTH:
4605             # For health we should log important events
4606             for item, status in result.payload:
4607               if status in [constants.OOB_STATUS_WARNING,
4608                             constants.OOB_STATUS_CRITICAL]:
4609                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4610                                 item, node.name, status)
4611
4612           if self.op.command == constants.OOB_POWER_ON:
4613             node.powered = True
4614           elif self.op.command == constants.OOB_POWER_OFF:
4615             node.powered = False
4616           elif self.op.command == constants.OOB_POWER_STATUS:
4617             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4618             if powered != node.powered:
4619               logging.warning(("Recorded power state (%s) of node '%s' does not"
4620                                " match actual power state (%s)"), node.powered,
4621                               node.name, powered)
4622
4623           # For configuration changing commands we should update the node
4624           if self.op.command in (constants.OOB_POWER_ON,
4625                                  constants.OOB_POWER_OFF):
4626             self.cfg.Update(node, feedback_fn)
4627
4628           node_entry.append((constants.RS_NORMAL, result.payload))
4629
4630           if (self.op.command == constants.OOB_POWER_ON and
4631               idx < len(self.nodes) - 1):
4632             time.sleep(self.op.power_delay)
4633
4634     return ret
4635
4636   def _CheckPayload(self, result):
4637     """Checks if the payload is valid.
4638
4639     @param result: RPC result
4640     @raises errors.OpExecError: If payload is not valid
4641
4642     """
4643     errs = []
4644     if self.op.command == constants.OOB_HEALTH:
4645       if not isinstance(result.payload, list):
4646         errs.append("command 'health' is expected to return a list but got %s" %
4647                     type(result.payload))
4648       else:
4649         for item, status in result.payload:
4650           if status not in constants.OOB_STATUSES:
4651             errs.append("health item '%s' has invalid status '%s'" %
4652                         (item, status))
4653
4654     if self.op.command == constants.OOB_POWER_STATUS:
4655       if not isinstance(result.payload, dict):
4656         errs.append("power-status is expected to return a dict but got %s" %
4657                     type(result.payload))
4658
4659     if self.op.command in [
4660         constants.OOB_POWER_ON,
4661         constants.OOB_POWER_OFF,
4662         constants.OOB_POWER_CYCLE,
4663         ]:
4664       if result.payload is not None:
4665         errs.append("%s is expected to not return payload but got '%s'" %
4666                     (self.op.command, result.payload))
4667
4668     if errs:
4669       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4670                                utils.CommaJoin(errs))
4671
4672
4673 class _OsQuery(_QueryBase):
4674   FIELDS = query.OS_FIELDS
4675
4676   def ExpandNames(self, lu):
4677     # Lock all nodes in shared mode
4678     # Temporary removal of locks, should be reverted later
4679     # TODO: reintroduce locks when they are lighter-weight
4680     lu.needed_locks = {}
4681     #self.share_locks[locking.LEVEL_NODE] = 1
4682     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4683
4684     # The following variables interact with _QueryBase._GetNames
4685     if self.names:
4686       self.wanted = self.names
4687     else:
4688       self.wanted = locking.ALL_SET
4689
4690     self.do_locking = self.use_locking
4691
4692   def DeclareLocks(self, lu, level):
4693     pass
4694
4695   @staticmethod
4696   def _DiagnoseByOS(rlist):
4697     """Remaps a per-node return list into an a per-os per-node dictionary
4698
4699     @param rlist: a map with node names as keys and OS objects as values
4700
4701     @rtype: dict
4702     @return: a dictionary with osnames as keys and as value another
4703         map, with nodes as keys and tuples of (path, status, diagnose,
4704         variants, parameters, api_versions) as values, eg::
4705
4706           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4707                                      (/srv/..., False, "invalid api")],
4708                            "node2": [(/srv/..., True, "", [], [])]}
4709           }
4710
4711     """
4712     all_os = {}
4713     # we build here the list of nodes that didn't fail the RPC (at RPC
4714     # level), so that nodes with a non-responding node daemon don't
4715     # make all OSes invalid
4716     good_nodes = [node_name for node_name in rlist
4717                   if not rlist[node_name].fail_msg]
4718     for node_name, nr in rlist.items():
4719       if nr.fail_msg or not nr.payload:
4720         continue
4721       for (name, path, status, diagnose, variants,
4722            params, api_versions) in nr.payload:
4723         if name not in all_os:
4724           # build a list of nodes for this os containing empty lists
4725           # for each node in node_list
4726           all_os[name] = {}
4727           for nname in good_nodes:
4728             all_os[name][nname] = []
4729         # convert params from [name, help] to (name, help)
4730         params = [tuple(v) for v in params]
4731         all_os[name][node_name].append((path, status, diagnose,
4732                                         variants, params, api_versions))
4733     return all_os
4734
4735   def _GetQueryData(self, lu):
4736     """Computes the list of nodes and their attributes.
4737
4738     """
4739     # Locking is not used
4740     assert not (compat.any(lu.glm.is_owned(level)
4741                            for level in locking.LEVELS
4742                            if level != locking.LEVEL_CLUSTER) or
4743                 self.do_locking or self.use_locking)
4744
4745     valid_nodes = [node.name
4746                    for node in lu.cfg.GetAllNodesInfo().values()
4747                    if not node.offline and node.vm_capable]
4748     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4749     cluster = lu.cfg.GetClusterInfo()
4750
4751     data = {}
4752
4753     for (os_name, os_data) in pol.items():
4754       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4755                           hidden=(os_name in cluster.hidden_os),
4756                           blacklisted=(os_name in cluster.blacklisted_os))
4757
4758       variants = set()
4759       parameters = set()
4760       api_versions = set()
4761
4762       for idx, osl in enumerate(os_data.values()):
4763         info.valid = bool(info.valid and osl and osl[0][1])
4764         if not info.valid:
4765           break
4766
4767         (node_variants, node_params, node_api) = osl[0][3:6]
4768         if idx == 0:
4769           # First entry
4770           variants.update(node_variants)
4771           parameters.update(node_params)
4772           api_versions.update(node_api)
4773         else:
4774           # Filter out inconsistent values
4775           variants.intersection_update(node_variants)
4776           parameters.intersection_update(node_params)
4777           api_versions.intersection_update(node_api)
4778
4779       info.variants = list(variants)
4780       info.parameters = list(parameters)
4781       info.api_versions = list(api_versions)
4782
4783       data[os_name] = info
4784
4785     # Prepare data in requested order
4786     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4787             if name in data]
4788
4789
4790 class LUOsDiagnose(NoHooksLU):
4791   """Logical unit for OS diagnose/query.
4792
4793   """
4794   REQ_BGL = False
4795
4796   @staticmethod
4797   def _BuildFilter(fields, names):
4798     """Builds a filter for querying OSes.
4799
4800     """
4801     name_filter = qlang.MakeSimpleFilter("name", names)
4802
4803     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4804     # respective field is not requested
4805     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4806                      for fname in ["hidden", "blacklisted"]
4807                      if fname not in fields]
4808     if "valid" not in fields:
4809       status_filter.append([qlang.OP_TRUE, "valid"])
4810
4811     if status_filter:
4812       status_filter.insert(0, qlang.OP_AND)
4813     else:
4814       status_filter = None
4815
4816     if name_filter and status_filter:
4817       return [qlang.OP_AND, name_filter, status_filter]
4818     elif name_filter:
4819       return name_filter
4820     else:
4821       return status_filter
4822
4823   def CheckArguments(self):
4824     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4825                        self.op.output_fields, False)
4826
4827   def ExpandNames(self):
4828     self.oq.ExpandNames(self)
4829
4830   def Exec(self, feedback_fn):
4831     return self.oq.OldStyleQuery(self)
4832
4833
4834 class LUNodeRemove(LogicalUnit):
4835   """Logical unit for removing a node.
4836
4837   """
4838   HPATH = "node-remove"
4839   HTYPE = constants.HTYPE_NODE
4840
4841   def BuildHooksEnv(self):
4842     """Build hooks env.
4843
4844     This doesn't run on the target node in the pre phase as a failed
4845     node would then be impossible to remove.
4846
4847     """
4848     return {
4849       "OP_TARGET": self.op.node_name,
4850       "NODE_NAME": self.op.node_name,
4851       }
4852
4853   def BuildHooksNodes(self):
4854     """Build hooks nodes.
4855
4856     """
4857     all_nodes = self.cfg.GetNodeList()
4858     try:
4859       all_nodes.remove(self.op.node_name)
4860     except ValueError:
4861       logging.warning("Node '%s', which is about to be removed, was not found"
4862                       " in the list of all nodes", self.op.node_name)
4863     return (all_nodes, all_nodes)
4864
4865   def CheckPrereq(self):
4866     """Check prerequisites.
4867
4868     This checks:
4869      - the node exists in the configuration
4870      - it does not have primary or secondary instances
4871      - it's not the master
4872
4873     Any errors are signaled by raising errors.OpPrereqError.
4874
4875     """
4876     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4877     node = self.cfg.GetNodeInfo(self.op.node_name)
4878     assert node is not None
4879
4880     masternode = self.cfg.GetMasterNode()
4881     if node.name == masternode:
4882       raise errors.OpPrereqError("Node is the master node, failover to another"
4883                                  " node is required", errors.ECODE_INVAL)
4884
4885     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4886       if node.name in instance.all_nodes:
4887         raise errors.OpPrereqError("Instance %s is still running on the node,"
4888                                    " please remove first" % instance_name,
4889                                    errors.ECODE_INVAL)
4890     self.op.node_name = node.name
4891     self.node = node
4892
4893   def Exec(self, feedback_fn):
4894     """Removes the node from the cluster.
4895
4896     """
4897     node = self.node
4898     logging.info("Stopping the node daemon and removing configs from node %s",
4899                  node.name)
4900
4901     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4902
4903     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4904       "Not owning BGL"
4905
4906     # Promote nodes to master candidate as needed
4907     _AdjustCandidatePool(self, exceptions=[node.name])
4908     self.context.RemoveNode(node.name)
4909
4910     # Run post hooks on the node before it's removed
4911     _RunPostHook(self, node.name)
4912
4913     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4914     msg = result.fail_msg
4915     if msg:
4916       self.LogWarning("Errors encountered on the remote node while leaving"
4917                       " the cluster: %s", msg)
4918
4919     # Remove node from our /etc/hosts
4920     if self.cfg.GetClusterInfo().modify_etc_hosts:
4921       master_node = self.cfg.GetMasterNode()
4922       result = self.rpc.call_etc_hosts_modify(master_node,
4923                                               constants.ETC_HOSTS_REMOVE,
4924                                               node.name, None)
4925       result.Raise("Can't update hosts file with new host data")
4926       _RedistributeAncillaryFiles(self)
4927
4928
4929 class _NodeQuery(_QueryBase):
4930   FIELDS = query.NODE_FIELDS
4931
4932   def ExpandNames(self, lu):
4933     lu.needed_locks = {}
4934     lu.share_locks = _ShareAll()
4935
4936     if self.names:
4937       self.wanted = _GetWantedNodes(lu, self.names)
4938     else:
4939       self.wanted = locking.ALL_SET
4940
4941     self.do_locking = (self.use_locking and
4942                        query.NQ_LIVE in self.requested_data)
4943
4944     if self.do_locking:
4945       # If any non-static field is requested we need to lock the nodes
4946       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4947
4948   def DeclareLocks(self, lu, level):
4949     pass
4950
4951   def _GetQueryData(self, lu):
4952     """Computes the list of nodes and their attributes.
4953
4954     """
4955     all_info = lu.cfg.GetAllNodesInfo()
4956
4957     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4958
4959     # Gather data as requested
4960     if query.NQ_LIVE in self.requested_data:
4961       # filter out non-vm_capable nodes
4962       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4963
4964       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4965                                         [lu.cfg.GetHypervisorType()])
4966       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4967                        for (name, nresult) in node_data.items()
4968                        if not nresult.fail_msg and nresult.payload)
4969     else:
4970       live_data = None
4971
4972     if query.NQ_INST in self.requested_data:
4973       node_to_primary = dict([(name, set()) for name in nodenames])
4974       node_to_secondary = dict([(name, set()) for name in nodenames])
4975
4976       inst_data = lu.cfg.GetAllInstancesInfo()
4977
4978       for inst in inst_data.values():
4979         if inst.primary_node in node_to_primary:
4980           node_to_primary[inst.primary_node].add(inst.name)
4981         for secnode in inst.secondary_nodes:
4982           if secnode in node_to_secondary:
4983             node_to_secondary[secnode].add(inst.name)
4984     else:
4985       node_to_primary = None
4986       node_to_secondary = None
4987
4988     if query.NQ_OOB in self.requested_data:
4989       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4990                          for name, node in all_info.iteritems())
4991     else:
4992       oob_support = None
4993
4994     if query.NQ_GROUP in self.requested_data:
4995       groups = lu.cfg.GetAllNodeGroupsInfo()
4996     else:
4997       groups = {}
4998
4999     return query.NodeQueryData([all_info[name] for name in nodenames],
5000                                live_data, lu.cfg.GetMasterNode(),
5001                                node_to_primary, node_to_secondary, groups,
5002                                oob_support, lu.cfg.GetClusterInfo())
5003
5004
5005 class LUNodeQuery(NoHooksLU):
5006   """Logical unit for querying nodes.
5007
5008   """
5009   # pylint: disable=W0142
5010   REQ_BGL = False
5011
5012   def CheckArguments(self):
5013     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5014                          self.op.output_fields, self.op.use_locking)
5015
5016   def ExpandNames(self):
5017     self.nq.ExpandNames(self)
5018
5019   def DeclareLocks(self, level):
5020     self.nq.DeclareLocks(self, level)
5021
5022   def Exec(self, feedback_fn):
5023     return self.nq.OldStyleQuery(self)
5024
5025
5026 class LUNodeQueryvols(NoHooksLU):
5027   """Logical unit for getting volumes on node(s).
5028
5029   """
5030   REQ_BGL = False
5031   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5032   _FIELDS_STATIC = utils.FieldSet("node")
5033
5034   def CheckArguments(self):
5035     _CheckOutputFields(static=self._FIELDS_STATIC,
5036                        dynamic=self._FIELDS_DYNAMIC,
5037                        selected=self.op.output_fields)
5038
5039   def ExpandNames(self):
5040     self.share_locks = _ShareAll()
5041     self.needed_locks = {}
5042
5043     if not self.op.nodes:
5044       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5045     else:
5046       self.needed_locks[locking.LEVEL_NODE] = \
5047         _GetWantedNodes(self, self.op.nodes)
5048
5049   def Exec(self, feedback_fn):
5050     """Computes the list of nodes and their attributes.
5051
5052     """
5053     nodenames = self.owned_locks(locking.LEVEL_NODE)
5054     volumes = self.rpc.call_node_volumes(nodenames)
5055
5056     ilist = self.cfg.GetAllInstancesInfo()
5057     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5058
5059     output = []
5060     for node in nodenames:
5061       nresult = volumes[node]
5062       if nresult.offline:
5063         continue
5064       msg = nresult.fail_msg
5065       if msg:
5066         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5067         continue
5068
5069       node_vols = sorted(nresult.payload,
5070                          key=operator.itemgetter("dev"))
5071
5072       for vol in node_vols:
5073         node_output = []
5074         for field in self.op.output_fields:
5075           if field == "node":
5076             val = node
5077           elif field == "phys":
5078             val = vol["dev"]
5079           elif field == "vg":
5080             val = vol["vg"]
5081           elif field == "name":
5082             val = vol["name"]
5083           elif field == "size":
5084             val = int(float(vol["size"]))
5085           elif field == "instance":
5086             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5087           else:
5088             raise errors.ParameterError(field)
5089           node_output.append(str(val))
5090
5091         output.append(node_output)
5092
5093     return output
5094
5095
5096 class LUNodeQueryStorage(NoHooksLU):
5097   """Logical unit for getting information on storage units on node(s).
5098
5099   """
5100   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5101   REQ_BGL = False
5102
5103   def CheckArguments(self):
5104     _CheckOutputFields(static=self._FIELDS_STATIC,
5105                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5106                        selected=self.op.output_fields)
5107
5108   def ExpandNames(self):
5109     self.share_locks = _ShareAll()
5110     self.needed_locks = {}
5111
5112     if self.op.nodes:
5113       self.needed_locks[locking.LEVEL_NODE] = \
5114         _GetWantedNodes(self, self.op.nodes)
5115     else:
5116       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5117
5118   def Exec(self, feedback_fn):
5119     """Computes the list of nodes and their attributes.
5120
5121     """
5122     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5123
5124     # Always get name to sort by
5125     if constants.SF_NAME in self.op.output_fields:
5126       fields = self.op.output_fields[:]
5127     else:
5128       fields = [constants.SF_NAME] + self.op.output_fields
5129
5130     # Never ask for node or type as it's only known to the LU
5131     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5132       while extra in fields:
5133         fields.remove(extra)
5134
5135     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5136     name_idx = field_idx[constants.SF_NAME]
5137
5138     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5139     data = self.rpc.call_storage_list(self.nodes,
5140                                       self.op.storage_type, st_args,
5141                                       self.op.name, fields)
5142
5143     result = []
5144
5145     for node in utils.NiceSort(self.nodes):
5146       nresult = data[node]
5147       if nresult.offline:
5148         continue
5149
5150       msg = nresult.fail_msg
5151       if msg:
5152         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5153         continue
5154
5155       rows = dict([(row[name_idx], row) for row in nresult.payload])
5156
5157       for name in utils.NiceSort(rows.keys()):
5158         row = rows[name]
5159
5160         out = []
5161
5162         for field in self.op.output_fields:
5163           if field == constants.SF_NODE:
5164             val = node
5165           elif field == constants.SF_TYPE:
5166             val = self.op.storage_type
5167           elif field in field_idx:
5168             val = row[field_idx[field]]
5169           else:
5170             raise errors.ParameterError(field)
5171
5172           out.append(val)
5173
5174         result.append(out)
5175
5176     return result
5177
5178
5179 class _InstanceQuery(_QueryBase):
5180   FIELDS = query.INSTANCE_FIELDS
5181
5182   def ExpandNames(self, lu):
5183     lu.needed_locks = {}
5184     lu.share_locks = _ShareAll()
5185
5186     if self.names:
5187       self.wanted = _GetWantedInstances(lu, self.names)
5188     else:
5189       self.wanted = locking.ALL_SET
5190
5191     self.do_locking = (self.use_locking and
5192                        query.IQ_LIVE in self.requested_data)
5193     if self.do_locking:
5194       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5195       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5196       lu.needed_locks[locking.LEVEL_NODE] = []
5197       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5198
5199     self.do_grouplocks = (self.do_locking and
5200                           query.IQ_NODES in self.requested_data)
5201
5202   def DeclareLocks(self, lu, level):
5203     if self.do_locking:
5204       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5205         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5206
5207         # Lock all groups used by instances optimistically; this requires going
5208         # via the node before it's locked, requiring verification later on
5209         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5210           set(group_uuid
5211               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5212               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5213       elif level == locking.LEVEL_NODE:
5214         lu._LockInstancesNodes() # pylint: disable=W0212
5215
5216   @staticmethod
5217   def _CheckGroupLocks(lu):
5218     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5219     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5220
5221     # Check if node groups for locked instances are still correct
5222     for instance_name in owned_instances:
5223       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5224
5225   def _GetQueryData(self, lu):
5226     """Computes the list of instances and their attributes.
5227
5228     """
5229     if self.do_grouplocks:
5230       self._CheckGroupLocks(lu)
5231
5232     cluster = lu.cfg.GetClusterInfo()
5233     all_info = lu.cfg.GetAllInstancesInfo()
5234
5235     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5236
5237     instance_list = [all_info[name] for name in instance_names]
5238     nodes = frozenset(itertools.chain(*(inst.all_nodes
5239                                         for inst in instance_list)))
5240     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5241     bad_nodes = []
5242     offline_nodes = []
5243     wrongnode_inst = set()
5244
5245     # Gather data as requested
5246     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5247       live_data = {}
5248       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5249       for name in nodes:
5250         result = node_data[name]
5251         if result.offline:
5252           # offline nodes will be in both lists
5253           assert result.fail_msg
5254           offline_nodes.append(name)
5255         if result.fail_msg:
5256           bad_nodes.append(name)
5257         elif result.payload:
5258           for inst in result.payload:
5259             if inst in all_info:
5260               if all_info[inst].primary_node == name:
5261                 live_data.update(result.payload)
5262               else:
5263                 wrongnode_inst.add(inst)
5264             else:
5265               # orphan instance; we don't list it here as we don't
5266               # handle this case yet in the output of instance listing
5267               logging.warning("Orphan instance '%s' found on node %s",
5268                               inst, name)
5269         # else no instance is alive
5270     else:
5271       live_data = {}
5272
5273     if query.IQ_DISKUSAGE in self.requested_data:
5274       disk_usage = dict((inst.name,
5275                          _ComputeDiskSize(inst.disk_template,
5276                                           [{constants.IDISK_SIZE: disk.size}
5277                                            for disk in inst.disks]))
5278                         for inst in instance_list)
5279     else:
5280       disk_usage = None
5281
5282     if query.IQ_CONSOLE in self.requested_data:
5283       consinfo = {}
5284       for inst in instance_list:
5285         if inst.name in live_data:
5286           # Instance is running
5287           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5288         else:
5289           consinfo[inst.name] = None
5290       assert set(consinfo.keys()) == set(instance_names)
5291     else:
5292       consinfo = None
5293
5294     if query.IQ_NODES in self.requested_data:
5295       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5296                                             instance_list)))
5297       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5298       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5299                     for uuid in set(map(operator.attrgetter("group"),
5300                                         nodes.values())))
5301     else:
5302       nodes = None
5303       groups = None
5304
5305     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5306                                    disk_usage, offline_nodes, bad_nodes,
5307                                    live_data, wrongnode_inst, consinfo,
5308                                    nodes, groups)
5309
5310
5311 class LUQuery(NoHooksLU):
5312   """Query for resources/items of a certain kind.
5313
5314   """
5315   # pylint: disable=W0142
5316   REQ_BGL = False
5317
5318   def CheckArguments(self):
5319     qcls = _GetQueryImplementation(self.op.what)
5320
5321     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5322
5323   def ExpandNames(self):
5324     self.impl.ExpandNames(self)
5325
5326   def DeclareLocks(self, level):
5327     self.impl.DeclareLocks(self, level)
5328
5329   def Exec(self, feedback_fn):
5330     return self.impl.NewStyleQuery(self)
5331
5332
5333 class LUQueryFields(NoHooksLU):
5334   """Query for resources/items of a certain kind.
5335
5336   """
5337   # pylint: disable=W0142
5338   REQ_BGL = False
5339
5340   def CheckArguments(self):
5341     self.qcls = _GetQueryImplementation(self.op.what)
5342
5343   def ExpandNames(self):
5344     self.needed_locks = {}
5345
5346   def Exec(self, feedback_fn):
5347     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5348
5349
5350 class LUNodeModifyStorage(NoHooksLU):
5351   """Logical unit for modifying a storage volume on a node.
5352
5353   """
5354   REQ_BGL = False
5355
5356   def CheckArguments(self):
5357     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5358
5359     storage_type = self.op.storage_type
5360
5361     try:
5362       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5363     except KeyError:
5364       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5365                                  " modified" % storage_type,
5366                                  errors.ECODE_INVAL)
5367
5368     diff = set(self.op.changes.keys()) - modifiable
5369     if diff:
5370       raise errors.OpPrereqError("The following fields can not be modified for"
5371                                  " storage units of type '%s': %r" %
5372                                  (storage_type, list(diff)),
5373                                  errors.ECODE_INVAL)
5374
5375   def ExpandNames(self):
5376     self.needed_locks = {
5377       locking.LEVEL_NODE: self.op.node_name,
5378       }
5379
5380   def Exec(self, feedback_fn):
5381     """Computes the list of nodes and their attributes.
5382
5383     """
5384     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5385     result = self.rpc.call_storage_modify(self.op.node_name,
5386                                           self.op.storage_type, st_args,
5387                                           self.op.name, self.op.changes)
5388     result.Raise("Failed to modify storage unit '%s' on %s" %
5389                  (self.op.name, self.op.node_name))
5390
5391
5392 class LUNodeAdd(LogicalUnit):
5393   """Logical unit for adding node to the cluster.
5394
5395   """
5396   HPATH = "node-add"
5397   HTYPE = constants.HTYPE_NODE
5398   _NFLAGS = ["master_capable", "vm_capable"]
5399
5400   def CheckArguments(self):
5401     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5402     # validate/normalize the node name
5403     self.hostname = netutils.GetHostname(name=self.op.node_name,
5404                                          family=self.primary_ip_family)
5405     self.op.node_name = self.hostname.name
5406
5407     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5408       raise errors.OpPrereqError("Cannot readd the master node",
5409                                  errors.ECODE_STATE)
5410
5411     if self.op.readd and self.op.group:
5412       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5413                                  " being readded", errors.ECODE_INVAL)
5414
5415   def BuildHooksEnv(self):
5416     """Build hooks env.
5417
5418     This will run on all nodes before, and on all nodes + the new node after.
5419
5420     """
5421     return {
5422       "OP_TARGET": self.op.node_name,
5423       "NODE_NAME": self.op.node_name,
5424       "NODE_PIP": self.op.primary_ip,
5425       "NODE_SIP": self.op.secondary_ip,
5426       "MASTER_CAPABLE": str(self.op.master_capable),
5427       "VM_CAPABLE": str(self.op.vm_capable),
5428       }
5429
5430   def BuildHooksNodes(self):
5431     """Build hooks nodes.
5432
5433     """
5434     # Exclude added node
5435     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5436     post_nodes = pre_nodes + [self.op.node_name, ]
5437
5438     return (pre_nodes, post_nodes)
5439
5440   def CheckPrereq(self):
5441     """Check prerequisites.
5442
5443     This checks:
5444      - the new node is not already in the config
5445      - it is resolvable
5446      - its parameters (single/dual homed) matches the cluster
5447
5448     Any errors are signaled by raising errors.OpPrereqError.
5449
5450     """
5451     cfg = self.cfg
5452     hostname = self.hostname
5453     node = hostname.name
5454     primary_ip = self.op.primary_ip = hostname.ip
5455     if self.op.secondary_ip is None:
5456       if self.primary_ip_family == netutils.IP6Address.family:
5457         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5458                                    " IPv4 address must be given as secondary",
5459                                    errors.ECODE_INVAL)
5460       self.op.secondary_ip = primary_ip
5461
5462     secondary_ip = self.op.secondary_ip
5463     if not netutils.IP4Address.IsValid(secondary_ip):
5464       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5465                                  " address" % secondary_ip, errors.ECODE_INVAL)
5466
5467     node_list = cfg.GetNodeList()
5468     if not self.op.readd and node in node_list:
5469       raise errors.OpPrereqError("Node %s is already in the configuration" %
5470                                  node, errors.ECODE_EXISTS)
5471     elif self.op.readd and node not in node_list:
5472       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5473                                  errors.ECODE_NOENT)
5474
5475     self.changed_primary_ip = False
5476
5477     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5478       if self.op.readd and node == existing_node_name:
5479         if existing_node.secondary_ip != secondary_ip:
5480           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5481                                      " address configuration as before",
5482                                      errors.ECODE_INVAL)
5483         if existing_node.primary_ip != primary_ip:
5484           self.changed_primary_ip = True
5485
5486         continue
5487
5488       if (existing_node.primary_ip == primary_ip or
5489           existing_node.secondary_ip == primary_ip or
5490           existing_node.primary_ip == secondary_ip or
5491           existing_node.secondary_ip == secondary_ip):
5492         raise errors.OpPrereqError("New node ip address(es) conflict with"
5493                                    " existing node %s" % existing_node.name,
5494                                    errors.ECODE_NOTUNIQUE)
5495
5496     # After this 'if' block, None is no longer a valid value for the
5497     # _capable op attributes
5498     if self.op.readd:
5499       old_node = self.cfg.GetNodeInfo(node)
5500       assert old_node is not None, "Can't retrieve locked node %s" % node
5501       for attr in self._NFLAGS:
5502         if getattr(self.op, attr) is None:
5503           setattr(self.op, attr, getattr(old_node, attr))
5504     else:
5505       for attr in self._NFLAGS:
5506         if getattr(self.op, attr) is None:
5507           setattr(self.op, attr, True)
5508
5509     if self.op.readd and not self.op.vm_capable:
5510       pri, sec = cfg.GetNodeInstances(node)
5511       if pri or sec:
5512         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5513                                    " flag set to false, but it already holds"
5514                                    " instances" % node,
5515                                    errors.ECODE_STATE)
5516
5517     # check that the type of the node (single versus dual homed) is the
5518     # same as for the master
5519     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5520     master_singlehomed = myself.secondary_ip == myself.primary_ip
5521     newbie_singlehomed = secondary_ip == primary_ip
5522     if master_singlehomed != newbie_singlehomed:
5523       if master_singlehomed:
5524         raise errors.OpPrereqError("The master has no secondary ip but the"
5525                                    " new node has one",
5526                                    errors.ECODE_INVAL)
5527       else:
5528         raise errors.OpPrereqError("The master has a secondary ip but the"
5529                                    " new node doesn't have one",
5530                                    errors.ECODE_INVAL)
5531
5532     # checks reachability
5533     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5534       raise errors.OpPrereqError("Node not reachable by ping",
5535                                  errors.ECODE_ENVIRON)
5536
5537     if not newbie_singlehomed:
5538       # check reachability from my secondary ip to newbie's secondary ip
5539       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5540                            source=myself.secondary_ip):
5541         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5542                                    " based ping to node daemon port",
5543                                    errors.ECODE_ENVIRON)
5544
5545     if self.op.readd:
5546       exceptions = [node]
5547     else:
5548       exceptions = []
5549
5550     if self.op.master_capable:
5551       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5552     else:
5553       self.master_candidate = False
5554
5555     if self.op.readd:
5556       self.new_node = old_node
5557     else:
5558       node_group = cfg.LookupNodeGroup(self.op.group)
5559       self.new_node = objects.Node(name=node,
5560                                    primary_ip=primary_ip,
5561                                    secondary_ip=secondary_ip,
5562                                    master_candidate=self.master_candidate,
5563                                    offline=False, drained=False,
5564                                    group=node_group)
5565
5566     if self.op.ndparams:
5567       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5568
5569     if self.op.hv_state:
5570       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5571
5572     if self.op.disk_state:
5573       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5574
5575   def Exec(self, feedback_fn):
5576     """Adds the new node to the cluster.
5577
5578     """
5579     new_node = self.new_node
5580     node = new_node.name
5581
5582     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5583       "Not owning BGL"
5584
5585     # We adding a new node so we assume it's powered
5586     new_node.powered = True
5587
5588     # for re-adds, reset the offline/drained/master-candidate flags;
5589     # we need to reset here, otherwise offline would prevent RPC calls
5590     # later in the procedure; this also means that if the re-add
5591     # fails, we are left with a non-offlined, broken node
5592     if self.op.readd:
5593       new_node.drained = new_node.offline = False # pylint: disable=W0201
5594       self.LogInfo("Readding a node, the offline/drained flags were reset")
5595       # if we demote the node, we do cleanup later in the procedure
5596       new_node.master_candidate = self.master_candidate
5597       if self.changed_primary_ip:
5598         new_node.primary_ip = self.op.primary_ip
5599
5600     # copy the master/vm_capable flags
5601     for attr in self._NFLAGS:
5602       setattr(new_node, attr, getattr(self.op, attr))
5603
5604     # notify the user about any possible mc promotion
5605     if new_node.master_candidate:
5606       self.LogInfo("Node will be a master candidate")
5607
5608     if self.op.ndparams:
5609       new_node.ndparams = self.op.ndparams
5610     else:
5611       new_node.ndparams = {}
5612
5613     if self.op.hv_state:
5614       new_node.hv_state_static = self.new_hv_state
5615
5616     if self.op.disk_state:
5617       new_node.disk_state_static = self.new_disk_state
5618
5619     # check connectivity
5620     result = self.rpc.call_version([node])[node]
5621     result.Raise("Can't get version information from node %s" % node)
5622     if constants.PROTOCOL_VERSION == result.payload:
5623       logging.info("Communication to node %s fine, sw version %s match",
5624                    node, result.payload)
5625     else:
5626       raise errors.OpExecError("Version mismatch master version %s,"
5627                                " node version %s" %
5628                                (constants.PROTOCOL_VERSION, result.payload))
5629
5630     # Add node to our /etc/hosts, and add key to known_hosts
5631     if self.cfg.GetClusterInfo().modify_etc_hosts:
5632       master_node = self.cfg.GetMasterNode()
5633       result = self.rpc.call_etc_hosts_modify(master_node,
5634                                               constants.ETC_HOSTS_ADD,
5635                                               self.hostname.name,
5636                                               self.hostname.ip)
5637       result.Raise("Can't update hosts file with new host data")
5638
5639     if new_node.secondary_ip != new_node.primary_ip:
5640       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5641                                False)
5642
5643     node_verify_list = [self.cfg.GetMasterNode()]
5644     node_verify_param = {
5645       constants.NV_NODELIST: ([node], {}),
5646       # TODO: do a node-net-test as well?
5647     }
5648
5649     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5650                                        self.cfg.GetClusterName())
5651     for verifier in node_verify_list:
5652       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5653       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5654       if nl_payload:
5655         for failed in nl_payload:
5656           feedback_fn("ssh/hostname verification failed"
5657                       " (checking from %s): %s" %
5658                       (verifier, nl_payload[failed]))
5659         raise errors.OpExecError("ssh/hostname verification failed")
5660
5661     if self.op.readd:
5662       _RedistributeAncillaryFiles(self)
5663       self.context.ReaddNode(new_node)
5664       # make sure we redistribute the config
5665       self.cfg.Update(new_node, feedback_fn)
5666       # and make sure the new node will not have old files around
5667       if not new_node.master_candidate:
5668         result = self.rpc.call_node_demote_from_mc(new_node.name)
5669         msg = result.fail_msg
5670         if msg:
5671           self.LogWarning("Node failed to demote itself from master"
5672                           " candidate status: %s" % msg)
5673     else:
5674       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5675                                   additional_vm=self.op.vm_capable)
5676       self.context.AddNode(new_node, self.proc.GetECId())
5677
5678
5679 class LUNodeSetParams(LogicalUnit):
5680   """Modifies the parameters of a node.
5681
5682   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5683       to the node role (as _ROLE_*)
5684   @cvar _R2F: a dictionary from node role to tuples of flags
5685   @cvar _FLAGS: a list of attribute names corresponding to the flags
5686
5687   """
5688   HPATH = "node-modify"
5689   HTYPE = constants.HTYPE_NODE
5690   REQ_BGL = False
5691   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5692   _F2R = {
5693     (True, False, False): _ROLE_CANDIDATE,
5694     (False, True, False): _ROLE_DRAINED,
5695     (False, False, True): _ROLE_OFFLINE,
5696     (False, False, False): _ROLE_REGULAR,
5697     }
5698   _R2F = dict((v, k) for k, v in _F2R.items())
5699   _FLAGS = ["master_candidate", "drained", "offline"]
5700
5701   def CheckArguments(self):
5702     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5703     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5704                 self.op.master_capable, self.op.vm_capable,
5705                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5706                 self.op.disk_state]
5707     if all_mods.count(None) == len(all_mods):
5708       raise errors.OpPrereqError("Please pass at least one modification",
5709                                  errors.ECODE_INVAL)
5710     if all_mods.count(True) > 1:
5711       raise errors.OpPrereqError("Can't set the node into more than one"
5712                                  " state at the same time",
5713                                  errors.ECODE_INVAL)
5714
5715     # Boolean value that tells us whether we might be demoting from MC
5716     self.might_demote = (self.op.master_candidate == False or
5717                          self.op.offline == True or
5718                          self.op.drained == True or
5719                          self.op.master_capable == False)
5720
5721     if self.op.secondary_ip:
5722       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5723         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5724                                    " address" % self.op.secondary_ip,
5725                                    errors.ECODE_INVAL)
5726
5727     self.lock_all = self.op.auto_promote and self.might_demote
5728     self.lock_instances = self.op.secondary_ip is not None
5729
5730   def _InstanceFilter(self, instance):
5731     """Filter for getting affected instances.
5732
5733     """
5734     return (instance.disk_template in constants.DTS_INT_MIRROR and
5735             self.op.node_name in instance.all_nodes)
5736
5737   def ExpandNames(self):
5738     if self.lock_all:
5739       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5740     else:
5741       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5742
5743     # Since modifying a node can have severe effects on currently running
5744     # operations the resource lock is at least acquired in shared mode
5745     self.needed_locks[locking.LEVEL_NODE_RES] = \
5746       self.needed_locks[locking.LEVEL_NODE]
5747
5748     # Get node resource and instance locks in shared mode; they are not used
5749     # for anything but read-only access
5750     self.share_locks[locking.LEVEL_NODE_RES] = 1
5751     self.share_locks[locking.LEVEL_INSTANCE] = 1
5752
5753     if self.lock_instances:
5754       self.needed_locks[locking.LEVEL_INSTANCE] = \
5755         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5756
5757   def BuildHooksEnv(self):
5758     """Build hooks env.
5759
5760     This runs on the master node.
5761
5762     """
5763     return {
5764       "OP_TARGET": self.op.node_name,
5765       "MASTER_CANDIDATE": str(self.op.master_candidate),
5766       "OFFLINE": str(self.op.offline),
5767       "DRAINED": str(self.op.drained),
5768       "MASTER_CAPABLE": str(self.op.master_capable),
5769       "VM_CAPABLE": str(self.op.vm_capable),
5770       }
5771
5772   def BuildHooksNodes(self):
5773     """Build hooks nodes.
5774
5775     """
5776     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5777     return (nl, nl)
5778
5779   def CheckPrereq(self):
5780     """Check prerequisites.
5781
5782     This only checks the instance list against the existing names.
5783
5784     """
5785     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5786
5787     if self.lock_instances:
5788       affected_instances = \
5789         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5790
5791       # Verify instance locks
5792       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5793       wanted_instances = frozenset(affected_instances.keys())
5794       if wanted_instances - owned_instances:
5795         raise errors.OpPrereqError("Instances affected by changing node %s's"
5796                                    " secondary IP address have changed since"
5797                                    " locks were acquired, wanted '%s', have"
5798                                    " '%s'; retry the operation" %
5799                                    (self.op.node_name,
5800                                     utils.CommaJoin(wanted_instances),
5801                                     utils.CommaJoin(owned_instances)),
5802                                    errors.ECODE_STATE)
5803     else:
5804       affected_instances = None
5805
5806     if (self.op.master_candidate is not None or
5807         self.op.drained is not None or
5808         self.op.offline is not None):
5809       # we can't change the master's node flags
5810       if self.op.node_name == self.cfg.GetMasterNode():
5811         raise errors.OpPrereqError("The master role can be changed"
5812                                    " only via master-failover",
5813                                    errors.ECODE_INVAL)
5814
5815     if self.op.master_candidate and not node.master_capable:
5816       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5817                                  " it a master candidate" % node.name,
5818                                  errors.ECODE_STATE)
5819
5820     if self.op.vm_capable == False:
5821       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5822       if ipri or isec:
5823         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5824                                    " the vm_capable flag" % node.name,
5825                                    errors.ECODE_STATE)
5826
5827     if node.master_candidate and self.might_demote and not self.lock_all:
5828       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5829       # check if after removing the current node, we're missing master
5830       # candidates
5831       (mc_remaining, mc_should, _) = \
5832           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5833       if mc_remaining < mc_should:
5834         raise errors.OpPrereqError("Not enough master candidates, please"
5835                                    " pass auto promote option to allow"
5836                                    " promotion", errors.ECODE_STATE)
5837
5838     self.old_flags = old_flags = (node.master_candidate,
5839                                   node.drained, node.offline)
5840     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5841     self.old_role = old_role = self._F2R[old_flags]
5842
5843     # Check for ineffective changes
5844     for attr in self._FLAGS:
5845       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5846         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5847         setattr(self.op, attr, None)
5848
5849     # Past this point, any flag change to False means a transition
5850     # away from the respective state, as only real changes are kept
5851
5852     # TODO: We might query the real power state if it supports OOB
5853     if _SupportsOob(self.cfg, node):
5854       if self.op.offline is False and not (node.powered or
5855                                            self.op.powered == True):
5856         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5857                                     " offline status can be reset") %
5858                                    self.op.node_name)
5859     elif self.op.powered is not None:
5860       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5861                                   " as it does not support out-of-band"
5862                                   " handling") % self.op.node_name)
5863
5864     # If we're being deofflined/drained, we'll MC ourself if needed
5865     if (self.op.drained == False or self.op.offline == False or
5866         (self.op.master_capable and not node.master_capable)):
5867       if _DecideSelfPromotion(self):
5868         self.op.master_candidate = True
5869         self.LogInfo("Auto-promoting node to master candidate")
5870
5871     # If we're no longer master capable, we'll demote ourselves from MC
5872     if self.op.master_capable == False and node.master_candidate:
5873       self.LogInfo("Demoting from master candidate")
5874       self.op.master_candidate = False
5875
5876     # Compute new role
5877     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5878     if self.op.master_candidate:
5879       new_role = self._ROLE_CANDIDATE
5880     elif self.op.drained:
5881       new_role = self._ROLE_DRAINED
5882     elif self.op.offline:
5883       new_role = self._ROLE_OFFLINE
5884     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5885       # False is still in new flags, which means we're un-setting (the
5886       # only) True flag
5887       new_role = self._ROLE_REGULAR
5888     else: # no new flags, nothing, keep old role
5889       new_role = old_role
5890
5891     self.new_role = new_role
5892
5893     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5894       # Trying to transition out of offline status
5895       # TODO: Use standard RPC runner, but make sure it works when the node is
5896       # still marked offline
5897       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5898       if result.fail_msg:
5899         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5900                                    " to report its version: %s" %
5901                                    (node.name, result.fail_msg),
5902                                    errors.ECODE_STATE)
5903       else:
5904         self.LogWarning("Transitioning node from offline to online state"
5905                         " without using re-add. Please make sure the node"
5906                         " is healthy!")
5907
5908     if self.op.secondary_ip:
5909       # Ok even without locking, because this can't be changed by any LU
5910       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5911       master_singlehomed = master.secondary_ip == master.primary_ip
5912       if master_singlehomed and self.op.secondary_ip:
5913         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5914                                    " homed cluster", errors.ECODE_INVAL)
5915
5916       assert not (frozenset(affected_instances) -
5917                   self.owned_locks(locking.LEVEL_INSTANCE))
5918
5919       if node.offline:
5920         if affected_instances:
5921           raise errors.OpPrereqError("Cannot change secondary IP address:"
5922                                      " offline node has instances (%s)"
5923                                      " configured to use it" %
5924                                      utils.CommaJoin(affected_instances.keys()))
5925       else:
5926         # On online nodes, check that no instances are running, and that
5927         # the node has the new ip and we can reach it.
5928         for instance in affected_instances.values():
5929           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5930                               msg="cannot change secondary ip")
5931
5932         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5933         if master.name != node.name:
5934           # check reachability from master secondary ip to new secondary ip
5935           if not netutils.TcpPing(self.op.secondary_ip,
5936                                   constants.DEFAULT_NODED_PORT,
5937                                   source=master.secondary_ip):
5938             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5939                                        " based ping to node daemon port",
5940                                        errors.ECODE_ENVIRON)
5941
5942     if self.op.ndparams:
5943       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5944       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5945       self.new_ndparams = new_ndparams
5946
5947     if self.op.hv_state:
5948       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5949                                                  self.node.hv_state_static)
5950
5951     if self.op.disk_state:
5952       self.new_disk_state = \
5953         _MergeAndVerifyDiskState(self.op.disk_state,
5954                                  self.node.disk_state_static)
5955
5956   def Exec(self, feedback_fn):
5957     """Modifies a node.
5958
5959     """
5960     node = self.node
5961     old_role = self.old_role
5962     new_role = self.new_role
5963
5964     result = []
5965
5966     if self.op.ndparams:
5967       node.ndparams = self.new_ndparams
5968
5969     if self.op.powered is not None:
5970       node.powered = self.op.powered
5971
5972     if self.op.hv_state:
5973       node.hv_state_static = self.new_hv_state
5974
5975     if self.op.disk_state:
5976       node.disk_state_static = self.new_disk_state
5977
5978     for attr in ["master_capable", "vm_capable"]:
5979       val = getattr(self.op, attr)
5980       if val is not None:
5981         setattr(node, attr, val)
5982         result.append((attr, str(val)))
5983
5984     if new_role != old_role:
5985       # Tell the node to demote itself, if no longer MC and not offline
5986       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5987         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5988         if msg:
5989           self.LogWarning("Node failed to demote itself: %s", msg)
5990
5991       new_flags = self._R2F[new_role]
5992       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5993         if of != nf:
5994           result.append((desc, str(nf)))
5995       (node.master_candidate, node.drained, node.offline) = new_flags
5996
5997       # we locked all nodes, we adjust the CP before updating this node
5998       if self.lock_all:
5999         _AdjustCandidatePool(self, [node.name])
6000
6001     if self.op.secondary_ip:
6002       node.secondary_ip = self.op.secondary_ip
6003       result.append(("secondary_ip", self.op.secondary_ip))
6004
6005     # this will trigger configuration file update, if needed
6006     self.cfg.Update(node, feedback_fn)
6007
6008     # this will trigger job queue propagation or cleanup if the mc
6009     # flag changed
6010     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6011       self.context.ReaddNode(node)
6012
6013     return result
6014
6015
6016 class LUNodePowercycle(NoHooksLU):
6017   """Powercycles a node.
6018
6019   """
6020   REQ_BGL = False
6021
6022   def CheckArguments(self):
6023     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6024     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6025       raise errors.OpPrereqError("The node is the master and the force"
6026                                  " parameter was not set",
6027                                  errors.ECODE_INVAL)
6028
6029   def ExpandNames(self):
6030     """Locking for PowercycleNode.
6031
6032     This is a last-resort option and shouldn't block on other
6033     jobs. Therefore, we grab no locks.
6034
6035     """
6036     self.needed_locks = {}
6037
6038   def Exec(self, feedback_fn):
6039     """Reboots a node.
6040
6041     """
6042     result = self.rpc.call_node_powercycle(self.op.node_name,
6043                                            self.cfg.GetHypervisorType())
6044     result.Raise("Failed to schedule the reboot")
6045     return result.payload
6046
6047
6048 class LUClusterQuery(NoHooksLU):
6049   """Query cluster configuration.
6050
6051   """
6052   REQ_BGL = False
6053
6054   def ExpandNames(self):
6055     self.needed_locks = {}
6056
6057   def Exec(self, feedback_fn):
6058     """Return cluster config.
6059
6060     """
6061     cluster = self.cfg.GetClusterInfo()
6062     os_hvp = {}
6063
6064     # Filter just for enabled hypervisors
6065     for os_name, hv_dict in cluster.os_hvp.items():
6066       os_hvp[os_name] = {}
6067       for hv_name, hv_params in hv_dict.items():
6068         if hv_name in cluster.enabled_hypervisors:
6069           os_hvp[os_name][hv_name] = hv_params
6070
6071     # Convert ip_family to ip_version
6072     primary_ip_version = constants.IP4_VERSION
6073     if cluster.primary_ip_family == netutils.IP6Address.family:
6074       primary_ip_version = constants.IP6_VERSION
6075
6076     result = {
6077       "software_version": constants.RELEASE_VERSION,
6078       "protocol_version": constants.PROTOCOL_VERSION,
6079       "config_version": constants.CONFIG_VERSION,
6080       "os_api_version": max(constants.OS_API_VERSIONS),
6081       "export_version": constants.EXPORT_VERSION,
6082       "architecture": (platform.architecture()[0], platform.machine()),
6083       "name": cluster.cluster_name,
6084       "master": cluster.master_node,
6085       "default_hypervisor": cluster.primary_hypervisor,
6086       "enabled_hypervisors": cluster.enabled_hypervisors,
6087       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6088                         for hypervisor_name in cluster.enabled_hypervisors]),
6089       "os_hvp": os_hvp,
6090       "beparams": cluster.beparams,
6091       "osparams": cluster.osparams,
6092       "ipolicy": cluster.ipolicy,
6093       "nicparams": cluster.nicparams,
6094       "ndparams": cluster.ndparams,
6095       "candidate_pool_size": cluster.candidate_pool_size,
6096       "master_netdev": cluster.master_netdev,
6097       "master_netmask": cluster.master_netmask,
6098       "use_external_mip_script": cluster.use_external_mip_script,
6099       "volume_group_name": cluster.volume_group_name,
6100       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6101       "file_storage_dir": cluster.file_storage_dir,
6102       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6103       "maintain_node_health": cluster.maintain_node_health,
6104       "ctime": cluster.ctime,
6105       "mtime": cluster.mtime,
6106       "uuid": cluster.uuid,
6107       "tags": list(cluster.GetTags()),
6108       "uid_pool": cluster.uid_pool,
6109       "default_iallocator": cluster.default_iallocator,
6110       "reserved_lvs": cluster.reserved_lvs,
6111       "primary_ip_version": primary_ip_version,
6112       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6113       "hidden_os": cluster.hidden_os,
6114       "blacklisted_os": cluster.blacklisted_os,
6115       }
6116
6117     return result
6118
6119
6120 class LUClusterConfigQuery(NoHooksLU):
6121   """Return configuration values.
6122
6123   """
6124   REQ_BGL = False
6125   _FIELDS_DYNAMIC = utils.FieldSet()
6126   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6127                                   "watcher_pause", "volume_group_name")
6128
6129   def CheckArguments(self):
6130     _CheckOutputFields(static=self._FIELDS_STATIC,
6131                        dynamic=self._FIELDS_DYNAMIC,
6132                        selected=self.op.output_fields)
6133
6134   def ExpandNames(self):
6135     self.needed_locks = {}
6136
6137   def Exec(self, feedback_fn):
6138     """Dump a representation of the cluster config to the standard output.
6139
6140     """
6141     values = []
6142     for field in self.op.output_fields:
6143       if field == "cluster_name":
6144         entry = self.cfg.GetClusterName()
6145       elif field == "master_node":
6146         entry = self.cfg.GetMasterNode()
6147       elif field == "drain_flag":
6148         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6149       elif field == "watcher_pause":
6150         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6151       elif field == "volume_group_name":
6152         entry = self.cfg.GetVGName()
6153       else:
6154         raise errors.ParameterError(field)
6155       values.append(entry)
6156     return values
6157
6158
6159 class LUInstanceActivateDisks(NoHooksLU):
6160   """Bring up an instance's disks.
6161
6162   """
6163   REQ_BGL = False
6164
6165   def ExpandNames(self):
6166     self._ExpandAndLockInstance()
6167     self.needed_locks[locking.LEVEL_NODE] = []
6168     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6169
6170   def DeclareLocks(self, level):
6171     if level == locking.LEVEL_NODE:
6172       self._LockInstancesNodes()
6173
6174   def CheckPrereq(self):
6175     """Check prerequisites.
6176
6177     This checks that the instance is in the cluster.
6178
6179     """
6180     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6181     assert self.instance is not None, \
6182       "Cannot retrieve locked instance %s" % self.op.instance_name
6183     _CheckNodeOnline(self, self.instance.primary_node)
6184
6185   def Exec(self, feedback_fn):
6186     """Activate the disks.
6187
6188     """
6189     disks_ok, disks_info = \
6190               _AssembleInstanceDisks(self, self.instance,
6191                                      ignore_size=self.op.ignore_size)
6192     if not disks_ok:
6193       raise errors.OpExecError("Cannot activate block devices")
6194
6195     return disks_info
6196
6197
6198 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6199                            ignore_size=False):
6200   """Prepare the block devices for an instance.
6201
6202   This sets up the block devices on all nodes.
6203
6204   @type lu: L{LogicalUnit}
6205   @param lu: the logical unit on whose behalf we execute
6206   @type instance: L{objects.Instance}
6207   @param instance: the instance for whose disks we assemble
6208   @type disks: list of L{objects.Disk} or None
6209   @param disks: which disks to assemble (or all, if None)
6210   @type ignore_secondaries: boolean
6211   @param ignore_secondaries: if true, errors on secondary nodes
6212       won't result in an error return from the function
6213   @type ignore_size: boolean
6214   @param ignore_size: if true, the current known size of the disk
6215       will not be used during the disk activation, useful for cases
6216       when the size is wrong
6217   @return: False if the operation failed, otherwise a list of
6218       (host, instance_visible_name, node_visible_name)
6219       with the mapping from node devices to instance devices
6220
6221   """
6222   device_info = []
6223   disks_ok = True
6224   iname = instance.name
6225   disks = _ExpandCheckDisks(instance, disks)
6226
6227   # With the two passes mechanism we try to reduce the window of
6228   # opportunity for the race condition of switching DRBD to primary
6229   # before handshaking occured, but we do not eliminate it
6230
6231   # The proper fix would be to wait (with some limits) until the
6232   # connection has been made and drbd transitions from WFConnection
6233   # into any other network-connected state (Connected, SyncTarget,
6234   # SyncSource, etc.)
6235
6236   # 1st pass, assemble on all nodes in secondary mode
6237   for idx, inst_disk in enumerate(disks):
6238     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6239       if ignore_size:
6240         node_disk = node_disk.Copy()
6241         node_disk.UnsetSize()
6242       lu.cfg.SetDiskID(node_disk, node)
6243       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6244       msg = result.fail_msg
6245       if msg:
6246         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6247                            " (is_primary=False, pass=1): %s",
6248                            inst_disk.iv_name, node, msg)
6249         if not ignore_secondaries:
6250           disks_ok = False
6251
6252   # FIXME: race condition on drbd migration to primary
6253
6254   # 2nd pass, do only the primary node
6255   for idx, inst_disk in enumerate(disks):
6256     dev_path = None
6257
6258     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6259       if node != instance.primary_node:
6260         continue
6261       if ignore_size:
6262         node_disk = node_disk.Copy()
6263         node_disk.UnsetSize()
6264       lu.cfg.SetDiskID(node_disk, node)
6265       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6266       msg = result.fail_msg
6267       if msg:
6268         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6269                            " (is_primary=True, pass=2): %s",
6270                            inst_disk.iv_name, node, msg)
6271         disks_ok = False
6272       else:
6273         dev_path = result.payload
6274
6275     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6276
6277   # leave the disks configured for the primary node
6278   # this is a workaround that would be fixed better by
6279   # improving the logical/physical id handling
6280   for disk in disks:
6281     lu.cfg.SetDiskID(disk, instance.primary_node)
6282
6283   return disks_ok, device_info
6284
6285
6286 def _StartInstanceDisks(lu, instance, force):
6287   """Start the disks of an instance.
6288
6289   """
6290   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6291                                            ignore_secondaries=force)
6292   if not disks_ok:
6293     _ShutdownInstanceDisks(lu, instance)
6294     if force is not None and not force:
6295       lu.proc.LogWarning("", hint="If the message above refers to a"
6296                          " secondary node,"
6297                          " you can retry the operation using '--force'.")
6298     raise errors.OpExecError("Disk consistency error")
6299
6300
6301 class LUInstanceDeactivateDisks(NoHooksLU):
6302   """Shutdown an instance's disks.
6303
6304   """
6305   REQ_BGL = False
6306
6307   def ExpandNames(self):
6308     self._ExpandAndLockInstance()
6309     self.needed_locks[locking.LEVEL_NODE] = []
6310     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6311
6312   def DeclareLocks(self, level):
6313     if level == locking.LEVEL_NODE:
6314       self._LockInstancesNodes()
6315
6316   def CheckPrereq(self):
6317     """Check prerequisites.
6318
6319     This checks that the instance is in the cluster.
6320
6321     """
6322     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6323     assert self.instance is not None, \
6324       "Cannot retrieve locked instance %s" % self.op.instance_name
6325
6326   def Exec(self, feedback_fn):
6327     """Deactivate the disks
6328
6329     """
6330     instance = self.instance
6331     if self.op.force:
6332       _ShutdownInstanceDisks(self, instance)
6333     else:
6334       _SafeShutdownInstanceDisks(self, instance)
6335
6336
6337 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6338   """Shutdown block devices of an instance.
6339
6340   This function checks if an instance is running, before calling
6341   _ShutdownInstanceDisks.
6342
6343   """
6344   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6345   _ShutdownInstanceDisks(lu, instance, disks=disks)
6346
6347
6348 def _ExpandCheckDisks(instance, disks):
6349   """Return the instance disks selected by the disks list
6350
6351   @type disks: list of L{objects.Disk} or None
6352   @param disks: selected disks
6353   @rtype: list of L{objects.Disk}
6354   @return: selected instance disks to act on
6355
6356   """
6357   if disks is None:
6358     return instance.disks
6359   else:
6360     if not set(disks).issubset(instance.disks):
6361       raise errors.ProgrammerError("Can only act on disks belonging to the"
6362                                    " target instance")
6363     return disks
6364
6365
6366 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6367   """Shutdown block devices of an instance.
6368
6369   This does the shutdown on all nodes of the instance.
6370
6371   If the ignore_primary is false, errors on the primary node are
6372   ignored.
6373
6374   """
6375   all_result = True
6376   disks = _ExpandCheckDisks(instance, disks)
6377
6378   for disk in disks:
6379     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6380       lu.cfg.SetDiskID(top_disk, node)
6381       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6382       msg = result.fail_msg
6383       if msg:
6384         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6385                       disk.iv_name, node, msg)
6386         if ((node == instance.primary_node and not ignore_primary) or
6387             (node != instance.primary_node and not result.offline)):
6388           all_result = False
6389   return all_result
6390
6391
6392 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6393   """Checks if a node has enough free memory.
6394
6395   This function check if a given node has the needed amount of free
6396   memory. In case the node has less memory or we cannot get the
6397   information from the node, this function raise an OpPrereqError
6398   exception.
6399
6400   @type lu: C{LogicalUnit}
6401   @param lu: a logical unit from which we get configuration data
6402   @type node: C{str}
6403   @param node: the node to check
6404   @type reason: C{str}
6405   @param reason: string to use in the error message
6406   @type requested: C{int}
6407   @param requested: the amount of memory in MiB to check for
6408   @type hypervisor_name: C{str}
6409   @param hypervisor_name: the hypervisor to ask for memory stats
6410   @rtype: integer
6411   @return: node current free memory
6412   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6413       we cannot check the node
6414
6415   """
6416   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6417   nodeinfo[node].Raise("Can't get data from node %s" % node,
6418                        prereq=True, ecode=errors.ECODE_ENVIRON)
6419   (_, _, (hv_info, )) = nodeinfo[node].payload
6420
6421   free_mem = hv_info.get("memory_free", None)
6422   if not isinstance(free_mem, int):
6423     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6424                                " was '%s'" % (node, free_mem),
6425                                errors.ECODE_ENVIRON)
6426   if requested > free_mem:
6427     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6428                                " needed %s MiB, available %s MiB" %
6429                                (node, reason, requested, free_mem),
6430                                errors.ECODE_NORES)
6431   return free_mem
6432
6433
6434 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6435   """Checks if nodes have enough free disk space in the all VGs.
6436
6437   This function check if all given nodes have the needed amount of
6438   free disk. In case any node has less disk or we cannot get the
6439   information from the node, this function raise an OpPrereqError
6440   exception.
6441
6442   @type lu: C{LogicalUnit}
6443   @param lu: a logical unit from which we get configuration data
6444   @type nodenames: C{list}
6445   @param nodenames: the list of node names to check
6446   @type req_sizes: C{dict}
6447   @param req_sizes: the hash of vg and corresponding amount of disk in
6448       MiB to check for
6449   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6450       or we cannot check the node
6451
6452   """
6453   for vg, req_size in req_sizes.items():
6454     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6455
6456
6457 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6458   """Checks if nodes have enough free disk space in the specified VG.
6459
6460   This function check if all given nodes have the needed amount of
6461   free disk. In case any node has less disk or we cannot get the
6462   information from the node, this function raise an OpPrereqError
6463   exception.
6464
6465   @type lu: C{LogicalUnit}
6466   @param lu: a logical unit from which we get configuration data
6467   @type nodenames: C{list}
6468   @param nodenames: the list of node names to check
6469   @type vg: C{str}
6470   @param vg: the volume group to check
6471   @type requested: C{int}
6472   @param requested: the amount of disk in MiB to check for
6473   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6474       or we cannot check the node
6475
6476   """
6477   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6478   for node in nodenames:
6479     info = nodeinfo[node]
6480     info.Raise("Cannot get current information from node %s" % node,
6481                prereq=True, ecode=errors.ECODE_ENVIRON)
6482     (_, (vg_info, ), _) = info.payload
6483     vg_free = vg_info.get("vg_free", None)
6484     if not isinstance(vg_free, int):
6485       raise errors.OpPrereqError("Can't compute free disk space on node"
6486                                  " %s for vg %s, result was '%s'" %
6487                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6488     if requested > vg_free:
6489       raise errors.OpPrereqError("Not enough disk space on target node %s"
6490                                  " vg %s: required %d MiB, available %d MiB" %
6491                                  (node, vg, requested, vg_free),
6492                                  errors.ECODE_NORES)
6493
6494
6495 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6496   """Checks if nodes have enough physical CPUs
6497
6498   This function checks if all given nodes have the needed number of
6499   physical CPUs. In case any node has less CPUs or we cannot get the
6500   information from the node, this function raises an OpPrereqError
6501   exception.
6502
6503   @type lu: C{LogicalUnit}
6504   @param lu: a logical unit from which we get configuration data
6505   @type nodenames: C{list}
6506   @param nodenames: the list of node names to check
6507   @type requested: C{int}
6508   @param requested: the minimum acceptable number of physical CPUs
6509   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6510       or we cannot check the node
6511
6512   """
6513   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6514   for node in nodenames:
6515     info = nodeinfo[node]
6516     info.Raise("Cannot get current information from node %s" % node,
6517                prereq=True, ecode=errors.ECODE_ENVIRON)
6518     (_, _, (hv_info, )) = info.payload
6519     num_cpus = hv_info.get("cpu_total", None)
6520     if not isinstance(num_cpus, int):
6521       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6522                                  " on node %s, result was '%s'" %
6523                                  (node, num_cpus), errors.ECODE_ENVIRON)
6524     if requested > num_cpus:
6525       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6526                                  "required" % (node, num_cpus, requested),
6527                                  errors.ECODE_NORES)
6528
6529
6530 class LUInstanceStartup(LogicalUnit):
6531   """Starts an instance.
6532
6533   """
6534   HPATH = "instance-start"
6535   HTYPE = constants.HTYPE_INSTANCE
6536   REQ_BGL = False
6537
6538   def CheckArguments(self):
6539     # extra beparams
6540     if self.op.beparams:
6541       # fill the beparams dict
6542       objects.UpgradeBeParams(self.op.beparams)
6543       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6544
6545   def ExpandNames(self):
6546     self._ExpandAndLockInstance()
6547     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6548
6549   def DeclareLocks(self, level):
6550     if level == locking.LEVEL_NODE_RES:
6551       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6552
6553   def BuildHooksEnv(self):
6554     """Build hooks env.
6555
6556     This runs on master, primary and secondary nodes of the instance.
6557
6558     """
6559     env = {
6560       "FORCE": self.op.force,
6561       }
6562
6563     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6564
6565     return env
6566
6567   def BuildHooksNodes(self):
6568     """Build hooks nodes.
6569
6570     """
6571     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6572     return (nl, nl)
6573
6574   def CheckPrereq(self):
6575     """Check prerequisites.
6576
6577     This checks that the instance is in the cluster.
6578
6579     """
6580     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6581     assert self.instance is not None, \
6582       "Cannot retrieve locked instance %s" % self.op.instance_name
6583
6584     # extra hvparams
6585     if self.op.hvparams:
6586       # check hypervisor parameter syntax (locally)
6587       cluster = self.cfg.GetClusterInfo()
6588       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6589       filled_hvp = cluster.FillHV(instance)
6590       filled_hvp.update(self.op.hvparams)
6591       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6592       hv_type.CheckParameterSyntax(filled_hvp)
6593       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6594
6595     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6596
6597     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6598
6599     if self.primary_offline and self.op.ignore_offline_nodes:
6600       self.proc.LogWarning("Ignoring offline primary node")
6601
6602       if self.op.hvparams or self.op.beparams:
6603         self.proc.LogWarning("Overridden parameters are ignored")
6604     else:
6605       _CheckNodeOnline(self, instance.primary_node)
6606
6607       bep = self.cfg.GetClusterInfo().FillBE(instance)
6608       bep.update(self.op.beparams)
6609
6610       # check bridges existence
6611       _CheckInstanceBridgesExist(self, instance)
6612
6613       remote_info = self.rpc.call_instance_info(instance.primary_node,
6614                                                 instance.name,
6615                                                 instance.hypervisor)
6616       remote_info.Raise("Error checking node %s" % instance.primary_node,
6617                         prereq=True, ecode=errors.ECODE_ENVIRON)
6618       if not remote_info.payload: # not running already
6619         _CheckNodeFreeMemory(self, instance.primary_node,
6620                              "starting instance %s" % instance.name,
6621                              bep[constants.BE_MINMEM], instance.hypervisor)
6622
6623   def Exec(self, feedback_fn):
6624     """Start the instance.
6625
6626     """
6627     instance = self.instance
6628     force = self.op.force
6629
6630     if not self.op.no_remember:
6631       self.cfg.MarkInstanceUp(instance.name)
6632
6633     if self.primary_offline:
6634       assert self.op.ignore_offline_nodes
6635       self.proc.LogInfo("Primary node offline, marked instance as started")
6636     else:
6637       node_current = instance.primary_node
6638
6639       _StartInstanceDisks(self, instance, force)
6640
6641       result = \
6642         self.rpc.call_instance_start(node_current,
6643                                      (instance, self.op.hvparams,
6644                                       self.op.beparams),
6645                                      self.op.startup_paused)
6646       msg = result.fail_msg
6647       if msg:
6648         _ShutdownInstanceDisks(self, instance)
6649         raise errors.OpExecError("Could not start instance: %s" % msg)
6650
6651
6652 class LUInstanceReboot(LogicalUnit):
6653   """Reboot an instance.
6654
6655   """
6656   HPATH = "instance-reboot"
6657   HTYPE = constants.HTYPE_INSTANCE
6658   REQ_BGL = False
6659
6660   def ExpandNames(self):
6661     self._ExpandAndLockInstance()
6662
6663   def BuildHooksEnv(self):
6664     """Build hooks env.
6665
6666     This runs on master, primary and secondary nodes of the instance.
6667
6668     """
6669     env = {
6670       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6671       "REBOOT_TYPE": self.op.reboot_type,
6672       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6673       }
6674
6675     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6676
6677     return env
6678
6679   def BuildHooksNodes(self):
6680     """Build hooks nodes.
6681
6682     """
6683     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6684     return (nl, nl)
6685
6686   def CheckPrereq(self):
6687     """Check prerequisites.
6688
6689     This checks that the instance is in the cluster.
6690
6691     """
6692     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6693     assert self.instance is not None, \
6694       "Cannot retrieve locked instance %s" % self.op.instance_name
6695     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6696     _CheckNodeOnline(self, instance.primary_node)
6697
6698     # check bridges existence
6699     _CheckInstanceBridgesExist(self, instance)
6700
6701   def Exec(self, feedback_fn):
6702     """Reboot the instance.
6703
6704     """
6705     instance = self.instance
6706     ignore_secondaries = self.op.ignore_secondaries
6707     reboot_type = self.op.reboot_type
6708
6709     remote_info = self.rpc.call_instance_info(instance.primary_node,
6710                                               instance.name,
6711                                               instance.hypervisor)
6712     remote_info.Raise("Error checking node %s" % instance.primary_node)
6713     instance_running = bool(remote_info.payload)
6714
6715     node_current = instance.primary_node
6716
6717     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6718                                             constants.INSTANCE_REBOOT_HARD]:
6719       for disk in instance.disks:
6720         self.cfg.SetDiskID(disk, node_current)
6721       result = self.rpc.call_instance_reboot(node_current, instance,
6722                                              reboot_type,
6723                                              self.op.shutdown_timeout)
6724       result.Raise("Could not reboot instance")
6725     else:
6726       if instance_running:
6727         result = self.rpc.call_instance_shutdown(node_current, instance,
6728                                                  self.op.shutdown_timeout)
6729         result.Raise("Could not shutdown instance for full reboot")
6730         _ShutdownInstanceDisks(self, instance)
6731       else:
6732         self.LogInfo("Instance %s was already stopped, starting now",
6733                      instance.name)
6734       _StartInstanceDisks(self, instance, ignore_secondaries)
6735       result = self.rpc.call_instance_start(node_current,
6736                                             (instance, None, None), False)
6737       msg = result.fail_msg
6738       if msg:
6739         _ShutdownInstanceDisks(self, instance)
6740         raise errors.OpExecError("Could not start instance for"
6741                                  " full reboot: %s" % msg)
6742
6743     self.cfg.MarkInstanceUp(instance.name)
6744
6745
6746 class LUInstanceShutdown(LogicalUnit):
6747   """Shutdown an instance.
6748
6749   """
6750   HPATH = "instance-stop"
6751   HTYPE = constants.HTYPE_INSTANCE
6752   REQ_BGL = False
6753
6754   def ExpandNames(self):
6755     self._ExpandAndLockInstance()
6756
6757   def BuildHooksEnv(self):
6758     """Build hooks env.
6759
6760     This runs on master, primary and secondary nodes of the instance.
6761
6762     """
6763     env = _BuildInstanceHookEnvByObject(self, self.instance)
6764     env["TIMEOUT"] = self.op.timeout
6765     return env
6766
6767   def BuildHooksNodes(self):
6768     """Build hooks nodes.
6769
6770     """
6771     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6772     return (nl, nl)
6773
6774   def CheckPrereq(self):
6775     """Check prerequisites.
6776
6777     This checks that the instance is in the cluster.
6778
6779     """
6780     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6781     assert self.instance is not None, \
6782       "Cannot retrieve locked instance %s" % self.op.instance_name
6783
6784     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6785
6786     self.primary_offline = \
6787       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6788
6789     if self.primary_offline and self.op.ignore_offline_nodes:
6790       self.proc.LogWarning("Ignoring offline primary node")
6791     else:
6792       _CheckNodeOnline(self, self.instance.primary_node)
6793
6794   def Exec(self, feedback_fn):
6795     """Shutdown the instance.
6796
6797     """
6798     instance = self.instance
6799     node_current = instance.primary_node
6800     timeout = self.op.timeout
6801
6802     if not self.op.no_remember:
6803       self.cfg.MarkInstanceDown(instance.name)
6804
6805     if self.primary_offline:
6806       assert self.op.ignore_offline_nodes
6807       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6808     else:
6809       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6810       msg = result.fail_msg
6811       if msg:
6812         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6813
6814       _ShutdownInstanceDisks(self, instance)
6815
6816
6817 class LUInstanceReinstall(LogicalUnit):
6818   """Reinstall an instance.
6819
6820   """
6821   HPATH = "instance-reinstall"
6822   HTYPE = constants.HTYPE_INSTANCE
6823   REQ_BGL = False
6824
6825   def ExpandNames(self):
6826     self._ExpandAndLockInstance()
6827
6828   def BuildHooksEnv(self):
6829     """Build hooks env.
6830
6831     This runs on master, primary and secondary nodes of the instance.
6832
6833     """
6834     return _BuildInstanceHookEnvByObject(self, self.instance)
6835
6836   def BuildHooksNodes(self):
6837     """Build hooks nodes.
6838
6839     """
6840     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6841     return (nl, nl)
6842
6843   def CheckPrereq(self):
6844     """Check prerequisites.
6845
6846     This checks that the instance is in the cluster and is not running.
6847
6848     """
6849     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6850     assert instance is not None, \
6851       "Cannot retrieve locked instance %s" % self.op.instance_name
6852     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6853                      " offline, cannot reinstall")
6854     for node in instance.secondary_nodes:
6855       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6856                        " cannot reinstall")
6857
6858     if instance.disk_template == constants.DT_DISKLESS:
6859       raise errors.OpPrereqError("Instance '%s' has no disks" %
6860                                  self.op.instance_name,
6861                                  errors.ECODE_INVAL)
6862     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6863
6864     if self.op.os_type is not None:
6865       # OS verification
6866       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6867       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6868       instance_os = self.op.os_type
6869     else:
6870       instance_os = instance.os
6871
6872     nodelist = list(instance.all_nodes)
6873
6874     if self.op.osparams:
6875       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6876       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6877       self.os_inst = i_osdict # the new dict (without defaults)
6878     else:
6879       self.os_inst = None
6880
6881     self.instance = instance
6882
6883   def Exec(self, feedback_fn):
6884     """Reinstall the instance.
6885
6886     """
6887     inst = self.instance
6888
6889     if self.op.os_type is not None:
6890       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6891       inst.os = self.op.os_type
6892       # Write to configuration
6893       self.cfg.Update(inst, feedback_fn)
6894
6895     _StartInstanceDisks(self, inst, None)
6896     try:
6897       feedback_fn("Running the instance OS create scripts...")
6898       # FIXME: pass debug option from opcode to backend
6899       result = self.rpc.call_instance_os_add(inst.primary_node,
6900                                              (inst, self.os_inst), True,
6901                                              self.op.debug_level)
6902       result.Raise("Could not install OS for instance %s on node %s" %
6903                    (inst.name, inst.primary_node))
6904     finally:
6905       _ShutdownInstanceDisks(self, inst)
6906
6907
6908 class LUInstanceRecreateDisks(LogicalUnit):
6909   """Recreate an instance's missing disks.
6910
6911   """
6912   HPATH = "instance-recreate-disks"
6913   HTYPE = constants.HTYPE_INSTANCE
6914   REQ_BGL = False
6915
6916   _MODIFYABLE = frozenset([
6917     constants.IDISK_SIZE,
6918     constants.IDISK_MODE,
6919     ])
6920
6921   # New or changed disk parameters may have different semantics
6922   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6923     constants.IDISK_ADOPT,
6924
6925     # TODO: Implement support changing VG while recreating
6926     constants.IDISK_VG,
6927     constants.IDISK_METAVG,
6928     ]))
6929
6930   def CheckArguments(self):
6931     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6932       # Normalize and convert deprecated list of disk indices
6933       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6934
6935     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6936     if duplicates:
6937       raise errors.OpPrereqError("Some disks have been specified more than"
6938                                  " once: %s" % utils.CommaJoin(duplicates),
6939                                  errors.ECODE_INVAL)
6940
6941     for (idx, params) in self.op.disks:
6942       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6943       unsupported = frozenset(params.keys()) - self._MODIFYABLE
6944       if unsupported:
6945         raise errors.OpPrereqError("Parameters for disk %s try to change"
6946                                    " unmodifyable parameter(s): %s" %
6947                                    (idx, utils.CommaJoin(unsupported)),
6948                                    errors.ECODE_INVAL)
6949
6950   def ExpandNames(self):
6951     self._ExpandAndLockInstance()
6952     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6953     if self.op.nodes:
6954       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6955       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6956     else:
6957       self.needed_locks[locking.LEVEL_NODE] = []
6958     self.needed_locks[locking.LEVEL_NODE_RES] = []
6959
6960   def DeclareLocks(self, level):
6961     if level == locking.LEVEL_NODE:
6962       # if we replace the nodes, we only need to lock the old primary,
6963       # otherwise we need to lock all nodes for disk re-creation
6964       primary_only = bool(self.op.nodes)
6965       self._LockInstancesNodes(primary_only=primary_only)
6966     elif level == locking.LEVEL_NODE_RES:
6967       # Copy node locks
6968       self.needed_locks[locking.LEVEL_NODE_RES] = \
6969         self.needed_locks[locking.LEVEL_NODE][:]
6970
6971   def BuildHooksEnv(self):
6972     """Build hooks env.
6973
6974     This runs on master, primary and secondary nodes of the instance.
6975
6976     """
6977     return _BuildInstanceHookEnvByObject(self, self.instance)
6978
6979   def BuildHooksNodes(self):
6980     """Build hooks nodes.
6981
6982     """
6983     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6984     return (nl, nl)
6985
6986   def CheckPrereq(self):
6987     """Check prerequisites.
6988
6989     This checks that the instance is in the cluster and is not running.
6990
6991     """
6992     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6993     assert instance is not None, \
6994       "Cannot retrieve locked instance %s" % self.op.instance_name
6995     if self.op.nodes:
6996       if len(self.op.nodes) != len(instance.all_nodes):
6997         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6998                                    " %d replacement nodes were specified" %
6999                                    (instance.name, len(instance.all_nodes),
7000                                     len(self.op.nodes)),
7001                                    errors.ECODE_INVAL)
7002       assert instance.disk_template != constants.DT_DRBD8 or \
7003           len(self.op.nodes) == 2
7004       assert instance.disk_template != constants.DT_PLAIN or \
7005           len(self.op.nodes) == 1
7006       primary_node = self.op.nodes[0]
7007     else:
7008       primary_node = instance.primary_node
7009     _CheckNodeOnline(self, primary_node)
7010
7011     if instance.disk_template == constants.DT_DISKLESS:
7012       raise errors.OpPrereqError("Instance '%s' has no disks" %
7013                                  self.op.instance_name, errors.ECODE_INVAL)
7014
7015     # if we replace nodes *and* the old primary is offline, we don't
7016     # check
7017     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7018     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7019     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7020     if not (self.op.nodes and old_pnode.offline):
7021       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7022                           msg="cannot recreate disks")
7023
7024     if self.op.disks:
7025       self.disks = dict(self.op.disks)
7026     else:
7027       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7028
7029     maxidx = max(self.disks.keys())
7030     if maxidx >= len(instance.disks):
7031       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7032                                  errors.ECODE_INVAL)
7033
7034     if (self.op.nodes and
7035         sorted(self.disks.keys()) != range(len(instance.disks))):
7036       raise errors.OpPrereqError("Can't recreate disks partially and"
7037                                  " change the nodes at the same time",
7038                                  errors.ECODE_INVAL)
7039
7040     self.instance = instance
7041
7042   def Exec(self, feedback_fn):
7043     """Recreate the disks.
7044
7045     """
7046     instance = self.instance
7047
7048     assert (self.owned_locks(locking.LEVEL_NODE) ==
7049             self.owned_locks(locking.LEVEL_NODE_RES))
7050
7051     to_skip = []
7052     mods = [] # keeps track of needed changes
7053
7054     for idx, disk in enumerate(instance.disks):
7055       try:
7056         changes = self.disks[idx]
7057       except KeyError:
7058         # Disk should not be recreated
7059         to_skip.append(idx)
7060         continue
7061
7062       # update secondaries for disks, if needed
7063       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7064         # need to update the nodes and minors
7065         assert len(self.op.nodes) == 2
7066         assert len(disk.logical_id) == 6 # otherwise disk internals
7067                                          # have changed
7068         (_, _, old_port, _, _, old_secret) = disk.logical_id
7069         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7070         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7071                   new_minors[0], new_minors[1], old_secret)
7072         assert len(disk.logical_id) == len(new_id)
7073       else:
7074         new_id = None
7075
7076       mods.append((idx, new_id, changes))
7077
7078     # now that we have passed all asserts above, we can apply the mods
7079     # in a single run (to avoid partial changes)
7080     for idx, new_id, changes in mods:
7081       disk = instance.disks[idx]
7082       if new_id is not None:
7083         assert disk.dev_type == constants.LD_DRBD8
7084         disk.logical_id = new_id
7085       if changes:
7086         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7087                     mode=changes.get(constants.IDISK_MODE, None))
7088
7089     # change primary node, if needed
7090     if self.op.nodes:
7091       instance.primary_node = self.op.nodes[0]
7092       self.LogWarning("Changing the instance's nodes, you will have to"
7093                       " remove any disks left on the older nodes manually")
7094
7095     if self.op.nodes:
7096       self.cfg.Update(instance, feedback_fn)
7097
7098     _CreateDisks(self, instance, to_skip=to_skip)
7099
7100
7101 class LUInstanceRename(LogicalUnit):
7102   """Rename an instance.
7103
7104   """
7105   HPATH = "instance-rename"
7106   HTYPE = constants.HTYPE_INSTANCE
7107
7108   def CheckArguments(self):
7109     """Check arguments.
7110
7111     """
7112     if self.op.ip_check and not self.op.name_check:
7113       # TODO: make the ip check more flexible and not depend on the name check
7114       raise errors.OpPrereqError("IP address check requires a name check",
7115                                  errors.ECODE_INVAL)
7116
7117   def BuildHooksEnv(self):
7118     """Build hooks env.
7119
7120     This runs on master, primary and secondary nodes of the instance.
7121
7122     """
7123     env = _BuildInstanceHookEnvByObject(self, self.instance)
7124     env["INSTANCE_NEW_NAME"] = self.op.new_name
7125     return env
7126
7127   def BuildHooksNodes(self):
7128     """Build hooks nodes.
7129
7130     """
7131     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7132     return (nl, nl)
7133
7134   def CheckPrereq(self):
7135     """Check prerequisites.
7136
7137     This checks that the instance is in the cluster and is not running.
7138
7139     """
7140     self.op.instance_name = _ExpandInstanceName(self.cfg,
7141                                                 self.op.instance_name)
7142     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7143     assert instance is not None
7144     _CheckNodeOnline(self, instance.primary_node)
7145     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7146                         msg="cannot rename")
7147     self.instance = instance
7148
7149     new_name = self.op.new_name
7150     if self.op.name_check:
7151       hostname = netutils.GetHostname(name=new_name)
7152       if hostname.name != new_name:
7153         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7154                      hostname.name)
7155       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7156         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7157                                     " same as given hostname '%s'") %
7158                                     (hostname.name, self.op.new_name),
7159                                     errors.ECODE_INVAL)
7160       new_name = self.op.new_name = hostname.name
7161       if (self.op.ip_check and
7162           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7163         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7164                                    (hostname.ip, new_name),
7165                                    errors.ECODE_NOTUNIQUE)
7166
7167     instance_list = self.cfg.GetInstanceList()
7168     if new_name in instance_list and new_name != instance.name:
7169       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7170                                  new_name, errors.ECODE_EXISTS)
7171
7172   def Exec(self, feedback_fn):
7173     """Rename the instance.
7174
7175     """
7176     inst = self.instance
7177     old_name = inst.name
7178
7179     rename_file_storage = False
7180     if (inst.disk_template in constants.DTS_FILEBASED and
7181         self.op.new_name != inst.name):
7182       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7183       rename_file_storage = True
7184
7185     self.cfg.RenameInstance(inst.name, self.op.new_name)
7186     # Change the instance lock. This is definitely safe while we hold the BGL.
7187     # Otherwise the new lock would have to be added in acquired mode.
7188     assert self.REQ_BGL
7189     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7190     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7191
7192     # re-read the instance from the configuration after rename
7193     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7194
7195     if rename_file_storage:
7196       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7197       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7198                                                      old_file_storage_dir,
7199                                                      new_file_storage_dir)
7200       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7201                    " (but the instance has been renamed in Ganeti)" %
7202                    (inst.primary_node, old_file_storage_dir,
7203                     new_file_storage_dir))
7204
7205     _StartInstanceDisks(self, inst, None)
7206     try:
7207       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7208                                                  old_name, self.op.debug_level)
7209       msg = result.fail_msg
7210       if msg:
7211         msg = ("Could not run OS rename script for instance %s on node %s"
7212                " (but the instance has been renamed in Ganeti): %s" %
7213                (inst.name, inst.primary_node, msg))
7214         self.proc.LogWarning(msg)
7215     finally:
7216       _ShutdownInstanceDisks(self, inst)
7217
7218     return inst.name
7219
7220
7221 class LUInstanceRemove(LogicalUnit):
7222   """Remove an instance.
7223
7224   """
7225   HPATH = "instance-remove"
7226   HTYPE = constants.HTYPE_INSTANCE
7227   REQ_BGL = False
7228
7229   def ExpandNames(self):
7230     self._ExpandAndLockInstance()
7231     self.needed_locks[locking.LEVEL_NODE] = []
7232     self.needed_locks[locking.LEVEL_NODE_RES] = []
7233     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7234
7235   def DeclareLocks(self, level):
7236     if level == locking.LEVEL_NODE:
7237       self._LockInstancesNodes()
7238     elif level == locking.LEVEL_NODE_RES:
7239       # Copy node locks
7240       self.needed_locks[locking.LEVEL_NODE_RES] = \
7241         self.needed_locks[locking.LEVEL_NODE][:]
7242
7243   def BuildHooksEnv(self):
7244     """Build hooks env.
7245
7246     This runs on master, primary and secondary nodes of the instance.
7247
7248     """
7249     env = _BuildInstanceHookEnvByObject(self, self.instance)
7250     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7251     return env
7252
7253   def BuildHooksNodes(self):
7254     """Build hooks nodes.
7255
7256     """
7257     nl = [self.cfg.GetMasterNode()]
7258     nl_post = list(self.instance.all_nodes) + nl
7259     return (nl, nl_post)
7260
7261   def CheckPrereq(self):
7262     """Check prerequisites.
7263
7264     This checks that the instance is in the cluster.
7265
7266     """
7267     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7268     assert self.instance is not None, \
7269       "Cannot retrieve locked instance %s" % self.op.instance_name
7270
7271   def Exec(self, feedback_fn):
7272     """Remove the instance.
7273
7274     """
7275     instance = self.instance
7276     logging.info("Shutting down instance %s on node %s",
7277                  instance.name, instance.primary_node)
7278
7279     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7280                                              self.op.shutdown_timeout)
7281     msg = result.fail_msg
7282     if msg:
7283       if self.op.ignore_failures:
7284         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7285       else:
7286         raise errors.OpExecError("Could not shutdown instance %s on"
7287                                  " node %s: %s" %
7288                                  (instance.name, instance.primary_node, msg))
7289
7290     assert (self.owned_locks(locking.LEVEL_NODE) ==
7291             self.owned_locks(locking.LEVEL_NODE_RES))
7292     assert not (set(instance.all_nodes) -
7293                 self.owned_locks(locking.LEVEL_NODE)), \
7294       "Not owning correct locks"
7295
7296     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7297
7298
7299 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7300   """Utility function to remove an instance.
7301
7302   """
7303   logging.info("Removing block devices for instance %s", instance.name)
7304
7305   if not _RemoveDisks(lu, instance):
7306     if not ignore_failures:
7307       raise errors.OpExecError("Can't remove instance's disks")
7308     feedback_fn("Warning: can't remove instance's disks")
7309
7310   logging.info("Removing instance %s out of cluster config", instance.name)
7311
7312   lu.cfg.RemoveInstance(instance.name)
7313
7314   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7315     "Instance lock removal conflict"
7316
7317   # Remove lock for the instance
7318   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7319
7320
7321 class LUInstanceQuery(NoHooksLU):
7322   """Logical unit for querying instances.
7323
7324   """
7325   # pylint: disable=W0142
7326   REQ_BGL = False
7327
7328   def CheckArguments(self):
7329     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7330                              self.op.output_fields, self.op.use_locking)
7331
7332   def ExpandNames(self):
7333     self.iq.ExpandNames(self)
7334
7335   def DeclareLocks(self, level):
7336     self.iq.DeclareLocks(self, level)
7337
7338   def Exec(self, feedback_fn):
7339     return self.iq.OldStyleQuery(self)
7340
7341
7342 class LUInstanceFailover(LogicalUnit):
7343   """Failover an instance.
7344
7345   """
7346   HPATH = "instance-failover"
7347   HTYPE = constants.HTYPE_INSTANCE
7348   REQ_BGL = False
7349
7350   def CheckArguments(self):
7351     """Check the arguments.
7352
7353     """
7354     self.iallocator = getattr(self.op, "iallocator", None)
7355     self.target_node = getattr(self.op, "target_node", None)
7356
7357   def ExpandNames(self):
7358     self._ExpandAndLockInstance()
7359
7360     if self.op.target_node is not None:
7361       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7362
7363     self.needed_locks[locking.LEVEL_NODE] = []
7364     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7365
7366     self.needed_locks[locking.LEVEL_NODE_RES] = []
7367     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7368
7369     ignore_consistency = self.op.ignore_consistency
7370     shutdown_timeout = self.op.shutdown_timeout
7371     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7372                                        cleanup=False,
7373                                        failover=True,
7374                                        ignore_consistency=ignore_consistency,
7375                                        shutdown_timeout=shutdown_timeout,
7376                                        ignore_ipolicy=self.op.ignore_ipolicy)
7377     self.tasklets = [self._migrater]
7378
7379   def DeclareLocks(self, level):
7380     if level == locking.LEVEL_NODE:
7381       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7382       if instance.disk_template in constants.DTS_EXT_MIRROR:
7383         if self.op.target_node is None:
7384           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7385         else:
7386           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7387                                                    self.op.target_node]
7388         del self.recalculate_locks[locking.LEVEL_NODE]
7389       else:
7390         self._LockInstancesNodes()
7391     elif level == locking.LEVEL_NODE_RES:
7392       # Copy node locks
7393       self.needed_locks[locking.LEVEL_NODE_RES] = \
7394         self.needed_locks[locking.LEVEL_NODE][:]
7395
7396   def BuildHooksEnv(self):
7397     """Build hooks env.
7398
7399     This runs on master, primary and secondary nodes of the instance.
7400
7401     """
7402     instance = self._migrater.instance
7403     source_node = instance.primary_node
7404     target_node = self.op.target_node
7405     env = {
7406       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7407       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7408       "OLD_PRIMARY": source_node,
7409       "NEW_PRIMARY": target_node,
7410       }
7411
7412     if instance.disk_template in constants.DTS_INT_MIRROR:
7413       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7414       env["NEW_SECONDARY"] = source_node
7415     else:
7416       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7417
7418     env.update(_BuildInstanceHookEnvByObject(self, instance))
7419
7420     return env
7421
7422   def BuildHooksNodes(self):
7423     """Build hooks nodes.
7424
7425     """
7426     instance = self._migrater.instance
7427     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7428     return (nl, nl + [instance.primary_node])
7429
7430
7431 class LUInstanceMigrate(LogicalUnit):
7432   """Migrate an instance.
7433
7434   This is migration without shutting down, compared to the failover,
7435   which is done with shutdown.
7436
7437   """
7438   HPATH = "instance-migrate"
7439   HTYPE = constants.HTYPE_INSTANCE
7440   REQ_BGL = False
7441
7442   def ExpandNames(self):
7443     self._ExpandAndLockInstance()
7444
7445     if self.op.target_node is not None:
7446       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7447
7448     self.needed_locks[locking.LEVEL_NODE] = []
7449     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7450
7451     self.needed_locks[locking.LEVEL_NODE] = []
7452     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7453
7454     self._migrater = \
7455       TLMigrateInstance(self, self.op.instance_name,
7456                         cleanup=self.op.cleanup,
7457                         failover=False,
7458                         fallback=self.op.allow_failover,
7459                         allow_runtime_changes=self.op.allow_runtime_changes,
7460                         ignore_ipolicy=self.op.ignore_ipolicy)
7461     self.tasklets = [self._migrater]
7462
7463   def DeclareLocks(self, level):
7464     if level == locking.LEVEL_NODE:
7465       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7466       if instance.disk_template in constants.DTS_EXT_MIRROR:
7467         if self.op.target_node is None:
7468           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7469         else:
7470           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7471                                                    self.op.target_node]
7472         del self.recalculate_locks[locking.LEVEL_NODE]
7473       else:
7474         self._LockInstancesNodes()
7475     elif level == locking.LEVEL_NODE_RES:
7476       # Copy node locks
7477       self.needed_locks[locking.LEVEL_NODE_RES] = \
7478         self.needed_locks[locking.LEVEL_NODE][:]
7479
7480   def BuildHooksEnv(self):
7481     """Build hooks env.
7482
7483     This runs on master, primary and secondary nodes of the instance.
7484
7485     """
7486     instance = self._migrater.instance
7487     source_node = instance.primary_node
7488     target_node = self.op.target_node
7489     env = _BuildInstanceHookEnvByObject(self, instance)
7490     env.update({
7491       "MIGRATE_LIVE": self._migrater.live,
7492       "MIGRATE_CLEANUP": self.op.cleanup,
7493       "OLD_PRIMARY": source_node,
7494       "NEW_PRIMARY": target_node,
7495       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7496       })
7497
7498     if instance.disk_template in constants.DTS_INT_MIRROR:
7499       env["OLD_SECONDARY"] = target_node
7500       env["NEW_SECONDARY"] = source_node
7501     else:
7502       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7503
7504     return env
7505
7506   def BuildHooksNodes(self):
7507     """Build hooks nodes.
7508
7509     """
7510     instance = self._migrater.instance
7511     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7512     return (nl, nl + [instance.primary_node])
7513
7514
7515 class LUInstanceMove(LogicalUnit):
7516   """Move an instance by data-copying.
7517
7518   """
7519   HPATH = "instance-move"
7520   HTYPE = constants.HTYPE_INSTANCE
7521   REQ_BGL = False
7522
7523   def ExpandNames(self):
7524     self._ExpandAndLockInstance()
7525     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7526     self.op.target_node = target_node
7527     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7528     self.needed_locks[locking.LEVEL_NODE_RES] = []
7529     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7530
7531   def DeclareLocks(self, level):
7532     if level == locking.LEVEL_NODE:
7533       self._LockInstancesNodes(primary_only=True)
7534     elif level == locking.LEVEL_NODE_RES:
7535       # Copy node locks
7536       self.needed_locks[locking.LEVEL_NODE_RES] = \
7537         self.needed_locks[locking.LEVEL_NODE][:]
7538
7539   def BuildHooksEnv(self):
7540     """Build hooks env.
7541
7542     This runs on master, primary and secondary nodes of the instance.
7543
7544     """
7545     env = {
7546       "TARGET_NODE": self.op.target_node,
7547       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7548       }
7549     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7550     return env
7551
7552   def BuildHooksNodes(self):
7553     """Build hooks nodes.
7554
7555     """
7556     nl = [
7557       self.cfg.GetMasterNode(),
7558       self.instance.primary_node,
7559       self.op.target_node,
7560       ]
7561     return (nl, nl)
7562
7563   def CheckPrereq(self):
7564     """Check prerequisites.
7565
7566     This checks that the instance is in the cluster.
7567
7568     """
7569     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7570     assert self.instance is not None, \
7571       "Cannot retrieve locked instance %s" % self.op.instance_name
7572
7573     node = self.cfg.GetNodeInfo(self.op.target_node)
7574     assert node is not None, \
7575       "Cannot retrieve locked node %s" % self.op.target_node
7576
7577     self.target_node = target_node = node.name
7578
7579     if target_node == instance.primary_node:
7580       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7581                                  (instance.name, target_node),
7582                                  errors.ECODE_STATE)
7583
7584     bep = self.cfg.GetClusterInfo().FillBE(instance)
7585
7586     for idx, dsk in enumerate(instance.disks):
7587       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7588         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7589                                    " cannot copy" % idx, errors.ECODE_STATE)
7590
7591     _CheckNodeOnline(self, target_node)
7592     _CheckNodeNotDrained(self, target_node)
7593     _CheckNodeVmCapable(self, target_node)
7594     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7595                                      self.cfg.GetNodeGroup(node.group))
7596     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7597                             ignore=self.op.ignore_ipolicy)
7598
7599     if instance.admin_state == constants.ADMINST_UP:
7600       # check memory requirements on the secondary node
7601       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7602                            instance.name, bep[constants.BE_MAXMEM],
7603                            instance.hypervisor)
7604     else:
7605       self.LogInfo("Not checking memory on the secondary node as"
7606                    " instance will not be started")
7607
7608     # check bridge existance
7609     _CheckInstanceBridgesExist(self, instance, node=target_node)
7610
7611   def Exec(self, feedback_fn):
7612     """Move an instance.
7613
7614     The move is done by shutting it down on its present node, copying
7615     the data over (slow) and starting it on the new node.
7616
7617     """
7618     instance = self.instance
7619
7620     source_node = instance.primary_node
7621     target_node = self.target_node
7622
7623     self.LogInfo("Shutting down instance %s on source node %s",
7624                  instance.name, source_node)
7625
7626     assert (self.owned_locks(locking.LEVEL_NODE) ==
7627             self.owned_locks(locking.LEVEL_NODE_RES))
7628
7629     result = self.rpc.call_instance_shutdown(source_node, instance,
7630                                              self.op.shutdown_timeout)
7631     msg = result.fail_msg
7632     if msg:
7633       if self.op.ignore_consistency:
7634         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7635                              " Proceeding anyway. Please make sure node"
7636                              " %s is down. Error details: %s",
7637                              instance.name, source_node, source_node, msg)
7638       else:
7639         raise errors.OpExecError("Could not shutdown instance %s on"
7640                                  " node %s: %s" %
7641                                  (instance.name, source_node, msg))
7642
7643     # create the target disks
7644     try:
7645       _CreateDisks(self, instance, target_node=target_node)
7646     except errors.OpExecError:
7647       self.LogWarning("Device creation failed, reverting...")
7648       try:
7649         _RemoveDisks(self, instance, target_node=target_node)
7650       finally:
7651         self.cfg.ReleaseDRBDMinors(instance.name)
7652         raise
7653
7654     cluster_name = self.cfg.GetClusterInfo().cluster_name
7655
7656     errs = []
7657     # activate, get path, copy the data over
7658     for idx, disk in enumerate(instance.disks):
7659       self.LogInfo("Copying data for disk %d", idx)
7660       result = self.rpc.call_blockdev_assemble(target_node, disk,
7661                                                instance.name, True, idx)
7662       if result.fail_msg:
7663         self.LogWarning("Can't assemble newly created disk %d: %s",
7664                         idx, result.fail_msg)
7665         errs.append(result.fail_msg)
7666         break
7667       dev_path = result.payload
7668       result = self.rpc.call_blockdev_export(source_node, disk,
7669                                              target_node, dev_path,
7670                                              cluster_name)
7671       if result.fail_msg:
7672         self.LogWarning("Can't copy data over for disk %d: %s",
7673                         idx, result.fail_msg)
7674         errs.append(result.fail_msg)
7675         break
7676
7677     if errs:
7678       self.LogWarning("Some disks failed to copy, aborting")
7679       try:
7680         _RemoveDisks(self, instance, target_node=target_node)
7681       finally:
7682         self.cfg.ReleaseDRBDMinors(instance.name)
7683         raise errors.OpExecError("Errors during disk copy: %s" %
7684                                  (",".join(errs),))
7685
7686     instance.primary_node = target_node
7687     self.cfg.Update(instance, feedback_fn)
7688
7689     self.LogInfo("Removing the disks on the original node")
7690     _RemoveDisks(self, instance, target_node=source_node)
7691
7692     # Only start the instance if it's marked as up
7693     if instance.admin_state == constants.ADMINST_UP:
7694       self.LogInfo("Starting instance %s on node %s",
7695                    instance.name, target_node)
7696
7697       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7698                                            ignore_secondaries=True)
7699       if not disks_ok:
7700         _ShutdownInstanceDisks(self, instance)
7701         raise errors.OpExecError("Can't activate the instance's disks")
7702
7703       result = self.rpc.call_instance_start(target_node,
7704                                             (instance, None, None), False)
7705       msg = result.fail_msg
7706       if msg:
7707         _ShutdownInstanceDisks(self, instance)
7708         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7709                                  (instance.name, target_node, msg))
7710
7711
7712 class LUNodeMigrate(LogicalUnit):
7713   """Migrate all instances from a node.
7714
7715   """
7716   HPATH = "node-migrate"
7717   HTYPE = constants.HTYPE_NODE
7718   REQ_BGL = False
7719
7720   def CheckArguments(self):
7721     pass
7722
7723   def ExpandNames(self):
7724     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7725
7726     self.share_locks = _ShareAll()
7727     self.needed_locks = {
7728       locking.LEVEL_NODE: [self.op.node_name],
7729       }
7730
7731   def BuildHooksEnv(self):
7732     """Build hooks env.
7733
7734     This runs on the master, the primary and all the secondaries.
7735
7736     """
7737     return {
7738       "NODE_NAME": self.op.node_name,
7739       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7740       }
7741
7742   def BuildHooksNodes(self):
7743     """Build hooks nodes.
7744
7745     """
7746     nl = [self.cfg.GetMasterNode()]
7747     return (nl, nl)
7748
7749   def CheckPrereq(self):
7750     pass
7751
7752   def Exec(self, feedback_fn):
7753     # Prepare jobs for migration instances
7754     allow_runtime_changes = self.op.allow_runtime_changes
7755     jobs = [
7756       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7757                                  mode=self.op.mode,
7758                                  live=self.op.live,
7759                                  iallocator=self.op.iallocator,
7760                                  target_node=self.op.target_node,
7761                                  allow_runtime_changes=allow_runtime_changes,
7762                                  ignore_ipolicy=self.op.ignore_ipolicy)]
7763       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7764       ]
7765
7766     # TODO: Run iallocator in this opcode and pass correct placement options to
7767     # OpInstanceMigrate. Since other jobs can modify the cluster between
7768     # running the iallocator and the actual migration, a good consistency model
7769     # will have to be found.
7770
7771     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7772             frozenset([self.op.node_name]))
7773
7774     return ResultWithJobs(jobs)
7775
7776
7777 class TLMigrateInstance(Tasklet):
7778   """Tasklet class for instance migration.
7779
7780   @type live: boolean
7781   @ivar live: whether the migration will be done live or non-live;
7782       this variable is initalized only after CheckPrereq has run
7783   @type cleanup: boolean
7784   @ivar cleanup: Wheater we cleanup from a failed migration
7785   @type iallocator: string
7786   @ivar iallocator: The iallocator used to determine target_node
7787   @type target_node: string
7788   @ivar target_node: If given, the target_node to reallocate the instance to
7789   @type failover: boolean
7790   @ivar failover: Whether operation results in failover or migration
7791   @type fallback: boolean
7792   @ivar fallback: Whether fallback to failover is allowed if migration not
7793                   possible
7794   @type ignore_consistency: boolean
7795   @ivar ignore_consistency: Wheter we should ignore consistency between source
7796                             and target node
7797   @type shutdown_timeout: int
7798   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7799   @type ignore_ipolicy: bool
7800   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7801
7802   """
7803
7804   # Constants
7805   _MIGRATION_POLL_INTERVAL = 1      # seconds
7806   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7807
7808   def __init__(self, lu, instance_name, cleanup=False,
7809                failover=False, fallback=False,
7810                ignore_consistency=False,
7811                allow_runtime_changes=True,
7812                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7813                ignore_ipolicy=False):
7814     """Initializes this class.
7815
7816     """
7817     Tasklet.__init__(self, lu)
7818
7819     # Parameters
7820     self.instance_name = instance_name
7821     self.cleanup = cleanup
7822     self.live = False # will be overridden later
7823     self.failover = failover
7824     self.fallback = fallback
7825     self.ignore_consistency = ignore_consistency
7826     self.shutdown_timeout = shutdown_timeout
7827     self.ignore_ipolicy = ignore_ipolicy
7828     self.allow_runtime_changes = allow_runtime_changes
7829
7830   def CheckPrereq(self):
7831     """Check prerequisites.
7832
7833     This checks that the instance is in the cluster.
7834
7835     """
7836     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7837     instance = self.cfg.GetInstanceInfo(instance_name)
7838     assert instance is not None
7839     self.instance = instance
7840     cluster = self.cfg.GetClusterInfo()
7841
7842     if (not self.cleanup and
7843         not instance.admin_state == constants.ADMINST_UP and
7844         not self.failover and self.fallback):
7845       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7846                       " switching to failover")
7847       self.failover = True
7848
7849     if instance.disk_template not in constants.DTS_MIRRORED:
7850       if self.failover:
7851         text = "failovers"
7852       else:
7853         text = "migrations"
7854       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7855                                  " %s" % (instance.disk_template, text),
7856                                  errors.ECODE_STATE)
7857
7858     if instance.disk_template in constants.DTS_EXT_MIRROR:
7859       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7860
7861       if self.lu.op.iallocator:
7862         self._RunAllocator()
7863       else:
7864         # We set set self.target_node as it is required by
7865         # BuildHooksEnv
7866         self.target_node = self.lu.op.target_node
7867
7868       # Check that the target node is correct in terms of instance policy
7869       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7870       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7871       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7872       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7873                               ignore=self.ignore_ipolicy)
7874
7875       # self.target_node is already populated, either directly or by the
7876       # iallocator run
7877       target_node = self.target_node
7878       if self.target_node == instance.primary_node:
7879         raise errors.OpPrereqError("Cannot migrate instance %s"
7880                                    " to its primary (%s)" %
7881                                    (instance.name, instance.primary_node))
7882
7883       if len(self.lu.tasklets) == 1:
7884         # It is safe to release locks only when we're the only tasklet
7885         # in the LU
7886         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7887                       keep=[instance.primary_node, self.target_node])
7888
7889     else:
7890       secondary_nodes = instance.secondary_nodes
7891       if not secondary_nodes:
7892         raise errors.ConfigurationError("No secondary node but using"
7893                                         " %s disk template" %
7894                                         instance.disk_template)
7895       target_node = secondary_nodes[0]
7896       if self.lu.op.iallocator or (self.lu.op.target_node and
7897                                    self.lu.op.target_node != target_node):
7898         if self.failover:
7899           text = "failed over"
7900         else:
7901           text = "migrated"
7902         raise errors.OpPrereqError("Instances with disk template %s cannot"
7903                                    " be %s to arbitrary nodes"
7904                                    " (neither an iallocator nor a target"
7905                                    " node can be passed)" %
7906                                    (instance.disk_template, text),
7907                                    errors.ECODE_INVAL)
7908       nodeinfo = self.cfg.GetNodeInfo(target_node)
7909       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7910       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7911       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7912                               ignore=self.ignore_ipolicy)
7913
7914     i_be = cluster.FillBE(instance)
7915
7916     # check memory requirements on the secondary node
7917     if (not self.cleanup and
7918          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7919       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7920                                                "migrating instance %s" %
7921                                                instance.name,
7922                                                i_be[constants.BE_MINMEM],
7923                                                instance.hypervisor)
7924     else:
7925       self.lu.LogInfo("Not checking memory on the secondary node as"
7926                       " instance will not be started")
7927
7928     # check if failover must be forced instead of migration
7929     if (not self.cleanup and not self.failover and
7930         i_be[constants.BE_ALWAYS_FAILOVER]):
7931       if self.fallback:
7932         self.lu.LogInfo("Instance configured to always failover; fallback"
7933                         " to failover")
7934         self.failover = True
7935       else:
7936         raise errors.OpPrereqError("This instance has been configured to"
7937                                    " always failover, please allow failover",
7938                                    errors.ECODE_STATE)
7939
7940     # check bridge existance
7941     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7942
7943     if not self.cleanup:
7944       _CheckNodeNotDrained(self.lu, target_node)
7945       if not self.failover:
7946         result = self.rpc.call_instance_migratable(instance.primary_node,
7947                                                    instance)
7948         if result.fail_msg and self.fallback:
7949           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7950                           " failover")
7951           self.failover = True
7952         else:
7953           result.Raise("Can't migrate, please use failover",
7954                        prereq=True, ecode=errors.ECODE_STATE)
7955
7956     assert not (self.failover and self.cleanup)
7957
7958     if not self.failover:
7959       if self.lu.op.live is not None and self.lu.op.mode is not None:
7960         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7961                                    " parameters are accepted",
7962                                    errors.ECODE_INVAL)
7963       if self.lu.op.live is not None:
7964         if self.lu.op.live:
7965           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7966         else:
7967           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7968         # reset the 'live' parameter to None so that repeated
7969         # invocations of CheckPrereq do not raise an exception
7970         self.lu.op.live = None
7971       elif self.lu.op.mode is None:
7972         # read the default value from the hypervisor
7973         i_hv = cluster.FillHV(self.instance, skip_globals=False)
7974         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7975
7976       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7977     else:
7978       # Failover is never live
7979       self.live = False
7980
7981     if not (self.failover or self.cleanup):
7982       remote_info = self.rpc.call_instance_info(instance.primary_node,
7983                                                 instance.name,
7984                                                 instance.hypervisor)
7985       remote_info.Raise("Error checking instance on node %s" %
7986                         instance.primary_node)
7987       instance_running = bool(remote_info.payload)
7988       if instance_running:
7989         self.current_mem = int(remote_info.payload["memory"])
7990
7991   def _RunAllocator(self):
7992     """Run the allocator based on input opcode.
7993
7994     """
7995     # FIXME: add a self.ignore_ipolicy option
7996     ial = IAllocator(self.cfg, self.rpc,
7997                      mode=constants.IALLOCATOR_MODE_RELOC,
7998                      name=self.instance_name,
7999                      # TODO See why hail breaks with a single node below
8000                      relocate_from=[self.instance.primary_node,
8001                                     self.instance.primary_node],
8002                      )
8003
8004     ial.Run(self.lu.op.iallocator)
8005
8006     if not ial.success:
8007       raise errors.OpPrereqError("Can't compute nodes using"
8008                                  " iallocator '%s': %s" %
8009                                  (self.lu.op.iallocator, ial.info),
8010                                  errors.ECODE_NORES)
8011     if len(ial.result) != ial.required_nodes:
8012       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8013                                  " of nodes (%s), required %s" %
8014                                  (self.lu.op.iallocator, len(ial.result),
8015                                   ial.required_nodes), errors.ECODE_FAULT)
8016     self.target_node = ial.result[0]
8017     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8018                  self.instance_name, self.lu.op.iallocator,
8019                  utils.CommaJoin(ial.result))
8020
8021   def _WaitUntilSync(self):
8022     """Poll with custom rpc for disk sync.
8023
8024     This uses our own step-based rpc call.
8025
8026     """
8027     self.feedback_fn("* wait until resync is done")
8028     all_done = False
8029     while not all_done:
8030       all_done = True
8031       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8032                                             self.nodes_ip,
8033                                             self.instance.disks)
8034       min_percent = 100
8035       for node, nres in result.items():
8036         nres.Raise("Cannot resync disks on node %s" % node)
8037         node_done, node_percent = nres.payload
8038         all_done = all_done and node_done
8039         if node_percent is not None:
8040           min_percent = min(min_percent, node_percent)
8041       if not all_done:
8042         if min_percent < 100:
8043           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8044         time.sleep(2)
8045
8046   def _EnsureSecondary(self, node):
8047     """Demote a node to secondary.
8048
8049     """
8050     self.feedback_fn("* switching node %s to secondary mode" % node)
8051
8052     for dev in self.instance.disks:
8053       self.cfg.SetDiskID(dev, node)
8054
8055     result = self.rpc.call_blockdev_close(node, self.instance.name,
8056                                           self.instance.disks)
8057     result.Raise("Cannot change disk to secondary on node %s" % node)
8058
8059   def _GoStandalone(self):
8060     """Disconnect from the network.
8061
8062     """
8063     self.feedback_fn("* changing into standalone mode")
8064     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8065                                                self.instance.disks)
8066     for node, nres in result.items():
8067       nres.Raise("Cannot disconnect disks node %s" % node)
8068
8069   def _GoReconnect(self, multimaster):
8070     """Reconnect to the network.
8071
8072     """
8073     if multimaster:
8074       msg = "dual-master"
8075     else:
8076       msg = "single-master"
8077     self.feedback_fn("* changing disks into %s mode" % msg)
8078     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8079                                            self.instance.disks,
8080                                            self.instance.name, multimaster)
8081     for node, nres in result.items():
8082       nres.Raise("Cannot change disks config on node %s" % node)
8083
8084   def _ExecCleanup(self):
8085     """Try to cleanup after a failed migration.
8086
8087     The cleanup is done by:
8088       - check that the instance is running only on one node
8089         (and update the config if needed)
8090       - change disks on its secondary node to secondary
8091       - wait until disks are fully synchronized
8092       - disconnect from the network
8093       - change disks into single-master mode
8094       - wait again until disks are fully synchronized
8095
8096     """
8097     instance = self.instance
8098     target_node = self.target_node
8099     source_node = self.source_node
8100
8101     # check running on only one node
8102     self.feedback_fn("* checking where the instance actually runs"
8103                      " (if this hangs, the hypervisor might be in"
8104                      " a bad state)")
8105     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8106     for node, result in ins_l.items():
8107       result.Raise("Can't contact node %s" % node)
8108
8109     runningon_source = instance.name in ins_l[source_node].payload
8110     runningon_target = instance.name in ins_l[target_node].payload
8111
8112     if runningon_source and runningon_target:
8113       raise errors.OpExecError("Instance seems to be running on two nodes,"
8114                                " or the hypervisor is confused; you will have"
8115                                " to ensure manually that it runs only on one"
8116                                " and restart this operation")
8117
8118     if not (runningon_source or runningon_target):
8119       raise errors.OpExecError("Instance does not seem to be running at all;"
8120                                " in this case it's safer to repair by"
8121                                " running 'gnt-instance stop' to ensure disk"
8122                                " shutdown, and then restarting it")
8123
8124     if runningon_target:
8125       # the migration has actually succeeded, we need to update the config
8126       self.feedback_fn("* instance running on secondary node (%s),"
8127                        " updating config" % target_node)
8128       instance.primary_node = target_node
8129       self.cfg.Update(instance, self.feedback_fn)
8130       demoted_node = source_node
8131     else:
8132       self.feedback_fn("* instance confirmed to be running on its"
8133                        " primary node (%s)" % source_node)
8134       demoted_node = target_node
8135
8136     if instance.disk_template in constants.DTS_INT_MIRROR:
8137       self._EnsureSecondary(demoted_node)
8138       try:
8139         self._WaitUntilSync()
8140       except errors.OpExecError:
8141         # we ignore here errors, since if the device is standalone, it
8142         # won't be able to sync
8143         pass
8144       self._GoStandalone()
8145       self._GoReconnect(False)
8146       self._WaitUntilSync()
8147
8148     self.feedback_fn("* done")
8149
8150   def _RevertDiskStatus(self):
8151     """Try to revert the disk status after a failed migration.
8152
8153     """
8154     target_node = self.target_node
8155     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8156       return
8157
8158     try:
8159       self._EnsureSecondary(target_node)
8160       self._GoStandalone()
8161       self._GoReconnect(False)
8162       self._WaitUntilSync()
8163     except errors.OpExecError, err:
8164       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8165                          " please try to recover the instance manually;"
8166                          " error '%s'" % str(err))
8167
8168   def _AbortMigration(self):
8169     """Call the hypervisor code to abort a started migration.
8170
8171     """
8172     instance = self.instance
8173     target_node = self.target_node
8174     source_node = self.source_node
8175     migration_info = self.migration_info
8176
8177     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8178                                                                  instance,
8179                                                                  migration_info,
8180                                                                  False)
8181     abort_msg = abort_result.fail_msg
8182     if abort_msg:
8183       logging.error("Aborting migration failed on target node %s: %s",
8184                     target_node, abort_msg)
8185       # Don't raise an exception here, as we stil have to try to revert the
8186       # disk status, even if this step failed.
8187
8188     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8189         instance, False, self.live)
8190     abort_msg = abort_result.fail_msg
8191     if abort_msg:
8192       logging.error("Aborting migration failed on source node %s: %s",
8193                     source_node, abort_msg)
8194
8195   def _ExecMigration(self):
8196     """Migrate an instance.
8197
8198     The migrate is done by:
8199       - change the disks into dual-master mode
8200       - wait until disks are fully synchronized again
8201       - migrate the instance
8202       - change disks on the new secondary node (the old primary) to secondary
8203       - wait until disks are fully synchronized
8204       - change disks into single-master mode
8205
8206     """
8207     instance = self.instance
8208     target_node = self.target_node
8209     source_node = self.source_node
8210
8211     # Check for hypervisor version mismatch and warn the user.
8212     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8213                                        None, [self.instance.hypervisor])
8214     for ninfo in nodeinfo.values():
8215       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8216                   ninfo.node)
8217     (_, _, (src_info, )) = nodeinfo[source_node].payload
8218     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8219
8220     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8221         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8222       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8223       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8224       if src_version != dst_version:
8225         self.feedback_fn("* warning: hypervisor version mismatch between"
8226                          " source (%s) and target (%s) node" %
8227                          (src_version, dst_version))
8228
8229     self.feedback_fn("* checking disk consistency between source and target")
8230     for dev in instance.disks:
8231       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8232         raise errors.OpExecError("Disk %s is degraded or not fully"
8233                                  " synchronized on target node,"
8234                                  " aborting migration" % dev.iv_name)
8235
8236     if self.current_mem > self.tgt_free_mem:
8237       if not self.allow_runtime_changes:
8238         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8239                                  " free memory to fit instance %s on target"
8240                                  " node %s (have %dMB, need %dMB)" %
8241                                  (instance.name, target_node,
8242                                   self.tgt_free_mem, self.current_mem))
8243       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8244       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8245                                                      instance,
8246                                                      self.tgt_free_mem)
8247       rpcres.Raise("Cannot modify instance runtime memory")
8248
8249     # First get the migration information from the remote node
8250     result = self.rpc.call_migration_info(source_node, instance)
8251     msg = result.fail_msg
8252     if msg:
8253       log_err = ("Failed fetching source migration information from %s: %s" %
8254                  (source_node, msg))
8255       logging.error(log_err)
8256       raise errors.OpExecError(log_err)
8257
8258     self.migration_info = migration_info = result.payload
8259
8260     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8261       # Then switch the disks to master/master mode
8262       self._EnsureSecondary(target_node)
8263       self._GoStandalone()
8264       self._GoReconnect(True)
8265       self._WaitUntilSync()
8266
8267     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8268     result = self.rpc.call_accept_instance(target_node,
8269                                            instance,
8270                                            migration_info,
8271                                            self.nodes_ip[target_node])
8272
8273     msg = result.fail_msg
8274     if msg:
8275       logging.error("Instance pre-migration failed, trying to revert"
8276                     " disk status: %s", msg)
8277       self.feedback_fn("Pre-migration failed, aborting")
8278       self._AbortMigration()
8279       self._RevertDiskStatus()
8280       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8281                                (instance.name, msg))
8282
8283     self.feedback_fn("* migrating instance to %s" % target_node)
8284     result = self.rpc.call_instance_migrate(source_node, instance,
8285                                             self.nodes_ip[target_node],
8286                                             self.live)
8287     msg = result.fail_msg
8288     if msg:
8289       logging.error("Instance migration failed, trying to revert"
8290                     " disk status: %s", msg)
8291       self.feedback_fn("Migration failed, aborting")
8292       self._AbortMigration()
8293       self._RevertDiskStatus()
8294       raise errors.OpExecError("Could not migrate instance %s: %s" %
8295                                (instance.name, msg))
8296
8297     self.feedback_fn("* starting memory transfer")
8298     last_feedback = time.time()
8299     while True:
8300       result = self.rpc.call_instance_get_migration_status(source_node,
8301                                                            instance)
8302       msg = result.fail_msg
8303       ms = result.payload   # MigrationStatus instance
8304       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8305         logging.error("Instance migration failed, trying to revert"
8306                       " disk status: %s", msg)
8307         self.feedback_fn("Migration failed, aborting")
8308         self._AbortMigration()
8309         self._RevertDiskStatus()
8310         raise errors.OpExecError("Could not migrate instance %s: %s" %
8311                                  (instance.name, msg))
8312
8313       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8314         self.feedback_fn("* memory transfer complete")
8315         break
8316
8317       if (utils.TimeoutExpired(last_feedback,
8318                                self._MIGRATION_FEEDBACK_INTERVAL) and
8319           ms.transferred_ram is not None):
8320         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8321         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8322         last_feedback = time.time()
8323
8324       time.sleep(self._MIGRATION_POLL_INTERVAL)
8325
8326     result = self.rpc.call_instance_finalize_migration_src(source_node,
8327                                                            instance,
8328                                                            True,
8329                                                            self.live)
8330     msg = result.fail_msg
8331     if msg:
8332       logging.error("Instance migration succeeded, but finalization failed"
8333                     " on the source node: %s", msg)
8334       raise errors.OpExecError("Could not finalize instance migration: %s" %
8335                                msg)
8336
8337     instance.primary_node = target_node
8338
8339     # distribute new instance config to the other nodes
8340     self.cfg.Update(instance, self.feedback_fn)
8341
8342     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8343                                                            instance,
8344                                                            migration_info,
8345                                                            True)
8346     msg = result.fail_msg
8347     if msg:
8348       logging.error("Instance migration succeeded, but finalization failed"
8349                     " on the target node: %s", msg)
8350       raise errors.OpExecError("Could not finalize instance migration: %s" %
8351                                msg)
8352
8353     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8354       self._EnsureSecondary(source_node)
8355       self._WaitUntilSync()
8356       self._GoStandalone()
8357       self._GoReconnect(False)
8358       self._WaitUntilSync()
8359
8360     # If the instance's disk template is `rbd' and there was a successful
8361     # migration, unmap the device from the source node.
8362     if self.instance.disk_template == constants.DT_RBD:
8363       disks = _ExpandCheckDisks(instance, instance.disks)
8364       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8365       for disk in disks:
8366         result = self.rpc.call_blockdev_shutdown(source_node, disk)
8367         msg = result.fail_msg
8368         if msg:
8369           logging.error("Migration was successful, but couldn't unmap the"
8370                         " block device %s on source node %s: %s",
8371                         disk.iv_name, source_node, msg)
8372           logging.error("You need to unmap the device %s manually on %s",
8373                         disk.iv_name, source_node)
8374
8375     self.feedback_fn("* done")
8376
8377   def _ExecFailover(self):
8378     """Failover an instance.
8379
8380     The failover is done by shutting it down on its present node and
8381     starting it on the secondary.
8382
8383     """
8384     instance = self.instance
8385     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8386
8387     source_node = instance.primary_node
8388     target_node = self.target_node
8389
8390     if instance.admin_state == constants.ADMINST_UP:
8391       self.feedback_fn("* checking disk consistency between source and target")
8392       for dev in instance.disks:
8393         # for drbd, these are drbd over lvm
8394         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8395           if primary_node.offline:
8396             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8397                              " target node %s" %
8398                              (primary_node.name, dev.iv_name, target_node))
8399           elif not self.ignore_consistency:
8400             raise errors.OpExecError("Disk %s is degraded on target node,"
8401                                      " aborting failover" % dev.iv_name)
8402     else:
8403       self.feedback_fn("* not checking disk consistency as instance is not"
8404                        " running")
8405
8406     self.feedback_fn("* shutting down instance on source node")
8407     logging.info("Shutting down instance %s on node %s",
8408                  instance.name, source_node)
8409
8410     result = self.rpc.call_instance_shutdown(source_node, instance,
8411                                              self.shutdown_timeout)
8412     msg = result.fail_msg
8413     if msg:
8414       if self.ignore_consistency or primary_node.offline:
8415         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8416                            " proceeding anyway; please make sure node"
8417                            " %s is down; error details: %s",
8418                            instance.name, source_node, source_node, msg)
8419       else:
8420         raise errors.OpExecError("Could not shutdown instance %s on"
8421                                  " node %s: %s" %
8422                                  (instance.name, source_node, msg))
8423
8424     self.feedback_fn("* deactivating the instance's disks on source node")
8425     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8426       raise errors.OpExecError("Can't shut down the instance's disks")
8427
8428     instance.primary_node = target_node
8429     # distribute new instance config to the other nodes
8430     self.cfg.Update(instance, self.feedback_fn)
8431
8432     # Only start the instance if it's marked as up
8433     if instance.admin_state == constants.ADMINST_UP:
8434       self.feedback_fn("* activating the instance's disks on target node %s" %
8435                        target_node)
8436       logging.info("Starting instance %s on node %s",
8437                    instance.name, target_node)
8438
8439       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8440                                            ignore_secondaries=True)
8441       if not disks_ok:
8442         _ShutdownInstanceDisks(self.lu, instance)
8443         raise errors.OpExecError("Can't activate the instance's disks")
8444
8445       self.feedback_fn("* starting the instance on the target node %s" %
8446                        target_node)
8447       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8448                                             False)
8449       msg = result.fail_msg
8450       if msg:
8451         _ShutdownInstanceDisks(self.lu, instance)
8452         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8453                                  (instance.name, target_node, msg))
8454
8455   def Exec(self, feedback_fn):
8456     """Perform the migration.
8457
8458     """
8459     self.feedback_fn = feedback_fn
8460     self.source_node = self.instance.primary_node
8461
8462     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8463     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8464       self.target_node = self.instance.secondary_nodes[0]
8465       # Otherwise self.target_node has been populated either
8466       # directly, or through an iallocator.
8467
8468     self.all_nodes = [self.source_node, self.target_node]
8469     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8470                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8471
8472     if self.failover:
8473       feedback_fn("Failover instance %s" % self.instance.name)
8474       self._ExecFailover()
8475     else:
8476       feedback_fn("Migrating instance %s" % self.instance.name)
8477
8478       if self.cleanup:
8479         return self._ExecCleanup()
8480       else:
8481         return self._ExecMigration()
8482
8483
8484 def _CreateBlockDev(lu, node, instance, device, force_create,
8485                     info, force_open):
8486   """Create a tree of block devices on a given node.
8487
8488   If this device type has to be created on secondaries, create it and
8489   all its children.
8490
8491   If not, just recurse to children keeping the same 'force' value.
8492
8493   @param lu: the lu on whose behalf we execute
8494   @param node: the node on which to create the device
8495   @type instance: L{objects.Instance}
8496   @param instance: the instance which owns the device
8497   @type device: L{objects.Disk}
8498   @param device: the device to create
8499   @type force_create: boolean
8500   @param force_create: whether to force creation of this device; this
8501       will be change to True whenever we find a device which has
8502       CreateOnSecondary() attribute
8503   @param info: the extra 'metadata' we should attach to the device
8504       (this will be represented as a LVM tag)
8505   @type force_open: boolean
8506   @param force_open: this parameter will be passes to the
8507       L{backend.BlockdevCreate} function where it specifies
8508       whether we run on primary or not, and it affects both
8509       the child assembly and the device own Open() execution
8510
8511   """
8512   if device.CreateOnSecondary():
8513     force_create = True
8514
8515   if device.children:
8516     for child in device.children:
8517       _CreateBlockDev(lu, node, instance, child, force_create,
8518                       info, force_open)
8519
8520   if not force_create:
8521     return
8522
8523   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8524
8525
8526 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8527   """Create a single block device on a given node.
8528
8529   This will not recurse over children of the device, so they must be
8530   created in advance.
8531
8532   @param lu: the lu on whose behalf we execute
8533   @param node: the node on which to create the device
8534   @type instance: L{objects.Instance}
8535   @param instance: the instance which owns the device
8536   @type device: L{objects.Disk}
8537   @param device: the device to create
8538   @param info: the extra 'metadata' we should attach to the device
8539       (this will be represented as a LVM tag)
8540   @type force_open: boolean
8541   @param force_open: this parameter will be passes to the
8542       L{backend.BlockdevCreate} function where it specifies
8543       whether we run on primary or not, and it affects both
8544       the child assembly and the device own Open() execution
8545
8546   """
8547   lu.cfg.SetDiskID(device, node)
8548   result = lu.rpc.call_blockdev_create(node, device, device.size,
8549                                        instance.name, force_open, info)
8550   result.Raise("Can't create block device %s on"
8551                " node %s for instance %s" % (device, node, instance.name))
8552   if device.physical_id is None:
8553     device.physical_id = result.payload
8554
8555
8556 def _GenerateUniqueNames(lu, exts):
8557   """Generate a suitable LV name.
8558
8559   This will generate a logical volume name for the given instance.
8560
8561   """
8562   results = []
8563   for val in exts:
8564     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8565     results.append("%s%s" % (new_id, val))
8566   return results
8567
8568
8569 def _ComputeLDParams(disk_template, disk_params):
8570   """Computes Logical Disk parameters from Disk Template parameters.
8571
8572   @type disk_template: string
8573   @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8574   @type disk_params: dict
8575   @param disk_params: disk template parameters; dict(template_name -> parameters
8576   @rtype: list(dict)
8577   @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8578     contains the LD parameters of the node. The tree is flattened in-order.
8579
8580   """
8581   if disk_template not in constants.DISK_TEMPLATES:
8582     raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8583
8584   result = list()
8585   dt_params = disk_params[disk_template]
8586   if disk_template == constants.DT_DRBD8:
8587     drbd_params = {
8588       constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8589       constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8590       constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8591       constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8592       constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8593       constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8594       constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8595       constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8596       constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8597       constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8598       constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8599       constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8600       }
8601
8602     drbd_params = \
8603       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8604                        drbd_params)
8605
8606     result.append(drbd_params)
8607
8608     # data LV
8609     data_params = {
8610       constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8611       }
8612     data_params = \
8613       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8614                        data_params)
8615     result.append(data_params)
8616
8617     # metadata LV
8618     meta_params = {
8619       constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8620       }
8621     meta_params = \
8622       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8623                        meta_params)
8624     result.append(meta_params)
8625
8626   elif (disk_template == constants.DT_FILE or
8627         disk_template == constants.DT_SHARED_FILE):
8628     result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8629
8630   elif disk_template == constants.DT_PLAIN:
8631     params = {
8632       constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8633       }
8634     params = \
8635       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8636                        params)
8637     result.append(params)
8638
8639   elif disk_template == constants.DT_BLOCK:
8640     result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8641
8642   elif disk_template == constants.DT_RBD:
8643     params = {
8644       constants.LDP_POOL: dt_params[constants.RBD_POOL]
8645       }
8646     params = \
8647       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8648                        params)
8649     result.append(params)
8650
8651   return result
8652
8653
8654 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8655                          iv_name, p_minor, s_minor, drbd_params, data_params,
8656                          meta_params):
8657   """Generate a drbd8 device complete with its children.
8658
8659   """
8660   assert len(vgnames) == len(names) == 2
8661   port = lu.cfg.AllocatePort()
8662   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8663
8664   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8665                           logical_id=(vgnames[0], names[0]),
8666                           params=data_params)
8667   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8668                           logical_id=(vgnames[1], names[1]),
8669                           params=meta_params)
8670   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8671                           logical_id=(primary, secondary, port,
8672                                       p_minor, s_minor,
8673                                       shared_secret),
8674                           children=[dev_data, dev_meta],
8675                           iv_name=iv_name, params=drbd_params)
8676   return drbd_dev
8677
8678
8679 def _GenerateDiskTemplate(lu, template_name,
8680                           instance_name, primary_node,
8681                           secondary_nodes, disk_info,
8682                           file_storage_dir, file_driver,
8683                           base_index, feedback_fn, disk_params):
8684   """Generate the entire disk layout for a given template type.
8685
8686   """
8687   #TODO: compute space requirements
8688
8689   vgname = lu.cfg.GetVGName()
8690   disk_count = len(disk_info)
8691   disks = []
8692   ld_params = _ComputeLDParams(template_name, disk_params)
8693   if template_name == constants.DT_DISKLESS:
8694     pass
8695   elif template_name == constants.DT_PLAIN:
8696     if secondary_nodes:
8697       raise errors.ProgrammerError("Wrong template configuration")
8698
8699     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8700                                       for i in range(disk_count)])
8701     for idx, disk in enumerate(disk_info):
8702       disk_index = idx + base_index
8703       vg = disk.get(constants.IDISK_VG, vgname)
8704       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8705       disk_dev = objects.Disk(dev_type=constants.LD_LV,
8706                               size=disk[constants.IDISK_SIZE],
8707                               logical_id=(vg, names[idx]),
8708                               iv_name="disk/%d" % disk_index,
8709                               mode=disk[constants.IDISK_MODE],
8710                               params=ld_params[0])
8711       disks.append(disk_dev)
8712   elif template_name == constants.DT_DRBD8:
8713     drbd_params, data_params, meta_params = ld_params
8714     if len(secondary_nodes) != 1:
8715       raise errors.ProgrammerError("Wrong template configuration")
8716     remote_node = secondary_nodes[0]
8717     minors = lu.cfg.AllocateDRBDMinor(
8718       [primary_node, remote_node] * len(disk_info), instance_name)
8719
8720     names = []
8721     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8722                                                for i in range(disk_count)]):
8723       names.append(lv_prefix + "_data")
8724       names.append(lv_prefix + "_meta")
8725     for idx, disk in enumerate(disk_info):
8726       disk_index = idx + base_index
8727       drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8728       data_vg = disk.get(constants.IDISK_VG, vgname)
8729       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8730       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8731                                       disk[constants.IDISK_SIZE],
8732                                       [data_vg, meta_vg],
8733                                       names[idx * 2:idx * 2 + 2],
8734                                       "disk/%d" % disk_index,
8735                                       minors[idx * 2], minors[idx * 2 + 1],
8736                                       drbd_params, data_params, meta_params)
8737       disk_dev.mode = disk[constants.IDISK_MODE]
8738       disks.append(disk_dev)
8739   elif template_name == constants.DT_FILE:
8740     if secondary_nodes:
8741       raise errors.ProgrammerError("Wrong template configuration")
8742
8743     opcodes.RequireFileStorage()
8744
8745     for idx, disk in enumerate(disk_info):
8746       disk_index = idx + base_index
8747       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8748                               size=disk[constants.IDISK_SIZE],
8749                               iv_name="disk/%d" % disk_index,
8750                               logical_id=(file_driver,
8751                                           "%s/disk%d" % (file_storage_dir,
8752                                                          disk_index)),
8753                               mode=disk[constants.IDISK_MODE],
8754                               params=ld_params[0])
8755       disks.append(disk_dev)
8756   elif template_name == constants.DT_SHARED_FILE:
8757     if secondary_nodes:
8758       raise errors.ProgrammerError("Wrong template configuration")
8759
8760     opcodes.RequireSharedFileStorage()
8761
8762     for idx, disk in enumerate(disk_info):
8763       disk_index = idx + base_index
8764       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8765                               size=disk[constants.IDISK_SIZE],
8766                               iv_name="disk/%d" % disk_index,
8767                               logical_id=(file_driver,
8768                                           "%s/disk%d" % (file_storage_dir,
8769                                                          disk_index)),
8770                               mode=disk[constants.IDISK_MODE],
8771                               params=ld_params[0])
8772       disks.append(disk_dev)
8773   elif template_name == constants.DT_BLOCK:
8774     if secondary_nodes:
8775       raise errors.ProgrammerError("Wrong template configuration")
8776
8777     for idx, disk in enumerate(disk_info):
8778       disk_index = idx + base_index
8779       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8780                               size=disk[constants.IDISK_SIZE],
8781                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8782                                           disk[constants.IDISK_ADOPT]),
8783                               iv_name="disk/%d" % disk_index,
8784                               mode=disk[constants.IDISK_MODE],
8785                               params=ld_params[0])
8786       disks.append(disk_dev)
8787   elif template_name == constants.DT_RBD:
8788     if secondary_nodes:
8789       raise errors.ProgrammerError("Wrong template configuration")
8790
8791     names = _GenerateUniqueNames(lu, [".rbd.disk%d" % (base_index + i)
8792                                       for i in range(disk_count)])
8793
8794     for idx, disk in enumerate(disk_info):
8795       disk_index = idx + base_index
8796       disk_dev = objects.Disk(dev_type=constants.LD_RBD,
8797                               size=disk[constants.IDISK_SIZE],
8798                               logical_id=("rbd", names[idx]),
8799                               iv_name="disk/%d" % disk_index,
8800                               mode=disk[constants.IDISK_MODE],
8801                               params=ld_params[0])
8802       disks.append(disk_dev)
8803
8804   else:
8805     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8806   return disks
8807
8808
8809 def _GetInstanceInfoText(instance):
8810   """Compute that text that should be added to the disk's metadata.
8811
8812   """
8813   return "originstname+%s" % instance.name
8814
8815
8816 def _CalcEta(time_taken, written, total_size):
8817   """Calculates the ETA based on size written and total size.
8818
8819   @param time_taken: The time taken so far
8820   @param written: amount written so far
8821   @param total_size: The total size of data to be written
8822   @return: The remaining time in seconds
8823
8824   """
8825   avg_time = time_taken / float(written)
8826   return (total_size - written) * avg_time
8827
8828
8829 def _WipeDisks(lu, instance):
8830   """Wipes instance disks.
8831
8832   @type lu: L{LogicalUnit}
8833   @param lu: the logical unit on whose behalf we execute
8834   @type instance: L{objects.Instance}
8835   @param instance: the instance whose disks we should create
8836   @return: the success of the wipe
8837
8838   """
8839   node = instance.primary_node
8840
8841   for device in instance.disks:
8842     lu.cfg.SetDiskID(device, node)
8843
8844   logging.info("Pause sync of instance %s disks", instance.name)
8845   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8846
8847   for idx, success in enumerate(result.payload):
8848     if not success:
8849       logging.warn("pause-sync of instance %s for disks %d failed",
8850                    instance.name, idx)
8851
8852   try:
8853     for idx, device in enumerate(instance.disks):
8854       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8855       # MAX_WIPE_CHUNK at max
8856       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8857                             constants.MIN_WIPE_CHUNK_PERCENT)
8858       # we _must_ make this an int, otherwise rounding errors will
8859       # occur
8860       wipe_chunk_size = int(wipe_chunk_size)
8861
8862       lu.LogInfo("* Wiping disk %d", idx)
8863       logging.info("Wiping disk %d for instance %s, node %s using"
8864                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8865
8866       offset = 0
8867       size = device.size
8868       last_output = 0
8869       start_time = time.time()
8870
8871       while offset < size:
8872         wipe_size = min(wipe_chunk_size, size - offset)
8873         logging.debug("Wiping disk %d, offset %s, chunk %s",
8874                       idx, offset, wipe_size)
8875         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8876         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8877                      (idx, offset, wipe_size))
8878         now = time.time()
8879         offset += wipe_size
8880         if now - last_output >= 60:
8881           eta = _CalcEta(now - start_time, offset, size)
8882           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8883                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8884           last_output = now
8885   finally:
8886     logging.info("Resume sync of instance %s disks", instance.name)
8887
8888     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8889
8890     for idx, success in enumerate(result.payload):
8891       if not success:
8892         lu.LogWarning("Resume sync of disk %d failed, please have a"
8893                       " look at the status and troubleshoot the issue", idx)
8894         logging.warn("resume-sync of instance %s for disks %d failed",
8895                      instance.name, idx)
8896
8897
8898 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8899   """Create all disks for an instance.
8900
8901   This abstracts away some work from AddInstance.
8902
8903   @type lu: L{LogicalUnit}
8904   @param lu: the logical unit on whose behalf we execute
8905   @type instance: L{objects.Instance}
8906   @param instance: the instance whose disks we should create
8907   @type to_skip: list
8908   @param to_skip: list of indices to skip
8909   @type target_node: string
8910   @param target_node: if passed, overrides the target node for creation
8911   @rtype: boolean
8912   @return: the success of the creation
8913
8914   """
8915   info = _GetInstanceInfoText(instance)
8916   if target_node is None:
8917     pnode = instance.primary_node
8918     all_nodes = instance.all_nodes
8919   else:
8920     pnode = target_node
8921     all_nodes = [pnode]
8922
8923   if instance.disk_template in constants.DTS_FILEBASED:
8924     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8925     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8926
8927     result.Raise("Failed to create directory '%s' on"
8928                  " node %s" % (file_storage_dir, pnode))
8929
8930   # Note: this needs to be kept in sync with adding of disks in
8931   # LUInstanceSetParams
8932   for idx, device in enumerate(instance.disks):
8933     if to_skip and idx in to_skip:
8934       continue
8935     logging.info("Creating volume %s for instance %s",
8936                  device.iv_name, instance.name)
8937     #HARDCODE
8938     for node in all_nodes:
8939       f_create = node == pnode
8940       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8941
8942
8943 def _RemoveDisks(lu, instance, target_node=None):
8944   """Remove all disks for an instance.
8945
8946   This abstracts away some work from `AddInstance()` and
8947   `RemoveInstance()`. Note that in case some of the devices couldn't
8948   be removed, the removal will continue with the other ones (compare
8949   with `_CreateDisks()`).
8950
8951   @type lu: L{LogicalUnit}
8952   @param lu: the logical unit on whose behalf we execute
8953   @type instance: L{objects.Instance}
8954   @param instance: the instance whose disks we should remove
8955   @type target_node: string
8956   @param target_node: used to override the node on which to remove the disks
8957   @rtype: boolean
8958   @return: the success of the removal
8959
8960   """
8961   logging.info("Removing block devices for instance %s", instance.name)
8962
8963   all_result = True
8964   for device in instance.disks:
8965     if target_node:
8966       edata = [(target_node, device)]
8967     else:
8968       edata = device.ComputeNodeTree(instance.primary_node)
8969     for node, disk in edata:
8970       lu.cfg.SetDiskID(disk, node)
8971       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8972       if msg:
8973         lu.LogWarning("Could not remove block device %s on node %s,"
8974                       " continuing anyway: %s", device.iv_name, node, msg)
8975         all_result = False
8976
8977     # if this is a DRBD disk, return its port to the pool
8978     if device.dev_type in constants.LDS_DRBD:
8979       tcp_port = device.logical_id[2]
8980       lu.cfg.AddTcpUdpPort(tcp_port)
8981
8982   if instance.disk_template == constants.DT_FILE:
8983     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8984     if target_node:
8985       tgt = target_node
8986     else:
8987       tgt = instance.primary_node
8988     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8989     if result.fail_msg:
8990       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8991                     file_storage_dir, instance.primary_node, result.fail_msg)
8992       all_result = False
8993
8994   return all_result
8995
8996
8997 def _ComputeDiskSizePerVG(disk_template, disks):
8998   """Compute disk size requirements in the volume group
8999
9000   """
9001   def _compute(disks, payload):
9002     """Universal algorithm.
9003
9004     """
9005     vgs = {}
9006     for disk in disks:
9007       vgs[disk[constants.IDISK_VG]] = \
9008         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9009
9010     return vgs
9011
9012   # Required free disk space as a function of disk and swap space
9013   req_size_dict = {
9014     constants.DT_DISKLESS: {},
9015     constants.DT_PLAIN: _compute(disks, 0),
9016     # 128 MB are added for drbd metadata for each disk
9017     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9018     constants.DT_FILE: {},
9019     constants.DT_SHARED_FILE: {},
9020   }
9021
9022   if disk_template not in req_size_dict:
9023     raise errors.ProgrammerError("Disk template '%s' size requirement"
9024                                  " is unknown" % disk_template)
9025
9026   return req_size_dict[disk_template]
9027
9028
9029 def _ComputeDiskSize(disk_template, disks):
9030   """Compute disk size requirements in the volume group
9031
9032   """
9033   # Required free disk space as a function of disk and swap space
9034   req_size_dict = {
9035     constants.DT_DISKLESS: None,
9036     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9037     # 128 MB are added for drbd metadata for each disk
9038     constants.DT_DRBD8:
9039       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9040     constants.DT_FILE: None,
9041     constants.DT_SHARED_FILE: 0,
9042     constants.DT_BLOCK: 0,
9043     constants.DT_RBD: 0,
9044   }
9045
9046   if disk_template not in req_size_dict:
9047     raise errors.ProgrammerError("Disk template '%s' size requirement"
9048                                  " is unknown" % disk_template)
9049
9050   return req_size_dict[disk_template]
9051
9052
9053 def _FilterVmNodes(lu, nodenames):
9054   """Filters out non-vm_capable nodes from a list.
9055
9056   @type lu: L{LogicalUnit}
9057   @param lu: the logical unit for which we check
9058   @type nodenames: list
9059   @param nodenames: the list of nodes on which we should check
9060   @rtype: list
9061   @return: the list of vm-capable nodes
9062
9063   """
9064   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9065   return [name for name in nodenames if name not in vm_nodes]
9066
9067
9068 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9069   """Hypervisor parameter validation.
9070
9071   This function abstract the hypervisor parameter validation to be
9072   used in both instance create and instance modify.
9073
9074   @type lu: L{LogicalUnit}
9075   @param lu: the logical unit for which we check
9076   @type nodenames: list
9077   @param nodenames: the list of nodes on which we should check
9078   @type hvname: string
9079   @param hvname: the name of the hypervisor we should use
9080   @type hvparams: dict
9081   @param hvparams: the parameters which we need to check
9082   @raise errors.OpPrereqError: if the parameters are not valid
9083
9084   """
9085   nodenames = _FilterVmNodes(lu, nodenames)
9086
9087   cluster = lu.cfg.GetClusterInfo()
9088   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9089
9090   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9091   for node in nodenames:
9092     info = hvinfo[node]
9093     if info.offline:
9094       continue
9095     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9096
9097
9098 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9099   """OS parameters validation.
9100
9101   @type lu: L{LogicalUnit}
9102   @param lu: the logical unit for which we check
9103   @type required: boolean
9104   @param required: whether the validation should fail if the OS is not
9105       found
9106   @type nodenames: list
9107   @param nodenames: the list of nodes on which we should check
9108   @type osname: string
9109   @param osname: the name of the hypervisor we should use
9110   @type osparams: dict
9111   @param osparams: the parameters which we need to check
9112   @raise errors.OpPrereqError: if the parameters are not valid
9113
9114   """
9115   nodenames = _FilterVmNodes(lu, nodenames)
9116   result = lu.rpc.call_os_validate(nodenames, required, osname,
9117                                    [constants.OS_VALIDATE_PARAMETERS],
9118                                    osparams)
9119   for node, nres in result.items():
9120     # we don't check for offline cases since this should be run only
9121     # against the master node and/or an instance's nodes
9122     nres.Raise("OS Parameters validation failed on node %s" % node)
9123     if not nres.payload:
9124       lu.LogInfo("OS %s not found on node %s, validation skipped",
9125                  osname, node)
9126
9127
9128 class LUInstanceCreate(LogicalUnit):
9129   """Create an instance.
9130
9131   """
9132   HPATH = "instance-add"
9133   HTYPE = constants.HTYPE_INSTANCE
9134   REQ_BGL = False
9135
9136   def CheckArguments(self):
9137     """Check arguments.
9138
9139     """
9140     # do not require name_check to ease forward/backward compatibility
9141     # for tools
9142     if self.op.no_install and self.op.start:
9143       self.LogInfo("No-installation mode selected, disabling startup")
9144       self.op.start = False
9145     # validate/normalize the instance name
9146     self.op.instance_name = \
9147       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9148
9149     if self.op.ip_check and not self.op.name_check:
9150       # TODO: make the ip check more flexible and not depend on the name check
9151       raise errors.OpPrereqError("Cannot do IP address check without a name"
9152                                  " check", errors.ECODE_INVAL)
9153
9154     # check nics' parameter names
9155     for nic in self.op.nics:
9156       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9157
9158     # check disks. parameter names and consistent adopt/no-adopt strategy
9159     has_adopt = has_no_adopt = False
9160     for disk in self.op.disks:
9161       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9162       if constants.IDISK_ADOPT in disk:
9163         has_adopt = True
9164       else:
9165         has_no_adopt = True
9166     if has_adopt and has_no_adopt:
9167       raise errors.OpPrereqError("Either all disks are adopted or none is",
9168                                  errors.ECODE_INVAL)
9169     if has_adopt:
9170       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9171         raise errors.OpPrereqError("Disk adoption is not supported for the"
9172                                    " '%s' disk template" %
9173                                    self.op.disk_template,
9174                                    errors.ECODE_INVAL)
9175       if self.op.iallocator is not None:
9176         raise errors.OpPrereqError("Disk adoption not allowed with an"
9177                                    " iallocator script", errors.ECODE_INVAL)
9178       if self.op.mode == constants.INSTANCE_IMPORT:
9179         raise errors.OpPrereqError("Disk adoption not allowed for"
9180                                    " instance import", errors.ECODE_INVAL)
9181     else:
9182       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9183         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9184                                    " but no 'adopt' parameter given" %
9185                                    self.op.disk_template,
9186                                    errors.ECODE_INVAL)
9187
9188     self.adopt_disks = has_adopt
9189
9190     # instance name verification
9191     if self.op.name_check:
9192       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9193       self.op.instance_name = self.hostname1.name
9194       # used in CheckPrereq for ip ping check
9195       self.check_ip = self.hostname1.ip
9196     else:
9197       self.check_ip = None
9198
9199     # file storage checks
9200     if (self.op.file_driver and
9201         not self.op.file_driver in constants.FILE_DRIVER):
9202       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9203                                  self.op.file_driver, errors.ECODE_INVAL)
9204
9205     if self.op.disk_template == constants.DT_FILE:
9206       opcodes.RequireFileStorage()
9207     elif self.op.disk_template == constants.DT_SHARED_FILE:
9208       opcodes.RequireSharedFileStorage()
9209
9210     ### Node/iallocator related checks
9211     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9212
9213     if self.op.pnode is not None:
9214       if self.op.disk_template in constants.DTS_INT_MIRROR:
9215         if self.op.snode is None:
9216           raise errors.OpPrereqError("The networked disk templates need"
9217                                      " a mirror node", errors.ECODE_INVAL)
9218       elif self.op.snode:
9219         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9220                         " template")
9221         self.op.snode = None
9222
9223     self._cds = _GetClusterDomainSecret()
9224
9225     if self.op.mode == constants.INSTANCE_IMPORT:
9226       # On import force_variant must be True, because if we forced it at
9227       # initial install, our only chance when importing it back is that it
9228       # works again!
9229       self.op.force_variant = True
9230
9231       if self.op.no_install:
9232         self.LogInfo("No-installation mode has no effect during import")
9233
9234     elif self.op.mode == constants.INSTANCE_CREATE:
9235       if self.op.os_type is None:
9236         raise errors.OpPrereqError("No guest OS specified",
9237                                    errors.ECODE_INVAL)
9238       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9239         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9240                                    " installation" % self.op.os_type,
9241                                    errors.ECODE_STATE)
9242       if self.op.disk_template is None:
9243         raise errors.OpPrereqError("No disk template specified",
9244                                    errors.ECODE_INVAL)
9245
9246     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9247       # Check handshake to ensure both clusters have the same domain secret
9248       src_handshake = self.op.source_handshake
9249       if not src_handshake:
9250         raise errors.OpPrereqError("Missing source handshake",
9251                                    errors.ECODE_INVAL)
9252
9253       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9254                                                            src_handshake)
9255       if errmsg:
9256         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9257                                    errors.ECODE_INVAL)
9258
9259       # Load and check source CA
9260       self.source_x509_ca_pem = self.op.source_x509_ca
9261       if not self.source_x509_ca_pem:
9262         raise errors.OpPrereqError("Missing source X509 CA",
9263                                    errors.ECODE_INVAL)
9264
9265       try:
9266         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9267                                                     self._cds)
9268       except OpenSSL.crypto.Error, err:
9269         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9270                                    (err, ), errors.ECODE_INVAL)
9271
9272       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9273       if errcode is not None:
9274         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9275                                    errors.ECODE_INVAL)
9276
9277       self.source_x509_ca = cert
9278
9279       src_instance_name = self.op.source_instance_name
9280       if not src_instance_name:
9281         raise errors.OpPrereqError("Missing source instance name",
9282                                    errors.ECODE_INVAL)
9283
9284       self.source_instance_name = \
9285           netutils.GetHostname(name=src_instance_name).name
9286
9287     else:
9288       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9289                                  self.op.mode, errors.ECODE_INVAL)
9290
9291   def ExpandNames(self):
9292     """ExpandNames for CreateInstance.
9293
9294     Figure out the right locks for instance creation.
9295
9296     """
9297     self.needed_locks = {}
9298
9299     instance_name = self.op.instance_name
9300     # this is just a preventive check, but someone might still add this
9301     # instance in the meantime, and creation will fail at lock-add time
9302     if instance_name in self.cfg.GetInstanceList():
9303       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9304                                  instance_name, errors.ECODE_EXISTS)
9305
9306     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9307
9308     if self.op.iallocator:
9309       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9310       # specifying a group on instance creation and then selecting nodes from
9311       # that group
9312       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9313       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9314     else:
9315       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9316       nodelist = [self.op.pnode]
9317       if self.op.snode is not None:
9318         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9319         nodelist.append(self.op.snode)
9320       self.needed_locks[locking.LEVEL_NODE] = nodelist
9321       # Lock resources of instance's primary and secondary nodes (copy to
9322       # prevent accidential modification)
9323       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9324
9325     # in case of import lock the source node too
9326     if self.op.mode == constants.INSTANCE_IMPORT:
9327       src_node = self.op.src_node
9328       src_path = self.op.src_path
9329
9330       if src_path is None:
9331         self.op.src_path = src_path = self.op.instance_name
9332
9333       if src_node is None:
9334         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9335         self.op.src_node = None
9336         if os.path.isabs(src_path):
9337           raise errors.OpPrereqError("Importing an instance from a path"
9338                                      " requires a source node option",
9339                                      errors.ECODE_INVAL)
9340       else:
9341         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9342         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9343           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9344         if not os.path.isabs(src_path):
9345           self.op.src_path = src_path = \
9346             utils.PathJoin(constants.EXPORT_DIR, src_path)
9347
9348   def _RunAllocator(self):
9349     """Run the allocator based on input opcode.
9350
9351     """
9352     nics = [n.ToDict() for n in self.nics]
9353     ial = IAllocator(self.cfg, self.rpc,
9354                      mode=constants.IALLOCATOR_MODE_ALLOC,
9355                      name=self.op.instance_name,
9356                      disk_template=self.op.disk_template,
9357                      tags=self.op.tags,
9358                      os=self.op.os_type,
9359                      vcpus=self.be_full[constants.BE_VCPUS],
9360                      memory=self.be_full[constants.BE_MAXMEM],
9361                      disks=self.disks,
9362                      nics=nics,
9363                      hypervisor=self.op.hypervisor,
9364                      )
9365
9366     ial.Run(self.op.iallocator)
9367
9368     if not ial.success:
9369       raise errors.OpPrereqError("Can't compute nodes using"
9370                                  " iallocator '%s': %s" %
9371                                  (self.op.iallocator, ial.info),
9372                                  errors.ECODE_NORES)
9373     if len(ial.result) != ial.required_nodes:
9374       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9375                                  " of nodes (%s), required %s" %
9376                                  (self.op.iallocator, len(ial.result),
9377                                   ial.required_nodes), errors.ECODE_FAULT)
9378     self.op.pnode = ial.result[0]
9379     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9380                  self.op.instance_name, self.op.iallocator,
9381                  utils.CommaJoin(ial.result))
9382     if ial.required_nodes == 2:
9383       self.op.snode = ial.result[1]
9384
9385   def BuildHooksEnv(self):
9386     """Build hooks env.
9387
9388     This runs on master, primary and secondary nodes of the instance.
9389
9390     """
9391     env = {
9392       "ADD_MODE": self.op.mode,
9393       }
9394     if self.op.mode == constants.INSTANCE_IMPORT:
9395       env["SRC_NODE"] = self.op.src_node
9396       env["SRC_PATH"] = self.op.src_path
9397       env["SRC_IMAGES"] = self.src_images
9398
9399     env.update(_BuildInstanceHookEnv(
9400       name=self.op.instance_name,
9401       primary_node=self.op.pnode,
9402       secondary_nodes=self.secondaries,
9403       status=self.op.start,
9404       os_type=self.op.os_type,
9405       minmem=self.be_full[constants.BE_MINMEM],
9406       maxmem=self.be_full[constants.BE_MAXMEM],
9407       vcpus=self.be_full[constants.BE_VCPUS],
9408       nics=_NICListToTuple(self, self.nics),
9409       disk_template=self.op.disk_template,
9410       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9411              for d in self.disks],
9412       bep=self.be_full,
9413       hvp=self.hv_full,
9414       hypervisor_name=self.op.hypervisor,
9415       tags=self.op.tags,
9416     ))
9417
9418     return env
9419
9420   def BuildHooksNodes(self):
9421     """Build hooks nodes.
9422
9423     """
9424     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9425     return nl, nl
9426
9427   def _ReadExportInfo(self):
9428     """Reads the export information from disk.
9429
9430     It will override the opcode source node and path with the actual
9431     information, if these two were not specified before.
9432
9433     @return: the export information
9434
9435     """
9436     assert self.op.mode == constants.INSTANCE_IMPORT
9437
9438     src_node = self.op.src_node
9439     src_path = self.op.src_path
9440
9441     if src_node is None:
9442       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9443       exp_list = self.rpc.call_export_list(locked_nodes)
9444       found = False
9445       for node in exp_list:
9446         if exp_list[node].fail_msg:
9447           continue
9448         if src_path in exp_list[node].payload:
9449           found = True
9450           self.op.src_node = src_node = node
9451           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9452                                                        src_path)
9453           break
9454       if not found:
9455         raise errors.OpPrereqError("No export found for relative path %s" %
9456                                     src_path, errors.ECODE_INVAL)
9457
9458     _CheckNodeOnline(self, src_node)
9459     result = self.rpc.call_export_info(src_node, src_path)
9460     result.Raise("No export or invalid export found in dir %s" % src_path)
9461
9462     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9463     if not export_info.has_section(constants.INISECT_EXP):
9464       raise errors.ProgrammerError("Corrupted export config",
9465                                    errors.ECODE_ENVIRON)
9466
9467     ei_version = export_info.get(constants.INISECT_EXP, "version")
9468     if (int(ei_version) != constants.EXPORT_VERSION):
9469       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9470                                  (ei_version, constants.EXPORT_VERSION),
9471                                  errors.ECODE_ENVIRON)
9472     return export_info
9473
9474   def _ReadExportParams(self, einfo):
9475     """Use export parameters as defaults.
9476
9477     In case the opcode doesn't specify (as in override) some instance
9478     parameters, then try to use them from the export information, if
9479     that declares them.
9480
9481     """
9482     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9483
9484     if self.op.disk_template is None:
9485       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9486         self.op.disk_template = einfo.get(constants.INISECT_INS,
9487                                           "disk_template")
9488         if self.op.disk_template not in constants.DISK_TEMPLATES:
9489           raise errors.OpPrereqError("Disk template specified in configuration"
9490                                      " file is not one of the allowed values:"
9491                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9492       else:
9493         raise errors.OpPrereqError("No disk template specified and the export"
9494                                    " is missing the disk_template information",
9495                                    errors.ECODE_INVAL)
9496
9497     if not self.op.disks:
9498       disks = []
9499       # TODO: import the disk iv_name too
9500       for idx in range(constants.MAX_DISKS):
9501         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9502           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9503           disks.append({constants.IDISK_SIZE: disk_sz})
9504       self.op.disks = disks
9505       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9506         raise errors.OpPrereqError("No disk info specified and the export"
9507                                    " is missing the disk information",
9508                                    errors.ECODE_INVAL)
9509
9510     if not self.op.nics:
9511       nics = []
9512       for idx in range(constants.MAX_NICS):
9513         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9514           ndict = {}
9515           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9516             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9517             ndict[name] = v
9518           nics.append(ndict)
9519         else:
9520           break
9521       self.op.nics = nics
9522
9523     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9524       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9525
9526     if (self.op.hypervisor is None and
9527         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9528       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9529
9530     if einfo.has_section(constants.INISECT_HYP):
9531       # use the export parameters but do not override the ones
9532       # specified by the user
9533       for name, value in einfo.items(constants.INISECT_HYP):
9534         if name not in self.op.hvparams:
9535           self.op.hvparams[name] = value
9536
9537     if einfo.has_section(constants.INISECT_BEP):
9538       # use the parameters, without overriding
9539       for name, value in einfo.items(constants.INISECT_BEP):
9540         if name not in self.op.beparams:
9541           self.op.beparams[name] = value
9542         # Compatibility for the old "memory" be param
9543         if name == constants.BE_MEMORY:
9544           if constants.BE_MAXMEM not in self.op.beparams:
9545             self.op.beparams[constants.BE_MAXMEM] = value
9546           if constants.BE_MINMEM not in self.op.beparams:
9547             self.op.beparams[constants.BE_MINMEM] = value
9548     else:
9549       # try to read the parameters old style, from the main section
9550       for name in constants.BES_PARAMETERS:
9551         if (name not in self.op.beparams and
9552             einfo.has_option(constants.INISECT_INS, name)):
9553           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9554
9555     if einfo.has_section(constants.INISECT_OSP):
9556       # use the parameters, without overriding
9557       for name, value in einfo.items(constants.INISECT_OSP):
9558         if name not in self.op.osparams:
9559           self.op.osparams[name] = value
9560
9561   def _RevertToDefaults(self, cluster):
9562     """Revert the instance parameters to the default values.
9563
9564     """
9565     # hvparams
9566     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9567     for name in self.op.hvparams.keys():
9568       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9569         del self.op.hvparams[name]
9570     # beparams
9571     be_defs = cluster.SimpleFillBE({})
9572     for name in self.op.beparams.keys():
9573       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9574         del self.op.beparams[name]
9575     # nic params
9576     nic_defs = cluster.SimpleFillNIC({})
9577     for nic in self.op.nics:
9578       for name in constants.NICS_PARAMETERS:
9579         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9580           del nic[name]
9581     # osparams
9582     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9583     for name in self.op.osparams.keys():
9584       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9585         del self.op.osparams[name]
9586
9587   def _CalculateFileStorageDir(self):
9588     """Calculate final instance file storage dir.
9589
9590     """
9591     # file storage dir calculation/check
9592     self.instance_file_storage_dir = None
9593     if self.op.disk_template in constants.DTS_FILEBASED:
9594       # build the full file storage dir path
9595       joinargs = []
9596
9597       if self.op.disk_template == constants.DT_SHARED_FILE:
9598         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9599       else:
9600         get_fsd_fn = self.cfg.GetFileStorageDir
9601
9602       cfg_storagedir = get_fsd_fn()
9603       if not cfg_storagedir:
9604         raise errors.OpPrereqError("Cluster file storage dir not defined")
9605       joinargs.append(cfg_storagedir)
9606
9607       if self.op.file_storage_dir is not None:
9608         joinargs.append(self.op.file_storage_dir)
9609
9610       joinargs.append(self.op.instance_name)
9611
9612       # pylint: disable=W0142
9613       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9614
9615   def CheckPrereq(self): # pylint: disable=R0914
9616     """Check prerequisites.
9617
9618     """
9619     self._CalculateFileStorageDir()
9620
9621     if self.op.mode == constants.INSTANCE_IMPORT:
9622       export_info = self._ReadExportInfo()
9623       self._ReadExportParams(export_info)
9624
9625     if (not self.cfg.GetVGName() and
9626         self.op.disk_template not in constants.DTS_NOT_LVM):
9627       raise errors.OpPrereqError("Cluster does not support lvm-based"
9628                                  " instances", errors.ECODE_STATE)
9629
9630     if (self.op.hypervisor is None or
9631         self.op.hypervisor == constants.VALUE_AUTO):
9632       self.op.hypervisor = self.cfg.GetHypervisorType()
9633
9634     cluster = self.cfg.GetClusterInfo()
9635     enabled_hvs = cluster.enabled_hypervisors
9636     if self.op.hypervisor not in enabled_hvs:
9637       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9638                                  " cluster (%s)" % (self.op.hypervisor,
9639                                   ",".join(enabled_hvs)),
9640                                  errors.ECODE_STATE)
9641
9642     # Check tag validity
9643     for tag in self.op.tags:
9644       objects.TaggableObject.ValidateTag(tag)
9645
9646     # check hypervisor parameter syntax (locally)
9647     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9648     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9649                                       self.op.hvparams)
9650     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9651     hv_type.CheckParameterSyntax(filled_hvp)
9652     self.hv_full = filled_hvp
9653     # check that we don't specify global parameters on an instance
9654     _CheckGlobalHvParams(self.op.hvparams)
9655
9656     # fill and remember the beparams dict
9657     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9658     for param, value in self.op.beparams.iteritems():
9659       if value == constants.VALUE_AUTO:
9660         self.op.beparams[param] = default_beparams[param]
9661     objects.UpgradeBeParams(self.op.beparams)
9662     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9663     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9664
9665     # build os parameters
9666     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9667
9668     # now that hvp/bep are in final format, let's reset to defaults,
9669     # if told to do so
9670     if self.op.identify_defaults:
9671       self._RevertToDefaults(cluster)
9672
9673     # NIC buildup
9674     self.nics = []
9675     for idx, nic in enumerate(self.op.nics):
9676       nic_mode_req = nic.get(constants.INIC_MODE, None)
9677       nic_mode = nic_mode_req
9678       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9679         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9680
9681       # in routed mode, for the first nic, the default ip is 'auto'
9682       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9683         default_ip_mode = constants.VALUE_AUTO
9684       else:
9685         default_ip_mode = constants.VALUE_NONE
9686
9687       # ip validity checks
9688       ip = nic.get(constants.INIC_IP, default_ip_mode)
9689       if ip is None or ip.lower() == constants.VALUE_NONE:
9690         nic_ip = None
9691       elif ip.lower() == constants.VALUE_AUTO:
9692         if not self.op.name_check:
9693           raise errors.OpPrereqError("IP address set to auto but name checks"
9694                                      " have been skipped",
9695                                      errors.ECODE_INVAL)
9696         nic_ip = self.hostname1.ip
9697       else:
9698         if not netutils.IPAddress.IsValid(ip):
9699           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9700                                      errors.ECODE_INVAL)
9701         nic_ip = ip
9702
9703       # TODO: check the ip address for uniqueness
9704       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9705         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9706                                    errors.ECODE_INVAL)
9707
9708       # MAC address verification
9709       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9710       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9711         mac = utils.NormalizeAndValidateMac(mac)
9712
9713         try:
9714           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9715         except errors.ReservationError:
9716           raise errors.OpPrereqError("MAC address %s already in use"
9717                                      " in cluster" % mac,
9718                                      errors.ECODE_NOTUNIQUE)
9719
9720       #  Build nic parameters
9721       link = nic.get(constants.INIC_LINK, None)
9722       if link == constants.VALUE_AUTO:
9723         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9724       nicparams = {}
9725       if nic_mode_req:
9726         nicparams[constants.NIC_MODE] = nic_mode
9727       if link:
9728         nicparams[constants.NIC_LINK] = link
9729
9730       check_params = cluster.SimpleFillNIC(nicparams)
9731       objects.NIC.CheckParameterSyntax(check_params)
9732       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9733
9734     # disk checks/pre-build
9735     default_vg = self.cfg.GetVGName()
9736     self.disks = []
9737     for disk in self.op.disks:
9738       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9739       if mode not in constants.DISK_ACCESS_SET:
9740         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9741                                    mode, errors.ECODE_INVAL)
9742       size = disk.get(constants.IDISK_SIZE, None)
9743       if size is None:
9744         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9745       try:
9746         size = int(size)
9747       except (TypeError, ValueError):
9748         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9749                                    errors.ECODE_INVAL)
9750
9751       data_vg = disk.get(constants.IDISK_VG, default_vg)
9752       new_disk = {
9753         constants.IDISK_SIZE: size,
9754         constants.IDISK_MODE: mode,
9755         constants.IDISK_VG: data_vg,
9756         }
9757       if constants.IDISK_METAVG in disk:
9758         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9759       if constants.IDISK_ADOPT in disk:
9760         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9761       self.disks.append(new_disk)
9762
9763     if self.op.mode == constants.INSTANCE_IMPORT:
9764       disk_images = []
9765       for idx in range(len(self.disks)):
9766         option = "disk%d_dump" % idx
9767         if export_info.has_option(constants.INISECT_INS, option):
9768           # FIXME: are the old os-es, disk sizes, etc. useful?
9769           export_name = export_info.get(constants.INISECT_INS, option)
9770           image = utils.PathJoin(self.op.src_path, export_name)
9771           disk_images.append(image)
9772         else:
9773           disk_images.append(False)
9774
9775       self.src_images = disk_images
9776
9777       old_name = export_info.get(constants.INISECT_INS, "name")
9778       if self.op.instance_name == old_name:
9779         for idx, nic in enumerate(self.nics):
9780           if nic.mac == constants.VALUE_AUTO:
9781             nic_mac_ini = "nic%d_mac" % idx
9782             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9783
9784     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9785
9786     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9787     if self.op.ip_check:
9788       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9789         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9790                                    (self.check_ip, self.op.instance_name),
9791                                    errors.ECODE_NOTUNIQUE)
9792
9793     #### mac address generation
9794     # By generating here the mac address both the allocator and the hooks get
9795     # the real final mac address rather than the 'auto' or 'generate' value.
9796     # There is a race condition between the generation and the instance object
9797     # creation, which means that we know the mac is valid now, but we're not
9798     # sure it will be when we actually add the instance. If things go bad
9799     # adding the instance will abort because of a duplicate mac, and the
9800     # creation job will fail.
9801     for nic in self.nics:
9802       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9803         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9804
9805     #### allocator run
9806
9807     if self.op.iallocator is not None:
9808       self._RunAllocator()
9809
9810     # Release all unneeded node locks
9811     _ReleaseLocks(self, locking.LEVEL_NODE,
9812                   keep=filter(None, [self.op.pnode, self.op.snode,
9813                                      self.op.src_node]))
9814     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9815                   keep=filter(None, [self.op.pnode, self.op.snode,
9816                                      self.op.src_node]))
9817
9818     #### node related checks
9819
9820     # check primary node
9821     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9822     assert self.pnode is not None, \
9823       "Cannot retrieve locked node %s" % self.op.pnode
9824     if pnode.offline:
9825       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9826                                  pnode.name, errors.ECODE_STATE)
9827     if pnode.drained:
9828       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9829                                  pnode.name, errors.ECODE_STATE)
9830     if not pnode.vm_capable:
9831       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9832                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9833
9834     self.secondaries = []
9835
9836     # mirror node verification
9837     if self.op.disk_template in constants.DTS_INT_MIRROR:
9838       if self.op.snode == pnode.name:
9839         raise errors.OpPrereqError("The secondary node cannot be the"
9840                                    " primary node", errors.ECODE_INVAL)
9841       _CheckNodeOnline(self, self.op.snode)
9842       _CheckNodeNotDrained(self, self.op.snode)
9843       _CheckNodeVmCapable(self, self.op.snode)
9844       self.secondaries.append(self.op.snode)
9845
9846       snode = self.cfg.GetNodeInfo(self.op.snode)
9847       if pnode.group != snode.group:
9848         self.LogWarning("The primary and secondary nodes are in two"
9849                         " different node groups; the disk parameters"
9850                         " from the first disk's node group will be"
9851                         " used")
9852
9853     nodenames = [pnode.name] + self.secondaries
9854
9855     # Verify instance specs
9856     ispec = {
9857       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9858       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9859       constants.ISPEC_DISK_COUNT: len(self.disks),
9860       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9861       constants.ISPEC_NIC_COUNT: len(self.nics),
9862       }
9863
9864     group_info = self.cfg.GetNodeGroup(pnode.group)
9865     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9866     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9867     if not self.op.ignore_ipolicy and res:
9868       raise errors.OpPrereqError(("Instance allocation to group %s violates"
9869                                   " policy: %s") % (pnode.group,
9870                                                     utils.CommaJoin(res)),
9871                                   errors.ECODE_INVAL)
9872
9873     # disk parameters (not customizable at instance or node level)
9874     # just use the primary node parameters, ignoring the secondary.
9875     self.diskparams = group_info.diskparams
9876
9877     if not self.adopt_disks:
9878       if self.op.disk_template == constants.DT_RBD:
9879         # _CheckRADOSFreeSpace() is just a placeholder.
9880         # Any function that checks prerequisites can be placed here.
9881         # Check if there is enough space on the RADOS cluster.
9882         _CheckRADOSFreeSpace()
9883       else:
9884         # Check lv size requirements, if not adopting
9885         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9886         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9887
9888     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9889       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9890                                 disk[constants.IDISK_ADOPT])
9891                      for disk in self.disks])
9892       if len(all_lvs) != len(self.disks):
9893         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9894                                    errors.ECODE_INVAL)
9895       for lv_name in all_lvs:
9896         try:
9897           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9898           # to ReserveLV uses the same syntax
9899           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9900         except errors.ReservationError:
9901           raise errors.OpPrereqError("LV named %s used by another instance" %
9902                                      lv_name, errors.ECODE_NOTUNIQUE)
9903
9904       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9905       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9906
9907       node_lvs = self.rpc.call_lv_list([pnode.name],
9908                                        vg_names.payload.keys())[pnode.name]
9909       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9910       node_lvs = node_lvs.payload
9911
9912       delta = all_lvs.difference(node_lvs.keys())
9913       if delta:
9914         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9915                                    utils.CommaJoin(delta),
9916                                    errors.ECODE_INVAL)
9917       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9918       if online_lvs:
9919         raise errors.OpPrereqError("Online logical volumes found, cannot"
9920                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9921                                    errors.ECODE_STATE)
9922       # update the size of disk based on what is found
9923       for dsk in self.disks:
9924         dsk[constants.IDISK_SIZE] = \
9925           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9926                                         dsk[constants.IDISK_ADOPT])][0]))
9927
9928     elif self.op.disk_template == constants.DT_BLOCK:
9929       # Normalize and de-duplicate device paths
9930       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9931                        for disk in self.disks])
9932       if len(all_disks) != len(self.disks):
9933         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9934                                    errors.ECODE_INVAL)
9935       baddisks = [d for d in all_disks
9936                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9937       if baddisks:
9938         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9939                                    " cannot be adopted" %
9940                                    (", ".join(baddisks),
9941                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9942                                    errors.ECODE_INVAL)
9943
9944       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9945                                             list(all_disks))[pnode.name]
9946       node_disks.Raise("Cannot get block device information from node %s" %
9947                        pnode.name)
9948       node_disks = node_disks.payload
9949       delta = all_disks.difference(node_disks.keys())
9950       if delta:
9951         raise errors.OpPrereqError("Missing block device(s): %s" %
9952                                    utils.CommaJoin(delta),
9953                                    errors.ECODE_INVAL)
9954       for dsk in self.disks:
9955         dsk[constants.IDISK_SIZE] = \
9956           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9957
9958     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9959
9960     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9961     # check OS parameters (remotely)
9962     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9963
9964     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9965
9966     # memory check on primary node
9967     #TODO(dynmem): use MINMEM for checking
9968     if self.op.start:
9969       _CheckNodeFreeMemory(self, self.pnode.name,
9970                            "creating instance %s" % self.op.instance_name,
9971                            self.be_full[constants.BE_MAXMEM],
9972                            self.op.hypervisor)
9973
9974     self.dry_run_result = list(nodenames)
9975
9976   def Exec(self, feedback_fn):
9977     """Create and add the instance to the cluster.
9978
9979     """
9980     instance = self.op.instance_name
9981     pnode_name = self.pnode.name
9982
9983     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9984                 self.owned_locks(locking.LEVEL_NODE)), \
9985       "Node locks differ from node resource locks"
9986
9987     ht_kind = self.op.hypervisor
9988     if ht_kind in constants.HTS_REQ_PORT:
9989       network_port = self.cfg.AllocatePort()
9990     else:
9991       network_port = None
9992
9993     disks = _GenerateDiskTemplate(self,
9994                                   self.op.disk_template,
9995                                   instance, pnode_name,
9996                                   self.secondaries,
9997                                   self.disks,
9998                                   self.instance_file_storage_dir,
9999                                   self.op.file_driver,
10000                                   0,
10001                                   feedback_fn,
10002                                   self.diskparams)
10003
10004     iobj = objects.Instance(name=instance, os=self.op.os_type,
10005                             primary_node=pnode_name,
10006                             nics=self.nics, disks=disks,
10007                             disk_template=self.op.disk_template,
10008                             admin_state=constants.ADMINST_DOWN,
10009                             network_port=network_port,
10010                             beparams=self.op.beparams,
10011                             hvparams=self.op.hvparams,
10012                             hypervisor=self.op.hypervisor,
10013                             osparams=self.op.osparams,
10014                             )
10015
10016     if self.op.tags:
10017       for tag in self.op.tags:
10018         iobj.AddTag(tag)
10019
10020     if self.adopt_disks:
10021       if self.op.disk_template == constants.DT_PLAIN:
10022         # rename LVs to the newly-generated names; we need to construct
10023         # 'fake' LV disks with the old data, plus the new unique_id
10024         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10025         rename_to = []
10026         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10027           rename_to.append(t_dsk.logical_id)
10028           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10029           self.cfg.SetDiskID(t_dsk, pnode_name)
10030         result = self.rpc.call_blockdev_rename(pnode_name,
10031                                                zip(tmp_disks, rename_to))
10032         result.Raise("Failed to rename adoped LVs")
10033     else:
10034       feedback_fn("* creating instance disks...")
10035       try:
10036         _CreateDisks(self, iobj)
10037       except errors.OpExecError:
10038         self.LogWarning("Device creation failed, reverting...")
10039         try:
10040           _RemoveDisks(self, iobj)
10041         finally:
10042           self.cfg.ReleaseDRBDMinors(instance)
10043           raise
10044
10045     feedback_fn("adding instance %s to cluster config" % instance)
10046
10047     self.cfg.AddInstance(iobj, self.proc.GetECId())
10048
10049     # Declare that we don't want to remove the instance lock anymore, as we've
10050     # added the instance to the config
10051     del self.remove_locks[locking.LEVEL_INSTANCE]
10052
10053     if self.op.mode == constants.INSTANCE_IMPORT:
10054       # Release unused nodes
10055       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10056     else:
10057       # Release all nodes
10058       _ReleaseLocks(self, locking.LEVEL_NODE)
10059
10060     disk_abort = False
10061     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10062       feedback_fn("* wiping instance disks...")
10063       try:
10064         _WipeDisks(self, iobj)
10065       except errors.OpExecError, err:
10066         logging.exception("Wiping disks failed")
10067         self.LogWarning("Wiping instance disks failed (%s)", err)
10068         disk_abort = True
10069
10070     if disk_abort:
10071       # Something is already wrong with the disks, don't do anything else
10072       pass
10073     elif self.op.wait_for_sync:
10074       disk_abort = not _WaitForSync(self, iobj)
10075     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10076       # make sure the disks are not degraded (still sync-ing is ok)
10077       feedback_fn("* checking mirrors status")
10078       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10079     else:
10080       disk_abort = False
10081
10082     if disk_abort:
10083       _RemoveDisks(self, iobj)
10084       self.cfg.RemoveInstance(iobj.name)
10085       # Make sure the instance lock gets removed
10086       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10087       raise errors.OpExecError("There are some degraded disks for"
10088                                " this instance")
10089
10090     # Release all node resource locks
10091     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10092
10093     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10094       if self.op.mode == constants.INSTANCE_CREATE:
10095         if not self.op.no_install:
10096           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10097                         not self.op.wait_for_sync)
10098           if pause_sync:
10099             feedback_fn("* pausing disk sync to install instance OS")
10100             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10101                                                               iobj.disks, True)
10102             for idx, success in enumerate(result.payload):
10103               if not success:
10104                 logging.warn("pause-sync of instance %s for disk %d failed",
10105                              instance, idx)
10106
10107           feedback_fn("* running the instance OS create scripts...")
10108           # FIXME: pass debug option from opcode to backend
10109           os_add_result = \
10110             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10111                                           self.op.debug_level)
10112           if pause_sync:
10113             feedback_fn("* resuming disk sync")
10114             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10115                                                               iobj.disks, False)
10116             for idx, success in enumerate(result.payload):
10117               if not success:
10118                 logging.warn("resume-sync of instance %s for disk %d failed",
10119                              instance, idx)
10120
10121           os_add_result.Raise("Could not add os for instance %s"
10122                               " on node %s" % (instance, pnode_name))
10123
10124       elif self.op.mode == constants.INSTANCE_IMPORT:
10125         feedback_fn("* running the instance OS import scripts...")
10126
10127         transfers = []
10128
10129         for idx, image in enumerate(self.src_images):
10130           if not image:
10131             continue
10132
10133           # FIXME: pass debug option from opcode to backend
10134           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10135                                              constants.IEIO_FILE, (image, ),
10136                                              constants.IEIO_SCRIPT,
10137                                              (iobj.disks[idx], idx),
10138                                              None)
10139           transfers.append(dt)
10140
10141         import_result = \
10142           masterd.instance.TransferInstanceData(self, feedback_fn,
10143                                                 self.op.src_node, pnode_name,
10144                                                 self.pnode.secondary_ip,
10145                                                 iobj, transfers)
10146         if not compat.all(import_result):
10147           self.LogWarning("Some disks for instance %s on node %s were not"
10148                           " imported successfully" % (instance, pnode_name))
10149
10150       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10151         feedback_fn("* preparing remote import...")
10152         # The source cluster will stop the instance before attempting to make a
10153         # connection. In some cases stopping an instance can take a long time,
10154         # hence the shutdown timeout is added to the connection timeout.
10155         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10156                            self.op.source_shutdown_timeout)
10157         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10158
10159         assert iobj.primary_node == self.pnode.name
10160         disk_results = \
10161           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10162                                         self.source_x509_ca,
10163                                         self._cds, timeouts)
10164         if not compat.all(disk_results):
10165           # TODO: Should the instance still be started, even if some disks
10166           # failed to import (valid for local imports, too)?
10167           self.LogWarning("Some disks for instance %s on node %s were not"
10168                           " imported successfully" % (instance, pnode_name))
10169
10170         # Run rename script on newly imported instance
10171         assert iobj.name == instance
10172         feedback_fn("Running rename script for %s" % instance)
10173         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10174                                                    self.source_instance_name,
10175                                                    self.op.debug_level)
10176         if result.fail_msg:
10177           self.LogWarning("Failed to run rename script for %s on node"
10178                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10179
10180       else:
10181         # also checked in the prereq part
10182         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10183                                      % self.op.mode)
10184
10185     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10186
10187     if self.op.start:
10188       iobj.admin_state = constants.ADMINST_UP
10189       self.cfg.Update(iobj, feedback_fn)
10190       logging.info("Starting instance %s on node %s", instance, pnode_name)
10191       feedback_fn("* starting instance...")
10192       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10193                                             False)
10194       result.Raise("Could not start instance")
10195
10196     return list(iobj.all_nodes)
10197
10198
10199 def _CheckRADOSFreeSpace():
10200   """Compute disk size requirements inside the RADOS cluster.
10201
10202   """
10203   # For the RADOS cluster we assume there is always enough space.
10204   pass
10205
10206
10207 class LUInstanceConsole(NoHooksLU):
10208   """Connect to an instance's console.
10209
10210   This is somewhat special in that it returns the command line that
10211   you need to run on the master node in order to connect to the
10212   console.
10213
10214   """
10215   REQ_BGL = False
10216
10217   def ExpandNames(self):
10218     self.share_locks = _ShareAll()
10219     self._ExpandAndLockInstance()
10220
10221   def CheckPrereq(self):
10222     """Check prerequisites.
10223
10224     This checks that the instance is in the cluster.
10225
10226     """
10227     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10228     assert self.instance is not None, \
10229       "Cannot retrieve locked instance %s" % self.op.instance_name
10230     _CheckNodeOnline(self, self.instance.primary_node)
10231
10232   def Exec(self, feedback_fn):
10233     """Connect to the console of an instance
10234
10235     """
10236     instance = self.instance
10237     node = instance.primary_node
10238
10239     node_insts = self.rpc.call_instance_list([node],
10240                                              [instance.hypervisor])[node]
10241     node_insts.Raise("Can't get node information from %s" % node)
10242
10243     if instance.name not in node_insts.payload:
10244       if instance.admin_state == constants.ADMINST_UP:
10245         state = constants.INSTST_ERRORDOWN
10246       elif instance.admin_state == constants.ADMINST_DOWN:
10247         state = constants.INSTST_ADMINDOWN
10248       else:
10249         state = constants.INSTST_ADMINOFFLINE
10250       raise errors.OpExecError("Instance %s is not running (state %s)" %
10251                                (instance.name, state))
10252
10253     logging.debug("Connecting to console of %s on %s", instance.name, node)
10254
10255     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10256
10257
10258 def _GetInstanceConsole(cluster, instance):
10259   """Returns console information for an instance.
10260
10261   @type cluster: L{objects.Cluster}
10262   @type instance: L{objects.Instance}
10263   @rtype: dict
10264
10265   """
10266   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10267   # beparams and hvparams are passed separately, to avoid editing the
10268   # instance and then saving the defaults in the instance itself.
10269   hvparams = cluster.FillHV(instance)
10270   beparams = cluster.FillBE(instance)
10271   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10272
10273   assert console.instance == instance.name
10274   assert console.Validate()
10275
10276   return console.ToDict()
10277
10278
10279 class LUInstanceReplaceDisks(LogicalUnit):
10280   """Replace the disks of an instance.
10281
10282   """
10283   HPATH = "mirrors-replace"
10284   HTYPE = constants.HTYPE_INSTANCE
10285   REQ_BGL = False
10286
10287   def CheckArguments(self):
10288     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10289                                   self.op.iallocator)
10290
10291   def ExpandNames(self):
10292     self._ExpandAndLockInstance()
10293
10294     assert locking.LEVEL_NODE not in self.needed_locks
10295     assert locking.LEVEL_NODE_RES not in self.needed_locks
10296     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10297
10298     assert self.op.iallocator is None or self.op.remote_node is None, \
10299       "Conflicting options"
10300
10301     if self.op.remote_node is not None:
10302       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10303
10304       # Warning: do not remove the locking of the new secondary here
10305       # unless DRBD8.AddChildren is changed to work in parallel;
10306       # currently it doesn't since parallel invocations of
10307       # FindUnusedMinor will conflict
10308       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10309       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10310     else:
10311       self.needed_locks[locking.LEVEL_NODE] = []
10312       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10313
10314       if self.op.iallocator is not None:
10315         # iallocator will select a new node in the same group
10316         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10317
10318     self.needed_locks[locking.LEVEL_NODE_RES] = []
10319
10320     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10321                                    self.op.iallocator, self.op.remote_node,
10322                                    self.op.disks, False, self.op.early_release,
10323                                    self.op.ignore_ipolicy)
10324
10325     self.tasklets = [self.replacer]
10326
10327   def DeclareLocks(self, level):
10328     if level == locking.LEVEL_NODEGROUP:
10329       assert self.op.remote_node is None
10330       assert self.op.iallocator is not None
10331       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10332
10333       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10334       # Lock all groups used by instance optimistically; this requires going
10335       # via the node before it's locked, requiring verification later on
10336       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10337         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10338
10339     elif level == locking.LEVEL_NODE:
10340       if self.op.iallocator is not None:
10341         assert self.op.remote_node is None
10342         assert not self.needed_locks[locking.LEVEL_NODE]
10343
10344         # Lock member nodes of all locked groups
10345         self.needed_locks[locking.LEVEL_NODE] = [node_name
10346           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10347           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10348       else:
10349         self._LockInstancesNodes()
10350     elif level == locking.LEVEL_NODE_RES:
10351       # Reuse node locks
10352       self.needed_locks[locking.LEVEL_NODE_RES] = \
10353         self.needed_locks[locking.LEVEL_NODE]
10354
10355   def BuildHooksEnv(self):
10356     """Build hooks env.
10357
10358     This runs on the master, the primary and all the secondaries.
10359
10360     """
10361     instance = self.replacer.instance
10362     env = {
10363       "MODE": self.op.mode,
10364       "NEW_SECONDARY": self.op.remote_node,
10365       "OLD_SECONDARY": instance.secondary_nodes[0],
10366       }
10367     env.update(_BuildInstanceHookEnvByObject(self, instance))
10368     return env
10369
10370   def BuildHooksNodes(self):
10371     """Build hooks nodes.
10372
10373     """
10374     instance = self.replacer.instance
10375     nl = [
10376       self.cfg.GetMasterNode(),
10377       instance.primary_node,
10378       ]
10379     if self.op.remote_node is not None:
10380       nl.append(self.op.remote_node)
10381     return nl, nl
10382
10383   def CheckPrereq(self):
10384     """Check prerequisites.
10385
10386     """
10387     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10388             self.op.iallocator is None)
10389
10390     # Verify if node group locks are still correct
10391     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10392     if owned_groups:
10393       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10394
10395     return LogicalUnit.CheckPrereq(self)
10396
10397
10398 class TLReplaceDisks(Tasklet):
10399   """Replaces disks for an instance.
10400
10401   Note: Locking is not within the scope of this class.
10402
10403   """
10404   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10405                disks, delay_iallocator, early_release, ignore_ipolicy):
10406     """Initializes this class.
10407
10408     """
10409     Tasklet.__init__(self, lu)
10410
10411     # Parameters
10412     self.instance_name = instance_name
10413     self.mode = mode
10414     self.iallocator_name = iallocator_name
10415     self.remote_node = remote_node
10416     self.disks = disks
10417     self.delay_iallocator = delay_iallocator
10418     self.early_release = early_release
10419     self.ignore_ipolicy = ignore_ipolicy
10420
10421     # Runtime data
10422     self.instance = None
10423     self.new_node = None
10424     self.target_node = None
10425     self.other_node = None
10426     self.remote_node_info = None
10427     self.node_secondary_ip = None
10428
10429   @staticmethod
10430   def CheckArguments(mode, remote_node, iallocator):
10431     """Helper function for users of this class.
10432
10433     """
10434     # check for valid parameter combination
10435     if mode == constants.REPLACE_DISK_CHG:
10436       if remote_node is None and iallocator is None:
10437         raise errors.OpPrereqError("When changing the secondary either an"
10438                                    " iallocator script must be used or the"
10439                                    " new node given", errors.ECODE_INVAL)
10440
10441       if remote_node is not None and iallocator is not None:
10442         raise errors.OpPrereqError("Give either the iallocator or the new"
10443                                    " secondary, not both", errors.ECODE_INVAL)
10444
10445     elif remote_node is not None or iallocator is not None:
10446       # Not replacing the secondary
10447       raise errors.OpPrereqError("The iallocator and new node options can"
10448                                  " only be used when changing the"
10449                                  " secondary node", errors.ECODE_INVAL)
10450
10451   @staticmethod
10452   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10453     """Compute a new secondary node using an IAllocator.
10454
10455     """
10456     ial = IAllocator(lu.cfg, lu.rpc,
10457                      mode=constants.IALLOCATOR_MODE_RELOC,
10458                      name=instance_name,
10459                      relocate_from=list(relocate_from))
10460
10461     ial.Run(iallocator_name)
10462
10463     if not ial.success:
10464       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10465                                  " %s" % (iallocator_name, ial.info),
10466                                  errors.ECODE_NORES)
10467
10468     if len(ial.result) != ial.required_nodes:
10469       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10470                                  " of nodes (%s), required %s" %
10471                                  (iallocator_name,
10472                                   len(ial.result), ial.required_nodes),
10473                                  errors.ECODE_FAULT)
10474
10475     remote_node_name = ial.result[0]
10476
10477     lu.LogInfo("Selected new secondary for instance '%s': %s",
10478                instance_name, remote_node_name)
10479
10480     return remote_node_name
10481
10482   def _FindFaultyDisks(self, node_name):
10483     """Wrapper for L{_FindFaultyInstanceDisks}.
10484
10485     """
10486     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10487                                     node_name, True)
10488
10489   def _CheckDisksActivated(self, instance):
10490     """Checks if the instance disks are activated.
10491
10492     @param instance: The instance to check disks
10493     @return: True if they are activated, False otherwise
10494
10495     """
10496     nodes = instance.all_nodes
10497
10498     for idx, dev in enumerate(instance.disks):
10499       for node in nodes:
10500         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10501         self.cfg.SetDiskID(dev, node)
10502
10503         result = self.rpc.call_blockdev_find(node, dev)
10504
10505         if result.offline:
10506           continue
10507         elif result.fail_msg or not result.payload:
10508           return False
10509
10510     return True
10511
10512   def CheckPrereq(self):
10513     """Check prerequisites.
10514
10515     This checks that the instance is in the cluster.
10516
10517     """
10518     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10519     assert instance is not None, \
10520       "Cannot retrieve locked instance %s" % self.instance_name
10521
10522     if instance.disk_template != constants.DT_DRBD8:
10523       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10524                                  " instances", errors.ECODE_INVAL)
10525
10526     if len(instance.secondary_nodes) != 1:
10527       raise errors.OpPrereqError("The instance has a strange layout,"
10528                                  " expected one secondary but found %d" %
10529                                  len(instance.secondary_nodes),
10530                                  errors.ECODE_FAULT)
10531
10532     if not self.delay_iallocator:
10533       self._CheckPrereq2()
10534
10535   def _CheckPrereq2(self):
10536     """Check prerequisites, second part.
10537
10538     This function should always be part of CheckPrereq. It was separated and is
10539     now called from Exec because during node evacuation iallocator was only
10540     called with an unmodified cluster model, not taking planned changes into
10541     account.
10542
10543     """
10544     instance = self.instance
10545     secondary_node = instance.secondary_nodes[0]
10546
10547     if self.iallocator_name is None:
10548       remote_node = self.remote_node
10549     else:
10550       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10551                                        instance.name, instance.secondary_nodes)
10552
10553     if remote_node is None:
10554       self.remote_node_info = None
10555     else:
10556       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10557              "Remote node '%s' is not locked" % remote_node
10558
10559       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10560       assert self.remote_node_info is not None, \
10561         "Cannot retrieve locked node %s" % remote_node
10562
10563     if remote_node == self.instance.primary_node:
10564       raise errors.OpPrereqError("The specified node is the primary node of"
10565                                  " the instance", errors.ECODE_INVAL)
10566
10567     if remote_node == secondary_node:
10568       raise errors.OpPrereqError("The specified node is already the"
10569                                  " secondary node of the instance",
10570                                  errors.ECODE_INVAL)
10571
10572     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10573                                     constants.REPLACE_DISK_CHG):
10574       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10575                                  errors.ECODE_INVAL)
10576
10577     if self.mode == constants.REPLACE_DISK_AUTO:
10578       if not self._CheckDisksActivated(instance):
10579         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10580                                    " first" % self.instance_name,
10581                                    errors.ECODE_STATE)
10582       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10583       faulty_secondary = self._FindFaultyDisks(secondary_node)
10584
10585       if faulty_primary and faulty_secondary:
10586         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10587                                    " one node and can not be repaired"
10588                                    " automatically" % self.instance_name,
10589                                    errors.ECODE_STATE)
10590
10591       if faulty_primary:
10592         self.disks = faulty_primary
10593         self.target_node = instance.primary_node
10594         self.other_node = secondary_node
10595         check_nodes = [self.target_node, self.other_node]
10596       elif faulty_secondary:
10597         self.disks = faulty_secondary
10598         self.target_node = secondary_node
10599         self.other_node = instance.primary_node
10600         check_nodes = [self.target_node, self.other_node]
10601       else:
10602         self.disks = []
10603         check_nodes = []
10604
10605     else:
10606       # Non-automatic modes
10607       if self.mode == constants.REPLACE_DISK_PRI:
10608         self.target_node = instance.primary_node
10609         self.other_node = secondary_node
10610         check_nodes = [self.target_node, self.other_node]
10611
10612       elif self.mode == constants.REPLACE_DISK_SEC:
10613         self.target_node = secondary_node
10614         self.other_node = instance.primary_node
10615         check_nodes = [self.target_node, self.other_node]
10616
10617       elif self.mode == constants.REPLACE_DISK_CHG:
10618         self.new_node = remote_node
10619         self.other_node = instance.primary_node
10620         self.target_node = secondary_node
10621         check_nodes = [self.new_node, self.other_node]
10622
10623         _CheckNodeNotDrained(self.lu, remote_node)
10624         _CheckNodeVmCapable(self.lu, remote_node)
10625
10626         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10627         assert old_node_info is not None
10628         if old_node_info.offline and not self.early_release:
10629           # doesn't make sense to delay the release
10630           self.early_release = True
10631           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10632                           " early-release mode", secondary_node)
10633
10634       else:
10635         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10636                                      self.mode)
10637
10638       # If not specified all disks should be replaced
10639       if not self.disks:
10640         self.disks = range(len(self.instance.disks))
10641
10642     # TODO: This is ugly, but right now we can't distinguish between internal
10643     # submitted opcode and external one. We should fix that.
10644     if self.remote_node_info:
10645       # We change the node, lets verify it still meets instance policy
10646       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10647       ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10648                                        new_group_info)
10649       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10650                               ignore=self.ignore_ipolicy)
10651
10652     # TODO: compute disk parameters
10653     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10654     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10655     if primary_node_info.group != secondary_node_info.group:
10656       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10657                       " different node groups; the disk parameters of the"
10658                       " primary node's group will be applied.")
10659
10660     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10661
10662     for node in check_nodes:
10663       _CheckNodeOnline(self.lu, node)
10664
10665     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10666                                                           self.other_node,
10667                                                           self.target_node]
10668                               if node_name is not None)
10669
10670     # Release unneeded node and node resource locks
10671     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10672     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10673
10674     # Release any owned node group
10675     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10676       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10677
10678     # Check whether disks are valid
10679     for disk_idx in self.disks:
10680       instance.FindDisk(disk_idx)
10681
10682     # Get secondary node IP addresses
10683     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10684                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10685
10686   def Exec(self, feedback_fn):
10687     """Execute disk replacement.
10688
10689     This dispatches the disk replacement to the appropriate handler.
10690
10691     """
10692     if self.delay_iallocator:
10693       self._CheckPrereq2()
10694
10695     if __debug__:
10696       # Verify owned locks before starting operation
10697       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10698       assert set(owned_nodes) == set(self.node_secondary_ip), \
10699           ("Incorrect node locks, owning %s, expected %s" %
10700            (owned_nodes, self.node_secondary_ip.keys()))
10701       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10702               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10703
10704       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10705       assert list(owned_instances) == [self.instance_name], \
10706           "Instance '%s' not locked" % self.instance_name
10707
10708       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10709           "Should not own any node group lock at this point"
10710
10711     if not self.disks:
10712       feedback_fn("No disks need replacement")
10713       return
10714
10715     feedback_fn("Replacing disk(s) %s for %s" %
10716                 (utils.CommaJoin(self.disks), self.instance.name))
10717
10718     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10719
10720     # Activate the instance disks if we're replacing them on a down instance
10721     if activate_disks:
10722       _StartInstanceDisks(self.lu, self.instance, True)
10723
10724     try:
10725       # Should we replace the secondary node?
10726       if self.new_node is not None:
10727         fn = self._ExecDrbd8Secondary
10728       else:
10729         fn = self._ExecDrbd8DiskOnly
10730
10731       result = fn(feedback_fn)
10732     finally:
10733       # Deactivate the instance disks if we're replacing them on a
10734       # down instance
10735       if activate_disks:
10736         _SafeShutdownInstanceDisks(self.lu, self.instance)
10737
10738     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10739
10740     if __debug__:
10741       # Verify owned locks
10742       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10743       nodes = frozenset(self.node_secondary_ip)
10744       assert ((self.early_release and not owned_nodes) or
10745               (not self.early_release and not (set(owned_nodes) - nodes))), \
10746         ("Not owning the correct locks, early_release=%s, owned=%r,"
10747          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10748
10749     return result
10750
10751   def _CheckVolumeGroup(self, nodes):
10752     self.lu.LogInfo("Checking volume groups")
10753
10754     vgname = self.cfg.GetVGName()
10755
10756     # Make sure volume group exists on all involved nodes
10757     results = self.rpc.call_vg_list(nodes)
10758     if not results:
10759       raise errors.OpExecError("Can't list volume groups on the nodes")
10760
10761     for node in nodes:
10762       res = results[node]
10763       res.Raise("Error checking node %s" % node)
10764       if vgname not in res.payload:
10765         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10766                                  (vgname, node))
10767
10768   def _CheckDisksExistence(self, nodes):
10769     # Check disk existence
10770     for idx, dev in enumerate(self.instance.disks):
10771       if idx not in self.disks:
10772         continue
10773
10774       for node in nodes:
10775         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10776         self.cfg.SetDiskID(dev, node)
10777
10778         result = self.rpc.call_blockdev_find(node, dev)
10779
10780         msg = result.fail_msg
10781         if msg or not result.payload:
10782           if not msg:
10783             msg = "disk not found"
10784           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10785                                    (idx, node, msg))
10786
10787   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10788     for idx, dev in enumerate(self.instance.disks):
10789       if idx not in self.disks:
10790         continue
10791
10792       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10793                       (idx, node_name))
10794
10795       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10796                                    ldisk=ldisk):
10797         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10798                                  " replace disks for instance %s" %
10799                                  (node_name, self.instance.name))
10800
10801   def _CreateNewStorage(self, node_name):
10802     """Create new storage on the primary or secondary node.
10803
10804     This is only used for same-node replaces, not for changing the
10805     secondary node, hence we don't want to modify the existing disk.
10806
10807     """
10808     iv_names = {}
10809
10810     for idx, dev in enumerate(self.instance.disks):
10811       if idx not in self.disks:
10812         continue
10813
10814       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10815
10816       self.cfg.SetDiskID(dev, node_name)
10817
10818       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10819       names = _GenerateUniqueNames(self.lu, lv_names)
10820
10821       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10822
10823       vg_data = dev.children[0].logical_id[0]
10824       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10825                              logical_id=(vg_data, names[0]), params=data_p)
10826       vg_meta = dev.children[1].logical_id[0]
10827       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10828                              logical_id=(vg_meta, names[1]), params=meta_p)
10829
10830       new_lvs = [lv_data, lv_meta]
10831       old_lvs = [child.Copy() for child in dev.children]
10832       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10833
10834       # we pass force_create=True to force the LVM creation
10835       for new_lv in new_lvs:
10836         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10837                         _GetInstanceInfoText(self.instance), False)
10838
10839     return iv_names
10840
10841   def _CheckDevices(self, node_name, iv_names):
10842     for name, (dev, _, _) in iv_names.iteritems():
10843       self.cfg.SetDiskID(dev, node_name)
10844
10845       result = self.rpc.call_blockdev_find(node_name, dev)
10846
10847       msg = result.fail_msg
10848       if msg or not result.payload:
10849         if not msg:
10850           msg = "disk not found"
10851         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10852                                  (name, msg))
10853
10854       if result.payload.is_degraded:
10855         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10856
10857   def _RemoveOldStorage(self, node_name, iv_names):
10858     for name, (_, old_lvs, _) in iv_names.iteritems():
10859       self.lu.LogInfo("Remove logical volumes for %s" % name)
10860
10861       for lv in old_lvs:
10862         self.cfg.SetDiskID(lv, node_name)
10863
10864         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10865         if msg:
10866           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10867                              hint="remove unused LVs manually")
10868
10869   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10870     """Replace a disk on the primary or secondary for DRBD 8.
10871
10872     The algorithm for replace is quite complicated:
10873
10874       1. for each disk to be replaced:
10875
10876         1. create new LVs on the target node with unique names
10877         1. detach old LVs from the drbd device
10878         1. rename old LVs to name_replaced.<time_t>
10879         1. rename new LVs to old LVs
10880         1. attach the new LVs (with the old names now) to the drbd device
10881
10882       1. wait for sync across all devices
10883
10884       1. for each modified disk:
10885
10886         1. remove old LVs (which have the name name_replaces.<time_t>)
10887
10888     Failures are not very well handled.
10889
10890     """
10891     steps_total = 6
10892
10893     # Step: check device activation
10894     self.lu.LogStep(1, steps_total, "Check device existence")
10895     self._CheckDisksExistence([self.other_node, self.target_node])
10896     self._CheckVolumeGroup([self.target_node, self.other_node])
10897
10898     # Step: check other node consistency
10899     self.lu.LogStep(2, steps_total, "Check peer consistency")
10900     self._CheckDisksConsistency(self.other_node,
10901                                 self.other_node == self.instance.primary_node,
10902                                 False)
10903
10904     # Step: create new storage
10905     self.lu.LogStep(3, steps_total, "Allocate new storage")
10906     iv_names = self._CreateNewStorage(self.target_node)
10907
10908     # Step: for each lv, detach+rename*2+attach
10909     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10910     for dev, old_lvs, new_lvs in iv_names.itervalues():
10911       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10912
10913       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10914                                                      old_lvs)
10915       result.Raise("Can't detach drbd from local storage on node"
10916                    " %s for device %s" % (self.target_node, dev.iv_name))
10917       #dev.children = []
10918       #cfg.Update(instance)
10919
10920       # ok, we created the new LVs, so now we know we have the needed
10921       # storage; as such, we proceed on the target node to rename
10922       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10923       # using the assumption that logical_id == physical_id (which in
10924       # turn is the unique_id on that node)
10925
10926       # FIXME(iustin): use a better name for the replaced LVs
10927       temp_suffix = int(time.time())
10928       ren_fn = lambda d, suff: (d.physical_id[0],
10929                                 d.physical_id[1] + "_replaced-%s" % suff)
10930
10931       # Build the rename list based on what LVs exist on the node
10932       rename_old_to_new = []
10933       for to_ren in old_lvs:
10934         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10935         if not result.fail_msg and result.payload:
10936           # device exists
10937           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10938
10939       self.lu.LogInfo("Renaming the old LVs on the target node")
10940       result = self.rpc.call_blockdev_rename(self.target_node,
10941                                              rename_old_to_new)
10942       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10943
10944       # Now we rename the new LVs to the old LVs
10945       self.lu.LogInfo("Renaming the new LVs on the target node")
10946       rename_new_to_old = [(new, old.physical_id)
10947                            for old, new in zip(old_lvs, new_lvs)]
10948       result = self.rpc.call_blockdev_rename(self.target_node,
10949                                              rename_new_to_old)
10950       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10951
10952       # Intermediate steps of in memory modifications
10953       for old, new in zip(old_lvs, new_lvs):
10954         new.logical_id = old.logical_id
10955         self.cfg.SetDiskID(new, self.target_node)
10956
10957       # We need to modify old_lvs so that removal later removes the
10958       # right LVs, not the newly added ones; note that old_lvs is a
10959       # copy here
10960       for disk in old_lvs:
10961         disk.logical_id = ren_fn(disk, temp_suffix)
10962         self.cfg.SetDiskID(disk, self.target_node)
10963
10964       # Now that the new lvs have the old name, we can add them to the device
10965       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10966       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10967                                                   new_lvs)
10968       msg = result.fail_msg
10969       if msg:
10970         for new_lv in new_lvs:
10971           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10972                                                new_lv).fail_msg
10973           if msg2:
10974             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10975                                hint=("cleanup manually the unused logical"
10976                                      "volumes"))
10977         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10978
10979     cstep = itertools.count(5)
10980
10981     if self.early_release:
10982       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10983       self._RemoveOldStorage(self.target_node, iv_names)
10984       # TODO: Check if releasing locks early still makes sense
10985       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10986     else:
10987       # Release all resource locks except those used by the instance
10988       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10989                     keep=self.node_secondary_ip.keys())
10990
10991     # Release all node locks while waiting for sync
10992     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10993
10994     # TODO: Can the instance lock be downgraded here? Take the optional disk
10995     # shutdown in the caller into consideration.
10996
10997     # Wait for sync
10998     # This can fail as the old devices are degraded and _WaitForSync
10999     # does a combined result over all disks, so we don't check its return value
11000     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11001     _WaitForSync(self.lu, self.instance)
11002
11003     # Check all devices manually
11004     self._CheckDevices(self.instance.primary_node, iv_names)
11005
11006     # Step: remove old storage
11007     if not self.early_release:
11008       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11009       self._RemoveOldStorage(self.target_node, iv_names)
11010
11011   def _ExecDrbd8Secondary(self, feedback_fn):
11012     """Replace the secondary node for DRBD 8.
11013
11014     The algorithm for replace is quite complicated:
11015       - for all disks of the instance:
11016         - create new LVs on the new node with same names
11017         - shutdown the drbd device on the old secondary
11018         - disconnect the drbd network on the primary
11019         - create the drbd device on the new secondary
11020         - network attach the drbd on the primary, using an artifice:
11021           the drbd code for Attach() will connect to the network if it
11022           finds a device which is connected to the good local disks but
11023           not network enabled
11024       - wait for sync across all devices
11025       - remove all disks from the old secondary
11026
11027     Failures are not very well handled.
11028
11029     """
11030     steps_total = 6
11031
11032     pnode = self.instance.primary_node
11033
11034     # Step: check device activation
11035     self.lu.LogStep(1, steps_total, "Check device existence")
11036     self._CheckDisksExistence([self.instance.primary_node])
11037     self._CheckVolumeGroup([self.instance.primary_node])
11038
11039     # Step: check other node consistency
11040     self.lu.LogStep(2, steps_total, "Check peer consistency")
11041     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11042
11043     # Step: create new storage
11044     self.lu.LogStep(3, steps_total, "Allocate new storage")
11045     for idx, dev in enumerate(self.instance.disks):
11046       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11047                       (self.new_node, idx))
11048       # we pass force_create=True to force LVM creation
11049       for new_lv in dev.children:
11050         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11051                         _GetInstanceInfoText(self.instance), False)
11052
11053     # Step 4: dbrd minors and drbd setups changes
11054     # after this, we must manually remove the drbd minors on both the
11055     # error and the success paths
11056     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11057     minors = self.cfg.AllocateDRBDMinor([self.new_node
11058                                          for dev in self.instance.disks],
11059                                         self.instance.name)
11060     logging.debug("Allocated minors %r", minors)
11061
11062     iv_names = {}
11063     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11064       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11065                       (self.new_node, idx))
11066       # create new devices on new_node; note that we create two IDs:
11067       # one without port, so the drbd will be activated without
11068       # networking information on the new node at this stage, and one
11069       # with network, for the latter activation in step 4
11070       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11071       if self.instance.primary_node == o_node1:
11072         p_minor = o_minor1
11073       else:
11074         assert self.instance.primary_node == o_node2, "Three-node instance?"
11075         p_minor = o_minor2
11076
11077       new_alone_id = (self.instance.primary_node, self.new_node, None,
11078                       p_minor, new_minor, o_secret)
11079       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11080                     p_minor, new_minor, o_secret)
11081
11082       iv_names[idx] = (dev, dev.children, new_net_id)
11083       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11084                     new_net_id)
11085       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11086       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11087                               logical_id=new_alone_id,
11088                               children=dev.children,
11089                               size=dev.size,
11090                               params=drbd_params)
11091       try:
11092         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11093                               _GetInstanceInfoText(self.instance), False)
11094       except errors.GenericError:
11095         self.cfg.ReleaseDRBDMinors(self.instance.name)
11096         raise
11097
11098     # We have new devices, shutdown the drbd on the old secondary
11099     for idx, dev in enumerate(self.instance.disks):
11100       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11101       self.cfg.SetDiskID(dev, self.target_node)
11102       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11103       if msg:
11104         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11105                            "node: %s" % (idx, msg),
11106                            hint=("Please cleanup this device manually as"
11107                                  " soon as possible"))
11108
11109     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11110     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11111                                                self.instance.disks)[pnode]
11112
11113     msg = result.fail_msg
11114     if msg:
11115       # detaches didn't succeed (unlikely)
11116       self.cfg.ReleaseDRBDMinors(self.instance.name)
11117       raise errors.OpExecError("Can't detach the disks from the network on"
11118                                " old node: %s" % (msg,))
11119
11120     # if we managed to detach at least one, we update all the disks of
11121     # the instance to point to the new secondary
11122     self.lu.LogInfo("Updating instance configuration")
11123     for dev, _, new_logical_id in iv_names.itervalues():
11124       dev.logical_id = new_logical_id
11125       self.cfg.SetDiskID(dev, self.instance.primary_node)
11126
11127     self.cfg.Update(self.instance, feedback_fn)
11128
11129     # Release all node locks (the configuration has been updated)
11130     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11131
11132     # and now perform the drbd attach
11133     self.lu.LogInfo("Attaching primary drbds to new secondary"
11134                     " (standalone => connected)")
11135     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11136                                             self.new_node],
11137                                            self.node_secondary_ip,
11138                                            self.instance.disks,
11139                                            self.instance.name,
11140                                            False)
11141     for to_node, to_result in result.items():
11142       msg = to_result.fail_msg
11143       if msg:
11144         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11145                            to_node, msg,
11146                            hint=("please do a gnt-instance info to see the"
11147                                  " status of disks"))
11148
11149     cstep = itertools.count(5)
11150
11151     if self.early_release:
11152       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11153       self._RemoveOldStorage(self.target_node, iv_names)
11154       # TODO: Check if releasing locks early still makes sense
11155       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11156     else:
11157       # Release all resource locks except those used by the instance
11158       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11159                     keep=self.node_secondary_ip.keys())
11160
11161     # TODO: Can the instance lock be downgraded here? Take the optional disk
11162     # shutdown in the caller into consideration.
11163
11164     # Wait for sync
11165     # This can fail as the old devices are degraded and _WaitForSync
11166     # does a combined result over all disks, so we don't check its return value
11167     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11168     _WaitForSync(self.lu, self.instance)
11169
11170     # Check all devices manually
11171     self._CheckDevices(self.instance.primary_node, iv_names)
11172
11173     # Step: remove old storage
11174     if not self.early_release:
11175       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11176       self._RemoveOldStorage(self.target_node, iv_names)
11177
11178
11179 class LURepairNodeStorage(NoHooksLU):
11180   """Repairs the volume group on a node.
11181
11182   """
11183   REQ_BGL = False
11184
11185   def CheckArguments(self):
11186     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11187
11188     storage_type = self.op.storage_type
11189
11190     if (constants.SO_FIX_CONSISTENCY not in
11191         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11192       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11193                                  " repaired" % storage_type,
11194                                  errors.ECODE_INVAL)
11195
11196   def ExpandNames(self):
11197     self.needed_locks = {
11198       locking.LEVEL_NODE: [self.op.node_name],
11199       }
11200
11201   def _CheckFaultyDisks(self, instance, node_name):
11202     """Ensure faulty disks abort the opcode or at least warn."""
11203     try:
11204       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11205                                   node_name, True):
11206         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11207                                    " node '%s'" % (instance.name, node_name),
11208                                    errors.ECODE_STATE)
11209     except errors.OpPrereqError, err:
11210       if self.op.ignore_consistency:
11211         self.proc.LogWarning(str(err.args[0]))
11212       else:
11213         raise
11214
11215   def CheckPrereq(self):
11216     """Check prerequisites.
11217
11218     """
11219     # Check whether any instance on this node has faulty disks
11220     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11221       if inst.admin_state != constants.ADMINST_UP:
11222         continue
11223       check_nodes = set(inst.all_nodes)
11224       check_nodes.discard(self.op.node_name)
11225       for inst_node_name in check_nodes:
11226         self._CheckFaultyDisks(inst, inst_node_name)
11227
11228   def Exec(self, feedback_fn):
11229     feedback_fn("Repairing storage unit '%s' on %s ..." %
11230                 (self.op.name, self.op.node_name))
11231
11232     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11233     result = self.rpc.call_storage_execute(self.op.node_name,
11234                                            self.op.storage_type, st_args,
11235                                            self.op.name,
11236                                            constants.SO_FIX_CONSISTENCY)
11237     result.Raise("Failed to repair storage unit '%s' on %s" %
11238                  (self.op.name, self.op.node_name))
11239
11240
11241 class LUNodeEvacuate(NoHooksLU):
11242   """Evacuates instances off a list of nodes.
11243
11244   """
11245   REQ_BGL = False
11246
11247   _MODE2IALLOCATOR = {
11248     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11249     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11250     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11251     }
11252   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11253   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11254           constants.IALLOCATOR_NEVAC_MODES)
11255
11256   def CheckArguments(self):
11257     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11258
11259   def ExpandNames(self):
11260     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11261
11262     if self.op.remote_node is not None:
11263       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11264       assert self.op.remote_node
11265
11266       if self.op.remote_node == self.op.node_name:
11267         raise errors.OpPrereqError("Can not use evacuated node as a new"
11268                                    " secondary node", errors.ECODE_INVAL)
11269
11270       if self.op.mode != constants.NODE_EVAC_SEC:
11271         raise errors.OpPrereqError("Without the use of an iallocator only"
11272                                    " secondary instances can be evacuated",
11273                                    errors.ECODE_INVAL)
11274
11275     # Declare locks
11276     self.share_locks = _ShareAll()
11277     self.needed_locks = {
11278       locking.LEVEL_INSTANCE: [],
11279       locking.LEVEL_NODEGROUP: [],
11280       locking.LEVEL_NODE: [],
11281       }
11282
11283     # Determine nodes (via group) optimistically, needs verification once locks
11284     # have been acquired
11285     self.lock_nodes = self._DetermineNodes()
11286
11287   def _DetermineNodes(self):
11288     """Gets the list of nodes to operate on.
11289
11290     """
11291     if self.op.remote_node is None:
11292       # Iallocator will choose any node(s) in the same group
11293       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11294     else:
11295       group_nodes = frozenset([self.op.remote_node])
11296
11297     # Determine nodes to be locked
11298     return set([self.op.node_name]) | group_nodes
11299
11300   def _DetermineInstances(self):
11301     """Builds list of instances to operate on.
11302
11303     """
11304     assert self.op.mode in constants.NODE_EVAC_MODES
11305
11306     if self.op.mode == constants.NODE_EVAC_PRI:
11307       # Primary instances only
11308       inst_fn = _GetNodePrimaryInstances
11309       assert self.op.remote_node is None, \
11310         "Evacuating primary instances requires iallocator"
11311     elif self.op.mode == constants.NODE_EVAC_SEC:
11312       # Secondary instances only
11313       inst_fn = _GetNodeSecondaryInstances
11314     else:
11315       # All instances
11316       assert self.op.mode == constants.NODE_EVAC_ALL
11317       inst_fn = _GetNodeInstances
11318       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11319       # per instance
11320       raise errors.OpPrereqError("Due to an issue with the iallocator"
11321                                  " interface it is not possible to evacuate"
11322                                  " all instances at once; specify explicitly"
11323                                  " whether to evacuate primary or secondary"
11324                                  " instances",
11325                                  errors.ECODE_INVAL)
11326
11327     return inst_fn(self.cfg, self.op.node_name)
11328
11329   def DeclareLocks(self, level):
11330     if level == locking.LEVEL_INSTANCE:
11331       # Lock instances optimistically, needs verification once node and group
11332       # locks have been acquired
11333       self.needed_locks[locking.LEVEL_INSTANCE] = \
11334         set(i.name for i in self._DetermineInstances())
11335
11336     elif level == locking.LEVEL_NODEGROUP:
11337       # Lock node groups for all potential target nodes optimistically, needs
11338       # verification once nodes have been acquired
11339       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11340         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11341
11342     elif level == locking.LEVEL_NODE:
11343       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11344
11345   def CheckPrereq(self):
11346     # Verify locks
11347     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11348     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11349     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11350
11351     need_nodes = self._DetermineNodes()
11352
11353     if not owned_nodes.issuperset(need_nodes):
11354       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11355                                  " locks were acquired, current nodes are"
11356                                  " are '%s', used to be '%s'; retry the"
11357                                  " operation" %
11358                                  (self.op.node_name,
11359                                   utils.CommaJoin(need_nodes),
11360                                   utils.CommaJoin(owned_nodes)),
11361                                  errors.ECODE_STATE)
11362
11363     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11364     if owned_groups != wanted_groups:
11365       raise errors.OpExecError("Node groups changed since locks were acquired,"
11366                                " current groups are '%s', used to be '%s';"
11367                                " retry the operation" %
11368                                (utils.CommaJoin(wanted_groups),
11369                                 utils.CommaJoin(owned_groups)))
11370
11371     # Determine affected instances
11372     self.instances = self._DetermineInstances()
11373     self.instance_names = [i.name for i in self.instances]
11374
11375     if set(self.instance_names) != owned_instances:
11376       raise errors.OpExecError("Instances on node '%s' changed since locks"
11377                                " were acquired, current instances are '%s',"
11378                                " used to be '%s'; retry the operation" %
11379                                (self.op.node_name,
11380                                 utils.CommaJoin(self.instance_names),
11381                                 utils.CommaJoin(owned_instances)))
11382
11383     if self.instance_names:
11384       self.LogInfo("Evacuating instances from node '%s': %s",
11385                    self.op.node_name,
11386                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11387     else:
11388       self.LogInfo("No instances to evacuate from node '%s'",
11389                    self.op.node_name)
11390
11391     if self.op.remote_node is not None:
11392       for i in self.instances:
11393         if i.primary_node == self.op.remote_node:
11394           raise errors.OpPrereqError("Node %s is the primary node of"
11395                                      " instance %s, cannot use it as"
11396                                      " secondary" %
11397                                      (self.op.remote_node, i.name),
11398                                      errors.ECODE_INVAL)
11399
11400   def Exec(self, feedback_fn):
11401     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11402
11403     if not self.instance_names:
11404       # No instances to evacuate
11405       jobs = []
11406
11407     elif self.op.iallocator is not None:
11408       # TODO: Implement relocation to other group
11409       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11410                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11411                        instances=list(self.instance_names))
11412
11413       ial.Run(self.op.iallocator)
11414
11415       if not ial.success:
11416         raise errors.OpPrereqError("Can't compute node evacuation using"
11417                                    " iallocator '%s': %s" %
11418                                    (self.op.iallocator, ial.info),
11419                                    errors.ECODE_NORES)
11420
11421       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11422
11423     elif self.op.remote_node is not None:
11424       assert self.op.mode == constants.NODE_EVAC_SEC
11425       jobs = [
11426         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11427                                         remote_node=self.op.remote_node,
11428                                         disks=[],
11429                                         mode=constants.REPLACE_DISK_CHG,
11430                                         early_release=self.op.early_release)]
11431         for instance_name in self.instance_names
11432         ]
11433
11434     else:
11435       raise errors.ProgrammerError("No iallocator or remote node")
11436
11437     return ResultWithJobs(jobs)
11438
11439
11440 def _SetOpEarlyRelease(early_release, op):
11441   """Sets C{early_release} flag on opcodes if available.
11442
11443   """
11444   try:
11445     op.early_release = early_release
11446   except AttributeError:
11447     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11448
11449   return op
11450
11451
11452 def _NodeEvacDest(use_nodes, group, nodes):
11453   """Returns group or nodes depending on caller's choice.
11454
11455   """
11456   if use_nodes:
11457     return utils.CommaJoin(nodes)
11458   else:
11459     return group
11460
11461
11462 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11463   """Unpacks the result of change-group and node-evacuate iallocator requests.
11464
11465   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11466   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11467
11468   @type lu: L{LogicalUnit}
11469   @param lu: Logical unit instance
11470   @type alloc_result: tuple/list
11471   @param alloc_result: Result from iallocator
11472   @type early_release: bool
11473   @param early_release: Whether to release locks early if possible
11474   @type use_nodes: bool
11475   @param use_nodes: Whether to display node names instead of groups
11476
11477   """
11478   (moved, failed, jobs) = alloc_result
11479
11480   if failed:
11481     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11482                                  for (name, reason) in failed)
11483     lu.LogWarning("Unable to evacuate instances %s", failreason)
11484     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11485
11486   if moved:
11487     lu.LogInfo("Instances to be moved: %s",
11488                utils.CommaJoin("%s (to %s)" %
11489                                (name, _NodeEvacDest(use_nodes, group, nodes))
11490                                for (name, group, nodes) in moved))
11491
11492   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11493               map(opcodes.OpCode.LoadOpCode, ops))
11494           for ops in jobs]
11495
11496
11497 class LUInstanceGrowDisk(LogicalUnit):
11498   """Grow a disk of an instance.
11499
11500   """
11501   HPATH = "disk-grow"
11502   HTYPE = constants.HTYPE_INSTANCE
11503   REQ_BGL = False
11504
11505   def ExpandNames(self):
11506     self._ExpandAndLockInstance()
11507     self.needed_locks[locking.LEVEL_NODE] = []
11508     self.needed_locks[locking.LEVEL_NODE_RES] = []
11509     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11510     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11511
11512   def DeclareLocks(self, level):
11513     if level == locking.LEVEL_NODE:
11514       self._LockInstancesNodes()
11515     elif level == locking.LEVEL_NODE_RES:
11516       # Copy node locks
11517       self.needed_locks[locking.LEVEL_NODE_RES] = \
11518         self.needed_locks[locking.LEVEL_NODE][:]
11519
11520   def BuildHooksEnv(self):
11521     """Build hooks env.
11522
11523     This runs on the master, the primary and all the secondaries.
11524
11525     """
11526     env = {
11527       "DISK": self.op.disk,
11528       "AMOUNT": self.op.amount,
11529       }
11530     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11531     return env
11532
11533   def BuildHooksNodes(self):
11534     """Build hooks nodes.
11535
11536     """
11537     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11538     return (nl, nl)
11539
11540   def CheckPrereq(self):
11541     """Check prerequisites.
11542
11543     This checks that the instance is in the cluster.
11544
11545     """
11546     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11547     assert instance is not None, \
11548       "Cannot retrieve locked instance %s" % self.op.instance_name
11549     nodenames = list(instance.all_nodes)
11550     for node in nodenames:
11551       _CheckNodeOnline(self, node)
11552
11553     self.instance = instance
11554
11555     if instance.disk_template not in constants.DTS_GROWABLE:
11556       raise errors.OpPrereqError("Instance's disk layout does not support"
11557                                  " growing", errors.ECODE_INVAL)
11558
11559     self.disk = instance.FindDisk(self.op.disk)
11560
11561     if instance.disk_template not in (constants.DT_FILE,
11562                                       constants.DT_SHARED_FILE,
11563                                       constants.DT_RBD):
11564       # TODO: check the free disk space for file, when that feature will be
11565       # supported
11566       _CheckNodesFreeDiskPerVG(self, nodenames,
11567                                self.disk.ComputeGrowth(self.op.amount))
11568
11569   def Exec(self, feedback_fn):
11570     """Execute disk grow.
11571
11572     """
11573     instance = self.instance
11574     disk = self.disk
11575
11576     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11577     assert (self.owned_locks(locking.LEVEL_NODE) ==
11578             self.owned_locks(locking.LEVEL_NODE_RES))
11579
11580     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11581     if not disks_ok:
11582       raise errors.OpExecError("Cannot activate block device to grow")
11583
11584     feedback_fn("Growing disk %s of instance '%s' by %s" %
11585                 (self.op.disk, instance.name,
11586                  utils.FormatUnit(self.op.amount, "h")))
11587
11588     # First run all grow ops in dry-run mode
11589     for node in instance.all_nodes:
11590       self.cfg.SetDiskID(disk, node)
11591       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11592       result.Raise("Grow request failed to node %s" % node)
11593
11594     # We know that (as far as we can test) operations across different
11595     # nodes will succeed, time to run it for real
11596     for node in instance.all_nodes:
11597       self.cfg.SetDiskID(disk, node)
11598       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11599       result.Raise("Grow request failed to node %s" % node)
11600
11601       # TODO: Rewrite code to work properly
11602       # DRBD goes into sync mode for a short amount of time after executing the
11603       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11604       # calling "resize" in sync mode fails. Sleeping for a short amount of
11605       # time is a work-around.
11606       time.sleep(5)
11607
11608     disk.RecordGrow(self.op.amount)
11609     self.cfg.Update(instance, feedback_fn)
11610
11611     # Changes have been recorded, release node lock
11612     _ReleaseLocks(self, locking.LEVEL_NODE)
11613
11614     # Downgrade lock while waiting for sync
11615     self.glm.downgrade(locking.LEVEL_INSTANCE)
11616
11617     if self.op.wait_for_sync:
11618       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11619       if disk_abort:
11620         self.proc.LogWarning("Disk sync-ing has not returned a good"
11621                              " status; please check the instance")
11622       if instance.admin_state != constants.ADMINST_UP:
11623         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11624     elif instance.admin_state != constants.ADMINST_UP:
11625       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11626                            " not supposed to be running because no wait for"
11627                            " sync mode was requested")
11628
11629     assert self.owned_locks(locking.LEVEL_NODE_RES)
11630     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11631
11632
11633 class LUInstanceQueryData(NoHooksLU):
11634   """Query runtime instance data.
11635
11636   """
11637   REQ_BGL = False
11638
11639   def ExpandNames(self):
11640     self.needed_locks = {}
11641
11642     # Use locking if requested or when non-static information is wanted
11643     if not (self.op.static or self.op.use_locking):
11644       self.LogWarning("Non-static data requested, locks need to be acquired")
11645       self.op.use_locking = True
11646
11647     if self.op.instances or not self.op.use_locking:
11648       # Expand instance names right here
11649       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11650     else:
11651       # Will use acquired locks
11652       self.wanted_names = None
11653
11654     if self.op.use_locking:
11655       self.share_locks = _ShareAll()
11656
11657       if self.wanted_names is None:
11658         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11659       else:
11660         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11661
11662       self.needed_locks[locking.LEVEL_NODE] = []
11663       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11664
11665   def DeclareLocks(self, level):
11666     if self.op.use_locking and level == locking.LEVEL_NODE:
11667       self._LockInstancesNodes()
11668
11669   def CheckPrereq(self):
11670     """Check prerequisites.
11671
11672     This only checks the optional instance list against the existing names.
11673
11674     """
11675     if self.wanted_names is None:
11676       assert self.op.use_locking, "Locking was not used"
11677       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11678
11679     self.wanted_instances = \
11680         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11681
11682   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11683     """Returns the status of a block device
11684
11685     """
11686     if self.op.static or not node:
11687       return None
11688
11689     self.cfg.SetDiskID(dev, node)
11690
11691     result = self.rpc.call_blockdev_find(node, dev)
11692     if result.offline:
11693       return None
11694
11695     result.Raise("Can't compute disk status for %s" % instance_name)
11696
11697     status = result.payload
11698     if status is None:
11699       return None
11700
11701     return (status.dev_path, status.major, status.minor,
11702             status.sync_percent, status.estimated_time,
11703             status.is_degraded, status.ldisk_status)
11704
11705   def _ComputeDiskStatus(self, instance, snode, dev):
11706     """Compute block device status.
11707
11708     """
11709     if dev.dev_type in constants.LDS_DRBD:
11710       # we change the snode then (otherwise we use the one passed in)
11711       if dev.logical_id[0] == instance.primary_node:
11712         snode = dev.logical_id[1]
11713       else:
11714         snode = dev.logical_id[0]
11715
11716     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11717                                               instance.name, dev)
11718     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11719
11720     if dev.children:
11721       dev_children = map(compat.partial(self._ComputeDiskStatus,
11722                                         instance, snode),
11723                          dev.children)
11724     else:
11725       dev_children = []
11726
11727     return {
11728       "iv_name": dev.iv_name,
11729       "dev_type": dev.dev_type,
11730       "logical_id": dev.logical_id,
11731       "physical_id": dev.physical_id,
11732       "pstatus": dev_pstatus,
11733       "sstatus": dev_sstatus,
11734       "children": dev_children,
11735       "mode": dev.mode,
11736       "size": dev.size,
11737       }
11738
11739   def Exec(self, feedback_fn):
11740     """Gather and return data"""
11741     result = {}
11742
11743     cluster = self.cfg.GetClusterInfo()
11744
11745     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11746                                           for i in self.wanted_instances)
11747     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11748       if self.op.static or pnode.offline:
11749         remote_state = None
11750         if pnode.offline:
11751           self.LogWarning("Primary node %s is marked offline, returning static"
11752                           " information only for instance %s" %
11753                           (pnode.name, instance.name))
11754       else:
11755         remote_info = self.rpc.call_instance_info(instance.primary_node,
11756                                                   instance.name,
11757                                                   instance.hypervisor)
11758         remote_info.Raise("Error checking node %s" % instance.primary_node)
11759         remote_info = remote_info.payload
11760         if remote_info and "state" in remote_info:
11761           remote_state = "up"
11762         else:
11763           if instance.admin_state == constants.ADMINST_UP:
11764             remote_state = "down"
11765           else:
11766             remote_state = instance.admin_state
11767
11768       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11769                   instance.disks)
11770
11771       result[instance.name] = {
11772         "name": instance.name,
11773         "config_state": instance.admin_state,
11774         "run_state": remote_state,
11775         "pnode": instance.primary_node,
11776         "snodes": instance.secondary_nodes,
11777         "os": instance.os,
11778         # this happens to be the same format used for hooks
11779         "nics": _NICListToTuple(self, instance.nics),
11780         "disk_template": instance.disk_template,
11781         "disks": disks,
11782         "hypervisor": instance.hypervisor,
11783         "network_port": instance.network_port,
11784         "hv_instance": instance.hvparams,
11785         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11786         "be_instance": instance.beparams,
11787         "be_actual": cluster.FillBE(instance),
11788         "os_instance": instance.osparams,
11789         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11790         "serial_no": instance.serial_no,
11791         "mtime": instance.mtime,
11792         "ctime": instance.ctime,
11793         "uuid": instance.uuid,
11794         }
11795
11796     return result
11797
11798
11799 class LUInstanceSetParams(LogicalUnit):
11800   """Modifies an instances's parameters.
11801
11802   """
11803   HPATH = "instance-modify"
11804   HTYPE = constants.HTYPE_INSTANCE
11805   REQ_BGL = False
11806
11807   def CheckArguments(self):
11808     if not (self.op.nics or self.op.disks or self.op.disk_template or
11809             self.op.hvparams or self.op.beparams or self.op.os_name or
11810             self.op.online_inst or self.op.offline_inst or
11811             self.op.runtime_mem):
11812       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11813
11814     if self.op.hvparams:
11815       _CheckGlobalHvParams(self.op.hvparams)
11816
11817     # Disk validation
11818     disk_addremove = 0
11819     for disk_op, disk_dict in self.op.disks:
11820       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11821       if disk_op == constants.DDM_REMOVE:
11822         disk_addremove += 1
11823         continue
11824       elif disk_op == constants.DDM_ADD:
11825         disk_addremove += 1
11826       else:
11827         if not isinstance(disk_op, int):
11828           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11829         if not isinstance(disk_dict, dict):
11830           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11831           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11832
11833       if disk_op == constants.DDM_ADD:
11834         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11835         if mode not in constants.DISK_ACCESS_SET:
11836           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11837                                      errors.ECODE_INVAL)
11838         size = disk_dict.get(constants.IDISK_SIZE, None)
11839         if size is None:
11840           raise errors.OpPrereqError("Required disk parameter size missing",
11841                                      errors.ECODE_INVAL)
11842         try:
11843           size = int(size)
11844         except (TypeError, ValueError), err:
11845           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11846                                      str(err), errors.ECODE_INVAL)
11847         disk_dict[constants.IDISK_SIZE] = size
11848       else:
11849         # modification of disk
11850         if constants.IDISK_SIZE in disk_dict:
11851           raise errors.OpPrereqError("Disk size change not possible, use"
11852                                      " grow-disk", errors.ECODE_INVAL)
11853
11854     if disk_addremove > 1:
11855       raise errors.OpPrereqError("Only one disk add or remove operation"
11856                                  " supported at a time", errors.ECODE_INVAL)
11857
11858     if self.op.disks and self.op.disk_template is not None:
11859       raise errors.OpPrereqError("Disk template conversion and other disk"
11860                                  " changes not supported at the same time",
11861                                  errors.ECODE_INVAL)
11862
11863     if (self.op.disk_template and
11864         self.op.disk_template in constants.DTS_INT_MIRROR and
11865         self.op.remote_node is None):
11866       raise errors.OpPrereqError("Changing the disk template to a mirrored"
11867                                  " one requires specifying a secondary node",
11868                                  errors.ECODE_INVAL)
11869
11870     # NIC validation
11871     nic_addremove = 0
11872     for nic_op, nic_dict in self.op.nics:
11873       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11874       if nic_op == constants.DDM_REMOVE:
11875         nic_addremove += 1
11876         continue
11877       elif nic_op == constants.DDM_ADD:
11878         nic_addremove += 1
11879       else:
11880         if not isinstance(nic_op, int):
11881           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11882         if not isinstance(nic_dict, dict):
11883           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11884           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11885
11886       # nic_dict should be a dict
11887       nic_ip = nic_dict.get(constants.INIC_IP, None)
11888       if nic_ip is not None:
11889         if nic_ip.lower() == constants.VALUE_NONE:
11890           nic_dict[constants.INIC_IP] = None
11891         else:
11892           if not netutils.IPAddress.IsValid(nic_ip):
11893             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11894                                        errors.ECODE_INVAL)
11895
11896       nic_bridge = nic_dict.get("bridge", None)
11897       nic_link = nic_dict.get(constants.INIC_LINK, None)
11898       if nic_bridge and nic_link:
11899         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11900                                    " at the same time", errors.ECODE_INVAL)
11901       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11902         nic_dict["bridge"] = None
11903       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11904         nic_dict[constants.INIC_LINK] = None
11905
11906       if nic_op == constants.DDM_ADD:
11907         nic_mac = nic_dict.get(constants.INIC_MAC, None)
11908         if nic_mac is None:
11909           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11910
11911       if constants.INIC_MAC in nic_dict:
11912         nic_mac = nic_dict[constants.INIC_MAC]
11913         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11914           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11915
11916         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11917           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11918                                      " modifying an existing nic",
11919                                      errors.ECODE_INVAL)
11920
11921     if nic_addremove > 1:
11922       raise errors.OpPrereqError("Only one NIC add or remove operation"
11923                                  " supported at a time", errors.ECODE_INVAL)
11924
11925   def ExpandNames(self):
11926     self._ExpandAndLockInstance()
11927     # Can't even acquire node locks in shared mode as upcoming changes in
11928     # Ganeti 2.6 will start to modify the node object on disk conversion
11929     self.needed_locks[locking.LEVEL_NODE] = []
11930     self.needed_locks[locking.LEVEL_NODE_RES] = []
11931     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11932
11933   def DeclareLocks(self, level):
11934     if level == locking.LEVEL_NODE:
11935       self._LockInstancesNodes()
11936       if self.op.disk_template and self.op.remote_node:
11937         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11938         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11939     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11940       # Copy node locks
11941       self.needed_locks[locking.LEVEL_NODE_RES] = \
11942         self.needed_locks[locking.LEVEL_NODE][:]
11943
11944   def BuildHooksEnv(self):
11945     """Build hooks env.
11946
11947     This runs on the master, primary and secondaries.
11948
11949     """
11950     args = dict()
11951     if constants.BE_MINMEM in self.be_new:
11952       args["minmem"] = self.be_new[constants.BE_MINMEM]
11953     if constants.BE_MAXMEM in self.be_new:
11954       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11955     if constants.BE_VCPUS in self.be_new:
11956       args["vcpus"] = self.be_new[constants.BE_VCPUS]
11957     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11958     # information at all.
11959     if self.op.nics:
11960       args["nics"] = []
11961       nic_override = dict(self.op.nics)
11962       for idx, nic in enumerate(self.instance.nics):
11963         if idx in nic_override:
11964           this_nic_override = nic_override[idx]
11965         else:
11966           this_nic_override = {}
11967         if constants.INIC_IP in this_nic_override:
11968           ip = this_nic_override[constants.INIC_IP]
11969         else:
11970           ip = nic.ip
11971         if constants.INIC_MAC in this_nic_override:
11972           mac = this_nic_override[constants.INIC_MAC]
11973         else:
11974           mac = nic.mac
11975         if idx in self.nic_pnew:
11976           nicparams = self.nic_pnew[idx]
11977         else:
11978           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11979         mode = nicparams[constants.NIC_MODE]
11980         link = nicparams[constants.NIC_LINK]
11981         args["nics"].append((ip, mac, mode, link))
11982       if constants.DDM_ADD in nic_override:
11983         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11984         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11985         nicparams = self.nic_pnew[constants.DDM_ADD]
11986         mode = nicparams[constants.NIC_MODE]
11987         link = nicparams[constants.NIC_LINK]
11988         args["nics"].append((ip, mac, mode, link))
11989       elif constants.DDM_REMOVE in nic_override:
11990         del args["nics"][-1]
11991
11992     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11993     if self.op.disk_template:
11994       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11995     if self.op.runtime_mem:
11996       env["RUNTIME_MEMORY"] = self.op.runtime_mem
11997
11998     return env
11999
12000   def BuildHooksNodes(self):
12001     """Build hooks nodes.
12002
12003     """
12004     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12005     return (nl, nl)
12006
12007   def CheckPrereq(self):
12008     """Check prerequisites.
12009
12010     This only checks the instance list against the existing names.
12011
12012     """
12013     # checking the new params on the primary/secondary nodes
12014
12015     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12016     cluster = self.cluster = self.cfg.GetClusterInfo()
12017     assert self.instance is not None, \
12018       "Cannot retrieve locked instance %s" % self.op.instance_name
12019     pnode = instance.primary_node
12020     nodelist = list(instance.all_nodes)
12021     pnode_info = self.cfg.GetNodeInfo(pnode)
12022     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12023
12024     # OS change
12025     if self.op.os_name and not self.op.force:
12026       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12027                       self.op.force_variant)
12028       instance_os = self.op.os_name
12029     else:
12030       instance_os = instance.os
12031
12032     if self.op.disk_template:
12033       if instance.disk_template == self.op.disk_template:
12034         raise errors.OpPrereqError("Instance already has disk template %s" %
12035                                    instance.disk_template, errors.ECODE_INVAL)
12036
12037       if (instance.disk_template,
12038           self.op.disk_template) not in self._DISK_CONVERSIONS:
12039         raise errors.OpPrereqError("Unsupported disk template conversion from"
12040                                    " %s to %s" % (instance.disk_template,
12041                                                   self.op.disk_template),
12042                                    errors.ECODE_INVAL)
12043       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12044                           msg="cannot change disk template")
12045       if self.op.disk_template in constants.DTS_INT_MIRROR:
12046         if self.op.remote_node == pnode:
12047           raise errors.OpPrereqError("Given new secondary node %s is the same"
12048                                      " as the primary node of the instance" %
12049                                      self.op.remote_node, errors.ECODE_STATE)
12050         _CheckNodeOnline(self, self.op.remote_node)
12051         _CheckNodeNotDrained(self, self.op.remote_node)
12052         # FIXME: here we assume that the old instance type is DT_PLAIN
12053         assert instance.disk_template == constants.DT_PLAIN
12054         disks = [{constants.IDISK_SIZE: d.size,
12055                   constants.IDISK_VG: d.logical_id[0]}
12056                  for d in instance.disks]
12057         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12058         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12059
12060         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12061         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12062         ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12063         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12064                                 ignore=self.op.ignore_ipolicy)
12065         if pnode_info.group != snode_info.group:
12066           self.LogWarning("The primary and secondary nodes are in two"
12067                           " different node groups; the disk parameters"
12068                           " from the first disk's node group will be"
12069                           " used")
12070
12071     # hvparams processing
12072     if self.op.hvparams:
12073       hv_type = instance.hypervisor
12074       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12075       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12076       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12077
12078       # local check
12079       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12080       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12081       self.hv_proposed = self.hv_new = hv_new # the new actual values
12082       self.hv_inst = i_hvdict # the new dict (without defaults)
12083     else:
12084       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12085                                               instance.hvparams)
12086       self.hv_new = self.hv_inst = {}
12087
12088     # beparams processing
12089     if self.op.beparams:
12090       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12091                                    use_none=True)
12092       objects.UpgradeBeParams(i_bedict)
12093       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12094       be_new = cluster.SimpleFillBE(i_bedict)
12095       self.be_proposed = self.be_new = be_new # the new actual values
12096       self.be_inst = i_bedict # the new dict (without defaults)
12097     else:
12098       self.be_new = self.be_inst = {}
12099       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12100     be_old = cluster.FillBE(instance)
12101
12102     # CPU param validation -- checking every time a paramtere is
12103     # changed to cover all cases where either CPU mask or vcpus have
12104     # changed
12105     if (constants.BE_VCPUS in self.be_proposed and
12106         constants.HV_CPU_MASK in self.hv_proposed):
12107       cpu_list = \
12108         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12109       # Verify mask is consistent with number of vCPUs. Can skip this
12110       # test if only 1 entry in the CPU mask, which means same mask
12111       # is applied to all vCPUs.
12112       if (len(cpu_list) > 1 and
12113           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12114         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12115                                    " CPU mask [%s]" %
12116                                    (self.be_proposed[constants.BE_VCPUS],
12117                                     self.hv_proposed[constants.HV_CPU_MASK]),
12118                                    errors.ECODE_INVAL)
12119
12120       # Only perform this test if a new CPU mask is given
12121       if constants.HV_CPU_MASK in self.hv_new:
12122         # Calculate the largest CPU number requested
12123         max_requested_cpu = max(map(max, cpu_list))
12124         # Check that all of the instance's nodes have enough physical CPUs to
12125         # satisfy the requested CPU mask
12126         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12127                                 max_requested_cpu + 1, instance.hypervisor)
12128
12129     # osparams processing
12130     if self.op.osparams:
12131       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12132       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12133       self.os_inst = i_osdict # the new dict (without defaults)
12134     else:
12135       self.os_inst = {}
12136
12137     self.warn = []
12138
12139     #TODO(dynmem): do the appropriate check involving MINMEM
12140     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12141         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12142       mem_check_list = [pnode]
12143       if be_new[constants.BE_AUTO_BALANCE]:
12144         # either we changed auto_balance to yes or it was from before
12145         mem_check_list.extend(instance.secondary_nodes)
12146       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12147                                                   instance.hypervisor)
12148       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12149                                          [instance.hypervisor])
12150       pninfo = nodeinfo[pnode]
12151       msg = pninfo.fail_msg
12152       if msg:
12153         # Assume the primary node is unreachable and go ahead
12154         self.warn.append("Can't get info from primary node %s: %s" %
12155                          (pnode, msg))
12156       else:
12157         (_, _, (pnhvinfo, )) = pninfo.payload
12158         if not isinstance(pnhvinfo.get("memory_free", None), int):
12159           self.warn.append("Node data from primary node %s doesn't contain"
12160                            " free memory information" % pnode)
12161         elif instance_info.fail_msg:
12162           self.warn.append("Can't get instance runtime information: %s" %
12163                           instance_info.fail_msg)
12164         else:
12165           if instance_info.payload:
12166             current_mem = int(instance_info.payload["memory"])
12167           else:
12168             # Assume instance not running
12169             # (there is a slight race condition here, but it's not very
12170             # probable, and we have no other way to check)
12171             # TODO: Describe race condition
12172             current_mem = 0
12173           #TODO(dynmem): do the appropriate check involving MINMEM
12174           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12175                       pnhvinfo["memory_free"])
12176           if miss_mem > 0:
12177             raise errors.OpPrereqError("This change will prevent the instance"
12178                                        " from starting, due to %d MB of memory"
12179                                        " missing on its primary node" %
12180                                        miss_mem,
12181                                        errors.ECODE_NORES)
12182
12183       if be_new[constants.BE_AUTO_BALANCE]:
12184         for node, nres in nodeinfo.items():
12185           if node not in instance.secondary_nodes:
12186             continue
12187           nres.Raise("Can't get info from secondary node %s" % node,
12188                      prereq=True, ecode=errors.ECODE_STATE)
12189           (_, _, (nhvinfo, )) = nres.payload
12190           if not isinstance(nhvinfo.get("memory_free", None), int):
12191             raise errors.OpPrereqError("Secondary node %s didn't return free"
12192                                        " memory information" % node,
12193                                        errors.ECODE_STATE)
12194           #TODO(dynmem): do the appropriate check involving MINMEM
12195           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12196             raise errors.OpPrereqError("This change will prevent the instance"
12197                                        " from failover to its secondary node"
12198                                        " %s, due to not enough memory" % node,
12199                                        errors.ECODE_STATE)
12200
12201     if self.op.runtime_mem:
12202       remote_info = self.rpc.call_instance_info(instance.primary_node,
12203                                                 instance.name,
12204                                                 instance.hypervisor)
12205       remote_info.Raise("Error checking node %s" % instance.primary_node)
12206       if not remote_info.payload: # not running already
12207         raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12208                                    errors.ECODE_STATE)
12209
12210       current_memory = remote_info.payload["memory"]
12211       if (not self.op.force and
12212            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12213             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12214         raise errors.OpPrereqError("Instance %s must have memory between %d"
12215                                    " and %d MB of memory unless --force is"
12216                                    " given" % (instance.name,
12217                                     self.be_proposed[constants.BE_MINMEM],
12218                                     self.be_proposed[constants.BE_MAXMEM]),
12219                                    errors.ECODE_INVAL)
12220
12221       if self.op.runtime_mem > current_memory:
12222         _CheckNodeFreeMemory(self, instance.primary_node,
12223                              "ballooning memory for instance %s" %
12224                              instance.name,
12225                              self.op.memory - current_memory,
12226                              instance.hypervisor)
12227
12228     # NIC processing
12229     self.nic_pnew = {}
12230     self.nic_pinst = {}
12231     for nic_op, nic_dict in self.op.nics:
12232       if nic_op == constants.DDM_REMOVE:
12233         if not instance.nics:
12234           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
12235                                      errors.ECODE_INVAL)
12236         continue
12237       if nic_op != constants.DDM_ADD:
12238         # an existing nic
12239         if not instance.nics:
12240           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
12241                                      " no NICs" % nic_op,
12242                                      errors.ECODE_INVAL)
12243         if nic_op < 0 or nic_op >= len(instance.nics):
12244           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
12245                                      " are 0 to %d" %
12246                                      (nic_op, len(instance.nics) - 1),
12247                                      errors.ECODE_INVAL)
12248         old_nic_params = instance.nics[nic_op].nicparams
12249         old_nic_ip = instance.nics[nic_op].ip
12250       else:
12251         old_nic_params = {}
12252         old_nic_ip = None
12253
12254       update_params_dict = dict([(key, nic_dict[key])
12255                                  for key in constants.NICS_PARAMETERS
12256                                  if key in nic_dict])
12257
12258       if "bridge" in nic_dict:
12259         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
12260
12261       new_nic_params = _GetUpdatedParams(old_nic_params,
12262                                          update_params_dict)
12263       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
12264       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
12265       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
12266       self.nic_pinst[nic_op] = new_nic_params
12267       self.nic_pnew[nic_op] = new_filled_nic_params
12268       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
12269
12270       if new_nic_mode == constants.NIC_MODE_BRIDGED:
12271         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
12272         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
12273         if msg:
12274           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
12275           if self.op.force:
12276             self.warn.append(msg)
12277           else:
12278             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12279       if new_nic_mode == constants.NIC_MODE_ROUTED:
12280         if constants.INIC_IP in nic_dict:
12281           nic_ip = nic_dict[constants.INIC_IP]
12282         else:
12283           nic_ip = old_nic_ip
12284         if nic_ip is None:
12285           raise errors.OpPrereqError("Cannot set the nic ip to None"
12286                                      " on a routed nic", errors.ECODE_INVAL)
12287       if constants.INIC_MAC in nic_dict:
12288         nic_mac = nic_dict[constants.INIC_MAC]
12289         if nic_mac is None:
12290           raise errors.OpPrereqError("Cannot set the nic mac to None",
12291                                      errors.ECODE_INVAL)
12292         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12293           # otherwise generate the mac
12294           nic_dict[constants.INIC_MAC] = \
12295             self.cfg.GenerateMAC(self.proc.GetECId())
12296         else:
12297           # or validate/reserve the current one
12298           try:
12299             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
12300           except errors.ReservationError:
12301             raise errors.OpPrereqError("MAC address %s already in use"
12302                                        " in cluster" % nic_mac,
12303                                        errors.ECODE_NOTUNIQUE)
12304
12305     # DISK processing
12306     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12307       raise errors.OpPrereqError("Disk operations not supported for"
12308                                  " diskless instances",
12309                                  errors.ECODE_INVAL)
12310     for disk_op, _ in self.op.disks:
12311       if disk_op == constants.DDM_REMOVE:
12312         if len(instance.disks) == 1:
12313           raise errors.OpPrereqError("Cannot remove the last disk of"
12314                                      " an instance", errors.ECODE_INVAL)
12315         _CheckInstanceState(self, instance, INSTANCE_DOWN,
12316                             msg="cannot remove disks")
12317
12318       if (disk_op == constants.DDM_ADD and
12319           len(instance.disks) >= constants.MAX_DISKS):
12320         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
12321                                    " add more" % constants.MAX_DISKS,
12322                                    errors.ECODE_STATE)
12323       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
12324         # an existing disk
12325         if disk_op < 0 or disk_op >= len(instance.disks):
12326           raise errors.OpPrereqError("Invalid disk index %s, valid values"
12327                                      " are 0 to %d" %
12328                                      (disk_op, len(instance.disks)),
12329                                      errors.ECODE_INVAL)
12330
12331     # disabling the instance
12332     if self.op.offline_inst:
12333       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12334                           msg="cannot change instance state to offline")
12335
12336     # enabling the instance
12337     if self.op.online_inst:
12338       _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
12339                           msg="cannot make instance go online")
12340
12341   def _ConvertPlainToDrbd(self, feedback_fn):
12342     """Converts an instance from plain to drbd.
12343
12344     """
12345     feedback_fn("Converting template to drbd")
12346     instance = self.instance
12347     pnode = instance.primary_node
12348     snode = self.op.remote_node
12349
12350     assert instance.disk_template == constants.DT_PLAIN
12351
12352     # create a fake disk info for _GenerateDiskTemplate
12353     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12354                   constants.IDISK_VG: d.logical_id[0]}
12355                  for d in instance.disks]
12356     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12357                                       instance.name, pnode, [snode],
12358                                       disk_info, None, None, 0, feedback_fn,
12359                                       self.diskparams)
12360     info = _GetInstanceInfoText(instance)
12361     feedback_fn("Creating aditional volumes...")
12362     # first, create the missing data and meta devices
12363     for disk in new_disks:
12364       # unfortunately this is... not too nice
12365       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12366                             info, True)
12367       for child in disk.children:
12368         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12369     # at this stage, all new LVs have been created, we can rename the
12370     # old ones
12371     feedback_fn("Renaming original volumes...")
12372     rename_list = [(o, n.children[0].logical_id)
12373                    for (o, n) in zip(instance.disks, new_disks)]
12374     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12375     result.Raise("Failed to rename original LVs")
12376
12377     feedback_fn("Initializing DRBD devices...")
12378     # all child devices are in place, we can now create the DRBD devices
12379     for disk in new_disks:
12380       for node in [pnode, snode]:
12381         f_create = node == pnode
12382         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12383
12384     # at this point, the instance has been modified
12385     instance.disk_template = constants.DT_DRBD8
12386     instance.disks = new_disks
12387     self.cfg.Update(instance, feedback_fn)
12388
12389     # Release node locks while waiting for sync
12390     _ReleaseLocks(self, locking.LEVEL_NODE)
12391
12392     # disks are created, waiting for sync
12393     disk_abort = not _WaitForSync(self, instance,
12394                                   oneshot=not self.op.wait_for_sync)
12395     if disk_abort:
12396       raise errors.OpExecError("There are some degraded disks for"
12397                                " this instance, please cleanup manually")
12398
12399     # Node resource locks will be released by caller
12400
12401   def _ConvertDrbdToPlain(self, feedback_fn):
12402     """Converts an instance from drbd to plain.
12403
12404     """
12405     instance = self.instance
12406
12407     assert len(instance.secondary_nodes) == 1
12408     assert instance.disk_template == constants.DT_DRBD8
12409
12410     pnode = instance.primary_node
12411     snode = instance.secondary_nodes[0]
12412     feedback_fn("Converting template to plain")
12413
12414     old_disks = instance.disks
12415     new_disks = [d.children[0] for d in old_disks]
12416
12417     # copy over size and mode
12418     for parent, child in zip(old_disks, new_disks):
12419       child.size = parent.size
12420       child.mode = parent.mode
12421
12422     # update instance structure
12423     instance.disks = new_disks
12424     instance.disk_template = constants.DT_PLAIN
12425     self.cfg.Update(instance, feedback_fn)
12426
12427     # Release locks in case removing disks takes a while
12428     _ReleaseLocks(self, locking.LEVEL_NODE)
12429
12430     feedback_fn("Removing volumes on the secondary node...")
12431     for disk in old_disks:
12432       self.cfg.SetDiskID(disk, snode)
12433       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12434       if msg:
12435         self.LogWarning("Could not remove block device %s on node %s,"
12436                         " continuing anyway: %s", disk.iv_name, snode, msg)
12437
12438     feedback_fn("Removing unneeded volumes on the primary node...")
12439     for idx, disk in enumerate(old_disks):
12440       meta = disk.children[1]
12441       self.cfg.SetDiskID(meta, pnode)
12442       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12443       if msg:
12444         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12445                         " continuing anyway: %s", idx, pnode, msg)
12446
12447     # this is a DRBD disk, return its port to the pool
12448     for disk in old_disks:
12449       tcp_port = disk.logical_id[2]
12450       self.cfg.AddTcpUdpPort(tcp_port)
12451
12452     # Node resource locks will be released by caller
12453
12454   def Exec(self, feedback_fn):
12455     """Modifies an instance.
12456
12457     All parameters take effect only at the next restart of the instance.
12458
12459     """
12460     # Process here the warnings from CheckPrereq, as we don't have a
12461     # feedback_fn there.
12462     for warn in self.warn:
12463       feedback_fn("WARNING: %s" % warn)
12464
12465     assert ((self.op.disk_template is None) ^
12466             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12467       "Not owning any node resource locks"
12468
12469     result = []
12470     instance = self.instance
12471
12472     # runtime memory
12473     if self.op.runtime_mem:
12474       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12475                                                      instance,
12476                                                      self.op.runtime_mem)
12477       rpcres.Raise("Cannot modify instance runtime memory")
12478       result.append(("runtime_memory", self.op.runtime_mem))
12479
12480     # disk changes
12481     for disk_op, disk_dict in self.op.disks:
12482       if disk_op == constants.DDM_REMOVE:
12483         # remove the last disk
12484         device = instance.disks.pop()
12485         device_idx = len(instance.disks)
12486         for node, disk in device.ComputeNodeTree(instance.primary_node):
12487           self.cfg.SetDiskID(disk, node)
12488           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12489           if msg:
12490             self.LogWarning("Could not remove disk/%d on node %s: %s,"
12491                             " continuing anyway", device_idx, node, msg)
12492         result.append(("disk/%d" % device_idx, "remove"))
12493
12494         # if this is a DRBD disk, return its port to the pool
12495         if device.dev_type in constants.LDS_DRBD:
12496           tcp_port = device.logical_id[2]
12497           self.cfg.AddTcpUdpPort(tcp_port)
12498       elif disk_op == constants.DDM_ADD:
12499         # add a new disk
12500         if instance.disk_template in (constants.DT_FILE,
12501                                         constants.DT_SHARED_FILE):
12502           file_driver, file_path = instance.disks[0].logical_id
12503           file_path = os.path.dirname(file_path)
12504         else:
12505           file_driver = file_path = None
12506         disk_idx_base = len(instance.disks)
12507         new_disk = _GenerateDiskTemplate(self,
12508                                          instance.disk_template,
12509                                          instance.name, instance.primary_node,
12510                                          instance.secondary_nodes,
12511                                          [disk_dict],
12512                                          file_path,
12513                                          file_driver,
12514                                          disk_idx_base,
12515                                          feedback_fn,
12516                                          self.diskparams)[0]
12517         instance.disks.append(new_disk)
12518         info = _GetInstanceInfoText(instance)
12519
12520         logging.info("Creating volume %s for instance %s",
12521                      new_disk.iv_name, instance.name)
12522         # Note: this needs to be kept in sync with _CreateDisks
12523         #HARDCODE
12524         for node in instance.all_nodes:
12525           f_create = node == instance.primary_node
12526           try:
12527             _CreateBlockDev(self, node, instance, new_disk,
12528                             f_create, info, f_create)
12529           except errors.OpExecError, err:
12530             self.LogWarning("Failed to create volume %s (%s) on"
12531                             " node %s: %s",
12532                             new_disk.iv_name, new_disk, node, err)
12533         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12534                        (new_disk.size, new_disk.mode)))
12535       else:
12536         # change a given disk
12537         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12538         result.append(("disk.mode/%d" % disk_op,
12539                        disk_dict[constants.IDISK_MODE]))
12540
12541     if self.op.disk_template:
12542       if __debug__:
12543         check_nodes = set(instance.all_nodes)
12544         if self.op.remote_node:
12545           check_nodes.add(self.op.remote_node)
12546         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12547           owned = self.owned_locks(level)
12548           assert not (check_nodes - owned), \
12549             ("Not owning the correct locks, owning %r, expected at least %r" %
12550              (owned, check_nodes))
12551
12552       r_shut = _ShutdownInstanceDisks(self, instance)
12553       if not r_shut:
12554         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12555                                  " proceed with disk template conversion")
12556       mode = (instance.disk_template, self.op.disk_template)
12557       try:
12558         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12559       except:
12560         self.cfg.ReleaseDRBDMinors(instance.name)
12561         raise
12562       result.append(("disk_template", self.op.disk_template))
12563
12564       assert instance.disk_template == self.op.disk_template, \
12565         ("Expected disk template '%s', found '%s'" %
12566          (self.op.disk_template, instance.disk_template))
12567
12568     # Release node and resource locks if there are any (they might already have
12569     # been released during disk conversion)
12570     _ReleaseLocks(self, locking.LEVEL_NODE)
12571     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12572
12573     # NIC changes
12574     for nic_op, nic_dict in self.op.nics:
12575       if nic_op == constants.DDM_REMOVE:
12576         # remove the last nic
12577         del instance.nics[-1]
12578         result.append(("nic.%d" % len(instance.nics), "remove"))
12579       elif nic_op == constants.DDM_ADD:
12580         # mac and bridge should be set, by now
12581         mac = nic_dict[constants.INIC_MAC]
12582         ip = nic_dict.get(constants.INIC_IP, None)
12583         nicparams = self.nic_pinst[constants.DDM_ADD]
12584         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12585         instance.nics.append(new_nic)
12586         result.append(("nic.%d" % (len(instance.nics) - 1),
12587                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12588                        (new_nic.mac, new_nic.ip,
12589                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12590                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12591                        )))
12592       else:
12593         for key in (constants.INIC_MAC, constants.INIC_IP):
12594           if key in nic_dict:
12595             setattr(instance.nics[nic_op], key, nic_dict[key])
12596         if nic_op in self.nic_pinst:
12597           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12598         for key, val in nic_dict.iteritems():
12599           result.append(("nic.%s/%d" % (key, nic_op), val))
12600
12601     # hvparams changes
12602     if self.op.hvparams:
12603       instance.hvparams = self.hv_inst
12604       for key, val in self.op.hvparams.iteritems():
12605         result.append(("hv/%s" % key, val))
12606
12607     # beparams changes
12608     if self.op.beparams:
12609       instance.beparams = self.be_inst
12610       for key, val in self.op.beparams.iteritems():
12611         result.append(("be/%s" % key, val))
12612
12613     # OS change
12614     if self.op.os_name:
12615       instance.os = self.op.os_name
12616
12617     # osparams changes
12618     if self.op.osparams:
12619       instance.osparams = self.os_inst
12620       for key, val in self.op.osparams.iteritems():
12621         result.append(("os/%s" % key, val))
12622
12623     # online/offline instance
12624     if self.op.online_inst:
12625       self.cfg.MarkInstanceDown(instance.name)
12626       result.append(("admin_state", constants.ADMINST_DOWN))
12627     if self.op.offline_inst:
12628       self.cfg.MarkInstanceOffline(instance.name)
12629       result.append(("admin_state", constants.ADMINST_OFFLINE))
12630
12631     self.cfg.Update(instance, feedback_fn)
12632
12633     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12634                 self.owned_locks(locking.LEVEL_NODE)), \
12635       "All node locks should have been released by now"
12636
12637     return result
12638
12639   _DISK_CONVERSIONS = {
12640     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12641     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12642     }
12643
12644
12645 class LUInstanceChangeGroup(LogicalUnit):
12646   HPATH = "instance-change-group"
12647   HTYPE = constants.HTYPE_INSTANCE
12648   REQ_BGL = False
12649
12650   def ExpandNames(self):
12651     self.share_locks = _ShareAll()
12652     self.needed_locks = {
12653       locking.LEVEL_NODEGROUP: [],
12654       locking.LEVEL_NODE: [],
12655       }
12656
12657     self._ExpandAndLockInstance()
12658
12659     if self.op.target_groups:
12660       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12661                                   self.op.target_groups)
12662     else:
12663       self.req_target_uuids = None
12664
12665     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12666
12667   def DeclareLocks(self, level):
12668     if level == locking.LEVEL_NODEGROUP:
12669       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12670
12671       if self.req_target_uuids:
12672         lock_groups = set(self.req_target_uuids)
12673
12674         # Lock all groups used by instance optimistically; this requires going
12675         # via the node before it's locked, requiring verification later on
12676         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12677         lock_groups.update(instance_groups)
12678       else:
12679         # No target groups, need to lock all of them
12680         lock_groups = locking.ALL_SET
12681
12682       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12683
12684     elif level == locking.LEVEL_NODE:
12685       if self.req_target_uuids:
12686         # Lock all nodes used by instances
12687         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12688         self._LockInstancesNodes()
12689
12690         # Lock all nodes in all potential target groups
12691         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12692                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12693         member_nodes = [node_name
12694                         for group in lock_groups
12695                         for node_name in self.cfg.GetNodeGroup(group).members]
12696         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12697       else:
12698         # Lock all nodes as all groups are potential targets
12699         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12700
12701   def CheckPrereq(self):
12702     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12703     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12704     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12705
12706     assert (self.req_target_uuids is None or
12707             owned_groups.issuperset(self.req_target_uuids))
12708     assert owned_instances == set([self.op.instance_name])
12709
12710     # Get instance information
12711     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12712
12713     # Check if node groups for locked instance are still correct
12714     assert owned_nodes.issuperset(self.instance.all_nodes), \
12715       ("Instance %s's nodes changed while we kept the lock" %
12716        self.op.instance_name)
12717
12718     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12719                                            owned_groups)
12720
12721     if self.req_target_uuids:
12722       # User requested specific target groups
12723       self.target_uuids = self.req_target_uuids
12724     else:
12725       # All groups except those used by the instance are potential targets
12726       self.target_uuids = owned_groups - inst_groups
12727
12728     conflicting_groups = self.target_uuids & inst_groups
12729     if conflicting_groups:
12730       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12731                                  " used by the instance '%s'" %
12732                                  (utils.CommaJoin(conflicting_groups),
12733                                   self.op.instance_name),
12734                                  errors.ECODE_INVAL)
12735
12736     if not self.target_uuids:
12737       raise errors.OpPrereqError("There are no possible target groups",
12738                                  errors.ECODE_INVAL)
12739
12740   def BuildHooksEnv(self):
12741     """Build hooks env.
12742
12743     """
12744     assert self.target_uuids
12745
12746     env = {
12747       "TARGET_GROUPS": " ".join(self.target_uuids),
12748       }
12749
12750     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12751
12752     return env
12753
12754   def BuildHooksNodes(self):
12755     """Build hooks nodes.
12756
12757     """
12758     mn = self.cfg.GetMasterNode()
12759     return ([mn], [mn])
12760
12761   def Exec(self, feedback_fn):
12762     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12763
12764     assert instances == [self.op.instance_name], "Instance not locked"
12765
12766     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12767                      instances=instances, target_groups=list(self.target_uuids))
12768
12769     ial.Run(self.op.iallocator)
12770
12771     if not ial.success:
12772       raise errors.OpPrereqError("Can't compute solution for changing group of"
12773                                  " instance '%s' using iallocator '%s': %s" %
12774                                  (self.op.instance_name, self.op.iallocator,
12775                                   ial.info),
12776                                  errors.ECODE_NORES)
12777
12778     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12779
12780     self.LogInfo("Iallocator returned %s job(s) for changing group of"
12781                  " instance '%s'", len(jobs), self.op.instance_name)
12782
12783     return ResultWithJobs(jobs)
12784
12785
12786 class LUBackupQuery(NoHooksLU):
12787   """Query the exports list
12788
12789   """
12790   REQ_BGL = False
12791
12792   def ExpandNames(self):
12793     self.needed_locks = {}
12794     self.share_locks[locking.LEVEL_NODE] = 1
12795     if not self.op.nodes:
12796       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12797     else:
12798       self.needed_locks[locking.LEVEL_NODE] = \
12799         _GetWantedNodes(self, self.op.nodes)
12800
12801   def Exec(self, feedback_fn):
12802     """Compute the list of all the exported system images.
12803
12804     @rtype: dict
12805     @return: a dictionary with the structure node->(export-list)
12806         where export-list is a list of the instances exported on
12807         that node.
12808
12809     """
12810     self.nodes = self.owned_locks(locking.LEVEL_NODE)
12811     rpcresult = self.rpc.call_export_list(self.nodes)
12812     result = {}
12813     for node in rpcresult:
12814       if rpcresult[node].fail_msg:
12815         result[node] = False
12816       else:
12817         result[node] = rpcresult[node].payload
12818
12819     return result
12820
12821
12822 class LUBackupPrepare(NoHooksLU):
12823   """Prepares an instance for an export and returns useful information.
12824
12825   """
12826   REQ_BGL = False
12827
12828   def ExpandNames(self):
12829     self._ExpandAndLockInstance()
12830
12831   def CheckPrereq(self):
12832     """Check prerequisites.
12833
12834     """
12835     instance_name = self.op.instance_name
12836
12837     self.instance = self.cfg.GetInstanceInfo(instance_name)
12838     assert self.instance is not None, \
12839           "Cannot retrieve locked instance %s" % self.op.instance_name
12840     _CheckNodeOnline(self, self.instance.primary_node)
12841
12842     self._cds = _GetClusterDomainSecret()
12843
12844   def Exec(self, feedback_fn):
12845     """Prepares an instance for an export.
12846
12847     """
12848     instance = self.instance
12849
12850     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12851       salt = utils.GenerateSecret(8)
12852
12853       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12854       result = self.rpc.call_x509_cert_create(instance.primary_node,
12855                                               constants.RIE_CERT_VALIDITY)
12856       result.Raise("Can't create X509 key and certificate on %s" % result.node)
12857
12858       (name, cert_pem) = result.payload
12859
12860       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12861                                              cert_pem)
12862
12863       return {
12864         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12865         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12866                           salt),
12867         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12868         }
12869
12870     return None
12871
12872
12873 class LUBackupExport(LogicalUnit):
12874   """Export an instance to an image in the cluster.
12875
12876   """
12877   HPATH = "instance-export"
12878   HTYPE = constants.HTYPE_INSTANCE
12879   REQ_BGL = False
12880
12881   def CheckArguments(self):
12882     """Check the arguments.
12883
12884     """
12885     self.x509_key_name = self.op.x509_key_name
12886     self.dest_x509_ca_pem = self.op.destination_x509_ca
12887
12888     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12889       if not self.x509_key_name:
12890         raise errors.OpPrereqError("Missing X509 key name for encryption",
12891                                    errors.ECODE_INVAL)
12892
12893       if not self.dest_x509_ca_pem:
12894         raise errors.OpPrereqError("Missing destination X509 CA",
12895                                    errors.ECODE_INVAL)
12896
12897   def ExpandNames(self):
12898     self._ExpandAndLockInstance()
12899
12900     # Lock all nodes for local exports
12901     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12902       # FIXME: lock only instance primary and destination node
12903       #
12904       # Sad but true, for now we have do lock all nodes, as we don't know where
12905       # the previous export might be, and in this LU we search for it and
12906       # remove it from its current node. In the future we could fix this by:
12907       #  - making a tasklet to search (share-lock all), then create the
12908       #    new one, then one to remove, after
12909       #  - removing the removal operation altogether
12910       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12911
12912   def DeclareLocks(self, level):
12913     """Last minute lock declaration."""
12914     # All nodes are locked anyway, so nothing to do here.
12915
12916   def BuildHooksEnv(self):
12917     """Build hooks env.
12918
12919     This will run on the master, primary node and target node.
12920
12921     """
12922     env = {
12923       "EXPORT_MODE": self.op.mode,
12924       "EXPORT_NODE": self.op.target_node,
12925       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12926       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12927       # TODO: Generic function for boolean env variables
12928       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12929       }
12930
12931     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12932
12933     return env
12934
12935   def BuildHooksNodes(self):
12936     """Build hooks nodes.
12937
12938     """
12939     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12940
12941     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12942       nl.append(self.op.target_node)
12943
12944     return (nl, nl)
12945
12946   def CheckPrereq(self):
12947     """Check prerequisites.
12948
12949     This checks that the instance and node names are valid.
12950
12951     """
12952     instance_name = self.op.instance_name
12953
12954     self.instance = self.cfg.GetInstanceInfo(instance_name)
12955     assert self.instance is not None, \
12956           "Cannot retrieve locked instance %s" % self.op.instance_name
12957     _CheckNodeOnline(self, self.instance.primary_node)
12958
12959     if (self.op.remove_instance and
12960         self.instance.admin_state == constants.ADMINST_UP and
12961         not self.op.shutdown):
12962       raise errors.OpPrereqError("Can not remove instance without shutting it"
12963                                  " down before")
12964
12965     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12966       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12967       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12968       assert self.dst_node is not None
12969
12970       _CheckNodeOnline(self, self.dst_node.name)
12971       _CheckNodeNotDrained(self, self.dst_node.name)
12972
12973       self._cds = None
12974       self.dest_disk_info = None
12975       self.dest_x509_ca = None
12976
12977     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12978       self.dst_node = None
12979
12980       if len(self.op.target_node) != len(self.instance.disks):
12981         raise errors.OpPrereqError(("Received destination information for %s"
12982                                     " disks, but instance %s has %s disks") %
12983                                    (len(self.op.target_node), instance_name,
12984                                     len(self.instance.disks)),
12985                                    errors.ECODE_INVAL)
12986
12987       cds = _GetClusterDomainSecret()
12988
12989       # Check X509 key name
12990       try:
12991         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12992       except (TypeError, ValueError), err:
12993         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12994
12995       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12996         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12997                                    errors.ECODE_INVAL)
12998
12999       # Load and verify CA
13000       try:
13001         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13002       except OpenSSL.crypto.Error, err:
13003         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13004                                    (err, ), errors.ECODE_INVAL)
13005
13006       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13007       if errcode is not None:
13008         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13009                                    (msg, ), errors.ECODE_INVAL)
13010
13011       self.dest_x509_ca = cert
13012
13013       # Verify target information
13014       disk_info = []
13015       for idx, disk_data in enumerate(self.op.target_node):
13016         try:
13017           (host, port, magic) = \
13018             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13019         except errors.GenericError, err:
13020           raise errors.OpPrereqError("Target info for disk %s: %s" %
13021                                      (idx, err), errors.ECODE_INVAL)
13022
13023         disk_info.append((host, port, magic))
13024
13025       assert len(disk_info) == len(self.op.target_node)
13026       self.dest_disk_info = disk_info
13027
13028     else:
13029       raise errors.ProgrammerError("Unhandled export mode %r" %
13030                                    self.op.mode)
13031
13032     # instance disk type verification
13033     # TODO: Implement export support for file-based disks
13034     for disk in self.instance.disks:
13035       if disk.dev_type == constants.LD_FILE:
13036         raise errors.OpPrereqError("Export not supported for instances with"
13037                                    " file-based disks", errors.ECODE_INVAL)
13038
13039   def _CleanupExports(self, feedback_fn):
13040     """Removes exports of current instance from all other nodes.
13041
13042     If an instance in a cluster with nodes A..D was exported to node C, its
13043     exports will be removed from the nodes A, B and D.
13044
13045     """
13046     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13047
13048     nodelist = self.cfg.GetNodeList()
13049     nodelist.remove(self.dst_node.name)
13050
13051     # on one-node clusters nodelist will be empty after the removal
13052     # if we proceed the backup would be removed because OpBackupQuery
13053     # substitutes an empty list with the full cluster node list.
13054     iname = self.instance.name
13055     if nodelist:
13056       feedback_fn("Removing old exports for instance %s" % iname)
13057       exportlist = self.rpc.call_export_list(nodelist)
13058       for node in exportlist:
13059         if exportlist[node].fail_msg:
13060           continue
13061         if iname in exportlist[node].payload:
13062           msg = self.rpc.call_export_remove(node, iname).fail_msg
13063           if msg:
13064             self.LogWarning("Could not remove older export for instance %s"
13065                             " on node %s: %s", iname, node, msg)
13066
13067   def Exec(self, feedback_fn):
13068     """Export an instance to an image in the cluster.
13069
13070     """
13071     assert self.op.mode in constants.EXPORT_MODES
13072
13073     instance = self.instance
13074     src_node = instance.primary_node
13075
13076     if self.op.shutdown:
13077       # shutdown the instance, but not the disks
13078       feedback_fn("Shutting down instance %s" % instance.name)
13079       result = self.rpc.call_instance_shutdown(src_node, instance,
13080                                                self.op.shutdown_timeout)
13081       # TODO: Maybe ignore failures if ignore_remove_failures is set
13082       result.Raise("Could not shutdown instance %s on"
13083                    " node %s" % (instance.name, src_node))
13084
13085     # set the disks ID correctly since call_instance_start needs the
13086     # correct drbd minor to create the symlinks
13087     for disk in instance.disks:
13088       self.cfg.SetDiskID(disk, src_node)
13089
13090     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13091
13092     if activate_disks:
13093       # Activate the instance disks if we'exporting a stopped instance
13094       feedback_fn("Activating disks for %s" % instance.name)
13095       _StartInstanceDisks(self, instance, None)
13096
13097     try:
13098       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13099                                                      instance)
13100
13101       helper.CreateSnapshots()
13102       try:
13103         if (self.op.shutdown and
13104             instance.admin_state == constants.ADMINST_UP and
13105             not self.op.remove_instance):
13106           assert not activate_disks
13107           feedback_fn("Starting instance %s" % instance.name)
13108           result = self.rpc.call_instance_start(src_node,
13109                                                 (instance, None, None), False)
13110           msg = result.fail_msg
13111           if msg:
13112             feedback_fn("Failed to start instance: %s" % msg)
13113             _ShutdownInstanceDisks(self, instance)
13114             raise errors.OpExecError("Could not start instance: %s" % msg)
13115
13116         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13117           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13118         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13119           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13120           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13121
13122           (key_name, _, _) = self.x509_key_name
13123
13124           dest_ca_pem = \
13125             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13126                                             self.dest_x509_ca)
13127
13128           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13129                                                      key_name, dest_ca_pem,
13130                                                      timeouts)
13131       finally:
13132         helper.Cleanup()
13133
13134       # Check for backwards compatibility
13135       assert len(dresults) == len(instance.disks)
13136       assert compat.all(isinstance(i, bool) for i in dresults), \
13137              "Not all results are boolean: %r" % dresults
13138
13139     finally:
13140       if activate_disks:
13141         feedback_fn("Deactivating disks for %s" % instance.name)
13142         _ShutdownInstanceDisks(self, instance)
13143
13144     if not (compat.all(dresults) and fin_resu):
13145       failures = []
13146       if not fin_resu:
13147         failures.append("export finalization")
13148       if not compat.all(dresults):
13149         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13150                                if not dsk)
13151         failures.append("disk export: disk(s) %s" % fdsk)
13152
13153       raise errors.OpExecError("Export failed, errors in %s" %
13154                                utils.CommaJoin(failures))
13155
13156     # At this point, the export was successful, we can cleanup/finish
13157
13158     # Remove instance if requested
13159     if self.op.remove_instance:
13160       feedback_fn("Removing instance %s" % instance.name)
13161       _RemoveInstance(self, feedback_fn, instance,
13162                       self.op.ignore_remove_failures)
13163
13164     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13165       self._CleanupExports(feedback_fn)
13166
13167     return fin_resu, dresults
13168
13169
13170 class LUBackupRemove(NoHooksLU):
13171   """Remove exports related to the named instance.
13172
13173   """
13174   REQ_BGL = False
13175
13176   def ExpandNames(self):
13177     self.needed_locks = {}
13178     # We need all nodes to be locked in order for RemoveExport to work, but we
13179     # don't need to lock the instance itself, as nothing will happen to it (and
13180     # we can remove exports also for a removed instance)
13181     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13182
13183   def Exec(self, feedback_fn):
13184     """Remove any export.
13185
13186     """
13187     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13188     # If the instance was not found we'll try with the name that was passed in.
13189     # This will only work if it was an FQDN, though.
13190     fqdn_warn = False
13191     if not instance_name:
13192       fqdn_warn = True
13193       instance_name = self.op.instance_name
13194
13195     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13196     exportlist = self.rpc.call_export_list(locked_nodes)
13197     found = False
13198     for node in exportlist:
13199       msg = exportlist[node].fail_msg
13200       if msg:
13201         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13202         continue
13203       if instance_name in exportlist[node].payload:
13204         found = True
13205         result = self.rpc.call_export_remove(node, instance_name)
13206         msg = result.fail_msg
13207         if msg:
13208           logging.error("Could not remove export for instance %s"
13209                         " on node %s: %s", instance_name, node, msg)
13210
13211     if fqdn_warn and not found:
13212       feedback_fn("Export not found. If trying to remove an export belonging"
13213                   " to a deleted instance please use its Fully Qualified"
13214                   " Domain Name.")
13215
13216
13217 class LUGroupAdd(LogicalUnit):
13218   """Logical unit for creating node groups.
13219
13220   """
13221   HPATH = "group-add"
13222   HTYPE = constants.HTYPE_GROUP
13223   REQ_BGL = False
13224
13225   def ExpandNames(self):
13226     # We need the new group's UUID here so that we can create and acquire the
13227     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13228     # that it should not check whether the UUID exists in the configuration.
13229     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13230     self.needed_locks = {}
13231     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13232
13233   def CheckPrereq(self):
13234     """Check prerequisites.
13235
13236     This checks that the given group name is not an existing node group
13237     already.
13238
13239     """
13240     try:
13241       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13242     except errors.OpPrereqError:
13243       pass
13244     else:
13245       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13246                                  " node group (UUID: %s)" %
13247                                  (self.op.group_name, existing_uuid),
13248                                  errors.ECODE_EXISTS)
13249
13250     if self.op.ndparams:
13251       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13252
13253     if self.op.hv_state:
13254       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13255     else:
13256       self.new_hv_state = None
13257
13258     if self.op.disk_state:
13259       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13260     else:
13261       self.new_disk_state = None
13262
13263     if self.op.diskparams:
13264       for templ in constants.DISK_TEMPLATES:
13265         if templ not in self.op.diskparams:
13266           self.op.diskparams[templ] = {}
13267         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13268     else:
13269       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13270
13271     if self.op.ipolicy:
13272       cluster = self.cfg.GetClusterInfo()
13273       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13274       try:
13275         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13276       except errors.ConfigurationError, err:
13277         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13278                                    errors.ECODE_INVAL)
13279
13280   def BuildHooksEnv(self):
13281     """Build hooks env.
13282
13283     """
13284     return {
13285       "GROUP_NAME": self.op.group_name,
13286       }
13287
13288   def BuildHooksNodes(self):
13289     """Build hooks nodes.
13290
13291     """
13292     mn = self.cfg.GetMasterNode()
13293     return ([mn], [mn])
13294
13295   def Exec(self, feedback_fn):
13296     """Add the node group to the cluster.
13297
13298     """
13299     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13300                                   uuid=self.group_uuid,
13301                                   alloc_policy=self.op.alloc_policy,
13302                                   ndparams=self.op.ndparams,
13303                                   diskparams=self.op.diskparams,
13304                                   ipolicy=self.op.ipolicy,
13305                                   hv_state_static=self.new_hv_state,
13306                                   disk_state_static=self.new_disk_state)
13307
13308     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13309     del self.remove_locks[locking.LEVEL_NODEGROUP]
13310
13311
13312 class LUGroupAssignNodes(NoHooksLU):
13313   """Logical unit for assigning nodes to groups.
13314
13315   """
13316   REQ_BGL = False
13317
13318   def ExpandNames(self):
13319     # These raise errors.OpPrereqError on their own:
13320     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13321     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13322
13323     # We want to lock all the affected nodes and groups. We have readily
13324     # available the list of nodes, and the *destination* group. To gather the
13325     # list of "source" groups, we need to fetch node information later on.
13326     self.needed_locks = {
13327       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13328       locking.LEVEL_NODE: self.op.nodes,
13329       }
13330
13331   def DeclareLocks(self, level):
13332     if level == locking.LEVEL_NODEGROUP:
13333       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13334
13335       # Try to get all affected nodes' groups without having the group or node
13336       # lock yet. Needs verification later in the code flow.
13337       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13338
13339       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13340
13341   def CheckPrereq(self):
13342     """Check prerequisites.
13343
13344     """
13345     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13346     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13347             frozenset(self.op.nodes))
13348
13349     expected_locks = (set([self.group_uuid]) |
13350                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13351     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13352     if actual_locks != expected_locks:
13353       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13354                                " current groups are '%s', used to be '%s'" %
13355                                (utils.CommaJoin(expected_locks),
13356                                 utils.CommaJoin(actual_locks)))
13357
13358     self.node_data = self.cfg.GetAllNodesInfo()
13359     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13360     instance_data = self.cfg.GetAllInstancesInfo()
13361
13362     if self.group is None:
13363       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13364                                (self.op.group_name, self.group_uuid))
13365
13366     (new_splits, previous_splits) = \
13367       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13368                                              for node in self.op.nodes],
13369                                             self.node_data, instance_data)
13370
13371     if new_splits:
13372       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13373
13374       if not self.op.force:
13375         raise errors.OpExecError("The following instances get split by this"
13376                                  " change and --force was not given: %s" %
13377                                  fmt_new_splits)
13378       else:
13379         self.LogWarning("This operation will split the following instances: %s",
13380                         fmt_new_splits)
13381
13382         if previous_splits:
13383           self.LogWarning("In addition, these already-split instances continue"
13384                           " to be split across groups: %s",
13385                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13386
13387   def Exec(self, feedback_fn):
13388     """Assign nodes to a new group.
13389
13390     """
13391     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13392
13393     self.cfg.AssignGroupNodes(mods)
13394
13395   @staticmethod
13396   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13397     """Check for split instances after a node assignment.
13398
13399     This method considers a series of node assignments as an atomic operation,
13400     and returns information about split instances after applying the set of
13401     changes.
13402
13403     In particular, it returns information about newly split instances, and
13404     instances that were already split, and remain so after the change.
13405
13406     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13407     considered.
13408
13409     @type changes: list of (node_name, new_group_uuid) pairs.
13410     @param changes: list of node assignments to consider.
13411     @param node_data: a dict with data for all nodes
13412     @param instance_data: a dict with all instances to consider
13413     @rtype: a two-tuple
13414     @return: a list of instances that were previously okay and result split as a
13415       consequence of this change, and a list of instances that were previously
13416       split and this change does not fix.
13417
13418     """
13419     changed_nodes = dict((node, group) for node, group in changes
13420                          if node_data[node].group != group)
13421
13422     all_split_instances = set()
13423     previously_split_instances = set()
13424
13425     def InstanceNodes(instance):
13426       return [instance.primary_node] + list(instance.secondary_nodes)
13427
13428     for inst in instance_data.values():
13429       if inst.disk_template not in constants.DTS_INT_MIRROR:
13430         continue
13431
13432       instance_nodes = InstanceNodes(inst)
13433
13434       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13435         previously_split_instances.add(inst.name)
13436
13437       if len(set(changed_nodes.get(node, node_data[node].group)
13438                  for node in instance_nodes)) > 1:
13439         all_split_instances.add(inst.name)
13440
13441     return (list(all_split_instances - previously_split_instances),
13442             list(previously_split_instances & all_split_instances))
13443
13444
13445 class _GroupQuery(_QueryBase):
13446   FIELDS = query.GROUP_FIELDS
13447
13448   def ExpandNames(self, lu):
13449     lu.needed_locks = {}
13450
13451     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13452     self._cluster = lu.cfg.GetClusterInfo()
13453     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13454
13455     if not self.names:
13456       self.wanted = [name_to_uuid[name]
13457                      for name in utils.NiceSort(name_to_uuid.keys())]
13458     else:
13459       # Accept names to be either names or UUIDs.
13460       missing = []
13461       self.wanted = []
13462       all_uuid = frozenset(self._all_groups.keys())
13463
13464       for name in self.names:
13465         if name in all_uuid:
13466           self.wanted.append(name)
13467         elif name in name_to_uuid:
13468           self.wanted.append(name_to_uuid[name])
13469         else:
13470           missing.append(name)
13471
13472       if missing:
13473         raise errors.OpPrereqError("Some groups do not exist: %s" %
13474                                    utils.CommaJoin(missing),
13475                                    errors.ECODE_NOENT)
13476
13477   def DeclareLocks(self, lu, level):
13478     pass
13479
13480   def _GetQueryData(self, lu):
13481     """Computes the list of node groups and their attributes.
13482
13483     """
13484     do_nodes = query.GQ_NODE in self.requested_data
13485     do_instances = query.GQ_INST in self.requested_data
13486
13487     group_to_nodes = None
13488     group_to_instances = None
13489
13490     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13491     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13492     # latter GetAllInstancesInfo() is not enough, for we have to go through
13493     # instance->node. Hence, we will need to process nodes even if we only need
13494     # instance information.
13495     if do_nodes or do_instances:
13496       all_nodes = lu.cfg.GetAllNodesInfo()
13497       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13498       node_to_group = {}
13499
13500       for node in all_nodes.values():
13501         if node.group in group_to_nodes:
13502           group_to_nodes[node.group].append(node.name)
13503           node_to_group[node.name] = node.group
13504
13505       if do_instances:
13506         all_instances = lu.cfg.GetAllInstancesInfo()
13507         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13508
13509         for instance in all_instances.values():
13510           node = instance.primary_node
13511           if node in node_to_group:
13512             group_to_instances[node_to_group[node]].append(instance.name)
13513
13514         if not do_nodes:
13515           # Do not pass on node information if it was not requested.
13516           group_to_nodes = None
13517
13518     return query.GroupQueryData(self._cluster,
13519                                 [self._all_groups[uuid]
13520                                  for uuid in self.wanted],
13521                                 group_to_nodes, group_to_instances)
13522
13523
13524 class LUGroupQuery(NoHooksLU):
13525   """Logical unit for querying node groups.
13526
13527   """
13528   REQ_BGL = False
13529
13530   def CheckArguments(self):
13531     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13532                           self.op.output_fields, False)
13533
13534   def ExpandNames(self):
13535     self.gq.ExpandNames(self)
13536
13537   def DeclareLocks(self, level):
13538     self.gq.DeclareLocks(self, level)
13539
13540   def Exec(self, feedback_fn):
13541     return self.gq.OldStyleQuery(self)
13542
13543
13544 class LUGroupSetParams(LogicalUnit):
13545   """Modifies the parameters of a node group.
13546
13547   """
13548   HPATH = "group-modify"
13549   HTYPE = constants.HTYPE_GROUP
13550   REQ_BGL = False
13551
13552   def CheckArguments(self):
13553     all_changes = [
13554       self.op.ndparams,
13555       self.op.diskparams,
13556       self.op.alloc_policy,
13557       self.op.hv_state,
13558       self.op.disk_state,
13559       self.op.ipolicy,
13560       ]
13561
13562     if all_changes.count(None) == len(all_changes):
13563       raise errors.OpPrereqError("Please pass at least one modification",
13564                                  errors.ECODE_INVAL)
13565
13566   def ExpandNames(self):
13567     # This raises errors.OpPrereqError on its own:
13568     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13569
13570     self.needed_locks = {
13571       locking.LEVEL_INSTANCE: [],
13572       locking.LEVEL_NODEGROUP: [self.group_uuid],
13573       }
13574
13575     self.share_locks[locking.LEVEL_INSTANCE] = 1
13576
13577   def DeclareLocks(self, level):
13578     if level == locking.LEVEL_INSTANCE:
13579       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13580
13581       # Lock instances optimistically, needs verification once group lock has
13582       # been acquired
13583       self.needed_locks[locking.LEVEL_INSTANCE] = \
13584           self.cfg.GetNodeGroupInstances(self.group_uuid)
13585
13586   def CheckPrereq(self):
13587     """Check prerequisites.
13588
13589     """
13590     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13591
13592     # Check if locked instances are still correct
13593     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13594
13595     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13596     cluster = self.cfg.GetClusterInfo()
13597
13598     if self.group is None:
13599       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13600                                (self.op.group_name, self.group_uuid))
13601
13602     if self.op.ndparams:
13603       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13604       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13605       self.new_ndparams = new_ndparams
13606
13607     if self.op.diskparams:
13608       self.new_diskparams = dict()
13609       for templ in constants.DISK_TEMPLATES:
13610         if templ not in self.op.diskparams:
13611           self.op.diskparams[templ] = {}
13612         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13613                                              self.op.diskparams[templ])
13614         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13615         self.new_diskparams[templ] = new_templ_params
13616
13617     if self.op.hv_state:
13618       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13619                                                  self.group.hv_state_static)
13620
13621     if self.op.disk_state:
13622       self.new_disk_state = \
13623         _MergeAndVerifyDiskState(self.op.disk_state,
13624                                  self.group.disk_state_static)
13625
13626     if self.op.ipolicy:
13627       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13628                                             self.op.ipolicy,
13629                                             group_policy=True)
13630
13631       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13632       inst_filter = lambda inst: inst.name in owned_instances
13633       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13634       violations = \
13635           _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13636                                                                self.group),
13637                                         new_ipolicy, instances)
13638
13639       if violations:
13640         self.LogWarning("After the ipolicy change the following instances"
13641                         " violate them: %s",
13642                         utils.CommaJoin(violations))
13643
13644   def BuildHooksEnv(self):
13645     """Build hooks env.
13646
13647     """
13648     return {
13649       "GROUP_NAME": self.op.group_name,
13650       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13651       }
13652
13653   def BuildHooksNodes(self):
13654     """Build hooks nodes.
13655
13656     """
13657     mn = self.cfg.GetMasterNode()
13658     return ([mn], [mn])
13659
13660   def Exec(self, feedback_fn):
13661     """Modifies the node group.
13662
13663     """
13664     result = []
13665
13666     if self.op.ndparams:
13667       self.group.ndparams = self.new_ndparams
13668       result.append(("ndparams", str(self.group.ndparams)))
13669
13670     if self.op.diskparams:
13671       self.group.diskparams = self.new_diskparams
13672       result.append(("diskparams", str(self.group.diskparams)))
13673
13674     if self.op.alloc_policy:
13675       self.group.alloc_policy = self.op.alloc_policy
13676
13677     if self.op.hv_state:
13678       self.group.hv_state_static = self.new_hv_state
13679
13680     if self.op.disk_state:
13681       self.group.disk_state_static = self.new_disk_state
13682
13683     if self.op.ipolicy:
13684       self.group.ipolicy = self.new_ipolicy
13685
13686     self.cfg.Update(self.group, feedback_fn)
13687     return result
13688
13689
13690 class LUGroupRemove(LogicalUnit):
13691   HPATH = "group-remove"
13692   HTYPE = constants.HTYPE_GROUP
13693   REQ_BGL = False
13694
13695   def ExpandNames(self):
13696     # This will raises errors.OpPrereqError on its own:
13697     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13698     self.needed_locks = {
13699       locking.LEVEL_NODEGROUP: [self.group_uuid],
13700       }
13701
13702   def CheckPrereq(self):
13703     """Check prerequisites.
13704
13705     This checks that the given group name exists as a node group, that is
13706     empty (i.e., contains no nodes), and that is not the last group of the
13707     cluster.
13708
13709     """
13710     # Verify that the group is empty.
13711     group_nodes = [node.name
13712                    for node in self.cfg.GetAllNodesInfo().values()
13713                    if node.group == self.group_uuid]
13714
13715     if group_nodes:
13716       raise errors.OpPrereqError("Group '%s' not empty, has the following"
13717                                  " nodes: %s" %
13718                                  (self.op.group_name,
13719                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
13720                                  errors.ECODE_STATE)
13721
13722     # Verify the cluster would not be left group-less.
13723     if len(self.cfg.GetNodeGroupList()) == 1:
13724       raise errors.OpPrereqError("Group '%s' is the only group,"
13725                                  " cannot be removed" %
13726                                  self.op.group_name,
13727                                  errors.ECODE_STATE)
13728
13729   def BuildHooksEnv(self):
13730     """Build hooks env.
13731
13732     """
13733     return {
13734       "GROUP_NAME": self.op.group_name,
13735       }
13736
13737   def BuildHooksNodes(self):
13738     """Build hooks nodes.
13739
13740     """
13741     mn = self.cfg.GetMasterNode()
13742     return ([mn], [mn])
13743
13744   def Exec(self, feedback_fn):
13745     """Remove the node group.
13746
13747     """
13748     try:
13749       self.cfg.RemoveNodeGroup(self.group_uuid)
13750     except errors.ConfigurationError:
13751       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13752                                (self.op.group_name, self.group_uuid))
13753
13754     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13755
13756
13757 class LUGroupRename(LogicalUnit):
13758   HPATH = "group-rename"
13759   HTYPE = constants.HTYPE_GROUP
13760   REQ_BGL = False
13761
13762   def ExpandNames(self):
13763     # This raises errors.OpPrereqError on its own:
13764     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13765
13766     self.needed_locks = {
13767       locking.LEVEL_NODEGROUP: [self.group_uuid],
13768       }
13769
13770   def CheckPrereq(self):
13771     """Check prerequisites.
13772
13773     Ensures requested new name is not yet used.
13774
13775     """
13776     try:
13777       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13778     except errors.OpPrereqError:
13779       pass
13780     else:
13781       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13782                                  " node group (UUID: %s)" %
13783                                  (self.op.new_name, new_name_uuid),
13784                                  errors.ECODE_EXISTS)
13785
13786   def BuildHooksEnv(self):
13787     """Build hooks env.
13788
13789     """
13790     return {
13791       "OLD_NAME": self.op.group_name,
13792       "NEW_NAME": self.op.new_name,
13793       }
13794
13795   def BuildHooksNodes(self):
13796     """Build hooks nodes.
13797
13798     """
13799     mn = self.cfg.GetMasterNode()
13800
13801     all_nodes = self.cfg.GetAllNodesInfo()
13802     all_nodes.pop(mn, None)
13803
13804     run_nodes = [mn]
13805     run_nodes.extend(node.name for node in all_nodes.values()
13806                      if node.group == self.group_uuid)
13807
13808     return (run_nodes, run_nodes)
13809
13810   def Exec(self, feedback_fn):
13811     """Rename the node group.
13812
13813     """
13814     group = self.cfg.GetNodeGroup(self.group_uuid)
13815
13816     if group is None:
13817       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13818                                (self.op.group_name, self.group_uuid))
13819
13820     group.name = self.op.new_name
13821     self.cfg.Update(group, feedback_fn)
13822
13823     return self.op.new_name
13824
13825
13826 class LUGroupEvacuate(LogicalUnit):
13827   HPATH = "group-evacuate"
13828   HTYPE = constants.HTYPE_GROUP
13829   REQ_BGL = False
13830
13831   def ExpandNames(self):
13832     # This raises errors.OpPrereqError on its own:
13833     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13834
13835     if self.op.target_groups:
13836       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13837                                   self.op.target_groups)
13838     else:
13839       self.req_target_uuids = []
13840
13841     if self.group_uuid in self.req_target_uuids:
13842       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13843                                  " as a target group (targets are %s)" %
13844                                  (self.group_uuid,
13845                                   utils.CommaJoin(self.req_target_uuids)),
13846                                  errors.ECODE_INVAL)
13847
13848     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13849
13850     self.share_locks = _ShareAll()
13851     self.needed_locks = {
13852       locking.LEVEL_INSTANCE: [],
13853       locking.LEVEL_NODEGROUP: [],
13854       locking.LEVEL_NODE: [],
13855       }
13856
13857   def DeclareLocks(self, level):
13858     if level == locking.LEVEL_INSTANCE:
13859       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13860
13861       # Lock instances optimistically, needs verification once node and group
13862       # locks have been acquired
13863       self.needed_locks[locking.LEVEL_INSTANCE] = \
13864         self.cfg.GetNodeGroupInstances(self.group_uuid)
13865
13866     elif level == locking.LEVEL_NODEGROUP:
13867       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13868
13869       if self.req_target_uuids:
13870         lock_groups = set([self.group_uuid] + self.req_target_uuids)
13871
13872         # Lock all groups used by instances optimistically; this requires going
13873         # via the node before it's locked, requiring verification later on
13874         lock_groups.update(group_uuid
13875                            for instance_name in
13876                              self.owned_locks(locking.LEVEL_INSTANCE)
13877                            for group_uuid in
13878                              self.cfg.GetInstanceNodeGroups(instance_name))
13879       else:
13880         # No target groups, need to lock all of them
13881         lock_groups = locking.ALL_SET
13882
13883       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13884
13885     elif level == locking.LEVEL_NODE:
13886       # This will only lock the nodes in the group to be evacuated which
13887       # contain actual instances
13888       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13889       self._LockInstancesNodes()
13890
13891       # Lock all nodes in group to be evacuated and target groups
13892       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13893       assert self.group_uuid in owned_groups
13894       member_nodes = [node_name
13895                       for group in owned_groups
13896                       for node_name in self.cfg.GetNodeGroup(group).members]
13897       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13898
13899   def CheckPrereq(self):
13900     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13901     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13902     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13903
13904     assert owned_groups.issuperset(self.req_target_uuids)
13905     assert self.group_uuid in owned_groups
13906
13907     # Check if locked instances are still correct
13908     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13909
13910     # Get instance information
13911     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13912
13913     # Check if node groups for locked instances are still correct
13914     for instance_name in owned_instances:
13915       inst = self.instances[instance_name]
13916       assert owned_nodes.issuperset(inst.all_nodes), \
13917         "Instance %s's nodes changed while we kept the lock" % instance_name
13918
13919       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13920                                              owned_groups)
13921
13922       assert self.group_uuid in inst_groups, \
13923         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13924
13925     if self.req_target_uuids:
13926       # User requested specific target groups
13927       self.target_uuids = self.req_target_uuids
13928     else:
13929       # All groups except the one to be evacuated are potential targets
13930       self.target_uuids = [group_uuid for group_uuid in owned_groups
13931                            if group_uuid != self.group_uuid]
13932
13933       if not self.target_uuids:
13934         raise errors.OpPrereqError("There are no possible target groups",
13935                                    errors.ECODE_INVAL)
13936
13937   def BuildHooksEnv(self):
13938     """Build hooks env.
13939
13940     """
13941     return {
13942       "GROUP_NAME": self.op.group_name,
13943       "TARGET_GROUPS": " ".join(self.target_uuids),
13944       }
13945
13946   def BuildHooksNodes(self):
13947     """Build hooks nodes.
13948
13949     """
13950     mn = self.cfg.GetMasterNode()
13951
13952     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13953
13954     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13955
13956     return (run_nodes, run_nodes)
13957
13958   def Exec(self, feedback_fn):
13959     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13960
13961     assert self.group_uuid not in self.target_uuids
13962
13963     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13964                      instances=instances, target_groups=self.target_uuids)
13965
13966     ial.Run(self.op.iallocator)
13967
13968     if not ial.success:
13969       raise errors.OpPrereqError("Can't compute group evacuation using"
13970                                  " iallocator '%s': %s" %
13971                                  (self.op.iallocator, ial.info),
13972                                  errors.ECODE_NORES)
13973
13974     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13975
13976     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13977                  len(jobs), self.op.group_name)
13978
13979     return ResultWithJobs(jobs)
13980
13981
13982 class TagsLU(NoHooksLU): # pylint: disable=W0223
13983   """Generic tags LU.
13984
13985   This is an abstract class which is the parent of all the other tags LUs.
13986
13987   """
13988   def ExpandNames(self):
13989     self.group_uuid = None
13990     self.needed_locks = {}
13991     if self.op.kind == constants.TAG_NODE:
13992       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13993       self.needed_locks[locking.LEVEL_NODE] = self.op.name
13994     elif self.op.kind == constants.TAG_INSTANCE:
13995       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13996       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13997     elif self.op.kind == constants.TAG_NODEGROUP:
13998       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13999
14000     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14001     # not possible to acquire the BGL based on opcode parameters)
14002
14003   def CheckPrereq(self):
14004     """Check prerequisites.
14005
14006     """
14007     if self.op.kind == constants.TAG_CLUSTER:
14008       self.target = self.cfg.GetClusterInfo()
14009     elif self.op.kind == constants.TAG_NODE:
14010       self.target = self.cfg.GetNodeInfo(self.op.name)
14011     elif self.op.kind == constants.TAG_INSTANCE:
14012       self.target = self.cfg.GetInstanceInfo(self.op.name)
14013     elif self.op.kind == constants.TAG_NODEGROUP:
14014       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14015     else:
14016       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14017                                  str(self.op.kind), errors.ECODE_INVAL)
14018
14019
14020 class LUTagsGet(TagsLU):
14021   """Returns the tags of a given object.
14022
14023   """
14024   REQ_BGL = False
14025
14026   def ExpandNames(self):
14027     TagsLU.ExpandNames(self)
14028
14029     # Share locks as this is only a read operation
14030     self.share_locks = _ShareAll()
14031
14032   def Exec(self, feedback_fn):
14033     """Returns the tag list.
14034
14035     """
14036     return list(self.target.GetTags())
14037
14038
14039 class LUTagsSearch(NoHooksLU):
14040   """Searches the tags for a given pattern.
14041
14042   """
14043   REQ_BGL = False
14044
14045   def ExpandNames(self):
14046     self.needed_locks = {}
14047
14048   def CheckPrereq(self):
14049     """Check prerequisites.
14050
14051     This checks the pattern passed for validity by compiling it.
14052
14053     """
14054     try:
14055       self.re = re.compile(self.op.pattern)
14056     except re.error, err:
14057       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14058                                  (self.op.pattern, err), errors.ECODE_INVAL)
14059
14060   def Exec(self, feedback_fn):
14061     """Returns the tag list.
14062
14063     """
14064     cfg = self.cfg
14065     tgts = [("/cluster", cfg.GetClusterInfo())]
14066     ilist = cfg.GetAllInstancesInfo().values()
14067     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14068     nlist = cfg.GetAllNodesInfo().values()
14069     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14070     tgts.extend(("/nodegroup/%s" % n.name, n)
14071                 for n in cfg.GetAllNodeGroupsInfo().values())
14072     results = []
14073     for path, target in tgts:
14074       for tag in target.GetTags():
14075         if self.re.search(tag):
14076           results.append((path, tag))
14077     return results
14078
14079
14080 class LUTagsSet(TagsLU):
14081   """Sets a tag on a given object.
14082
14083   """
14084   REQ_BGL = False
14085
14086   def CheckPrereq(self):
14087     """Check prerequisites.
14088
14089     This checks the type and length of the tag name and value.
14090
14091     """
14092     TagsLU.CheckPrereq(self)
14093     for tag in self.op.tags:
14094       objects.TaggableObject.ValidateTag(tag)
14095
14096   def Exec(self, feedback_fn):
14097     """Sets the tag.
14098
14099     """
14100     try:
14101       for tag in self.op.tags:
14102         self.target.AddTag(tag)
14103     except errors.TagError, err:
14104       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14105     self.cfg.Update(self.target, feedback_fn)
14106
14107
14108 class LUTagsDel(TagsLU):
14109   """Delete a list of tags from a given object.
14110
14111   """
14112   REQ_BGL = False
14113
14114   def CheckPrereq(self):
14115     """Check prerequisites.
14116
14117     This checks that we have the given tag.
14118
14119     """
14120     TagsLU.CheckPrereq(self)
14121     for tag in self.op.tags:
14122       objects.TaggableObject.ValidateTag(tag)
14123     del_tags = frozenset(self.op.tags)
14124     cur_tags = self.target.GetTags()
14125
14126     diff_tags = del_tags - cur_tags
14127     if diff_tags:
14128       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14129       raise errors.OpPrereqError("Tag(s) %s not found" %
14130                                  (utils.CommaJoin(diff_names), ),
14131                                  errors.ECODE_NOENT)
14132
14133   def Exec(self, feedback_fn):
14134     """Remove the tag from the object.
14135
14136     """
14137     for tag in self.op.tags:
14138       self.target.RemoveTag(tag)
14139     self.cfg.Update(self.target, feedback_fn)
14140
14141
14142 class LUTestDelay(NoHooksLU):
14143   """Sleep for a specified amount of time.
14144
14145   This LU sleeps on the master and/or nodes for a specified amount of
14146   time.
14147
14148   """
14149   REQ_BGL = False
14150
14151   def ExpandNames(self):
14152     """Expand names and set required locks.
14153
14154     This expands the node list, if any.
14155
14156     """
14157     self.needed_locks = {}
14158     if self.op.on_nodes:
14159       # _GetWantedNodes can be used here, but is not always appropriate to use
14160       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14161       # more information.
14162       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14163       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14164
14165   def _TestDelay(self):
14166     """Do the actual sleep.
14167
14168     """
14169     if self.op.on_master:
14170       if not utils.TestDelay(self.op.duration):
14171         raise errors.OpExecError("Error during master delay test")
14172     if self.op.on_nodes:
14173       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14174       for node, node_result in result.items():
14175         node_result.Raise("Failure during rpc call to node %s" % node)
14176
14177   def Exec(self, feedback_fn):
14178     """Execute the test delay opcode, with the wanted repetitions.
14179
14180     """
14181     if self.op.repeat == 0:
14182       self._TestDelay()
14183     else:
14184       top_value = self.op.repeat - 1
14185       for i in range(self.op.repeat):
14186         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14187         self._TestDelay()
14188
14189
14190 class LUTestJqueue(NoHooksLU):
14191   """Utility LU to test some aspects of the job queue.
14192
14193   """
14194   REQ_BGL = False
14195
14196   # Must be lower than default timeout for WaitForJobChange to see whether it
14197   # notices changed jobs
14198   _CLIENT_CONNECT_TIMEOUT = 20.0
14199   _CLIENT_CONFIRM_TIMEOUT = 60.0
14200
14201   @classmethod
14202   def _NotifyUsingSocket(cls, cb, errcls):
14203     """Opens a Unix socket and waits for another program to connect.
14204
14205     @type cb: callable
14206     @param cb: Callback to send socket name to client
14207     @type errcls: class
14208     @param errcls: Exception class to use for errors
14209
14210     """
14211     # Using a temporary directory as there's no easy way to create temporary
14212     # sockets without writing a custom loop around tempfile.mktemp and
14213     # socket.bind
14214     tmpdir = tempfile.mkdtemp()
14215     try:
14216       tmpsock = utils.PathJoin(tmpdir, "sock")
14217
14218       logging.debug("Creating temporary socket at %s", tmpsock)
14219       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14220       try:
14221         sock.bind(tmpsock)
14222         sock.listen(1)
14223
14224         # Send details to client
14225         cb(tmpsock)
14226
14227         # Wait for client to connect before continuing
14228         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14229         try:
14230           (conn, _) = sock.accept()
14231         except socket.error, err:
14232           raise errcls("Client didn't connect in time (%s)" % err)
14233       finally:
14234         sock.close()
14235     finally:
14236       # Remove as soon as client is connected
14237       shutil.rmtree(tmpdir)
14238
14239     # Wait for client to close
14240     try:
14241       try:
14242         # pylint: disable=E1101
14243         # Instance of '_socketobject' has no ... member
14244         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14245         conn.recv(1)
14246       except socket.error, err:
14247         raise errcls("Client failed to confirm notification (%s)" % err)
14248     finally:
14249       conn.close()
14250
14251   def _SendNotification(self, test, arg, sockname):
14252     """Sends a notification to the client.
14253
14254     @type test: string
14255     @param test: Test name
14256     @param arg: Test argument (depends on test)
14257     @type sockname: string
14258     @param sockname: Socket path
14259
14260     """
14261     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14262
14263   def _Notify(self, prereq, test, arg):
14264     """Notifies the client of a test.
14265
14266     @type prereq: bool
14267     @param prereq: Whether this is a prereq-phase test
14268     @type test: string
14269     @param test: Test name
14270     @param arg: Test argument (depends on test)
14271
14272     """
14273     if prereq:
14274       errcls = errors.OpPrereqError
14275     else:
14276       errcls = errors.OpExecError
14277
14278     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14279                                                   test, arg),
14280                                    errcls)
14281
14282   def CheckArguments(self):
14283     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14284     self.expandnames_calls = 0
14285
14286   def ExpandNames(self):
14287     checkargs_calls = getattr(self, "checkargs_calls", 0)
14288     if checkargs_calls < 1:
14289       raise errors.ProgrammerError("CheckArguments was not called")
14290
14291     self.expandnames_calls += 1
14292
14293     if self.op.notify_waitlock:
14294       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14295
14296     self.LogInfo("Expanding names")
14297
14298     # Get lock on master node (just to get a lock, not for a particular reason)
14299     self.needed_locks = {
14300       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14301       }
14302
14303   def Exec(self, feedback_fn):
14304     if self.expandnames_calls < 1:
14305       raise errors.ProgrammerError("ExpandNames was not called")
14306
14307     if self.op.notify_exec:
14308       self._Notify(False, constants.JQT_EXEC, None)
14309
14310     self.LogInfo("Executing")
14311
14312     if self.op.log_messages:
14313       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14314       for idx, msg in enumerate(self.op.log_messages):
14315         self.LogInfo("Sending log message %s", idx + 1)
14316         feedback_fn(constants.JQT_MSGPREFIX + msg)
14317         # Report how many test messages have been sent
14318         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14319
14320     if self.op.fail:
14321       raise errors.OpExecError("Opcode failure was requested")
14322
14323     return True
14324
14325
14326 class IAllocator(object):
14327   """IAllocator framework.
14328
14329   An IAllocator instance has three sets of attributes:
14330     - cfg that is needed to query the cluster
14331     - input data (all members of the _KEYS class attribute are required)
14332     - four buffer attributes (in|out_data|text), that represent the
14333       input (to the external script) in text and data structure format,
14334       and the output from it, again in two formats
14335     - the result variables from the script (success, info, nodes) for
14336       easy usage
14337
14338   """
14339   # pylint: disable=R0902
14340   # lots of instance attributes
14341
14342   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14343     self.cfg = cfg
14344     self.rpc = rpc_runner
14345     # init buffer variables
14346     self.in_text = self.out_text = self.in_data = self.out_data = None
14347     # init all input fields so that pylint is happy
14348     self.mode = mode
14349     self.memory = self.disks = self.disk_template = None
14350     self.os = self.tags = self.nics = self.vcpus = None
14351     self.hypervisor = None
14352     self.relocate_from = None
14353     self.name = None
14354     self.instances = None
14355     self.evac_mode = None
14356     self.target_groups = []
14357     # computed fields
14358     self.required_nodes = None
14359     # init result fields
14360     self.success = self.info = self.result = None
14361
14362     try:
14363       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14364     except KeyError:
14365       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14366                                    " IAllocator" % self.mode)
14367
14368     keyset = [n for (n, _) in keydata]
14369
14370     for key in kwargs:
14371       if key not in keyset:
14372         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14373                                      " IAllocator" % key)
14374       setattr(self, key, kwargs[key])
14375
14376     for key in keyset:
14377       if key not in kwargs:
14378         raise errors.ProgrammerError("Missing input parameter '%s' to"
14379                                      " IAllocator" % key)
14380     self._BuildInputData(compat.partial(fn, self), keydata)
14381
14382   def _ComputeClusterData(self):
14383     """Compute the generic allocator input data.
14384
14385     This is the data that is independent of the actual operation.
14386
14387     """
14388     cfg = self.cfg
14389     cluster_info = cfg.GetClusterInfo()
14390     # cluster data
14391     data = {
14392       "version": constants.IALLOCATOR_VERSION,
14393       "cluster_name": cfg.GetClusterName(),
14394       "cluster_tags": list(cluster_info.GetTags()),
14395       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14396       "ipolicy": cluster_info.ipolicy,
14397       }
14398     ninfo = cfg.GetAllNodesInfo()
14399     iinfo = cfg.GetAllInstancesInfo().values()
14400     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14401
14402     # node data
14403     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14404
14405     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14406       hypervisor_name = self.hypervisor
14407     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14408       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14409     else:
14410       hypervisor_name = cluster_info.primary_hypervisor
14411
14412     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14413                                         [hypervisor_name])
14414     node_iinfo = \
14415       self.rpc.call_all_instances_info(node_list,
14416                                        cluster_info.enabled_hypervisors)
14417
14418     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14419
14420     config_ndata = self._ComputeBasicNodeData(ninfo)
14421     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14422                                                  i_list, config_ndata)
14423     assert len(data["nodes"]) == len(ninfo), \
14424         "Incomplete node data computed"
14425
14426     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14427
14428     self.in_data = data
14429
14430   @staticmethod
14431   def _ComputeNodeGroupData(cfg):
14432     """Compute node groups data.
14433
14434     """
14435     cluster = cfg.GetClusterInfo()
14436     ng = dict((guuid, {
14437       "name": gdata.name,
14438       "alloc_policy": gdata.alloc_policy,
14439       "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14440       })
14441       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14442
14443     return ng
14444
14445   @staticmethod
14446   def _ComputeBasicNodeData(node_cfg):
14447     """Compute global node data.
14448
14449     @rtype: dict
14450     @returns: a dict of name: (node dict, node config)
14451
14452     """
14453     # fill in static (config-based) values
14454     node_results = dict((ninfo.name, {
14455       "tags": list(ninfo.GetTags()),
14456       "primary_ip": ninfo.primary_ip,
14457       "secondary_ip": ninfo.secondary_ip,
14458       "offline": ninfo.offline,
14459       "drained": ninfo.drained,
14460       "master_candidate": ninfo.master_candidate,
14461       "group": ninfo.group,
14462       "master_capable": ninfo.master_capable,
14463       "vm_capable": ninfo.vm_capable,
14464       })
14465       for ninfo in node_cfg.values())
14466
14467     return node_results
14468
14469   @staticmethod
14470   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14471                               node_results):
14472     """Compute global node data.
14473
14474     @param node_results: the basic node structures as filled from the config
14475
14476     """
14477     #TODO(dynmem): compute the right data on MAX and MIN memory
14478     # make a copy of the current dict
14479     node_results = dict(node_results)
14480     for nname, nresult in node_data.items():
14481       assert nname in node_results, "Missing basic data for node %s" % nname
14482       ninfo = node_cfg[nname]
14483
14484       if not (ninfo.offline or ninfo.drained):
14485         nresult.Raise("Can't get data for node %s" % nname)
14486         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14487                                 nname)
14488         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14489
14490         for attr in ["memory_total", "memory_free", "memory_dom0",
14491                      "vg_size", "vg_free", "cpu_total"]:
14492           if attr not in remote_info:
14493             raise errors.OpExecError("Node '%s' didn't return attribute"
14494                                      " '%s'" % (nname, attr))
14495           if not isinstance(remote_info[attr], int):
14496             raise errors.OpExecError("Node '%s' returned invalid value"
14497                                      " for '%s': %s" %
14498                                      (nname, attr, remote_info[attr]))
14499         # compute memory used by primary instances
14500         i_p_mem = i_p_up_mem = 0
14501         for iinfo, beinfo in i_list:
14502           if iinfo.primary_node == nname:
14503             i_p_mem += beinfo[constants.BE_MAXMEM]
14504             if iinfo.name not in node_iinfo[nname].payload:
14505               i_used_mem = 0
14506             else:
14507               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14508             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14509             remote_info["memory_free"] -= max(0, i_mem_diff)
14510
14511             if iinfo.admin_state == constants.ADMINST_UP:
14512               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14513
14514         # compute memory used by instances
14515         pnr_dyn = {
14516           "total_memory": remote_info["memory_total"],
14517           "reserved_memory": remote_info["memory_dom0"],
14518           "free_memory": remote_info["memory_free"],
14519           "total_disk": remote_info["vg_size"],
14520           "free_disk": remote_info["vg_free"],
14521           "total_cpus": remote_info["cpu_total"],
14522           "i_pri_memory": i_p_mem,
14523           "i_pri_up_memory": i_p_up_mem,
14524           }
14525         pnr_dyn.update(node_results[nname])
14526         node_results[nname] = pnr_dyn
14527
14528     return node_results
14529
14530   @staticmethod
14531   def _ComputeInstanceData(cluster_info, i_list):
14532     """Compute global instance data.
14533
14534     """
14535     instance_data = {}
14536     for iinfo, beinfo in i_list:
14537       nic_data = []
14538       for nic in iinfo.nics:
14539         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14540         nic_dict = {
14541           "mac": nic.mac,
14542           "ip": nic.ip,
14543           "mode": filled_params[constants.NIC_MODE],
14544           "link": filled_params[constants.NIC_LINK],
14545           }
14546         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14547           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14548         nic_data.append(nic_dict)
14549       pir = {
14550         "tags": list(iinfo.GetTags()),
14551         "admin_state": iinfo.admin_state,
14552         "vcpus": beinfo[constants.BE_VCPUS],
14553         "memory": beinfo[constants.BE_MAXMEM],
14554         "os": iinfo.os,
14555         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14556         "nics": nic_data,
14557         "disks": [{constants.IDISK_SIZE: dsk.size,
14558                    constants.IDISK_MODE: dsk.mode}
14559                   for dsk in iinfo.disks],
14560         "disk_template": iinfo.disk_template,
14561         "hypervisor": iinfo.hypervisor,
14562         }
14563       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14564                                                  pir["disks"])
14565       instance_data[iinfo.name] = pir
14566
14567     return instance_data
14568
14569   def _AddNewInstance(self):
14570     """Add new instance data to allocator structure.
14571
14572     This in combination with _AllocatorGetClusterData will create the
14573     correct structure needed as input for the allocator.
14574
14575     The checks for the completeness of the opcode must have already been
14576     done.
14577
14578     """
14579     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14580
14581     if self.disk_template in constants.DTS_INT_MIRROR:
14582       self.required_nodes = 2
14583     else:
14584       self.required_nodes = 1
14585
14586     request = {
14587       "name": self.name,
14588       "disk_template": self.disk_template,
14589       "tags": self.tags,
14590       "os": self.os,
14591       "vcpus": self.vcpus,
14592       "memory": self.memory,
14593       "disks": self.disks,
14594       "disk_space_total": disk_space,
14595       "nics": self.nics,
14596       "required_nodes": self.required_nodes,
14597       "hypervisor": self.hypervisor,
14598       }
14599
14600     return request
14601
14602   def _AddRelocateInstance(self):
14603     """Add relocate instance data to allocator structure.
14604
14605     This in combination with _IAllocatorGetClusterData will create the
14606     correct structure needed as input for the allocator.
14607
14608     The checks for the completeness of the opcode must have already been
14609     done.
14610
14611     """
14612     instance = self.cfg.GetInstanceInfo(self.name)
14613     if instance is None:
14614       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14615                                    " IAllocator" % self.name)
14616
14617     if instance.disk_template not in constants.DTS_MIRRORED:
14618       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14619                                  errors.ECODE_INVAL)
14620
14621     if instance.disk_template in constants.DTS_INT_MIRROR and \
14622         len(instance.secondary_nodes) != 1:
14623       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14624                                  errors.ECODE_STATE)
14625
14626     self.required_nodes = 1
14627     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14628     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14629
14630     request = {
14631       "name": self.name,
14632       "disk_space_total": disk_space,
14633       "required_nodes": self.required_nodes,
14634       "relocate_from": self.relocate_from,
14635       }
14636     return request
14637
14638   def _AddNodeEvacuate(self):
14639     """Get data for node-evacuate requests.
14640
14641     """
14642     return {
14643       "instances": self.instances,
14644       "evac_mode": self.evac_mode,
14645       }
14646
14647   def _AddChangeGroup(self):
14648     """Get data for node-evacuate requests.
14649
14650     """
14651     return {
14652       "instances": self.instances,
14653       "target_groups": self.target_groups,
14654       }
14655
14656   def _BuildInputData(self, fn, keydata):
14657     """Build input data structures.
14658
14659     """
14660     self._ComputeClusterData()
14661
14662     request = fn()
14663     request["type"] = self.mode
14664     for keyname, keytype in keydata:
14665       if keyname not in request:
14666         raise errors.ProgrammerError("Request parameter %s is missing" %
14667                                      keyname)
14668       val = request[keyname]
14669       if not keytype(val):
14670         raise errors.ProgrammerError("Request parameter %s doesn't pass"
14671                                      " validation, value %s, expected"
14672                                      " type %s" % (keyname, val, keytype))
14673     self.in_data["request"] = request
14674
14675     self.in_text = serializer.Dump(self.in_data)
14676
14677   _STRING_LIST = ht.TListOf(ht.TString)
14678   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14679      # pylint: disable=E1101
14680      # Class '...' has no 'OP_ID' member
14681      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14682                           opcodes.OpInstanceMigrate.OP_ID,
14683                           opcodes.OpInstanceReplaceDisks.OP_ID])
14684      })))
14685
14686   _NEVAC_MOVED = \
14687     ht.TListOf(ht.TAnd(ht.TIsLength(3),
14688                        ht.TItems([ht.TNonEmptyString,
14689                                   ht.TNonEmptyString,
14690                                   ht.TListOf(ht.TNonEmptyString),
14691                                  ])))
14692   _NEVAC_FAILED = \
14693     ht.TListOf(ht.TAnd(ht.TIsLength(2),
14694                        ht.TItems([ht.TNonEmptyString,
14695                                   ht.TMaybeString,
14696                                  ])))
14697   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14698                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14699
14700   _MODE_DATA = {
14701     constants.IALLOCATOR_MODE_ALLOC:
14702       (_AddNewInstance,
14703        [
14704         ("name", ht.TString),
14705         ("memory", ht.TInt),
14706         ("disks", ht.TListOf(ht.TDict)),
14707         ("disk_template", ht.TString),
14708         ("os", ht.TString),
14709         ("tags", _STRING_LIST),
14710         ("nics", ht.TListOf(ht.TDict)),
14711         ("vcpus", ht.TInt),
14712         ("hypervisor", ht.TString),
14713         ], ht.TList),
14714     constants.IALLOCATOR_MODE_RELOC:
14715       (_AddRelocateInstance,
14716        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14717        ht.TList),
14718      constants.IALLOCATOR_MODE_NODE_EVAC:
14719       (_AddNodeEvacuate, [
14720         ("instances", _STRING_LIST),
14721         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14722         ], _NEVAC_RESULT),
14723      constants.IALLOCATOR_MODE_CHG_GROUP:
14724       (_AddChangeGroup, [
14725         ("instances", _STRING_LIST),
14726         ("target_groups", _STRING_LIST),
14727         ], _NEVAC_RESULT),
14728     }
14729
14730   def Run(self, name, validate=True, call_fn=None):
14731     """Run an instance allocator and return the results.
14732
14733     """
14734     if call_fn is None:
14735       call_fn = self.rpc.call_iallocator_runner
14736
14737     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14738     result.Raise("Failure while running the iallocator script")
14739
14740     self.out_text = result.payload
14741     if validate:
14742       self._ValidateResult()
14743
14744   def _ValidateResult(self):
14745     """Process the allocator results.
14746
14747     This will process and if successful save the result in
14748     self.out_data and the other parameters.
14749
14750     """
14751     try:
14752       rdict = serializer.Load(self.out_text)
14753     except Exception, err:
14754       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14755
14756     if not isinstance(rdict, dict):
14757       raise errors.OpExecError("Can't parse iallocator results: not a dict")
14758
14759     # TODO: remove backwards compatiblity in later versions
14760     if "nodes" in rdict and "result" not in rdict:
14761       rdict["result"] = rdict["nodes"]
14762       del rdict["nodes"]
14763
14764     for key in "success", "info", "result":
14765       if key not in rdict:
14766         raise errors.OpExecError("Can't parse iallocator results:"
14767                                  " missing key '%s'" % key)
14768       setattr(self, key, rdict[key])
14769
14770     if not self._result_check(self.result):
14771       raise errors.OpExecError("Iallocator returned invalid result,"
14772                                " expected %s, got %s" %
14773                                (self._result_check, self.result),
14774                                errors.ECODE_INVAL)
14775
14776     if self.mode == constants.IALLOCATOR_MODE_RELOC:
14777       assert self.relocate_from is not None
14778       assert self.required_nodes == 1
14779
14780       node2group = dict((name, ndata["group"])
14781                         for (name, ndata) in self.in_data["nodes"].items())
14782
14783       fn = compat.partial(self._NodesToGroups, node2group,
14784                           self.in_data["nodegroups"])
14785
14786       instance = self.cfg.GetInstanceInfo(self.name)
14787       request_groups = fn(self.relocate_from + [instance.primary_node])
14788       result_groups = fn(rdict["result"] + [instance.primary_node])
14789
14790       if self.success and not set(result_groups).issubset(request_groups):
14791         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14792                                  " differ from original groups (%s)" %
14793                                  (utils.CommaJoin(result_groups),
14794                                   utils.CommaJoin(request_groups)))
14795
14796     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14797       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14798
14799     self.out_data = rdict
14800
14801   @staticmethod
14802   def _NodesToGroups(node2group, groups, nodes):
14803     """Returns a list of unique group names for a list of nodes.
14804
14805     @type node2group: dict
14806     @param node2group: Map from node name to group UUID
14807     @type groups: dict
14808     @param groups: Group information
14809     @type nodes: list
14810     @param nodes: Node names
14811
14812     """
14813     result = set()
14814
14815     for node in nodes:
14816       try:
14817         group_uuid = node2group[node]
14818       except KeyError:
14819         # Ignore unknown node
14820         pass
14821       else:
14822         try:
14823           group = groups[group_uuid]
14824         except KeyError:
14825           # Can't find group, let's use UUID
14826           group_name = group_uuid
14827         else:
14828           group_name = group["name"]
14829
14830         result.add(group_name)
14831
14832     return sorted(result)
14833
14834
14835 class LUTestAllocator(NoHooksLU):
14836   """Run allocator tests.
14837
14838   This LU runs the allocator tests
14839
14840   """
14841   def CheckPrereq(self):
14842     """Check prerequisites.
14843
14844     This checks the opcode parameters depending on the director and mode test.
14845
14846     """
14847     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14848       for attr in ["memory", "disks", "disk_template",
14849                    "os", "tags", "nics", "vcpus"]:
14850         if not hasattr(self.op, attr):
14851           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14852                                      attr, errors.ECODE_INVAL)
14853       iname = self.cfg.ExpandInstanceName(self.op.name)
14854       if iname is not None:
14855         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14856                                    iname, errors.ECODE_EXISTS)
14857       if not isinstance(self.op.nics, list):
14858         raise errors.OpPrereqError("Invalid parameter 'nics'",
14859                                    errors.ECODE_INVAL)
14860       if not isinstance(self.op.disks, list):
14861         raise errors.OpPrereqError("Invalid parameter 'disks'",
14862                                    errors.ECODE_INVAL)
14863       for row in self.op.disks:
14864         if (not isinstance(row, dict) or
14865             constants.IDISK_SIZE not in row or
14866             not isinstance(row[constants.IDISK_SIZE], int) or
14867             constants.IDISK_MODE not in row or
14868             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14869           raise errors.OpPrereqError("Invalid contents of the 'disks'"
14870                                      " parameter", errors.ECODE_INVAL)
14871       if self.op.hypervisor is None:
14872         self.op.hypervisor = self.cfg.GetHypervisorType()
14873     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14874       fname = _ExpandInstanceName(self.cfg, self.op.name)
14875       self.op.name = fname
14876       self.relocate_from = \
14877           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14878     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14879                           constants.IALLOCATOR_MODE_NODE_EVAC):
14880       if not self.op.instances:
14881         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14882       self.op.instances = _GetWantedInstances(self, self.op.instances)
14883     else:
14884       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14885                                  self.op.mode, errors.ECODE_INVAL)
14886
14887     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14888       if self.op.allocator is None:
14889         raise errors.OpPrereqError("Missing allocator name",
14890                                    errors.ECODE_INVAL)
14891     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14892       raise errors.OpPrereqError("Wrong allocator test '%s'" %
14893                                  self.op.direction, errors.ECODE_INVAL)
14894
14895   def Exec(self, feedback_fn):
14896     """Run the allocator test.
14897
14898     """
14899     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14900       ial = IAllocator(self.cfg, self.rpc,
14901                        mode=self.op.mode,
14902                        name=self.op.name,
14903                        memory=self.op.memory,
14904                        disks=self.op.disks,
14905                        disk_template=self.op.disk_template,
14906                        os=self.op.os,
14907                        tags=self.op.tags,
14908                        nics=self.op.nics,
14909                        vcpus=self.op.vcpus,
14910                        hypervisor=self.op.hypervisor,
14911                        )
14912     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14913       ial = IAllocator(self.cfg, self.rpc,
14914                        mode=self.op.mode,
14915                        name=self.op.name,
14916                        relocate_from=list(self.relocate_from),
14917                        )
14918     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14919       ial = IAllocator(self.cfg, self.rpc,
14920                        mode=self.op.mode,
14921                        instances=self.op.instances,
14922                        target_groups=self.op.target_groups)
14923     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14924       ial = IAllocator(self.cfg, self.rpc,
14925                        mode=self.op.mode,
14926                        instances=self.op.instances,
14927                        evac_mode=self.op.evac_mode)
14928     else:
14929       raise errors.ProgrammerError("Uncatched mode %s in"
14930                                    " LUTestAllocator.Exec", self.op.mode)
14931
14932     if self.op.direction == constants.IALLOCATOR_DIR_IN:
14933       result = ial.in_text
14934     else:
14935       ial.Run(self.op.allocator, validate=False)
14936       result = ial.out_text
14937     return result
14938
14939
14940 #: Query type implementations
14941 _QUERY_IMPL = {
14942   constants.QR_INSTANCE: _InstanceQuery,
14943   constants.QR_NODE: _NodeQuery,
14944   constants.QR_GROUP: _GroupQuery,
14945   constants.QR_OS: _OsQuery,
14946   }
14947
14948 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14949
14950
14951 def _GetQueryImplementation(name):
14952   """Returns the implemtnation for a query type.
14953
14954   @param name: Query type, must be one of L{constants.QR_VIA_OP}
14955
14956   """
14957   try:
14958     return _QUERY_IMPL[name]
14959   except KeyError:
14960     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14961                                errors.ECODE_INVAL)