code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 # End types
  78 class LogicalUnit(object):
  79   """Logical Unit base class.
  80
  81   Subclasses must follow these rules:
  82     - implement ExpandNames
  83     - implement CheckPrereq (except when tasklets are used)
  84     - implement Exec (except when tasklets are used)
  85     - implement BuildHooksEnv
  86     - redefine HPATH and HTYPE
  87     - optionally redefine their run requirements:
  88         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  89
  90   Note that all commands require root permissions.
  91
  92   @ivar dry_run_result: the value (if any) that will be returned to the caller
  93       in dry-run mode (signalled by opcode dry_run parameter)
  94
  95   """
  96   HPATH = None
  97   HTYPE = None
  98   REQ_BGL = True
  99
 100   def __init__(self, processor, op, context, rpc):
 101     """Constructor for LogicalUnit.
 102
 103     This needs to be overridden in derived classes in order to check op
 104     validity.
 105
 106     """
 107     self.proc = processor
 108     self.op = op
 109     self.cfg = context.cfg
 110     self.context = context
 111     self.rpc = rpc
 112     # Dicts used to declare locking needs to mcpu
 113     self.needed_locks = None
 114     self.acquired_locks = {}
 115     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 116     self.add_locks = {}
 117     self.remove_locks = {}
 118     # Used to force good behavior when calling helper functions
 119     self.recalculate_locks = {}
 120     self.__ssh = None
 121     # logging
 122     self.Log = processor.Log # pylint: disable-msg=C0103
 123     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 124     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 125     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 126     # support for dry-run
 127     self.dry_run_result = None
 128     # support for generic debug attribute
 129     if (not hasattr(self.op, "debug_level") or
 130         not isinstance(self.op.debug_level, int)):
 131       self.op.debug_level = 0
 132
 133     # Tasklets
 134     self.tasklets = None
 135
 136     # Validate opcode parameters and set defaults
 137     self.op.Validate(True)
 138
 139     self.CheckArguments()
 140
 141   def __GetSSH(self):
 142     """Returns the SshRunner object
 143
 144     """
 145     if not self.__ssh:
 146       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 147     return self.__ssh
 148
 149   ssh = property(fget=__GetSSH)
 150
 151   def CheckArguments(self):
 152     """Check syntactic validity for the opcode arguments.
 153
 154     This method is for doing a simple syntactic check and ensure
 155     validity of opcode parameters, without any cluster-related
 156     checks. While the same can be accomplished in ExpandNames and/or
 157     CheckPrereq, doing these separate is better because:
 158
 159       - ExpandNames is left as as purely a lock-related function
 160       - CheckPrereq is run after we have acquired locks (and possible
 161         waited for them)
 162
 163     The function is allowed to change the self.op attribute so that
 164     later methods can no longer worry about missing parameters.
 165
 166     """
 167     pass
 168
 169   def ExpandNames(self):
 170     """Expand names for this LU.
 171
 172     This method is called before starting to execute the opcode, and it should
 173     update all the parameters of the opcode to their canonical form (e.g. a
 174     short node name must be fully expanded after this method has successfully
 175     completed). This way locking, hooks, logging, etc. can work correctly.
 176
 177     LUs which implement this method must also populate the self.needed_locks
 178     member, as a dict with lock levels as keys, and a list of needed lock names
 179     as values. Rules:
 180
 181       - use an empty dict if you don't need any lock
 182       - if you don't need any lock at a particular level omit that level
 183       - don't put anything for the BGL level
 184       - if you want all locks at a level use locking.ALL_SET as a value
 185
 186     If you need to share locks (rather than acquire them exclusively) at one
 187     level you can modify self.share_locks, setting a true value (usually 1) for
 188     that level. By default locks are not shared.
 189
 190     This function can also define a list of tasklets, which then will be
 191     executed in order instead of the usual LU-level CheckPrereq and Exec
 192     functions, if those are not defined by the LU.
 193
 194     Examples::
 195
 196       # Acquire all nodes and one instance
 197       self.needed_locks = {
 198         locking.LEVEL_NODE: locking.ALL_SET,
 199         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 200       }
 201       # Acquire just two nodes
 202       self.needed_locks = {
 203         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 204       }
 205       # Acquire no locks
 206       self.needed_locks = {} # No, you can't leave it to the default value None
 207
 208     """
 209     # The implementation of this method is mandatory only if the new LU is
 210     # concurrent, so that old LUs don't need to be changed all at the same
 211     # time.
 212     if self.REQ_BGL:
 213       self.needed_locks = {} # Exclusive LUs don't need locks.
 214     else:
 215       raise NotImplementedError
 216
 217   def DeclareLocks(self, level):
 218     """Declare LU locking needs for a level
 219
 220     While most LUs can just declare their locking needs at ExpandNames time,
 221     sometimes there's the need to calculate some locks after having acquired
 222     the ones before. This function is called just before acquiring locks at a
 223     particular level, but after acquiring the ones at lower levels, and permits
 224     such calculations. It can be used to modify self.needed_locks, and by
 225     default it does nothing.
 226
 227     This function is only called if you have something already set in
 228     self.needed_locks for the level.
 229
 230     @param level: Locking level which is going to be locked
 231     @type level: member of ganeti.locking.LEVELS
 232
 233     """
 234
 235   def CheckPrereq(self):
 236     """Check prerequisites for this LU.
 237
 238     This method should check that the prerequisites for the execution
 239     of this LU are fulfilled. It can do internode communication, but
 240     it should be idempotent - no cluster or system changes are
 241     allowed.
 242
 243     The method should raise errors.OpPrereqError in case something is
 244     not fulfilled. Its return value is ignored.
 245
 246     This method should also update all the parameters of the opcode to
 247     their canonical form if it hasn't been done by ExpandNames before.
 248
 249     """
 250     if self.tasklets is not None:
 251       for (idx, tl) in enumerate(self.tasklets):
 252         logging.debug("Checking prerequisites for tasklet %s/%s",
 253                       idx + 1, len(self.tasklets))
 254         tl.CheckPrereq()
 255     else:
 256       pass
 257
 258   def Exec(self, feedback_fn):
 259     """Execute the LU.
 260
 261     This method should implement the actual work. It should raise
 262     errors.OpExecError for failures that are somewhat dealt with in
 263     code, or expected.
 264
 265     """
 266     if self.tasklets is not None:
 267       for (idx, tl) in enumerate(self.tasklets):
 268         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 269         tl.Exec(feedback_fn)
 270     else:
 271       raise NotImplementedError
 272
 273   def BuildHooksEnv(self):
 274     """Build hooks environment for this LU.
 275
 276     This method should return a three-node tuple consisting of: a dict
 277     containing the environment that will be used for running the
 278     specific hook for this LU, a list of node names on which the hook
 279     should run before the execution, and a list of node names on which
 280     the hook should run after the execution.
 281
 282     The keys of the dict must not have 'GANETI_' prefixed as this will
 283     be handled in the hooks runner. Also note additional keys will be
 284     added by the hooks runner. If the LU doesn't define any
 285     environment, an empty dict (and not None) should be returned.
 286
 287     No nodes should be returned as an empty list (and not None).
 288
 289     Note that if the HPATH for a LU class is None, this function will
 290     not be called.
 291
 292     """
 293     raise NotImplementedError
 294
 295   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 296     """Notify the LU about the results of its hooks.
 297
 298     This method is called every time a hooks phase is executed, and notifies
 299     the Logical Unit about the hooks' result. The LU can then use it to alter
 300     its result based on the hooks.  By default the method does nothing and the
 301     previous result is passed back unchanged but any LU can define it if it
 302     wants to use the local cluster hook-scripts somehow.
 303
 304     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 305         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 306     @param hook_results: the results of the multi-node hooks rpc call
 307     @param feedback_fn: function used send feedback back to the caller
 308     @param lu_result: the previous Exec result this LU had, or None
 309         in the PRE phase
 310     @return: the new Exec result, based on the previous result
 311         and hook results
 312
 313     """
 314     # API must be kept, thus we ignore the unused argument and could
 315     # be a function warnings
 316     # pylint: disable-msg=W0613,R0201
 317     return lu_result
 318
 319   def _ExpandAndLockInstance(self):
 320     """Helper function to expand and lock an instance.
 321
 322     Many LUs that work on an instance take its name in self.op.instance_name
 323     and need to expand it and then declare the expanded name for locking. This
 324     function does it, and then updates self.op.instance_name to the expanded
 325     name. It also initializes needed_locks as a dict, if this hasn't been done
 326     before.
 327
 328     """
 329     if self.needed_locks is None:
 330       self.needed_locks = {}
 331     else:
 332       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 333         "_ExpandAndLockInstance called with instance-level locks set"
 334     self.op.instance_name = _ExpandInstanceName(self.cfg,
 335                                                 self.op.instance_name)
 336     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 337
 338   def _LockInstancesNodes(self, primary_only=False):
 339     """Helper function to declare instances' nodes for locking.
 340
 341     This function should be called after locking one or more instances to lock
 342     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 343     with all primary or secondary nodes for instances already locked and
 344     present in self.needed_locks[locking.LEVEL_INSTANCE].
 345
 346     It should be called from DeclareLocks, and for safety only works if
 347     self.recalculate_locks[locking.LEVEL_NODE] is set.
 348
 349     In the future it may grow parameters to just lock some instance's nodes, or
 350     to just lock primaries or secondary nodes, if needed.
 351
 352     If should be called in DeclareLocks in a way similar to::
 353
 354       if level == locking.LEVEL_NODE:
 355         self._LockInstancesNodes()
 356
 357     @type primary_only: boolean
 358     @param primary_only: only lock primary nodes of locked instances
 359
 360     """
 361     assert locking.LEVEL_NODE in self.recalculate_locks, \
 362       "_LockInstancesNodes helper function called with no nodes to recalculate"
 363
 364     # TODO: check if we're really been called with the instance locks held
 365
 366     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 367     # future we might want to have different behaviors depending on the value
 368     # of self.recalculate_locks[locking.LEVEL_NODE]
 369     wanted_nodes = []
 370     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 371       instance = self.context.cfg.GetInstanceInfo(instance_name)
 372       wanted_nodes.append(instance.primary_node)
 373       if not primary_only:
 374         wanted_nodes.extend(instance.secondary_nodes)
 375
 376     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 377       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 378     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 379       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 380
 381     del self.recalculate_locks[locking.LEVEL_NODE]
 382
 383
 384 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 385   """Simple LU which runs no hooks.
 386
 387   This LU is intended as a parent for other LogicalUnits which will
 388   run no hooks, in order to reduce duplicate code.
 389
 390   """
 391   HPATH = None
 392   HTYPE = None
 393
 394   def BuildHooksEnv(self):
 395     """Empty BuildHooksEnv for NoHooksLu.
 396
 397     This just raises an error.
 398
 399     """
 400     assert False, "BuildHooksEnv called for NoHooksLUs"
 401
 402
 403 class Tasklet:
 404   """Tasklet base class.
 405
 406   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 407   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 408   tasklets know nothing about locks.
 409
 410   Subclasses must follow these rules:
 411     - Implement CheckPrereq
 412     - Implement Exec
 413
 414   """
 415   def __init__(self, lu):
 416     self.lu = lu
 417
 418     # Shortcuts
 419     self.cfg = lu.cfg
 420     self.rpc = lu.rpc
 421
 422   def CheckPrereq(self):
 423     """Check prerequisites for this tasklets.
 424
 425     This method should check whether the prerequisites for the execution of
 426     this tasklet are fulfilled. It can do internode communication, but it
 427     should be idempotent - no cluster or system changes are allowed.
 428
 429     The method should raise errors.OpPrereqError in case something is not
 430     fulfilled. Its return value is ignored.
 431
 432     This method should also update all parameters to their canonical form if it
 433     hasn't been done before.
 434
 435     """
 436     pass
 437
 438   def Exec(self, feedback_fn):
 439     """Execute the tasklet.
 440
 441     This method should implement the actual work. It should raise
 442     errors.OpExecError for failures that are somewhat dealt with in code, or
 443     expected.
 444
 445     """
 446     raise NotImplementedError
 447
 448
 449 class _QueryBase:
 450   """Base for query utility classes.
 451
 452   """
 453   #: Attribute holding field definitions
 454   FIELDS = None
 455
 456   def __init__(self, filter_, fields, use_locking):
 457     """Initializes this class.
 458
 459     """
 460     self.names = ReadSimpleFilter("name", filter_)
 461     self.use_locking = use_locking
 462
 463     self.query = query.Query(self.FIELDS, fields)
 464     self.requested_data = self.query.RequestedData()
 465
 466     self.do_locking = None
 467     self.wanted = None
 468
 469   def _GetNames(self, lu, all_names, lock_level):
 470     """Helper function to determine names asked for in the query.
 471
 472     """
 473     if self.do_locking:
 474       names = lu.acquired_locks[lock_level]
 475     else:
 476       names = all_names
 477
 478     if self.wanted == locking.ALL_SET:
 479       assert not self.names
 480       # caller didn't specify names, so ordering is not important
 481       return utils.NiceSort(names)
 482
 483     # caller specified names and we must keep the same order
 484     assert self.names
 485     assert not self.do_locking or lu.acquired_locks[lock_level]
 486
 487     missing = set(self.wanted).difference(names)
 488     if missing:
 489       raise errors.OpExecError("Some items were removed before retrieving"
 490                                " their data: %s" % missing)
 491
 492     # Return expanded names
 493     return self.wanted
 494
 495   @classmethod
 496   def FieldsQuery(cls, fields):
 497     """Returns list of available fields.
 498
 499     @return: List of L{objects.QueryFieldDefinition}
 500
 501     """
 502     return query.QueryFields(cls.FIELDS, fields)
 503
 504   def ExpandNames(self, lu):
 505     """Expand names for this query.
 506
 507     See L{LogicalUnit.ExpandNames}.
 508
 509     """
 510     raise NotImplementedError()
 511
 512   def DeclareLocks(self, lu, level):
 513     """Declare locks for this query.
 514
 515     See L{LogicalUnit.DeclareLocks}.
 516
 517     """
 518     raise NotImplementedError()
 519
 520   def _GetQueryData(self, lu):
 521     """Collects all data for this query.
 522
 523     @return: Query data object
 524
 525     """
 526     raise NotImplementedError()
 527
 528   def NewStyleQuery(self, lu):
 529     """Collect data and execute query.
 530
 531     """
 532     return query.GetQueryResponse(self.query, self._GetQueryData(lu))
 533
 534   def OldStyleQuery(self, lu):
 535     """Collect data and execute query.
 536
 537     """
 538     return self.query.OldStyleQuery(self._GetQueryData(lu))
 539
 540
 541 def _GetWantedNodes(lu, nodes):
 542   """Returns list of checked and expanded node names.
 543
 544   @type lu: L{LogicalUnit}
 545   @param lu: the logical unit on whose behalf we execute
 546   @type nodes: list
 547   @param nodes: list of node names or None for all nodes
 548   @rtype: list
 549   @return: the list of nodes, sorted
 550   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 551
 552   """
 553   if nodes:
 554     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 555
 556   return utils.NiceSort(lu.cfg.GetNodeList())
 557
 558
 559 def _GetWantedInstances(lu, instances):
 560   """Returns list of checked and expanded instance names.
 561
 562   @type lu: L{LogicalUnit}
 563   @param lu: the logical unit on whose behalf we execute
 564   @type instances: list
 565   @param instances: list of instance names or None for all instances
 566   @rtype: list
 567   @return: the list of instances, sorted
 568   @raise errors.OpPrereqError: if the instances parameter is wrong type
 569   @raise errors.OpPrereqError: if any of the passed instances is not found
 570
 571   """
 572   if instances:
 573     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 574   else:
 575     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 576   return wanted
 577
 578
 579 def _GetUpdatedParams(old_params, update_dict,
 580                       use_default=True, use_none=False):
 581   """Return the new version of a parameter dictionary.
 582
 583   @type old_params: dict
 584   @param old_params: old parameters
 585   @type update_dict: dict
 586   @param update_dict: dict containing new parameter values, or
 587       constants.VALUE_DEFAULT to reset the parameter to its default
 588       value
 589   @param use_default: boolean
 590   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 591       values as 'to be deleted' values
 592   @param use_none: boolean
 593   @type use_none: whether to recognise C{None} values as 'to be
 594       deleted' values
 595   @rtype: dict
 596   @return: the new parameter dictionary
 597
 598   """
 599   params_copy = copy.deepcopy(old_params)
 600   for key, val in update_dict.iteritems():
 601     if ((use_default and val == constants.VALUE_DEFAULT) or
 602         (use_none and val is None)):
 603       try:
 604         del params_copy[key]
 605       except KeyError:
 606         pass
 607     else:
 608       params_copy[key] = val
 609   return params_copy
 610
 611
 612 def _CheckOutputFields(static, dynamic, selected):
 613   """Checks whether all selected fields are valid.
 614
 615   @type static: L{utils.FieldSet}
 616   @param static: static fields set
 617   @type dynamic: L{utils.FieldSet}
 618   @param dynamic: dynamic fields set
 619
 620   """
 621   f = utils.FieldSet()
 622   f.Extend(static)
 623   f.Extend(dynamic)
 624
 625   delta = f.NonMatching(selected)
 626   if delta:
 627     raise errors.OpPrereqError("Unknown output fields selected: %s"
 628                                % ",".join(delta), errors.ECODE_INVAL)
 629
 630
 631 def _CheckGlobalHvParams(params):
 632   """Validates that given hypervisor params are not global ones.
 633
 634   This will ensure that instances don't get customised versions of
 635   global params.
 636
 637   """
 638   used_globals = constants.HVC_GLOBALS.intersection(params)
 639   if used_globals:
 640     msg = ("The following hypervisor parameters are global and cannot"
 641            " be customized at instance level, please modify them at"
 642            " cluster level: %s" % utils.CommaJoin(used_globals))
 643     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 644
 645
 646 def _CheckNodeOnline(lu, node, msg=None):
 647   """Ensure that a given node is online.
 648
 649   @param lu: the LU on behalf of which we make the check
 650   @param node: the node to check
 651   @param msg: if passed, should be a message to replace the default one
 652   @raise errors.OpPrereqError: if the node is offline
 653
 654   """
 655   if msg is None:
 656     msg = "Can't use offline node"
 657   if lu.cfg.GetNodeInfo(node).offline:
 658     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 659
 660
 661 def _CheckNodeNotDrained(lu, node):
 662   """Ensure that a given node is not drained.
 663
 664   @param lu: the LU on behalf of which we make the check
 665   @param node: the node to check
 666   @raise errors.OpPrereqError: if the node is drained
 667
 668   """
 669   if lu.cfg.GetNodeInfo(node).drained:
 670     raise errors.OpPrereqError("Can't use drained node %s" % node,
 671                                errors.ECODE_STATE)
 672
 673
 674 def _CheckNodeVmCapable(lu, node):
 675   """Ensure that a given node is vm capable.
 676
 677   @param lu: the LU on behalf of which we make the check
 678   @param node: the node to check
 679   @raise errors.OpPrereqError: if the node is not vm capable
 680
 681   """
 682   if not lu.cfg.GetNodeInfo(node).vm_capable:
 683     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 684                                errors.ECODE_STATE)
 685
 686
 687 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 688   """Ensure that a node supports a given OS.
 689
 690   @param lu: the LU on behalf of which we make the check
 691   @param node: the node to check
 692   @param os_name: the OS to query about
 693   @param force_variant: whether to ignore variant errors
 694   @raise errors.OpPrereqError: if the node is not supporting the OS
 695
 696   """
 697   result = lu.rpc.call_os_get(node, os_name)
 698   result.Raise("OS '%s' not in supported OS list for node %s" %
 699                (os_name, node),
 700                prereq=True, ecode=errors.ECODE_INVAL)
 701   if not force_variant:
 702     _CheckOSVariant(result.payload, os_name)
 703
 704
 705 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 706   """Ensure that a node has the given secondary ip.
 707
 708   @type lu: L{LogicalUnit}
 709   @param lu: the LU on behalf of which we make the check
 710   @type node: string
 711   @param node: the node to check
 712   @type secondary_ip: string
 713   @param secondary_ip: the ip to check
 714   @type prereq: boolean
 715   @param prereq: whether to throw a prerequisite or an execute error
 716   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 717   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 718
 719   """
 720   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 721   result.Raise("Failure checking secondary ip on node %s" % node,
 722                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 723   if not result.payload:
 724     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 725            " please fix and re-run this command" % secondary_ip)
 726     if prereq:
 727       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 728     else:
 729       raise errors.OpExecError(msg)
 730
 731
 732 def _GetClusterDomainSecret():
 733   """Reads the cluster domain secret.
 734
 735   """
 736   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 737                                strict=True)
 738
 739
 740 def _CheckInstanceDown(lu, instance, reason):
 741   """Ensure that an instance is not running."""
 742   if instance.admin_up:
 743     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 744                                (instance.name, reason), errors.ECODE_STATE)
 745
 746   pnode = instance.primary_node
 747   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 748   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 749               prereq=True, ecode=errors.ECODE_ENVIRON)
 750
 751   if instance.name in ins_l.payload:
 752     raise errors.OpPrereqError("Instance %s is running, %s" %
 753                                (instance.name, reason), errors.ECODE_STATE)
 754
 755
 756 def _ExpandItemName(fn, name, kind):
 757   """Expand an item name.
 758
 759   @param fn: the function to use for expansion
 760   @param name: requested item name
 761   @param kind: text description ('Node' or 'Instance')
 762   @return: the resolved (full) name
 763   @raise errors.OpPrereqError: if the item is not found
 764
 765   """
 766   full_name = fn(name)
 767   if full_name is None:
 768     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 769                                errors.ECODE_NOENT)
 770   return full_name
 771
 772
 773 def _ExpandNodeName(cfg, name):
 774   """Wrapper over L{_ExpandItemName} for nodes."""
 775   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 776
 777
 778 def _ExpandInstanceName(cfg, name):
 779   """Wrapper over L{_ExpandItemName} for instance."""
 780   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 781
 782
 783 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 784                           memory, vcpus, nics, disk_template, disks,
 785                           bep, hvp, hypervisor_name):
 786   """Builds instance related env variables for hooks
 787
 788   This builds the hook environment from individual variables.
 789
 790   @type name: string
 791   @param name: the name of the instance
 792   @type primary_node: string
 793   @param primary_node: the name of the instance's primary node
 794   @type secondary_nodes: list
 795   @param secondary_nodes: list of secondary nodes as strings
 796   @type os_type: string
 797   @param os_type: the name of the instance's OS
 798   @type status: boolean
 799   @param status: the should_run status of the instance
 800   @type memory: string
 801   @param memory: the memory size of the instance
 802   @type vcpus: string
 803   @param vcpus: the count of VCPUs the instance has
 804   @type nics: list
 805   @param nics: list of tuples (ip, mac, mode, link) representing
 806       the NICs the instance has
 807   @type disk_template: string
 808   @param disk_template: the disk template of the instance
 809   @type disks: list
 810   @param disks: the list of (size, mode) pairs
 811   @type bep: dict
 812   @param bep: the backend parameters for the instance
 813   @type hvp: dict
 814   @param hvp: the hypervisor parameters for the instance
 815   @type hypervisor_name: string
 816   @param hypervisor_name: the hypervisor for the instance
 817   @rtype: dict
 818   @return: the hook environment for this instance
 819
 820   """
 821   if status:
 822     str_status = "up"
 823   else:
 824     str_status = "down"
 825   env = {
 826     "OP_TARGET": name,
 827     "INSTANCE_NAME": name,
 828     "INSTANCE_PRIMARY": primary_node,
 829     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 830     "INSTANCE_OS_TYPE": os_type,
 831     "INSTANCE_STATUS": str_status,
 832     "INSTANCE_MEMORY": memory,
 833     "INSTANCE_VCPUS": vcpus,
 834     "INSTANCE_DISK_TEMPLATE": disk_template,
 835     "INSTANCE_HYPERVISOR": hypervisor_name,
 836   }
 837
 838   if nics:
 839     nic_count = len(nics)
 840     for idx, (ip, mac, mode, link) in enumerate(nics):
 841       if ip is None:
 842         ip = ""
 843       env["INSTANCE_NIC%d_IP" % idx] = ip
 844       env["INSTANCE_NIC%d_MAC" % idx] = mac
 845       env["INSTANCE_NIC%d_MODE" % idx] = mode
 846       env["INSTANCE_NIC%d_LINK" % idx] = link
 847       if mode == constants.NIC_MODE_BRIDGED:
 848         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 849   else:
 850     nic_count = 0
 851
 852   env["INSTANCE_NIC_COUNT"] = nic_count
 853
 854   if disks:
 855     disk_count = len(disks)
 856     for idx, (size, mode) in enumerate(disks):
 857       env["INSTANCE_DISK%d_SIZE" % idx] = size
 858       env["INSTANCE_DISK%d_MODE" % idx] = mode
 859   else:
 860     disk_count = 0
 861
 862   env["INSTANCE_DISK_COUNT"] = disk_count
 863
 864   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 865     for key, value in source.items():
 866       env["INSTANCE_%s_%s" % (kind, key)] = value
 867
 868   return env
 869
 870
 871 def _NICListToTuple(lu, nics):
 872   """Build a list of nic information tuples.
 873
 874   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 875   value in LUInstanceQueryData.
 876
 877   @type lu:  L{LogicalUnit}
 878   @param lu: the logical unit on whose behalf we execute
 879   @type nics: list of L{objects.NIC}
 880   @param nics: list of nics to convert to hooks tuples
 881
 882   """
 883   hooks_nics = []
 884   cluster = lu.cfg.GetClusterInfo()
 885   for nic in nics:
 886     ip = nic.ip
 887     mac = nic.mac
 888     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 889     mode = filled_params[constants.NIC_MODE]
 890     link = filled_params[constants.NIC_LINK]
 891     hooks_nics.append((ip, mac, mode, link))
 892   return hooks_nics
 893
 894
 895 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 896   """Builds instance related env variables for hooks from an object.
 897
 898   @type lu: L{LogicalUnit}
 899   @param lu: the logical unit on whose behalf we execute
 900   @type instance: L{objects.Instance}
 901   @param instance: the instance for which we should build the
 902       environment
 903   @type override: dict
 904   @param override: dictionary with key/values that will override
 905       our values
 906   @rtype: dict
 907   @return: the hook environment dictionary
 908
 909   """
 910   cluster = lu.cfg.GetClusterInfo()
 911   bep = cluster.FillBE(instance)
 912   hvp = cluster.FillHV(instance)
 913   args = {
 914     'name': instance.name,
 915     'primary_node': instance.primary_node,
 916     'secondary_nodes': instance.secondary_nodes,
 917     'os_type': instance.os,
 918     'status': instance.admin_up,
 919     'memory': bep[constants.BE_MEMORY],
 920     'vcpus': bep[constants.BE_VCPUS],
 921     'nics': _NICListToTuple(lu, instance.nics),
 922     'disk_template': instance.disk_template,
 923     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 924     'bep': bep,
 925     'hvp': hvp,
 926     'hypervisor_name': instance.hypervisor,
 927   }
 928   if override:
 929     args.update(override)
 930   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 931
 932
 933 def _AdjustCandidatePool(lu, exceptions):
 934   """Adjust the candidate pool after node operations.
 935
 936   """
 937   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 938   if mod_list:
 939     lu.LogInfo("Promoted nodes to master candidate role: %s",
 940                utils.CommaJoin(node.name for node in mod_list))
 941     for name in mod_list:
 942       lu.context.ReaddNode(name)
 943   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 944   if mc_now > mc_max:
 945     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 946                (mc_now, mc_max))
 947
 948
 949 def _DecideSelfPromotion(lu, exceptions=None):
 950   """Decide whether I should promote myself as a master candidate.
 951
 952   """
 953   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 954   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 955   # the new node will increase mc_max with one, so:
 956   mc_should = min(mc_should + 1, cp_size)
 957   return mc_now < mc_should
 958
 959
 960 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 961   """Check that the brigdes needed by a list of nics exist.
 962
 963   """
 964   cluster = lu.cfg.GetClusterInfo()
 965   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 966   brlist = [params[constants.NIC_LINK] for params in paramslist
 967             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 968   if brlist:
 969     result = lu.rpc.call_bridges_exist(target_node, brlist)
 970     result.Raise("Error checking bridges on destination node '%s'" %
 971                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 972
 973
 974 def _CheckInstanceBridgesExist(lu, instance, node=None):
 975   """Check that the brigdes needed by an instance exist.
 976
 977   """
 978   if node is None:
 979     node = instance.primary_node
 980   _CheckNicsBridgesExist(lu, instance.nics, node)
 981
 982
 983 def _CheckOSVariant(os_obj, name):
 984   """Check whether an OS name conforms to the os variants specification.
 985
 986   @type os_obj: L{objects.OS}
 987   @param os_obj: OS object to check
 988   @type name: string
 989   @param name: OS name passed by the user, to check for validity
 990
 991   """
 992   if not os_obj.supported_variants:
 993     return
 994   variant = objects.OS.GetVariant(name)
 995   if not variant:
 996     raise errors.OpPrereqError("OS name must include a variant",
 997                                errors.ECODE_INVAL)
 998
 999   if variant not in os_obj.supported_variants:
1000     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1001
1002
1003 def _GetNodeInstancesInner(cfg, fn):
1004   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1005
1006
1007 def _GetNodeInstances(cfg, node_name):
1008   """Returns a list of all primary and secondary instances on a node.
1009
1010   """
1011
1012   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1013
1014
1015 def _GetNodePrimaryInstances(cfg, node_name):
1016   """Returns primary instances on a node.
1017
1018   """
1019   return _GetNodeInstancesInner(cfg,
1020                                 lambda inst: node_name == inst.primary_node)
1021
1022
1023 def _GetNodeSecondaryInstances(cfg, node_name):
1024   """Returns secondary instances on a node.
1025
1026   """
1027   return _GetNodeInstancesInner(cfg,
1028                                 lambda inst: node_name in inst.secondary_nodes)
1029
1030
1031 def _GetStorageTypeArgs(cfg, storage_type):
1032   """Returns the arguments for a storage type.
1033
1034   """
1035   # Special case for file storage
1036   if storage_type == constants.ST_FILE:
1037     # storage.FileStorage wants a list of storage directories
1038     return [[cfg.GetFileStorageDir()]]
1039
1040   return []
1041
1042
1043 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1044   faulty = []
1045
1046   for dev in instance.disks:
1047     cfg.SetDiskID(dev, node_name)
1048
1049   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1050   result.Raise("Failed to get disk status from node %s" % node_name,
1051                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1052
1053   for idx, bdev_status in enumerate(result.payload):
1054     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1055       faulty.append(idx)
1056
1057   return faulty
1058
1059
1060 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1061   """Check the sanity of iallocator and node arguments and use the
1062   cluster-wide iallocator if appropriate.
1063
1064   Check that at most one of (iallocator, node) is specified. If none is
1065   specified, then the LU's opcode's iallocator slot is filled with the
1066   cluster-wide default iallocator.
1067
1068   @type iallocator_slot: string
1069   @param iallocator_slot: the name of the opcode iallocator slot
1070   @type node_slot: string
1071   @param node_slot: the name of the opcode target node slot
1072
1073   """
1074   node = getattr(lu.op, node_slot, None)
1075   iallocator = getattr(lu.op, iallocator_slot, None)
1076
1077   if node is not None and iallocator is not None:
1078     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1079                                errors.ECODE_INVAL)
1080   elif node is None and iallocator is None:
1081     default_iallocator = lu.cfg.GetDefaultIAllocator()
1082     if default_iallocator:
1083       setattr(lu.op, iallocator_slot, default_iallocator)
1084     else:
1085       raise errors.OpPrereqError("No iallocator or node given and no"
1086                                  " cluster-wide default iallocator found."
1087                                  " Please specify either an iallocator or a"
1088                                  " node, or set a cluster-wide default"
1089                                  " iallocator.")
1090
1091
1092 class LUClusterPostInit(LogicalUnit):
1093   """Logical unit for running hooks after cluster initialization.
1094
1095   """
1096   HPATH = "cluster-init"
1097   HTYPE = constants.HTYPE_CLUSTER
1098
1099   def BuildHooksEnv(self):
1100     """Build hooks env.
1101
1102     """
1103     env = {"OP_TARGET": self.cfg.GetClusterName()}
1104     mn = self.cfg.GetMasterNode()
1105     return env, [], [mn]
1106
1107   def Exec(self, feedback_fn):
1108     """Nothing to do.
1109
1110     """
1111     return True
1112
1113
1114 class LUClusterDestroy(LogicalUnit):
1115   """Logical unit for destroying the cluster.
1116
1117   """
1118   HPATH = "cluster-destroy"
1119   HTYPE = constants.HTYPE_CLUSTER
1120
1121   def BuildHooksEnv(self):
1122     """Build hooks env.
1123
1124     """
1125     env = {"OP_TARGET": self.cfg.GetClusterName()}
1126     return env, [], []
1127
1128   def CheckPrereq(self):
1129     """Check prerequisites.
1130
1131     This checks whether the cluster is empty.
1132
1133     Any errors are signaled by raising errors.OpPrereqError.
1134
1135     """
1136     master = self.cfg.GetMasterNode()
1137
1138     nodelist = self.cfg.GetNodeList()
1139     if len(nodelist) != 1 or nodelist[0] != master:
1140       raise errors.OpPrereqError("There are still %d node(s) in"
1141                                  " this cluster." % (len(nodelist) - 1),
1142                                  errors.ECODE_INVAL)
1143     instancelist = self.cfg.GetInstanceList()
1144     if instancelist:
1145       raise errors.OpPrereqError("There are still %d instance(s) in"
1146                                  " this cluster." % len(instancelist),
1147                                  errors.ECODE_INVAL)
1148
1149   def Exec(self, feedback_fn):
1150     """Destroys the cluster.
1151
1152     """
1153     master = self.cfg.GetMasterNode()
1154
1155     # Run post hooks on master node before it's removed
1156     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1157     try:
1158       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1159     except:
1160       # pylint: disable-msg=W0702
1161       self.LogWarning("Errors occurred running hooks on %s" % master)
1162
1163     result = self.rpc.call_node_stop_master(master, False)
1164     result.Raise("Could not disable the master role")
1165
1166     return master
1167
1168
1169 def _VerifyCertificate(filename):
1170   """Verifies a certificate for LUClusterVerify.
1171
1172   @type filename: string
1173   @param filename: Path to PEM file
1174
1175   """
1176   try:
1177     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1178                                            utils.ReadFile(filename))
1179   except Exception, err: # pylint: disable-msg=W0703
1180     return (LUClusterVerify.ETYPE_ERROR,
1181             "Failed to load X509 certificate %s: %s" % (filename, err))
1182
1183   (errcode, msg) = \
1184     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1185                                 constants.SSL_CERT_EXPIRATION_ERROR)
1186
1187   if msg:
1188     fnamemsg = "While verifying %s: %s" % (filename, msg)
1189   else:
1190     fnamemsg = None
1191
1192   if errcode is None:
1193     return (None, fnamemsg)
1194   elif errcode == utils.CERT_WARNING:
1195     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1196   elif errcode == utils.CERT_ERROR:
1197     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1198
1199   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1200
1201
1202 class LUClusterVerify(LogicalUnit):
1203   """Verifies the cluster status.
1204
1205   """
1206   HPATH = "cluster-verify"
1207   HTYPE = constants.HTYPE_CLUSTER
1208   REQ_BGL = False
1209
1210   TCLUSTER = "cluster"
1211   TNODE = "node"
1212   TINSTANCE = "instance"
1213
1214   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1215   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1216   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1217   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1218   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1219   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1220   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1221   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1222   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1223   ENODEDRBD = (TNODE, "ENODEDRBD")
1224   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1225   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1226   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1227   ENODEHV = (TNODE, "ENODEHV")
1228   ENODELVM = (TNODE, "ENODELVM")
1229   ENODEN1 = (TNODE, "ENODEN1")
1230   ENODENET = (TNODE, "ENODENET")
1231   ENODEOS = (TNODE, "ENODEOS")
1232   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1233   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1234   ENODERPC = (TNODE, "ENODERPC")
1235   ENODESSH = (TNODE, "ENODESSH")
1236   ENODEVERSION = (TNODE, "ENODEVERSION")
1237   ENODESETUP = (TNODE, "ENODESETUP")
1238   ENODETIME = (TNODE, "ENODETIME")
1239   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1240
1241   ETYPE_FIELD = "code"
1242   ETYPE_ERROR = "ERROR"
1243   ETYPE_WARNING = "WARNING"
1244
1245   _HOOKS_INDENT_RE = re.compile("^", re.M)
1246
1247   class NodeImage(object):
1248     """A class representing the logical and physical status of a node.
1249
1250     @type name: string
1251     @ivar name: the node name to which this object refers
1252     @ivar volumes: a structure as returned from
1253         L{ganeti.backend.GetVolumeList} (runtime)
1254     @ivar instances: a list of running instances (runtime)
1255     @ivar pinst: list of configured primary instances (config)
1256     @ivar sinst: list of configured secondary instances (config)
1257     @ivar sbp: dictionary of {primary-node: list of instances} for all
1258         instances for which this node is secondary (config)
1259     @ivar mfree: free memory, as reported by hypervisor (runtime)
1260     @ivar dfree: free disk, as reported by the node (runtime)
1261     @ivar offline: the offline status (config)
1262     @type rpc_fail: boolean
1263     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1264         not whether the individual keys were correct) (runtime)
1265     @type lvm_fail: boolean
1266     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1267     @type hyp_fail: boolean
1268     @ivar hyp_fail: whether the RPC call didn't return the instance list
1269     @type ghost: boolean
1270     @ivar ghost: whether this is a known node or not (config)
1271     @type os_fail: boolean
1272     @ivar os_fail: whether the RPC call didn't return valid OS data
1273     @type oslist: list
1274     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1275     @type vm_capable: boolean
1276     @ivar vm_capable: whether the node can host instances
1277
1278     """
1279     def __init__(self, offline=False, name=None, vm_capable=True):
1280       self.name = name
1281       self.volumes = {}
1282       self.instances = []
1283       self.pinst = []
1284       self.sinst = []
1285       self.sbp = {}
1286       self.mfree = 0
1287       self.dfree = 0
1288       self.offline = offline
1289       self.vm_capable = vm_capable
1290       self.rpc_fail = False
1291       self.lvm_fail = False
1292       self.hyp_fail = False
1293       self.ghost = False
1294       self.os_fail = False
1295       self.oslist = {}
1296
1297   def ExpandNames(self):
1298     self.needed_locks = {
1299       locking.LEVEL_NODE: locking.ALL_SET,
1300       locking.LEVEL_INSTANCE: locking.ALL_SET,
1301     }
1302     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1303
1304   def _Error(self, ecode, item, msg, *args, **kwargs):
1305     """Format an error message.
1306
1307     Based on the opcode's error_codes parameter, either format a
1308     parseable error code, or a simpler error string.
1309
1310     This must be called only from Exec and functions called from Exec.
1311
1312     """
1313     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1314     itype, etxt = ecode
1315     # first complete the msg
1316     if args:
1317       msg = msg % args
1318     # then format the whole message
1319     if self.op.error_codes:
1320       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1321     else:
1322       if item:
1323         item = " " + item
1324       else:
1325         item = ""
1326       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1327     # and finally report it via the feedback_fn
1328     self._feedback_fn("  - %s" % msg)
1329
1330   def _ErrorIf(self, cond, *args, **kwargs):
1331     """Log an error message if the passed condition is True.
1332
1333     """
1334     cond = bool(cond) or self.op.debug_simulate_errors
1335     if cond:
1336       self._Error(*args, **kwargs)
1337     # do not mark the operation as failed for WARN cases only
1338     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1339       self.bad = self.bad or cond
1340
1341   def _VerifyNode(self, ninfo, nresult):
1342     """Perform some basic validation on data returned from a node.
1343
1344       - check the result data structure is well formed and has all the
1345         mandatory fields
1346       - check ganeti version
1347
1348     @type ninfo: L{objects.Node}
1349     @param ninfo: the node to check
1350     @param nresult: the results from the node
1351     @rtype: boolean
1352     @return: whether overall this call was successful (and we can expect
1353          reasonable values in the respose)
1354
1355     """
1356     node = ninfo.name
1357     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1358
1359     # main result, nresult should be a non-empty dict
1360     test = not nresult or not isinstance(nresult, dict)
1361     _ErrorIf(test, self.ENODERPC, node,
1362                   "unable to verify node: no data returned")
1363     if test:
1364       return False
1365
1366     # compares ganeti version
1367     local_version = constants.PROTOCOL_VERSION
1368     remote_version = nresult.get("version", None)
1369     test = not (remote_version and
1370                 isinstance(remote_version, (list, tuple)) and
1371                 len(remote_version) == 2)
1372     _ErrorIf(test, self.ENODERPC, node,
1373              "connection to node returned invalid data")
1374     if test:
1375       return False
1376
1377     test = local_version != remote_version[0]
1378     _ErrorIf(test, self.ENODEVERSION, node,
1379              "incompatible protocol versions: master %s,"
1380              " node %s", local_version, remote_version[0])
1381     if test:
1382       return False
1383
1384     # node seems compatible, we can actually try to look into its results
1385
1386     # full package version
1387     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1388                   self.ENODEVERSION, node,
1389                   "software version mismatch: master %s, node %s",
1390                   constants.RELEASE_VERSION, remote_version[1],
1391                   code=self.ETYPE_WARNING)
1392
1393     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1394     if ninfo.vm_capable and isinstance(hyp_result, dict):
1395       for hv_name, hv_result in hyp_result.iteritems():
1396         test = hv_result is not None
1397         _ErrorIf(test, self.ENODEHV, node,
1398                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1399
1400     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1401     if ninfo.vm_capable and isinstance(hvp_result, list):
1402       for item, hv_name, hv_result in hvp_result:
1403         _ErrorIf(True, self.ENODEHV, node,
1404                  "hypervisor %s parameter verify failure (source %s): %s",
1405                  hv_name, item, hv_result)
1406
1407     test = nresult.get(constants.NV_NODESETUP,
1408                            ["Missing NODESETUP results"])
1409     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1410              "; ".join(test))
1411
1412     return True
1413
1414   def _VerifyNodeTime(self, ninfo, nresult,
1415                       nvinfo_starttime, nvinfo_endtime):
1416     """Check the node time.
1417
1418     @type ninfo: L{objects.Node}
1419     @param ninfo: the node to check
1420     @param nresult: the remote results for the node
1421     @param nvinfo_starttime: the start time of the RPC call
1422     @param nvinfo_endtime: the end time of the RPC call
1423
1424     """
1425     node = ninfo.name
1426     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1427
1428     ntime = nresult.get(constants.NV_TIME, None)
1429     try:
1430       ntime_merged = utils.MergeTime(ntime)
1431     except (ValueError, TypeError):
1432       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1433       return
1434
1435     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1436       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1437     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1438       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1439     else:
1440       ntime_diff = None
1441
1442     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1443              "Node time diverges by at least %s from master node time",
1444              ntime_diff)
1445
1446   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1447     """Check the node time.
1448
1449     @type ninfo: L{objects.Node}
1450     @param ninfo: the node to check
1451     @param nresult: the remote results for the node
1452     @param vg_name: the configured VG name
1453
1454     """
1455     if vg_name is None:
1456       return
1457
1458     node = ninfo.name
1459     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1460
1461     # checks vg existence and size > 20G
1462     vglist = nresult.get(constants.NV_VGLIST, None)
1463     test = not vglist
1464     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1465     if not test:
1466       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1467                                             constants.MIN_VG_SIZE)
1468       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1469
1470     # check pv names
1471     pvlist = nresult.get(constants.NV_PVLIST, None)
1472     test = pvlist is None
1473     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1474     if not test:
1475       # check that ':' is not present in PV names, since it's a
1476       # special character for lvcreate (denotes the range of PEs to
1477       # use on the PV)
1478       for _, pvname, owner_vg in pvlist:
1479         test = ":" in pvname
1480         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1481                  " '%s' of VG '%s'", pvname, owner_vg)
1482
1483   def _VerifyNodeNetwork(self, ninfo, nresult):
1484     """Check the node time.
1485
1486     @type ninfo: L{objects.Node}
1487     @param ninfo: the node to check
1488     @param nresult: the remote results for the node
1489
1490     """
1491     node = ninfo.name
1492     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1493
1494     test = constants.NV_NODELIST not in nresult
1495     _ErrorIf(test, self.ENODESSH, node,
1496              "node hasn't returned node ssh connectivity data")
1497     if not test:
1498       if nresult[constants.NV_NODELIST]:
1499         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1500           _ErrorIf(True, self.ENODESSH, node,
1501                    "ssh communication with node '%s': %s", a_node, a_msg)
1502
1503     test = constants.NV_NODENETTEST not in nresult
1504     _ErrorIf(test, self.ENODENET, node,
1505              "node hasn't returned node tcp connectivity data")
1506     if not test:
1507       if nresult[constants.NV_NODENETTEST]:
1508         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1509         for anode in nlist:
1510           _ErrorIf(True, self.ENODENET, node,
1511                    "tcp communication with node '%s': %s",
1512                    anode, nresult[constants.NV_NODENETTEST][anode])
1513
1514     test = constants.NV_MASTERIP not in nresult
1515     _ErrorIf(test, self.ENODENET, node,
1516              "node hasn't returned node master IP reachability data")
1517     if not test:
1518       if not nresult[constants.NV_MASTERIP]:
1519         if node == self.master_node:
1520           msg = "the master node cannot reach the master IP (not configured?)"
1521         else:
1522           msg = "cannot reach the master IP"
1523         _ErrorIf(True, self.ENODENET, node, msg)
1524
1525   def _VerifyInstance(self, instance, instanceconfig, node_image,
1526                       diskstatus):
1527     """Verify an instance.
1528
1529     This function checks to see if the required block devices are
1530     available on the instance's node.
1531
1532     """
1533     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1534     node_current = instanceconfig.primary_node
1535
1536     node_vol_should = {}
1537     instanceconfig.MapLVsByNode(node_vol_should)
1538
1539     for node in node_vol_should:
1540       n_img = node_image[node]
1541       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1542         # ignore missing volumes on offline or broken nodes
1543         continue
1544       for volume in node_vol_should[node]:
1545         test = volume not in n_img.volumes
1546         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1547                  "volume %s missing on node %s", volume, node)
1548
1549     if instanceconfig.admin_up:
1550       pri_img = node_image[node_current]
1551       test = instance not in pri_img.instances and not pri_img.offline
1552       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1553                "instance not running on its primary node %s",
1554                node_current)
1555
1556     for node, n_img in node_image.items():
1557       if node != node_current:
1558         test = instance in n_img.instances
1559         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1560                  "instance should not run on node %s", node)
1561
1562     diskdata = [(nname, success, status, idx)
1563                 for (nname, disks) in diskstatus.items()
1564                 for idx, (success, status) in enumerate(disks)]
1565
1566     for nname, success, bdev_status, idx in diskdata:
1567       # the 'ghost node' construction in Exec() ensures that we have a
1568       # node here
1569       snode = node_image[nname]
1570       bad_snode = snode.ghost or snode.offline
1571       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1572                self.EINSTANCEFAULTYDISK, instance,
1573                "couldn't retrieve status for disk/%s on %s: %s",
1574                idx, nname, bdev_status)
1575       _ErrorIf((instanceconfig.admin_up and success and
1576                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1577                self.EINSTANCEFAULTYDISK, instance,
1578                "disk/%s on %s is faulty", idx, nname)
1579
1580   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1581     """Verify if there are any unknown volumes in the cluster.
1582
1583     The .os, .swap and backup volumes are ignored. All other volumes are
1584     reported as unknown.
1585
1586     @type reserved: L{ganeti.utils.FieldSet}
1587     @param reserved: a FieldSet of reserved volume names
1588
1589     """
1590     for node, n_img in node_image.items():
1591       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1592         # skip non-healthy nodes
1593         continue
1594       for volume in n_img.volumes:
1595         test = ((node not in node_vol_should or
1596                 volume not in node_vol_should[node]) and
1597                 not reserved.Matches(volume))
1598         self._ErrorIf(test, self.ENODEORPHANLV, node,
1599                       "volume %s is unknown", volume)
1600
1601   def _VerifyOrphanInstances(self, instancelist, node_image):
1602     """Verify the list of running instances.
1603
1604     This checks what instances are running but unknown to the cluster.
1605
1606     """
1607     for node, n_img in node_image.items():
1608       for o_inst in n_img.instances:
1609         test = o_inst not in instancelist
1610         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1611                       "instance %s on node %s should not exist", o_inst, node)
1612
1613   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1614     """Verify N+1 Memory Resilience.
1615
1616     Check that if one single node dies we can still start all the
1617     instances it was primary for.
1618
1619     """
1620     cluster_info = self.cfg.GetClusterInfo()
1621     for node, n_img in node_image.items():
1622       # This code checks that every node which is now listed as
1623       # secondary has enough memory to host all instances it is
1624       # supposed to should a single other node in the cluster fail.
1625       # FIXME: not ready for failover to an arbitrary node
1626       # FIXME: does not support file-backed instances
1627       # WARNING: we currently take into account down instances as well
1628       # as up ones, considering that even if they're down someone
1629       # might want to start them even in the event of a node failure.
1630       if n_img.offline:
1631         # we're skipping offline nodes from the N+1 warning, since
1632         # most likely we don't have good memory infromation from them;
1633         # we already list instances living on such nodes, and that's
1634         # enough warning
1635         continue
1636       for prinode, instances in n_img.sbp.items():
1637         needed_mem = 0
1638         for instance in instances:
1639           bep = cluster_info.FillBE(instance_cfg[instance])
1640           if bep[constants.BE_AUTO_BALANCE]:
1641             needed_mem += bep[constants.BE_MEMORY]
1642         test = n_img.mfree < needed_mem
1643         self._ErrorIf(test, self.ENODEN1, node,
1644                       "not enough memory to accomodate instance failovers"
1645                       " should node %s fail", prinode)
1646
1647   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1648                        master_files):
1649     """Verifies and computes the node required file checksums.
1650
1651     @type ninfo: L{objects.Node}
1652     @param ninfo: the node to check
1653     @param nresult: the remote results for the node
1654     @param file_list: required list of files
1655     @param local_cksum: dictionary of local files and their checksums
1656     @param master_files: list of files that only masters should have
1657
1658     """
1659     node = ninfo.name
1660     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1661
1662     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1663     test = not isinstance(remote_cksum, dict)
1664     _ErrorIf(test, self.ENODEFILECHECK, node,
1665              "node hasn't returned file checksum data")
1666     if test:
1667       return
1668
1669     for file_name in file_list:
1670       node_is_mc = ninfo.master_candidate
1671       must_have = (file_name not in master_files) or node_is_mc
1672       # missing
1673       test1 = file_name not in remote_cksum
1674       # invalid checksum
1675       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1676       # existing and good
1677       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1678       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1679                "file '%s' missing", file_name)
1680       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1681                "file '%s' has wrong checksum", file_name)
1682       # not candidate and this is not a must-have file
1683       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1684                "file '%s' should not exist on non master"
1685                " candidates (and the file is outdated)", file_name)
1686       # all good, except non-master/non-must have combination
1687       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1688                "file '%s' should not exist"
1689                " on non master candidates", file_name)
1690
1691   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1692                       drbd_map):
1693     """Verifies and the node DRBD status.
1694
1695     @type ninfo: L{objects.Node}
1696     @param ninfo: the node to check
1697     @param nresult: the remote results for the node
1698     @param instanceinfo: the dict of instances
1699     @param drbd_helper: the configured DRBD usermode helper
1700     @param drbd_map: the DRBD map as returned by
1701         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1702
1703     """
1704     node = ninfo.name
1705     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1706
1707     if drbd_helper:
1708       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1709       test = (helper_result == None)
1710       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1711                "no drbd usermode helper returned")
1712       if helper_result:
1713         status, payload = helper_result
1714         test = not status
1715         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1716                  "drbd usermode helper check unsuccessful: %s", payload)
1717         test = status and (payload != drbd_helper)
1718         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1719                  "wrong drbd usermode helper: %s", payload)
1720
1721     # compute the DRBD minors
1722     node_drbd = {}
1723     for minor, instance in drbd_map[node].items():
1724       test = instance not in instanceinfo
1725       _ErrorIf(test, self.ECLUSTERCFG, None,
1726                "ghost instance '%s' in temporary DRBD map", instance)
1727         # ghost instance should not be running, but otherwise we
1728         # don't give double warnings (both ghost instance and
1729         # unallocated minor in use)
1730       if test:
1731         node_drbd[minor] = (instance, False)
1732       else:
1733         instance = instanceinfo[instance]
1734         node_drbd[minor] = (instance.name, instance.admin_up)
1735
1736     # and now check them
1737     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1738     test = not isinstance(used_minors, (tuple, list))
1739     _ErrorIf(test, self.ENODEDRBD, node,
1740              "cannot parse drbd status file: %s", str(used_minors))
1741     if test:
1742       # we cannot check drbd status
1743       return
1744
1745     for minor, (iname, must_exist) in node_drbd.items():
1746       test = minor not in used_minors and must_exist
1747       _ErrorIf(test, self.ENODEDRBD, node,
1748                "drbd minor %d of instance %s is not active", minor, iname)
1749     for minor in used_minors:
1750       test = minor not in node_drbd
1751       _ErrorIf(test, self.ENODEDRBD, node,
1752                "unallocated drbd minor %d is in use", minor)
1753
1754   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1755     """Builds the node OS structures.
1756
1757     @type ninfo: L{objects.Node}
1758     @param ninfo: the node to check
1759     @param nresult: the remote results for the node
1760     @param nimg: the node image object
1761
1762     """
1763     node = ninfo.name
1764     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1765
1766     remote_os = nresult.get(constants.NV_OSLIST, None)
1767     test = (not isinstance(remote_os, list) or
1768             not compat.all(isinstance(v, list) and len(v) == 7
1769                            for v in remote_os))
1770
1771     _ErrorIf(test, self.ENODEOS, node,
1772              "node hasn't returned valid OS data")
1773
1774     nimg.os_fail = test
1775
1776     if test:
1777       return
1778
1779     os_dict = {}
1780
1781     for (name, os_path, status, diagnose,
1782          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1783
1784       if name not in os_dict:
1785         os_dict[name] = []
1786
1787       # parameters is a list of lists instead of list of tuples due to
1788       # JSON lacking a real tuple type, fix it:
1789       parameters = [tuple(v) for v in parameters]
1790       os_dict[name].append((os_path, status, diagnose,
1791                             set(variants), set(parameters), set(api_ver)))
1792
1793     nimg.oslist = os_dict
1794
1795   def _VerifyNodeOS(self, ninfo, nimg, base):
1796     """Verifies the node OS list.
1797
1798     @type ninfo: L{objects.Node}
1799     @param ninfo: the node to check
1800     @param nimg: the node image object
1801     @param base: the 'template' node we match against (e.g. from the master)
1802
1803     """
1804     node = ninfo.name
1805     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1806
1807     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1808
1809     for os_name, os_data in nimg.oslist.items():
1810       assert os_data, "Empty OS status for OS %s?!" % os_name
1811       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1812       _ErrorIf(not f_status, self.ENODEOS, node,
1813                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1814       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1815                "OS '%s' has multiple entries (first one shadows the rest): %s",
1816                os_name, utils.CommaJoin([v[0] for v in os_data]))
1817       # this will catched in backend too
1818       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1819                and not f_var, self.ENODEOS, node,
1820                "OS %s with API at least %d does not declare any variant",
1821                os_name, constants.OS_API_V15)
1822       # comparisons with the 'base' image
1823       test = os_name not in base.oslist
1824       _ErrorIf(test, self.ENODEOS, node,
1825                "Extra OS %s not present on reference node (%s)",
1826                os_name, base.name)
1827       if test:
1828         continue
1829       assert base.oslist[os_name], "Base node has empty OS status?"
1830       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1831       if not b_status:
1832         # base OS is invalid, skipping
1833         continue
1834       for kind, a, b in [("API version", f_api, b_api),
1835                          ("variants list", f_var, b_var),
1836                          ("parameters", f_param, b_param)]:
1837         _ErrorIf(a != b, self.ENODEOS, node,
1838                  "OS %s %s differs from reference node %s: %s vs. %s",
1839                  kind, os_name, base.name,
1840                  utils.CommaJoin(a), utils.CommaJoin(b))
1841
1842     # check any missing OSes
1843     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1844     _ErrorIf(missing, self.ENODEOS, node,
1845              "OSes present on reference node %s but missing on this node: %s",
1846              base.name, utils.CommaJoin(missing))
1847
1848   def _VerifyOob(self, ninfo, nresult):
1849     """Verifies out of band functionality of a node.
1850
1851     @type ninfo: L{objects.Node}
1852     @param ninfo: the node to check
1853     @param nresult: the remote results for the node
1854
1855     """
1856     node = ninfo.name
1857     # We just have to verify the paths on master and/or master candidates
1858     # as the oob helper is invoked on the master
1859     if ((ninfo.master_candidate or ninfo.master_capable) and
1860         constants.NV_OOB_PATHS in nresult):
1861       for path_result in nresult[constants.NV_OOB_PATHS]:
1862         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1863
1864   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1865     """Verifies and updates the node volume data.
1866
1867     This function will update a L{NodeImage}'s internal structures
1868     with data from the remote call.
1869
1870     @type ninfo: L{objects.Node}
1871     @param ninfo: the node to check
1872     @param nresult: the remote results for the node
1873     @param nimg: the node image object
1874     @param vg_name: the configured VG name
1875
1876     """
1877     node = ninfo.name
1878     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1879
1880     nimg.lvm_fail = True
1881     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1882     if vg_name is None:
1883       pass
1884     elif isinstance(lvdata, basestring):
1885       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1886                utils.SafeEncode(lvdata))
1887     elif not isinstance(lvdata, dict):
1888       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1889     else:
1890       nimg.volumes = lvdata
1891       nimg.lvm_fail = False
1892
1893   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1894     """Verifies and updates the node instance list.
1895
1896     If the listing was successful, then updates this node's instance
1897     list. Otherwise, it marks the RPC call as failed for the instance
1898     list key.
1899
1900     @type ninfo: L{objects.Node}
1901     @param ninfo: the node to check
1902     @param nresult: the remote results for the node
1903     @param nimg: the node image object
1904
1905     """
1906     idata = nresult.get(constants.NV_INSTANCELIST, None)
1907     test = not isinstance(idata, list)
1908     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1909                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1910     if test:
1911       nimg.hyp_fail = True
1912     else:
1913       nimg.instances = idata
1914
1915   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1916     """Verifies and computes a node information map
1917
1918     @type ninfo: L{objects.Node}
1919     @param ninfo: the node to check
1920     @param nresult: the remote results for the node
1921     @param nimg: the node image object
1922     @param vg_name: the configured VG name
1923
1924     """
1925     node = ninfo.name
1926     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1927
1928     # try to read free memory (from the hypervisor)
1929     hv_info = nresult.get(constants.NV_HVINFO, None)
1930     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1931     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1932     if not test:
1933       try:
1934         nimg.mfree = int(hv_info["memory_free"])
1935       except (ValueError, TypeError):
1936         _ErrorIf(True, self.ENODERPC, node,
1937                  "node returned invalid nodeinfo, check hypervisor")
1938
1939     # FIXME: devise a free space model for file based instances as well
1940     if vg_name is not None:
1941       test = (constants.NV_VGLIST not in nresult or
1942               vg_name not in nresult[constants.NV_VGLIST])
1943       _ErrorIf(test, self.ENODELVM, node,
1944                "node didn't return data for the volume group '%s'"
1945                " - it is either missing or broken", vg_name)
1946       if not test:
1947         try:
1948           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1949         except (ValueError, TypeError):
1950           _ErrorIf(True, self.ENODERPC, node,
1951                    "node returned invalid LVM info, check LVM status")
1952
1953   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1954     """Gets per-disk status information for all instances.
1955
1956     @type nodelist: list of strings
1957     @param nodelist: Node names
1958     @type node_image: dict of (name, L{objects.Node})
1959     @param node_image: Node objects
1960     @type instanceinfo: dict of (name, L{objects.Instance})
1961     @param instanceinfo: Instance objects
1962     @rtype: {instance: {node: [(succes, payload)]}}
1963     @return: a dictionary of per-instance dictionaries with nodes as
1964         keys and disk information as values; the disk information is a
1965         list of tuples (success, payload)
1966
1967     """
1968     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1969
1970     node_disks = {}
1971     node_disks_devonly = {}
1972     diskless_instances = set()
1973     diskless = constants.DT_DISKLESS
1974
1975     for nname in nodelist:
1976       node_instances = list(itertools.chain(node_image[nname].pinst,
1977                                             node_image[nname].sinst))
1978       diskless_instances.update(inst for inst in node_instances
1979                                 if instanceinfo[inst].disk_template == diskless)
1980       disks = [(inst, disk)
1981                for inst in node_instances
1982                for disk in instanceinfo[inst].disks]
1983
1984       if not disks:
1985         # No need to collect data
1986         continue
1987
1988       node_disks[nname] = disks
1989
1990       # Creating copies as SetDiskID below will modify the objects and that can
1991       # lead to incorrect data returned from nodes
1992       devonly = [dev.Copy() for (_, dev) in disks]
1993
1994       for dev in devonly:
1995         self.cfg.SetDiskID(dev, nname)
1996
1997       node_disks_devonly[nname] = devonly
1998
1999     assert len(node_disks) == len(node_disks_devonly)
2000
2001     # Collect data from all nodes with disks
2002     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2003                                                           node_disks_devonly)
2004
2005     assert len(result) == len(node_disks)
2006
2007     instdisk = {}
2008
2009     for (nname, nres) in result.items():
2010       disks = node_disks[nname]
2011
2012       if nres.offline:
2013         # No data from this node
2014         data = len(disks) * [(False, "node offline")]
2015       else:
2016         msg = nres.fail_msg
2017         _ErrorIf(msg, self.ENODERPC, nname,
2018                  "while getting disk information: %s", msg)
2019         if msg:
2020           # No data from this node
2021           data = len(disks) * [(False, msg)]
2022         else:
2023           data = []
2024           for idx, i in enumerate(nres.payload):
2025             if isinstance(i, (tuple, list)) and len(i) == 2:
2026               data.append(i)
2027             else:
2028               logging.warning("Invalid result from node %s, entry %d: %s",
2029                               nname, idx, i)
2030               data.append((False, "Invalid result from the remote node"))
2031
2032       for ((inst, _), status) in zip(disks, data):
2033         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2034
2035     # Add empty entries for diskless instances.
2036     for inst in diskless_instances:
2037       assert inst not in instdisk
2038       instdisk[inst] = {}
2039
2040     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2041                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2042                       compat.all(isinstance(s, (tuple, list)) and
2043                                  len(s) == 2 for s in statuses)
2044                       for inst, nnames in instdisk.items()
2045                       for nname, statuses in nnames.items())
2046     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2047
2048     return instdisk
2049
2050   def _VerifyHVP(self, hvp_data):
2051     """Verifies locally the syntax of the hypervisor parameters.
2052
2053     """
2054     for item, hv_name, hv_params in hvp_data:
2055       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2056              (item, hv_name))
2057       try:
2058         hv_class = hypervisor.GetHypervisor(hv_name)
2059         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2060         hv_class.CheckParameterSyntax(hv_params)
2061       except errors.GenericError, err:
2062         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2063
2064
2065   def BuildHooksEnv(self):
2066     """Build hooks env.
2067
2068     Cluster-Verify hooks just ran in the post phase and their failure makes
2069     the output be logged in the verify output and the verification to fail.
2070
2071     """
2072     all_nodes = self.cfg.GetNodeList()
2073     env = {
2074       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2075       }
2076     for node in self.cfg.GetAllNodesInfo().values():
2077       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2078
2079     return env, [], all_nodes
2080
2081   def Exec(self, feedback_fn):
2082     """Verify integrity of cluster, performing various test on nodes.
2083
2084     """
2085     # This method has too many local variables. pylint: disable-msg=R0914
2086     self.bad = False
2087     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2088     verbose = self.op.verbose
2089     self._feedback_fn = feedback_fn
2090     feedback_fn("* Verifying global settings")
2091     for msg in self.cfg.VerifyConfig():
2092       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2093
2094     # Check the cluster certificates
2095     for cert_filename in constants.ALL_CERT_FILES:
2096       (errcode, msg) = _VerifyCertificate(cert_filename)
2097       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2098
2099     vg_name = self.cfg.GetVGName()
2100     drbd_helper = self.cfg.GetDRBDHelper()
2101     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2102     cluster = self.cfg.GetClusterInfo()
2103     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2104     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2105     nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2106     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2107     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2108                         for iname in instancelist)
2109     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2110     i_non_redundant = [] # Non redundant instances
2111     i_non_a_balanced = [] # Non auto-balanced instances
2112     n_offline = 0 # Count of offline nodes
2113     n_drained = 0 # Count of nodes being drained
2114     node_vol_should = {}
2115
2116     # FIXME: verify OS list
2117     # do local checksums
2118     master_files = [constants.CLUSTER_CONF_FILE]
2119     master_node = self.master_node = self.cfg.GetMasterNode()
2120     master_ip = self.cfg.GetMasterIP()
2121
2122     file_names = ssconf.SimpleStore().GetFileList()
2123     file_names.extend(constants.ALL_CERT_FILES)
2124     file_names.extend(master_files)
2125     if cluster.modify_etc_hosts:
2126       file_names.append(constants.ETC_HOSTS)
2127
2128     local_checksums = utils.FingerprintFiles(file_names)
2129
2130     # Compute the set of hypervisor parameters
2131     hvp_data = []
2132     for hv_name in hypervisors:
2133       hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2134     for os_name, os_hvp in cluster.os_hvp.items():
2135       for hv_name, hv_params in os_hvp.items():
2136         if not hv_params:
2137           continue
2138         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2139         hvp_data.append(("os %s" % os_name, hv_name, full_params))
2140     # TODO: collapse identical parameter values in a single one
2141     for instance in instanceinfo.values():
2142       if not instance.hvparams:
2143         continue
2144       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2145                        cluster.FillHV(instance)))
2146     # and verify them locally
2147     self._VerifyHVP(hvp_data)
2148
2149     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2150     node_verify_param = {
2151       constants.NV_FILELIST: file_names,
2152       constants.NV_NODELIST: [node.name for node in nodeinfo
2153                               if not node.offline],
2154       constants.NV_HYPERVISOR: hypervisors,
2155       constants.NV_HVPARAMS: hvp_data,
2156       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2157                                   node.secondary_ip) for node in nodeinfo
2158                                  if not node.offline],
2159       constants.NV_INSTANCELIST: hypervisors,
2160       constants.NV_VERSION: None,
2161       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2162       constants.NV_NODESETUP: None,
2163       constants.NV_TIME: None,
2164       constants.NV_MASTERIP: (master_node, master_ip),
2165       constants.NV_OSLIST: None,
2166       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2167       }
2168
2169     if vg_name is not None:
2170       node_verify_param[constants.NV_VGLIST] = None
2171       node_verify_param[constants.NV_LVLIST] = vg_name
2172       node_verify_param[constants.NV_PVLIST] = [vg_name]
2173       node_verify_param[constants.NV_DRBDLIST] = None
2174
2175     if drbd_helper:
2176       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2177
2178     # Build our expected cluster state
2179     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2180                                                  name=node.name,
2181                                                  vm_capable=node.vm_capable))
2182                       for node in nodeinfo)
2183
2184     # Gather OOB paths
2185     oob_paths = []
2186     for node in nodeinfo:
2187       path = _SupportsOob(self.cfg, node)
2188       if path and path not in oob_paths:
2189         oob_paths.append(path)
2190
2191     if oob_paths:
2192       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2193
2194     for instance in instancelist:
2195       inst_config = instanceinfo[instance]
2196
2197       for nname in inst_config.all_nodes:
2198         if nname not in node_image:
2199           # ghost node
2200           gnode = self.NodeImage(name=nname)
2201           gnode.ghost = True
2202           node_image[nname] = gnode
2203
2204       inst_config.MapLVsByNode(node_vol_should)
2205
2206       pnode = inst_config.primary_node
2207       node_image[pnode].pinst.append(instance)
2208
2209       for snode in inst_config.secondary_nodes:
2210         nimg = node_image[snode]
2211         nimg.sinst.append(instance)
2212         if pnode not in nimg.sbp:
2213           nimg.sbp[pnode] = []
2214         nimg.sbp[pnode].append(instance)
2215
2216     # At this point, we have the in-memory data structures complete,
2217     # except for the runtime information, which we'll gather next
2218
2219     # Due to the way our RPC system works, exact response times cannot be
2220     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2221     # time before and after executing the request, we can at least have a time
2222     # window.
2223     nvinfo_starttime = time.time()
2224     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2225                                            self.cfg.GetClusterName())
2226     nvinfo_endtime = time.time()
2227
2228     all_drbd_map = self.cfg.ComputeDRBDMap()
2229
2230     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2231     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2232
2233     feedback_fn("* Verifying node status")
2234
2235     refos_img = None
2236
2237     for node_i in nodeinfo:
2238       node = node_i.name
2239       nimg = node_image[node]
2240
2241       if node_i.offline:
2242         if verbose:
2243           feedback_fn("* Skipping offline node %s" % (node,))
2244         n_offline += 1
2245         continue
2246
2247       if node == master_node:
2248         ntype = "master"
2249       elif node_i.master_candidate:
2250         ntype = "master candidate"
2251       elif node_i.drained:
2252         ntype = "drained"
2253         n_drained += 1
2254       else:
2255         ntype = "regular"
2256       if verbose:
2257         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2258
2259       msg = all_nvinfo[node].fail_msg
2260       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2261       if msg:
2262         nimg.rpc_fail = True
2263         continue
2264
2265       nresult = all_nvinfo[node].payload
2266
2267       nimg.call_ok = self._VerifyNode(node_i, nresult)
2268       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2269       self._VerifyNodeNetwork(node_i, nresult)
2270       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2271                             master_files)
2272
2273       self._VerifyOob(node_i, nresult)
2274
2275       if nimg.vm_capable:
2276         self._VerifyNodeLVM(node_i, nresult, vg_name)
2277         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2278                              all_drbd_map)
2279
2280         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2281         self._UpdateNodeInstances(node_i, nresult, nimg)
2282         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2283         self._UpdateNodeOS(node_i, nresult, nimg)
2284         if not nimg.os_fail:
2285           if refos_img is None:
2286             refos_img = nimg
2287           self._VerifyNodeOS(node_i, nimg, refos_img)
2288
2289     feedback_fn("* Verifying instance status")
2290     for instance in instancelist:
2291       if verbose:
2292         feedback_fn("* Verifying instance %s" % instance)
2293       inst_config = instanceinfo[instance]
2294       self._VerifyInstance(instance, inst_config, node_image,
2295                            instdisk[instance])
2296       inst_nodes_offline = []
2297
2298       pnode = inst_config.primary_node
2299       pnode_img = node_image[pnode]
2300       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2301                self.ENODERPC, pnode, "instance %s, connection to"
2302                " primary node failed", instance)
2303
2304       _ErrorIf(pnode_img.offline, self.EINSTANCEBADNODE, instance,
2305                "instance lives on offline node %s", inst_config.primary_node)
2306
2307       # If the instance is non-redundant we cannot survive losing its primary
2308       # node, so we are not N+1 compliant. On the other hand we have no disk
2309       # templates with more than one secondary so that situation is not well
2310       # supported either.
2311       # FIXME: does not support file-backed instances
2312       if not inst_config.secondary_nodes:
2313         i_non_redundant.append(instance)
2314
2315       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2316                instance, "instance has multiple secondary nodes: %s",
2317                utils.CommaJoin(inst_config.secondary_nodes),
2318                code=self.ETYPE_WARNING)
2319
2320       if inst_config.disk_template in constants.DTS_NET_MIRROR:
2321         pnode = inst_config.primary_node
2322         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2323         instance_groups = {}
2324
2325         for node in instance_nodes:
2326           instance_groups.setdefault(nodeinfo_byname[node].group,
2327                                      []).append(node)
2328
2329         pretty_list = [
2330           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2331           # Sort so that we always list the primary node first.
2332           for group, nodes in sorted(instance_groups.items(),
2333                                      key=lambda (_, nodes): pnode in nodes,
2334                                      reverse=True)]
2335
2336         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2337                       instance, "instance has primary and secondary nodes in"
2338                       " different groups: %s", utils.CommaJoin(pretty_list),
2339                       code=self.ETYPE_WARNING)
2340
2341       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2342         i_non_a_balanced.append(instance)
2343
2344       for snode in inst_config.secondary_nodes:
2345         s_img = node_image[snode]
2346         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2347                  "instance %s, connection to secondary node failed", instance)
2348
2349         if s_img.offline:
2350           inst_nodes_offline.append(snode)
2351
2352       # warn that the instance lives on offline nodes
2353       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2354                "instance has offline secondary node(s) %s",
2355                utils.CommaJoin(inst_nodes_offline))
2356       # ... or ghost/non-vm_capable nodes
2357       for node in inst_config.all_nodes:
2358         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2359                  "instance lives on ghost node %s", node)
2360         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2361                  instance, "instance lives on non-vm_capable node %s", node)
2362
2363     feedback_fn("* Verifying orphan volumes")
2364     reserved = utils.FieldSet(*cluster.reserved_lvs)
2365     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2366
2367     feedback_fn("* Verifying orphan instances")
2368     self._VerifyOrphanInstances(instancelist, node_image)
2369
2370     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2371       feedback_fn("* Verifying N+1 Memory redundancy")
2372       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2373
2374     feedback_fn("* Other Notes")
2375     if i_non_redundant:
2376       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2377                   % len(i_non_redundant))
2378
2379     if i_non_a_balanced:
2380       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2381                   % len(i_non_a_balanced))
2382
2383     if n_offline:
2384       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2385
2386     if n_drained:
2387       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2388
2389     return not self.bad
2390
2391   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2392     """Analyze the post-hooks' result
2393
2394     This method analyses the hook result, handles it, and sends some
2395     nicely-formatted feedback back to the user.
2396
2397     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2398         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2399     @param hooks_results: the results of the multi-node hooks rpc call
2400     @param feedback_fn: function used send feedback back to the caller
2401     @param lu_result: previous Exec result
2402     @return: the new Exec result, based on the previous result
2403         and hook results
2404
2405     """
2406     # We only really run POST phase hooks, and are only interested in
2407     # their results
2408     if phase == constants.HOOKS_PHASE_POST:
2409       # Used to change hooks' output to proper indentation
2410       feedback_fn("* Hooks Results")
2411       assert hooks_results, "invalid result from hooks"
2412
2413       for node_name in hooks_results:
2414         res = hooks_results[node_name]
2415         msg = res.fail_msg
2416         test = msg and not res.offline
2417         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2418                       "Communication failure in hooks execution: %s", msg)
2419         if res.offline or msg:
2420           # No need to investigate payload if node is offline or gave an error.
2421           # override manually lu_result here as _ErrorIf only
2422           # overrides self.bad
2423           lu_result = 1
2424           continue
2425         for script, hkr, output in res.payload:
2426           test = hkr == constants.HKR_FAIL
2427           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2428                         "Script %s failed, output:", script)
2429           if test:
2430             output = self._HOOKS_INDENT_RE.sub('      ', output)
2431             feedback_fn("%s" % output)
2432             lu_result = 0
2433
2434       return lu_result
2435
2436
2437 class LUClusterVerifyDisks(NoHooksLU):
2438   """Verifies the cluster disks status.
2439
2440   """
2441   REQ_BGL = False
2442
2443   def ExpandNames(self):
2444     self.needed_locks = {
2445       locking.LEVEL_NODE: locking.ALL_SET,
2446       locking.LEVEL_INSTANCE: locking.ALL_SET,
2447     }
2448     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2449
2450   def Exec(self, feedback_fn):
2451     """Verify integrity of cluster disks.
2452
2453     @rtype: tuple of three items
2454     @return: a tuple of (dict of node-to-node_error, list of instances
2455         which need activate-disks, dict of instance: (node, volume) for
2456         missing volumes
2457
2458     """
2459     result = res_nodes, res_instances, res_missing = {}, [], {}
2460
2461     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2462     instances = self.cfg.GetAllInstancesInfo().values()
2463
2464     nv_dict = {}
2465     for inst in instances:
2466       inst_lvs = {}
2467       if not inst.admin_up:
2468         continue
2469       inst.MapLVsByNode(inst_lvs)
2470       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2471       for node, vol_list in inst_lvs.iteritems():
2472         for vol in vol_list:
2473           nv_dict[(node, vol)] = inst
2474
2475     if not nv_dict:
2476       return result
2477
2478     node_lvs = self.rpc.call_lv_list(nodes, [])
2479     for node, node_res in node_lvs.items():
2480       if node_res.offline:
2481         continue
2482       msg = node_res.fail_msg
2483       if msg:
2484         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2485         res_nodes[node] = msg
2486         continue
2487
2488       lvs = node_res.payload
2489       for lv_name, (_, _, lv_online) in lvs.items():
2490         inst = nv_dict.pop((node, lv_name), None)
2491         if (not lv_online and inst is not None
2492             and inst.name not in res_instances):
2493           res_instances.append(inst.name)
2494
2495     # any leftover items in nv_dict are missing LVs, let's arrange the
2496     # data better
2497     for key, inst in nv_dict.iteritems():
2498       if inst.name not in res_missing:
2499         res_missing[inst.name] = []
2500       res_missing[inst.name].append(key)
2501
2502     return result
2503
2504
2505 class LUClusterRepairDiskSizes(NoHooksLU):
2506   """Verifies the cluster disks sizes.
2507
2508   """
2509   REQ_BGL = False
2510
2511   def ExpandNames(self):
2512     if self.op.instances:
2513       self.wanted_names = []
2514       for name in self.op.instances:
2515         full_name = _ExpandInstanceName(self.cfg, name)
2516         self.wanted_names.append(full_name)
2517       self.needed_locks = {
2518         locking.LEVEL_NODE: [],
2519         locking.LEVEL_INSTANCE: self.wanted_names,
2520         }
2521       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2522     else:
2523       self.wanted_names = None
2524       self.needed_locks = {
2525         locking.LEVEL_NODE: locking.ALL_SET,
2526         locking.LEVEL_INSTANCE: locking.ALL_SET,
2527         }
2528     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2529
2530   def DeclareLocks(self, level):
2531     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2532       self._LockInstancesNodes(primary_only=True)
2533
2534   def CheckPrereq(self):
2535     """Check prerequisites.
2536
2537     This only checks the optional instance list against the existing names.
2538
2539     """
2540     if self.wanted_names is None:
2541       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2542
2543     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2544                              in self.wanted_names]
2545
2546   def _EnsureChildSizes(self, disk):
2547     """Ensure children of the disk have the needed disk size.
2548
2549     This is valid mainly for DRBD8 and fixes an issue where the
2550     children have smaller disk size.
2551
2552     @param disk: an L{ganeti.objects.Disk} object
2553
2554     """
2555     if disk.dev_type == constants.LD_DRBD8:
2556       assert disk.children, "Empty children for DRBD8?"
2557       fchild = disk.children[0]
2558       mismatch = fchild.size < disk.size
2559       if mismatch:
2560         self.LogInfo("Child disk has size %d, parent %d, fixing",
2561                      fchild.size, disk.size)
2562         fchild.size = disk.size
2563
2564       # and we recurse on this child only, not on the metadev
2565       return self._EnsureChildSizes(fchild) or mismatch
2566     else:
2567       return False
2568
2569   def Exec(self, feedback_fn):
2570     """Verify the size of cluster disks.
2571
2572     """
2573     # TODO: check child disks too
2574     # TODO: check differences in size between primary/secondary nodes
2575     per_node_disks = {}
2576     for instance in self.wanted_instances:
2577       pnode = instance.primary_node
2578       if pnode not in per_node_disks:
2579         per_node_disks[pnode] = []
2580       for idx, disk in enumerate(instance.disks):
2581         per_node_disks[pnode].append((instance, idx, disk))
2582
2583     changed = []
2584     for node, dskl in per_node_disks.items():
2585       newl = [v[2].Copy() for v in dskl]
2586       for dsk in newl:
2587         self.cfg.SetDiskID(dsk, node)
2588       result = self.rpc.call_blockdev_getsize(node, newl)
2589       if result.fail_msg:
2590         self.LogWarning("Failure in blockdev_getsize call to node"
2591                         " %s, ignoring", node)
2592         continue
2593       if len(result.payload) != len(dskl):
2594         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2595                         " result.payload=%s", node, len(dskl), result.payload)
2596         self.LogWarning("Invalid result from node %s, ignoring node results",
2597                         node)
2598         continue
2599       for ((instance, idx, disk), size) in zip(dskl, result.payload):
2600         if size is None:
2601           self.LogWarning("Disk %d of instance %s did not return size"
2602                           " information, ignoring", idx, instance.name)
2603           continue
2604         if not isinstance(size, (int, long)):
2605           self.LogWarning("Disk %d of instance %s did not return valid"
2606                           " size information, ignoring", idx, instance.name)
2607           continue
2608         size = size >> 20
2609         if size != disk.size:
2610           self.LogInfo("Disk %d of instance %s has mismatched size,"
2611                        " correcting: recorded %d, actual %d", idx,
2612                        instance.name, disk.size, size)
2613           disk.size = size
2614           self.cfg.Update(instance, feedback_fn)
2615           changed.append((instance.name, idx, size))
2616         if self._EnsureChildSizes(disk):
2617           self.cfg.Update(instance, feedback_fn)
2618           changed.append((instance.name, idx, disk.size))
2619     return changed
2620
2621
2622 class LUClusterRename(LogicalUnit):
2623   """Rename the cluster.
2624
2625   """
2626   HPATH = "cluster-rename"
2627   HTYPE = constants.HTYPE_CLUSTER
2628
2629   def BuildHooksEnv(self):
2630     """Build hooks env.
2631
2632     """
2633     env = {
2634       "OP_TARGET": self.cfg.GetClusterName(),
2635       "NEW_NAME": self.op.name,
2636       }
2637     mn = self.cfg.GetMasterNode()
2638     all_nodes = self.cfg.GetNodeList()
2639     return env, [mn], all_nodes
2640
2641   def CheckPrereq(self):
2642     """Verify that the passed name is a valid one.
2643
2644     """
2645     hostname = netutils.GetHostname(name=self.op.name,
2646                                     family=self.cfg.GetPrimaryIPFamily())
2647
2648     new_name = hostname.name
2649     self.ip = new_ip = hostname.ip
2650     old_name = self.cfg.GetClusterName()
2651     old_ip = self.cfg.GetMasterIP()
2652     if new_name == old_name and new_ip == old_ip:
2653       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2654                                  " cluster has changed",
2655                                  errors.ECODE_INVAL)
2656     if new_ip != old_ip:
2657       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2658         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2659                                    " reachable on the network" %
2660                                    new_ip, errors.ECODE_NOTUNIQUE)
2661
2662     self.op.name = new_name
2663
2664   def Exec(self, feedback_fn):
2665     """Rename the cluster.
2666
2667     """
2668     clustername = self.op.name
2669     ip = self.ip
2670
2671     # shutdown the master IP
2672     master = self.cfg.GetMasterNode()
2673     result = self.rpc.call_node_stop_master(master, False)
2674     result.Raise("Could not disable the master role")
2675
2676     try:
2677       cluster = self.cfg.GetClusterInfo()
2678       cluster.cluster_name = clustername
2679       cluster.master_ip = ip
2680       self.cfg.Update(cluster, feedback_fn)
2681
2682       # update the known hosts file
2683       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2684       node_list = self.cfg.GetOnlineNodeList()
2685       try:
2686         node_list.remove(master)
2687       except ValueError:
2688         pass
2689       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2690     finally:
2691       result = self.rpc.call_node_start_master(master, False, False)
2692       msg = result.fail_msg
2693       if msg:
2694         self.LogWarning("Could not re-enable the master role on"
2695                         " the master, please restart manually: %s", msg)
2696
2697     return clustername
2698
2699
2700 class LUClusterSetParams(LogicalUnit):
2701   """Change the parameters of the cluster.
2702
2703   """
2704   HPATH = "cluster-modify"
2705   HTYPE = constants.HTYPE_CLUSTER
2706   REQ_BGL = False
2707
2708   def CheckArguments(self):
2709     """Check parameters
2710
2711     """
2712     if self.op.uid_pool:
2713       uidpool.CheckUidPool(self.op.uid_pool)
2714
2715     if self.op.add_uids:
2716       uidpool.CheckUidPool(self.op.add_uids)
2717
2718     if self.op.remove_uids:
2719       uidpool.CheckUidPool(self.op.remove_uids)
2720
2721   def ExpandNames(self):
2722     # FIXME: in the future maybe other cluster params won't require checking on
2723     # all nodes to be modified.
2724     self.needed_locks = {
2725       locking.LEVEL_NODE: locking.ALL_SET,
2726     }
2727     self.share_locks[locking.LEVEL_NODE] = 1
2728
2729   def BuildHooksEnv(self):
2730     """Build hooks env.
2731
2732     """
2733     env = {
2734       "OP_TARGET": self.cfg.GetClusterName(),
2735       "NEW_VG_NAME": self.op.vg_name,
2736       }
2737     mn = self.cfg.GetMasterNode()
2738     return env, [mn], [mn]
2739
2740   def CheckPrereq(self):
2741     """Check prerequisites.
2742
2743     This checks whether the given params don't conflict and
2744     if the given volume group is valid.
2745
2746     """
2747     if self.op.vg_name is not None and not self.op.vg_name:
2748       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2749         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2750                                    " instances exist", errors.ECODE_INVAL)
2751
2752     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2753       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2754         raise errors.OpPrereqError("Cannot disable drbd helper while"
2755                                    " drbd-based instances exist",
2756                                    errors.ECODE_INVAL)
2757
2758     node_list = self.acquired_locks[locking.LEVEL_NODE]
2759
2760     # if vg_name not None, checks given volume group on all nodes
2761     if self.op.vg_name:
2762       vglist = self.rpc.call_vg_list(node_list)
2763       for node in node_list:
2764         msg = vglist[node].fail_msg
2765         if msg:
2766           # ignoring down node
2767           self.LogWarning("Error while gathering data on node %s"
2768                           " (ignoring node): %s", node, msg)
2769           continue
2770         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2771                                               self.op.vg_name,
2772                                               constants.MIN_VG_SIZE)
2773         if vgstatus:
2774           raise errors.OpPrereqError("Error on node '%s': %s" %
2775                                      (node, vgstatus), errors.ECODE_ENVIRON)
2776
2777     if self.op.drbd_helper:
2778       # checks given drbd helper on all nodes
2779       helpers = self.rpc.call_drbd_helper(node_list)
2780       for node in node_list:
2781         ninfo = self.cfg.GetNodeInfo(node)
2782         if ninfo.offline:
2783           self.LogInfo("Not checking drbd helper on offline node %s", node)
2784           continue
2785         msg = helpers[node].fail_msg
2786         if msg:
2787           raise errors.OpPrereqError("Error checking drbd helper on node"
2788                                      " '%s': %s" % (node, msg),
2789                                      errors.ECODE_ENVIRON)
2790         node_helper = helpers[node].payload
2791         if node_helper != self.op.drbd_helper:
2792           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2793                                      (node, node_helper), errors.ECODE_ENVIRON)
2794
2795     self.cluster = cluster = self.cfg.GetClusterInfo()
2796     # validate params changes
2797     if self.op.beparams:
2798       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2799       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2800
2801     if self.op.ndparams:
2802       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2803       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2804
2805     if self.op.nicparams:
2806       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2807       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2808       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2809       nic_errors = []
2810
2811       # check all instances for consistency
2812       for instance in self.cfg.GetAllInstancesInfo().values():
2813         for nic_idx, nic in enumerate(instance.nics):
2814           params_copy = copy.deepcopy(nic.nicparams)
2815           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2816
2817           # check parameter syntax
2818           try:
2819             objects.NIC.CheckParameterSyntax(params_filled)
2820           except errors.ConfigurationError, err:
2821             nic_errors.append("Instance %s, nic/%d: %s" %
2822                               (instance.name, nic_idx, err))
2823
2824           # if we're moving instances to routed, check that they have an ip
2825           target_mode = params_filled[constants.NIC_MODE]
2826           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2827             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2828                               (instance.name, nic_idx))
2829       if nic_errors:
2830         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2831                                    "\n".join(nic_errors))
2832
2833     # hypervisor list/parameters
2834     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2835     if self.op.hvparams:
2836       for hv_name, hv_dict in self.op.hvparams.items():
2837         if hv_name not in self.new_hvparams:
2838           self.new_hvparams[hv_name] = hv_dict
2839         else:
2840           self.new_hvparams[hv_name].update(hv_dict)
2841
2842     # os hypervisor parameters
2843     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2844     if self.op.os_hvp:
2845       for os_name, hvs in self.op.os_hvp.items():
2846         if os_name not in self.new_os_hvp:
2847           self.new_os_hvp[os_name] = hvs
2848         else:
2849           for hv_name, hv_dict in hvs.items():
2850             if hv_name not in self.new_os_hvp[os_name]:
2851               self.new_os_hvp[os_name][hv_name] = hv_dict
2852             else:
2853               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2854
2855     # os parameters
2856     self.new_osp = objects.FillDict(cluster.osparams, {})
2857     if self.op.osparams:
2858       for os_name, osp in self.op.osparams.items():
2859         if os_name not in self.new_osp:
2860           self.new_osp[os_name] = {}
2861
2862         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2863                                                   use_none=True)
2864
2865         if not self.new_osp[os_name]:
2866           # we removed all parameters
2867           del self.new_osp[os_name]
2868         else:
2869           # check the parameter validity (remote check)
2870           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2871                          os_name, self.new_osp[os_name])
2872
2873     # changes to the hypervisor list
2874     if self.op.enabled_hypervisors is not None:
2875       self.hv_list = self.op.enabled_hypervisors
2876       for hv in self.hv_list:
2877         # if the hypervisor doesn't already exist in the cluster
2878         # hvparams, we initialize it to empty, and then (in both
2879         # cases) we make sure to fill the defaults, as we might not
2880         # have a complete defaults list if the hypervisor wasn't
2881         # enabled before
2882         if hv not in new_hvp:
2883           new_hvp[hv] = {}
2884         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2885         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2886     else:
2887       self.hv_list = cluster.enabled_hypervisors
2888
2889     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2890       # either the enabled list has changed, or the parameters have, validate
2891       for hv_name, hv_params in self.new_hvparams.items():
2892         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2893             (self.op.enabled_hypervisors and
2894              hv_name in self.op.enabled_hypervisors)):
2895           # either this is a new hypervisor, or its parameters have changed
2896           hv_class = hypervisor.GetHypervisor(hv_name)
2897           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2898           hv_class.CheckParameterSyntax(hv_params)
2899           _CheckHVParams(self, node_list, hv_name, hv_params)
2900
2901     if self.op.os_hvp:
2902       # no need to check any newly-enabled hypervisors, since the
2903       # defaults have already been checked in the above code-block
2904       for os_name, os_hvp in self.new_os_hvp.items():
2905         for hv_name, hv_params in os_hvp.items():
2906           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2907           # we need to fill in the new os_hvp on top of the actual hv_p
2908           cluster_defaults = self.new_hvparams.get(hv_name, {})
2909           new_osp = objects.FillDict(cluster_defaults, hv_params)
2910           hv_class = hypervisor.GetHypervisor(hv_name)
2911           hv_class.CheckParameterSyntax(new_osp)
2912           _CheckHVParams(self, node_list, hv_name, new_osp)
2913
2914     if self.op.default_iallocator:
2915       alloc_script = utils.FindFile(self.op.default_iallocator,
2916                                     constants.IALLOCATOR_SEARCH_PATH,
2917                                     os.path.isfile)
2918       if alloc_script is None:
2919         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2920                                    " specified" % self.op.default_iallocator,
2921                                    errors.ECODE_INVAL)
2922
2923   def Exec(self, feedback_fn):
2924     """Change the parameters of the cluster.
2925
2926     """
2927     if self.op.vg_name is not None:
2928       new_volume = self.op.vg_name
2929       if not new_volume:
2930         new_volume = None
2931       if new_volume != self.cfg.GetVGName():
2932         self.cfg.SetVGName(new_volume)
2933       else:
2934         feedback_fn("Cluster LVM configuration already in desired"
2935                     " state, not changing")
2936     if self.op.drbd_helper is not None:
2937       new_helper = self.op.drbd_helper
2938       if not new_helper:
2939         new_helper = None
2940       if new_helper != self.cfg.GetDRBDHelper():
2941         self.cfg.SetDRBDHelper(new_helper)
2942       else:
2943         feedback_fn("Cluster DRBD helper already in desired state,"
2944                     " not changing")
2945     if self.op.hvparams:
2946       self.cluster.hvparams = self.new_hvparams
2947     if self.op.os_hvp:
2948       self.cluster.os_hvp = self.new_os_hvp
2949     if self.op.enabled_hypervisors is not None:
2950       self.cluster.hvparams = self.new_hvparams
2951       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2952     if self.op.beparams:
2953       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2954     if self.op.nicparams:
2955       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2956     if self.op.osparams:
2957       self.cluster.osparams = self.new_osp
2958     if self.op.ndparams:
2959       self.cluster.ndparams = self.new_ndparams
2960
2961     if self.op.candidate_pool_size is not None:
2962       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2963       # we need to update the pool size here, otherwise the save will fail
2964       _AdjustCandidatePool(self, [])
2965
2966     if self.op.maintain_node_health is not None:
2967       self.cluster.maintain_node_health = self.op.maintain_node_health
2968
2969     if self.op.prealloc_wipe_disks is not None:
2970       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2971
2972     if self.op.add_uids is not None:
2973       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2974
2975     if self.op.remove_uids is not None:
2976       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2977
2978     if self.op.uid_pool is not None:
2979       self.cluster.uid_pool = self.op.uid_pool
2980
2981     if self.op.default_iallocator is not None:
2982       self.cluster.default_iallocator = self.op.default_iallocator
2983
2984     if self.op.reserved_lvs is not None:
2985       self.cluster.reserved_lvs = self.op.reserved_lvs
2986
2987     def helper_os(aname, mods, desc):
2988       desc += " OS list"
2989       lst = getattr(self.cluster, aname)
2990       for key, val in mods:
2991         if key == constants.DDM_ADD:
2992           if val in lst:
2993             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2994           else:
2995             lst.append(val)
2996         elif key == constants.DDM_REMOVE:
2997           if val in lst:
2998             lst.remove(val)
2999           else:
3000             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3001         else:
3002           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3003
3004     if self.op.hidden_os:
3005       helper_os("hidden_os", self.op.hidden_os, "hidden")
3006
3007     if self.op.blacklisted_os:
3008       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3009
3010     if self.op.master_netdev:
3011       master = self.cfg.GetMasterNode()
3012       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3013                   self.cluster.master_netdev)
3014       result = self.rpc.call_node_stop_master(master, False)
3015       result.Raise("Could not disable the master ip")
3016       feedback_fn("Changing master_netdev from %s to %s" %
3017                   (self.cluster.master_netdev, self.op.master_netdev))
3018       self.cluster.master_netdev = self.op.master_netdev
3019
3020     self.cfg.Update(self.cluster, feedback_fn)
3021
3022     if self.op.master_netdev:
3023       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3024                   self.op.master_netdev)
3025       result = self.rpc.call_node_start_master(master, False, False)
3026       if result.fail_msg:
3027         self.LogWarning("Could not re-enable the master ip on"
3028                         " the master, please restart manually: %s",
3029                         result.fail_msg)
3030
3031
3032 def _UploadHelper(lu, nodes, fname):
3033   """Helper for uploading a file and showing warnings.
3034
3035   """
3036   if os.path.exists(fname):
3037     result = lu.rpc.call_upload_file(nodes, fname)
3038     for to_node, to_result in result.items():
3039       msg = to_result.fail_msg
3040       if msg:
3041         msg = ("Copy of file %s to node %s failed: %s" %
3042                (fname, to_node, msg))
3043         lu.proc.LogWarning(msg)
3044
3045
3046 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3047   """Distribute additional files which are part of the cluster configuration.
3048
3049   ConfigWriter takes care of distributing the config and ssconf files, but
3050   there are more files which should be distributed to all nodes. This function
3051   makes sure those are copied.
3052
3053   @param lu: calling logical unit
3054   @param additional_nodes: list of nodes not in the config to distribute to
3055   @type additional_vm: boolean
3056   @param additional_vm: whether the additional nodes are vm-capable or not
3057
3058   """
3059   # 1. Gather target nodes
3060   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3061   dist_nodes = lu.cfg.GetOnlineNodeList()
3062   nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3063   vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3064   if additional_nodes is not None:
3065     dist_nodes.extend(additional_nodes)
3066     if additional_vm:
3067       vm_nodes.extend(additional_nodes)
3068   if myself.name in dist_nodes:
3069     dist_nodes.remove(myself.name)
3070   if myself.name in vm_nodes:
3071     vm_nodes.remove(myself.name)
3072
3073   # 2. Gather files to distribute
3074   dist_files = set([constants.ETC_HOSTS,
3075                     constants.SSH_KNOWN_HOSTS_FILE,
3076                     constants.RAPI_CERT_FILE,
3077                     constants.RAPI_USERS_FILE,
3078                     constants.CONFD_HMAC_KEY,
3079                     constants.CLUSTER_DOMAIN_SECRET_FILE,
3080                    ])
3081
3082   vm_files = set()
3083   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3084   for hv_name in enabled_hypervisors:
3085     hv_class = hypervisor.GetHypervisor(hv_name)
3086     vm_files.update(hv_class.GetAncillaryFiles())
3087
3088   # 3. Perform the files upload
3089   for fname in dist_files:
3090     _UploadHelper(lu, dist_nodes, fname)
3091   for fname in vm_files:
3092     _UploadHelper(lu, vm_nodes, fname)
3093
3094
3095 class LUClusterRedistConf(NoHooksLU):
3096   """Force the redistribution of cluster configuration.
3097
3098   This is a very simple LU.
3099
3100   """
3101   REQ_BGL = False
3102
3103   def ExpandNames(self):
3104     self.needed_locks = {
3105       locking.LEVEL_NODE: locking.ALL_SET,
3106     }
3107     self.share_locks[locking.LEVEL_NODE] = 1
3108
3109   def Exec(self, feedback_fn):
3110     """Redistribute the configuration.
3111
3112     """
3113     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3114     _RedistributeAncillaryFiles(self)
3115
3116
3117 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3118   """Sleep and poll for an instance's disk to sync.
3119
3120   """
3121   if not instance.disks or disks is not None and not disks:
3122     return True
3123
3124   disks = _ExpandCheckDisks(instance, disks)
3125
3126   if not oneshot:
3127     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3128
3129   node = instance.primary_node
3130
3131   for dev in disks:
3132     lu.cfg.SetDiskID(dev, node)
3133
3134   # TODO: Convert to utils.Retry
3135
3136   retries = 0
3137   degr_retries = 10 # in seconds, as we sleep 1 second each time
3138   while True:
3139     max_time = 0
3140     done = True
3141     cumul_degraded = False
3142     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3143     msg = rstats.fail_msg
3144     if msg:
3145       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3146       retries += 1
3147       if retries >= 10:
3148         raise errors.RemoteError("Can't contact node %s for mirror data,"
3149                                  " aborting." % node)
3150       time.sleep(6)
3151       continue
3152     rstats = rstats.payload
3153     retries = 0
3154     for i, mstat in enumerate(rstats):
3155       if mstat is None:
3156         lu.LogWarning("Can't compute data for node %s/%s",
3157                            node, disks[i].iv_name)
3158         continue
3159
3160       cumul_degraded = (cumul_degraded or
3161                         (mstat.is_degraded and mstat.sync_percent is None))
3162       if mstat.sync_percent is not None:
3163         done = False
3164         if mstat.estimated_time is not None:
3165           rem_time = ("%s remaining (estimated)" %
3166                       utils.FormatSeconds(mstat.estimated_time))
3167           max_time = mstat.estimated_time
3168         else:
3169           rem_time = "no time estimate"
3170         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3171                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3172
3173     # if we're done but degraded, let's do a few small retries, to
3174     # make sure we see a stable and not transient situation; therefore
3175     # we force restart of the loop
3176     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3177       logging.info("Degraded disks found, %d retries left", degr_retries)
3178       degr_retries -= 1
3179       time.sleep(1)
3180       continue
3181
3182     if done or oneshot:
3183       break
3184
3185     time.sleep(min(60, max_time))
3186
3187   if done:
3188     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3189   return not cumul_degraded
3190
3191
3192 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3193   """Check that mirrors are not degraded.
3194
3195   The ldisk parameter, if True, will change the test from the
3196   is_degraded attribute (which represents overall non-ok status for
3197   the device(s)) to the ldisk (representing the local storage status).
3198
3199   """
3200   lu.cfg.SetDiskID(dev, node)
3201
3202   result = True
3203
3204   if on_primary or dev.AssembleOnSecondary():
3205     rstats = lu.rpc.call_blockdev_find(node, dev)
3206     msg = rstats.fail_msg
3207     if msg:
3208       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3209       result = False
3210     elif not rstats.payload:
3211       lu.LogWarning("Can't find disk on node %s", node)
3212       result = False
3213     else:
3214       if ldisk:
3215         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3216       else:
3217         result = result and not rstats.payload.is_degraded
3218
3219   if dev.children:
3220     for child in dev.children:
3221       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3222
3223   return result
3224
3225
3226 class LUOobCommand(NoHooksLU):
3227   """Logical unit for OOB handling.
3228
3229   """
3230   REG_BGL = False
3231   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3232
3233   def CheckPrereq(self):
3234     """Check prerequisites.
3235
3236     This checks:
3237      - the node exists in the configuration
3238      - OOB is supported
3239
3240     Any errors are signaled by raising errors.OpPrereqError.
3241
3242     """
3243     self.nodes = []
3244     self.master_node = self.cfg.GetMasterNode()
3245
3246     if self.op.node_names:
3247       if self.op.command in self._SKIP_MASTER:
3248         if self.master_node in self.op.node_names:
3249           master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3250           master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3251
3252           if master_oob_handler:
3253             additional_text = ("Run '%s %s %s' if you want to operate on the"
3254                                " master regardless") % (master_oob_handler,
3255                                                         self.op.command,
3256                                                         self.master_node)
3257           else:
3258             additional_text = "The master node does not support out-of-band"
3259
3260           raise errors.OpPrereqError(("Operating on the master node %s is not"
3261                                       " allowed for %s\n%s") %
3262                                      (self.master_node, self.op.command,
3263                                       additional_text), errors.ECODE_INVAL)
3264     else:
3265       self.op.node_names = self.cfg.GetNodeList()
3266       if self.op.command in self._SKIP_MASTER:
3267         self.op.node_names.remove(self.master_node)
3268
3269     if self.op.command in self._SKIP_MASTER:
3270       assert self.master_node not in self.op.node_names
3271
3272     for node_name in self.op.node_names:
3273       node = self.cfg.GetNodeInfo(node_name)
3274
3275       if node is None:
3276         raise errors.OpPrereqError("Node %s not found" % node_name,
3277                                    errors.ECODE_NOENT)
3278       else:
3279         self.nodes.append(node)
3280
3281       if (not self.op.ignore_status and
3282           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3283         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3284                                     " not marked offline") % node_name,
3285                                    errors.ECODE_STATE)
3286
3287   def ExpandNames(self):
3288     """Gather locks we need.
3289
3290     """
3291     if self.op.node_names:
3292       self.op.node_names = [_ExpandNodeName(self.cfg, name)
3293                             for name in self.op.node_names]
3294       lock_names = self.op.node_names
3295     else:
3296       lock_names = locking.ALL_SET
3297
3298     self.needed_locks = {
3299       locking.LEVEL_NODE: lock_names,
3300       }
3301
3302   def Exec(self, feedback_fn):
3303     """Execute OOB and return result if we expect any.
3304
3305     """
3306     master_node = self.master_node
3307     ret = []
3308
3309     for node in self.nodes:
3310       node_entry = [(constants.RS_NORMAL, node.name)]
3311       ret.append(node_entry)
3312
3313       oob_program = _SupportsOob(self.cfg, node)
3314
3315       if not oob_program:
3316         node_entry.append((constants.RS_UNAVAIL, None))
3317         continue
3318
3319       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3320                    self.op.command, oob_program, node.name)
3321       result = self.rpc.call_run_oob(master_node, oob_program,
3322                                      self.op.command, node.name,
3323                                      self.op.timeout)
3324
3325       if result.fail_msg:
3326         self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3327                         node.name, result.fail_msg)
3328         node_entry.append((constants.RS_NODATA, None))
3329       else:
3330         try:
3331           self._CheckPayload(result)
3332         except errors.OpExecError, err:
3333           self.LogWarning("The payload returned by '%s' is not valid: %s",
3334                           node.name, err)
3335           node_entry.append((constants.RS_NODATA, None))
3336         else:
3337           if self.op.command == constants.OOB_HEALTH:
3338             # For health we should log important events
3339             for item, status in result.payload:
3340               if status in [constants.OOB_STATUS_WARNING,
3341                             constants.OOB_STATUS_CRITICAL]:
3342                 self.LogWarning("On node '%s' item '%s' has status '%s'",
3343                                 node.name, item, status)
3344
3345           if self.op.command == constants.OOB_POWER_ON:
3346             node.powered = True
3347           elif self.op.command == constants.OOB_POWER_OFF:
3348             node.powered = False
3349           elif self.op.command == constants.OOB_POWER_STATUS:
3350             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3351             if powered != node.powered:
3352               logging.warning(("Recorded power state (%s) of node '%s' does not"
3353                                " match actual power state (%s)"), node.powered,
3354                               node.name, powered)
3355
3356           # For configuration changing commands we should update the node
3357           if self.op.command in (constants.OOB_POWER_ON,
3358                                  constants.OOB_POWER_OFF):
3359             self.cfg.Update(node, feedback_fn)
3360
3361           node_entry.append((constants.RS_NORMAL, result.payload))
3362
3363     return ret
3364
3365   def _CheckPayload(self, result):
3366     """Checks if the payload is valid.
3367
3368     @param result: RPC result
3369     @raises errors.OpExecError: If payload is not valid
3370
3371     """
3372     errs = []
3373     if self.op.command == constants.OOB_HEALTH:
3374       if not isinstance(result.payload, list):
3375         errs.append("command 'health' is expected to return a list but got %s" %
3376                     type(result.payload))
3377       else:
3378         for item, status in result.payload:
3379           if status not in constants.OOB_STATUSES:
3380             errs.append("health item '%s' has invalid status '%s'" %
3381                         (item, status))
3382
3383     if self.op.command == constants.OOB_POWER_STATUS:
3384       if not isinstance(result.payload, dict):
3385         errs.append("power-status is expected to return a dict but got %s" %
3386                     type(result.payload))
3387
3388     if self.op.command in [
3389         constants.OOB_POWER_ON,
3390         constants.OOB_POWER_OFF,
3391         constants.OOB_POWER_CYCLE,
3392         ]:
3393       if result.payload is not None:
3394         errs.append("%s is expected to not return payload but got '%s'" %
3395                     (self.op.command, result.payload))
3396
3397     if errs:
3398       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3399                                utils.CommaJoin(errs))
3400
3401
3402
3403 class LUOsDiagnose(NoHooksLU):
3404   """Logical unit for OS diagnose/query.
3405
3406   """
3407   REQ_BGL = False
3408   _HID = "hidden"
3409   _BLK = "blacklisted"
3410   _VLD = "valid"
3411   _FIELDS_STATIC = utils.FieldSet()
3412   _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3413                                    "parameters", "api_versions", _HID, _BLK)
3414
3415   def CheckArguments(self):
3416     if self.op.names:
3417       raise errors.OpPrereqError("Selective OS query not supported",
3418                                  errors.ECODE_INVAL)
3419
3420     _CheckOutputFields(static=self._FIELDS_STATIC,
3421                        dynamic=self._FIELDS_DYNAMIC,
3422                        selected=self.op.output_fields)
3423
3424   def ExpandNames(self):
3425     # Lock all nodes, in shared mode
3426     # Temporary removal of locks, should be reverted later
3427     # TODO: reintroduce locks when they are lighter-weight
3428     self.needed_locks = {}
3429     #self.share_locks[locking.LEVEL_NODE] = 1
3430     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3431
3432   @staticmethod
3433   def _DiagnoseByOS(rlist):
3434     """Remaps a per-node return list into an a per-os per-node dictionary
3435
3436     @param rlist: a map with node names as keys and OS objects as values
3437
3438     @rtype: dict
3439     @return: a dictionary with osnames as keys and as value another
3440         map, with nodes as keys and tuples of (path, status, diagnose,
3441         variants, parameters, api_versions) as values, eg::
3442
3443           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3444                                      (/srv/..., False, "invalid api")],
3445                            "node2": [(/srv/..., True, "", [], [])]}
3446           }
3447
3448     """
3449     all_os = {}
3450     # we build here the list of nodes that didn't fail the RPC (at RPC
3451     # level), so that nodes with a non-responding node daemon don't
3452     # make all OSes invalid
3453     good_nodes = [node_name for node_name in rlist
3454                   if not rlist[node_name].fail_msg]
3455     for node_name, nr in rlist.items():
3456       if nr.fail_msg or not nr.payload:
3457         continue
3458       for (name, path, status, diagnose, variants,
3459            params, api_versions) in nr.payload:
3460         if name not in all_os:
3461           # build a list of nodes for this os containing empty lists
3462           # for each node in node_list
3463           all_os[name] = {}
3464           for nname in good_nodes:
3465             all_os[name][nname] = []
3466         # convert params from [name, help] to (name, help)
3467         params = [tuple(v) for v in params]
3468         all_os[name][node_name].append((path, status, diagnose,
3469                                         variants, params, api_versions))
3470     return all_os
3471
3472   def Exec(self, feedback_fn):
3473     """Compute the list of OSes.
3474
3475     """
3476     valid_nodes = [node.name
3477                    for node in self.cfg.GetAllNodesInfo().values()
3478                    if not node.offline and node.vm_capable]
3479     node_data = self.rpc.call_os_diagnose(valid_nodes)
3480     pol = self._DiagnoseByOS(node_data)
3481     output = []
3482     cluster = self.cfg.GetClusterInfo()
3483
3484     for os_name in utils.NiceSort(pol.keys()):
3485       os_data = pol[os_name]
3486       row = []
3487       valid = True
3488       (variants, params, api_versions) = null_state = (set(), set(), set())
3489       for idx, osl in enumerate(os_data.values()):
3490         valid = bool(valid and osl and osl[0][1])
3491         if not valid:
3492           (variants, params, api_versions) = null_state
3493           break
3494         node_variants, node_params, node_api = osl[0][3:6]
3495         if idx == 0: # first entry
3496           variants = set(node_variants)
3497           params = set(node_params)
3498           api_versions = set(node_api)
3499         else: # keep consistency
3500           variants.intersection_update(node_variants)
3501           params.intersection_update(node_params)
3502           api_versions.intersection_update(node_api)
3503
3504       is_hid = os_name in cluster.hidden_os
3505       is_blk = os_name in cluster.blacklisted_os
3506       if ((self._HID not in self.op.output_fields and is_hid) or
3507           (self._BLK not in self.op.output_fields and is_blk) or
3508           (self._VLD not in self.op.output_fields and not valid)):
3509         continue
3510
3511       for field in self.op.output_fields:
3512         if field == "name":
3513           val = os_name
3514         elif field == self._VLD:
3515           val = valid
3516         elif field == "node_status":
3517           # this is just a copy of the dict
3518           val = {}
3519           for node_name, nos_list in os_data.items():
3520             val[node_name] = nos_list
3521         elif field == "variants":
3522           val = utils.NiceSort(list(variants))
3523         elif field == "parameters":
3524           val = list(params)
3525         elif field == "api_versions":
3526           val = list(api_versions)
3527         elif field == self._HID:
3528           val = is_hid
3529         elif field == self._BLK:
3530           val = is_blk
3531         else:
3532           raise errors.ParameterError(field)
3533         row.append(val)
3534       output.append(row)
3535
3536     return output
3537
3538
3539 class LUNodeRemove(LogicalUnit):
3540   """Logical unit for removing a node.
3541
3542   """
3543   HPATH = "node-remove"
3544   HTYPE = constants.HTYPE_NODE
3545
3546   def BuildHooksEnv(self):
3547     """Build hooks env.
3548
3549     This doesn't run on the target node in the pre phase as a failed
3550     node would then be impossible to remove.
3551
3552     """
3553     env = {
3554       "OP_TARGET": self.op.node_name,
3555       "NODE_NAME": self.op.node_name,
3556       }
3557     all_nodes = self.cfg.GetNodeList()
3558     try:
3559       all_nodes.remove(self.op.node_name)
3560     except ValueError:
3561       logging.warning("Node %s which is about to be removed not found"
3562                       " in the all nodes list", self.op.node_name)
3563     return env, all_nodes, all_nodes
3564
3565   def CheckPrereq(self):
3566     """Check prerequisites.
3567
3568     This checks:
3569      - the node exists in the configuration
3570      - it does not have primary or secondary instances
3571      - it's not the master
3572
3573     Any errors are signaled by raising errors.OpPrereqError.
3574
3575     """
3576     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3577     node = self.cfg.GetNodeInfo(self.op.node_name)
3578     assert node is not None
3579
3580     instance_list = self.cfg.GetInstanceList()
3581
3582     masternode = self.cfg.GetMasterNode()
3583     if node.name == masternode:
3584       raise errors.OpPrereqError("Node is the master node,"
3585                                  " you need to failover first.",
3586                                  errors.ECODE_INVAL)
3587
3588     for instance_name in instance_list:
3589       instance = self.cfg.GetInstanceInfo(instance_name)
3590       if node.name in instance.all_nodes:
3591         raise errors.OpPrereqError("Instance %s is still running on the node,"
3592                                    " please remove first." % instance_name,
3593                                    errors.ECODE_INVAL)
3594     self.op.node_name = node.name
3595     self.node = node
3596
3597   def Exec(self, feedback_fn):
3598     """Removes the node from the cluster.
3599
3600     """
3601     node = self.node
3602     logging.info("Stopping the node daemon and removing configs from node %s",
3603                  node.name)
3604
3605     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3606
3607     # Promote nodes to master candidate as needed
3608     _AdjustCandidatePool(self, exceptions=[node.name])
3609     self.context.RemoveNode(node.name)
3610
3611     # Run post hooks on the node before it's removed
3612     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3613     try:
3614       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3615     except:
3616       # pylint: disable-msg=W0702
3617       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3618
3619     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3620     msg = result.fail_msg
3621     if msg:
3622       self.LogWarning("Errors encountered on the remote node while leaving"
3623                       " the cluster: %s", msg)
3624
3625     # Remove node from our /etc/hosts
3626     if self.cfg.GetClusterInfo().modify_etc_hosts:
3627       master_node = self.cfg.GetMasterNode()
3628       result = self.rpc.call_etc_hosts_modify(master_node,
3629                                               constants.ETC_HOSTS_REMOVE,
3630                                               node.name, None)
3631       result.Raise("Can't update hosts file with new host data")
3632       _RedistributeAncillaryFiles(self)
3633
3634
3635 class _NodeQuery(_QueryBase):
3636   FIELDS = query.NODE_FIELDS
3637
3638   def ExpandNames(self, lu):
3639     lu.needed_locks = {}
3640     lu.share_locks[locking.LEVEL_NODE] = 1
3641
3642     if self.names:
3643       self.wanted = _GetWantedNodes(lu, self.names)
3644     else:
3645       self.wanted = locking.ALL_SET
3646
3647     self.do_locking = (self.use_locking and
3648                        query.NQ_LIVE in self.requested_data)
3649
3650     if self.do_locking:
3651       # if we don't request only static fields, we need to lock the nodes
3652       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3653
3654   def DeclareLocks(self, lu, level):
3655     pass
3656
3657   def _GetQueryData(self, lu):
3658     """Computes the list of nodes and their attributes.
3659
3660     """
3661     all_info = lu.cfg.GetAllNodesInfo()
3662
3663     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3664
3665     # Gather data as requested
3666     if query.NQ_LIVE in self.requested_data:
3667       # filter out non-vm_capable nodes
3668       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3669
3670       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3671                                         lu.cfg.GetHypervisorType())
3672       live_data = dict((name, nresult.payload)
3673                        for (name, nresult) in node_data.items()
3674                        if not nresult.fail_msg and nresult.payload)
3675     else:
3676       live_data = None
3677
3678     if query.NQ_INST in self.requested_data:
3679       node_to_primary = dict([(name, set()) for name in nodenames])
3680       node_to_secondary = dict([(name, set()) for name in nodenames])
3681
3682       inst_data = lu.cfg.GetAllInstancesInfo()
3683
3684       for inst in inst_data.values():
3685         if inst.primary_node in node_to_primary:
3686           node_to_primary[inst.primary_node].add(inst.name)
3687         for secnode in inst.secondary_nodes:
3688           if secnode in node_to_secondary:
3689             node_to_secondary[secnode].add(inst.name)
3690     else:
3691       node_to_primary = None
3692       node_to_secondary = None
3693
3694     if query.NQ_OOB in self.requested_data:
3695       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3696                          for name, node in all_info.iteritems())
3697     else:
3698       oob_support = None
3699
3700     if query.NQ_GROUP in self.requested_data:
3701       groups = lu.cfg.GetAllNodeGroupsInfo()
3702     else:
3703       groups = {}
3704
3705     return query.NodeQueryData([all_info[name] for name in nodenames],
3706                                live_data, lu.cfg.GetMasterNode(),
3707                                node_to_primary, node_to_secondary, groups,
3708                                oob_support, lu.cfg.GetClusterInfo())
3709
3710
3711 class LUNodeQuery(NoHooksLU):
3712   """Logical unit for querying nodes.
3713
3714   """
3715   # pylint: disable-msg=W0142
3716   REQ_BGL = False
3717
3718   def CheckArguments(self):
3719     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3720                          self.op.output_fields, self.op.use_locking)
3721
3722   def ExpandNames(self):
3723     self.nq.ExpandNames(self)
3724
3725   def Exec(self, feedback_fn):
3726     return self.nq.OldStyleQuery(self)
3727
3728
3729 class LUNodeQueryvols(NoHooksLU):
3730   """Logical unit for getting volumes on node(s).
3731
3732   """
3733   REQ_BGL = False
3734   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3735   _FIELDS_STATIC = utils.FieldSet("node")
3736
3737   def CheckArguments(self):
3738     _CheckOutputFields(static=self._FIELDS_STATIC,
3739                        dynamic=self._FIELDS_DYNAMIC,
3740                        selected=self.op.output_fields)
3741
3742   def ExpandNames(self):
3743     self.needed_locks = {}
3744     self.share_locks[locking.LEVEL_NODE] = 1
3745     if not self.op.nodes:
3746       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3747     else:
3748       self.needed_locks[locking.LEVEL_NODE] = \
3749         _GetWantedNodes(self, self.op.nodes)
3750
3751   def Exec(self, feedback_fn):
3752     """Computes the list of nodes and their attributes.
3753
3754     """
3755     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3756     volumes = self.rpc.call_node_volumes(nodenames)
3757
3758     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3759              in self.cfg.GetInstanceList()]
3760
3761     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3762
3763     output = []
3764     for node in nodenames:
3765       nresult = volumes[node]
3766       if nresult.offline:
3767         continue
3768       msg = nresult.fail_msg
3769       if msg:
3770         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3771         continue
3772
3773       node_vols = nresult.payload[:]
3774       node_vols.sort(key=lambda vol: vol['dev'])
3775
3776       for vol in node_vols:
3777         node_output = []
3778         for field in self.op.output_fields:
3779           if field == "node":
3780             val = node
3781           elif field == "phys":
3782             val = vol['dev']
3783           elif field == "vg":
3784             val = vol['vg']
3785           elif field == "name":
3786             val = vol['name']
3787           elif field == "size":
3788             val = int(float(vol['size']))
3789           elif field == "instance":
3790             for inst in ilist:
3791               if node not in lv_by_node[inst]:
3792                 continue
3793               if vol['name'] in lv_by_node[inst][node]:
3794                 val = inst.name
3795                 break
3796             else:
3797               val = '-'
3798           else:
3799             raise errors.ParameterError(field)
3800           node_output.append(str(val))
3801
3802         output.append(node_output)
3803
3804     return output
3805
3806
3807 class LUNodeQueryStorage(NoHooksLU):
3808   """Logical unit for getting information on storage units on node(s).
3809
3810   """
3811   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3812   REQ_BGL = False
3813
3814   def CheckArguments(self):
3815     _CheckOutputFields(static=self._FIELDS_STATIC,
3816                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3817                        selected=self.op.output_fields)
3818
3819   def ExpandNames(self):
3820     self.needed_locks = {}
3821     self.share_locks[locking.LEVEL_NODE] = 1
3822
3823     if self.op.nodes:
3824       self.needed_locks[locking.LEVEL_NODE] = \
3825         _GetWantedNodes(self, self.op.nodes)
3826     else:
3827       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3828
3829   def Exec(self, feedback_fn):
3830     """Computes the list of nodes and their attributes.
3831
3832     """
3833     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3834
3835     # Always get name to sort by
3836     if constants.SF_NAME in self.op.output_fields:
3837       fields = self.op.output_fields[:]
3838     else:
3839       fields = [constants.SF_NAME] + self.op.output_fields
3840
3841     # Never ask for node or type as it's only known to the LU
3842     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3843       while extra in fields:
3844         fields.remove(extra)
3845
3846     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3847     name_idx = field_idx[constants.SF_NAME]
3848
3849     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3850     data = self.rpc.call_storage_list(self.nodes,
3851                                       self.op.storage_type, st_args,
3852                                       self.op.name, fields)
3853
3854     result = []
3855
3856     for node in utils.NiceSort(self.nodes):
3857       nresult = data[node]
3858       if nresult.offline:
3859         continue
3860
3861       msg = nresult.fail_msg
3862       if msg:
3863         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3864         continue
3865
3866       rows = dict([(row[name_idx], row) for row in nresult.payload])
3867
3868       for name in utils.NiceSort(rows.keys()):
3869         row = rows[name]
3870
3871         out = []
3872
3873         for field in self.op.output_fields:
3874           if field == constants.SF_NODE:
3875             val = node
3876           elif field == constants.SF_TYPE:
3877             val = self.op.storage_type
3878           elif field in field_idx:
3879             val = row[field_idx[field]]
3880           else:
3881             raise errors.ParameterError(field)
3882
3883           out.append(val)
3884
3885         result.append(out)
3886
3887     return result
3888
3889
3890 class _InstanceQuery(_QueryBase):
3891   FIELDS = query.INSTANCE_FIELDS
3892
3893   def ExpandNames(self, lu):
3894     lu.needed_locks = {}
3895     lu.share_locks[locking.LEVEL_INSTANCE] = 1
3896     lu.share_locks[locking.LEVEL_NODE] = 1
3897
3898     if self.names:
3899       self.wanted = _GetWantedInstances(lu, self.names)
3900     else:
3901       self.wanted = locking.ALL_SET
3902
3903     self.do_locking = (self.use_locking and
3904                        query.IQ_LIVE in self.requested_data)
3905     if self.do_locking:
3906       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3907       lu.needed_locks[locking.LEVEL_NODE] = []
3908       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3909
3910   def DeclareLocks(self, lu, level):
3911     if level == locking.LEVEL_NODE and self.do_locking:
3912       lu._LockInstancesNodes() # pylint: disable-msg=W0212
3913
3914   def _GetQueryData(self, lu):
3915     """Computes the list of instances and their attributes.
3916
3917     """
3918     cluster = lu.cfg.GetClusterInfo()
3919     all_info = lu.cfg.GetAllInstancesInfo()
3920
3921     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3922
3923     instance_list = [all_info[name] for name in instance_names]
3924     nodes = frozenset(itertools.chain(*(inst.all_nodes
3925                                         for inst in instance_list)))
3926     hv_list = list(set([inst.hypervisor for inst in instance_list]))
3927     bad_nodes = []
3928     offline_nodes = []
3929     wrongnode_inst = set()
3930
3931     # Gather data as requested
3932     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
3933       live_data = {}
3934       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3935       for name in nodes:
3936         result = node_data[name]
3937         if result.offline:
3938           # offline nodes will be in both lists
3939           assert result.fail_msg
3940           offline_nodes.append(name)
3941         if result.fail_msg:
3942           bad_nodes.append(name)
3943         elif result.payload:
3944           for inst in result.payload:
3945             if all_info[inst].primary_node == name:
3946               live_data.update(result.payload)
3947             else:
3948               wrongnode_inst.add(inst)
3949         # else no instance is alive
3950     else:
3951       live_data = {}
3952
3953     if query.IQ_DISKUSAGE in self.requested_data:
3954       disk_usage = dict((inst.name,
3955                          _ComputeDiskSize(inst.disk_template,
3956                                           [{"size": disk.size}
3957                                            for disk in inst.disks]))
3958                         for inst in instance_list)
3959     else:
3960       disk_usage = None
3961
3962     if query.IQ_CONSOLE in self.requested_data:
3963       consinfo = {}
3964       for inst in instance_list:
3965         if inst.name in live_data:
3966           # Instance is running
3967           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
3968         else:
3969           consinfo[inst.name] = None
3970       assert set(consinfo.keys()) == set(instance_names)
3971     else:
3972       consinfo = None
3973
3974     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3975                                    disk_usage, offline_nodes, bad_nodes,
3976                                    live_data, wrongnode_inst, consinfo)
3977
3978
3979 class LUQuery(NoHooksLU):
3980   """Query for resources/items of a certain kind.
3981
3982   """
3983   # pylint: disable-msg=W0142
3984   REQ_BGL = False
3985
3986   def CheckArguments(self):
3987     qcls = _GetQueryImplementation(self.op.what)
3988
3989     self.impl = qcls(self.op.filter, self.op.fields, False)
3990
3991   def ExpandNames(self):
3992     self.impl.ExpandNames(self)
3993
3994   def DeclareLocks(self, level):
3995     self.impl.DeclareLocks(self, level)
3996
3997   def Exec(self, feedback_fn):
3998     return self.impl.NewStyleQuery(self)
3999
4000
4001 class LUQueryFields(NoHooksLU):
4002   """Query for resources/items of a certain kind.
4003
4004   """
4005   # pylint: disable-msg=W0142
4006   REQ_BGL = False
4007
4008   def CheckArguments(self):
4009     self.qcls = _GetQueryImplementation(self.op.what)
4010
4011   def ExpandNames(self):
4012     self.needed_locks = {}
4013
4014   def Exec(self, feedback_fn):
4015     return self.qcls.FieldsQuery(self.op.fields)
4016
4017
4018 class LUNodeModifyStorage(NoHooksLU):
4019   """Logical unit for modifying a storage volume on a node.
4020
4021   """
4022   REQ_BGL = False
4023
4024   def CheckArguments(self):
4025     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4026
4027     storage_type = self.op.storage_type
4028
4029     try:
4030       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4031     except KeyError:
4032       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4033                                  " modified" % storage_type,
4034                                  errors.ECODE_INVAL)
4035
4036     diff = set(self.op.changes.keys()) - modifiable
4037     if diff:
4038       raise errors.OpPrereqError("The following fields can not be modified for"
4039                                  " storage units of type '%s': %r" %
4040                                  (storage_type, list(diff)),
4041                                  errors.ECODE_INVAL)
4042
4043   def ExpandNames(self):
4044     self.needed_locks = {
4045       locking.LEVEL_NODE: self.op.node_name,
4046       }
4047
4048   def Exec(self, feedback_fn):
4049     """Computes the list of nodes and their attributes.
4050
4051     """
4052     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4053     result = self.rpc.call_storage_modify(self.op.node_name,
4054                                           self.op.storage_type, st_args,
4055                                           self.op.name, self.op.changes)
4056     result.Raise("Failed to modify storage unit '%s' on %s" %
4057                  (self.op.name, self.op.node_name))
4058
4059
4060 class LUNodeAdd(LogicalUnit):
4061   """Logical unit for adding node to the cluster.
4062
4063   """
4064   HPATH = "node-add"
4065   HTYPE = constants.HTYPE_NODE
4066   _NFLAGS = ["master_capable", "vm_capable"]
4067
4068   def CheckArguments(self):
4069     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4070     # validate/normalize the node name
4071     self.hostname = netutils.GetHostname(name=self.op.node_name,
4072                                          family=self.primary_ip_family)
4073     self.op.node_name = self.hostname.name
4074     if self.op.readd and self.op.group:
4075       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4076                                  " being readded", errors.ECODE_INVAL)
4077
4078   def BuildHooksEnv(self):
4079     """Build hooks env.
4080
4081     This will run on all nodes before, and on all nodes + the new node after.
4082
4083     """
4084     env = {
4085       "OP_TARGET": self.op.node_name,
4086       "NODE_NAME": self.op.node_name,
4087       "NODE_PIP": self.op.primary_ip,
4088       "NODE_SIP": self.op.secondary_ip,
4089       "MASTER_CAPABLE": str(self.op.master_capable),
4090       "VM_CAPABLE": str(self.op.vm_capable),
4091       }
4092     nodes_0 = self.cfg.GetNodeList()
4093     nodes_1 = nodes_0 + [self.op.node_name, ]
4094     return env, nodes_0, nodes_1
4095
4096   def CheckPrereq(self):
4097     """Check prerequisites.
4098
4099     This checks:
4100      - the new node is not already in the config
4101      - it is resolvable
4102      - its parameters (single/dual homed) matches the cluster
4103
4104     Any errors are signaled by raising errors.OpPrereqError.
4105
4106     """
4107     cfg = self.cfg
4108     hostname = self.hostname
4109     node = hostname.name
4110     primary_ip = self.op.primary_ip = hostname.ip
4111     if self.op.secondary_ip is None:
4112       if self.primary_ip_family == netutils.IP6Address.family:
4113         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4114                                    " IPv4 address must be given as secondary",
4115                                    errors.ECODE_INVAL)
4116       self.op.secondary_ip = primary_ip
4117
4118     secondary_ip = self.op.secondary_ip
4119     if not netutils.IP4Address.IsValid(secondary_ip):
4120       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4121                                  " address" % secondary_ip, errors.ECODE_INVAL)
4122
4123     node_list = cfg.GetNodeList()
4124     if not self.op.readd and node in node_list:
4125       raise errors.OpPrereqError("Node %s is already in the configuration" %
4126                                  node, errors.ECODE_EXISTS)
4127     elif self.op.readd and node not in node_list:
4128       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4129                                  errors.ECODE_NOENT)
4130
4131     self.changed_primary_ip = False
4132
4133     for existing_node_name in node_list:
4134       existing_node = cfg.GetNodeInfo(existing_node_name)
4135
4136       if self.op.readd and node == existing_node_name:
4137         if existing_node.secondary_ip != secondary_ip:
4138           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4139                                      " address configuration as before",
4140                                      errors.ECODE_INVAL)
4141         if existing_node.primary_ip != primary_ip:
4142           self.changed_primary_ip = True
4143
4144         continue
4145
4146       if (existing_node.primary_ip == primary_ip or
4147           existing_node.secondary_ip == primary_ip or
4148           existing_node.primary_ip == secondary_ip or
4149           existing_node.secondary_ip == secondary_ip):
4150         raise errors.OpPrereqError("New node ip address(es) conflict with"
4151                                    " existing node %s" % existing_node.name,
4152                                    errors.ECODE_NOTUNIQUE)
4153
4154     # After this 'if' block, None is no longer a valid value for the
4155     # _capable op attributes
4156     if self.op.readd:
4157       old_node = self.cfg.GetNodeInfo(node)
4158       assert old_node is not None, "Can't retrieve locked node %s" % node
4159       for attr in self._NFLAGS:
4160         if getattr(self.op, attr) is None:
4161           setattr(self.op, attr, getattr(old_node, attr))
4162     else:
4163       for attr in self._NFLAGS:
4164         if getattr(self.op, attr) is None:
4165           setattr(self.op, attr, True)
4166
4167     if self.op.readd and not self.op.vm_capable:
4168       pri, sec = cfg.GetNodeInstances(node)
4169       if pri or sec:
4170         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4171                                    " flag set to false, but it already holds"
4172                                    " instances" % node,
4173                                    errors.ECODE_STATE)
4174
4175     # check that the type of the node (single versus dual homed) is the
4176     # same as for the master
4177     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4178     master_singlehomed = myself.secondary_ip == myself.primary_ip
4179     newbie_singlehomed = secondary_ip == primary_ip
4180     if master_singlehomed != newbie_singlehomed:
4181       if master_singlehomed:
4182         raise errors.OpPrereqError("The master has no secondary ip but the"
4183                                    " new node has one",
4184                                    errors.ECODE_INVAL)
4185       else:
4186         raise errors.OpPrereqError("The master has a secondary ip but the"
4187                                    " new node doesn't have one",
4188                                    errors.ECODE_INVAL)
4189
4190     # checks reachability
4191     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4192       raise errors.OpPrereqError("Node not reachable by ping",
4193                                  errors.ECODE_ENVIRON)
4194
4195     if not newbie_singlehomed:
4196       # check reachability from my secondary ip to newbie's secondary ip
4197       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4198                            source=myself.secondary_ip):
4199         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4200                                    " based ping to node daemon port",
4201                                    errors.ECODE_ENVIRON)
4202
4203     if self.op.readd:
4204       exceptions = [node]
4205     else:
4206       exceptions = []
4207
4208     if self.op.master_capable:
4209       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4210     else:
4211       self.master_candidate = False
4212
4213     if self.op.readd:
4214       self.new_node = old_node
4215     else:
4216       node_group = cfg.LookupNodeGroup(self.op.group)
4217       self.new_node = objects.Node(name=node,
4218                                    primary_ip=primary_ip,
4219                                    secondary_ip=secondary_ip,
4220                                    master_candidate=self.master_candidate,
4221                                    offline=False, drained=False,
4222                                    group=node_group)
4223
4224     if self.op.ndparams:
4225       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4226
4227   def Exec(self, feedback_fn):
4228     """Adds the new node to the cluster.
4229
4230     """
4231     new_node = self.new_node
4232     node = new_node.name
4233
4234     # We adding a new node so we assume it's powered
4235     new_node.powered = True
4236
4237     # for re-adds, reset the offline/drained/master-candidate flags;
4238     # we need to reset here, otherwise offline would prevent RPC calls
4239     # later in the procedure; this also means that if the re-add
4240     # fails, we are left with a non-offlined, broken node
4241     if self.op.readd:
4242       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4243       self.LogInfo("Readding a node, the offline/drained flags were reset")
4244       # if we demote the node, we do cleanup later in the procedure
4245       new_node.master_candidate = self.master_candidate
4246       if self.changed_primary_ip:
4247         new_node.primary_ip = self.op.primary_ip
4248
4249     # copy the master/vm_capable flags
4250     for attr in self._NFLAGS:
4251       setattr(new_node, attr, getattr(self.op, attr))
4252
4253     # notify the user about any possible mc promotion
4254     if new_node.master_candidate:
4255       self.LogInfo("Node will be a master candidate")
4256
4257     if self.op.ndparams:
4258       new_node.ndparams = self.op.ndparams
4259     else:
4260       new_node.ndparams = {}
4261
4262     # check connectivity
4263     result = self.rpc.call_version([node])[node]
4264     result.Raise("Can't get version information from node %s" % node)
4265     if constants.PROTOCOL_VERSION == result.payload:
4266       logging.info("Communication to node %s fine, sw version %s match",
4267                    node, result.payload)
4268     else:
4269       raise errors.OpExecError("Version mismatch master version %s,"
4270                                " node version %s" %
4271                                (constants.PROTOCOL_VERSION, result.payload))
4272
4273     # Add node to our /etc/hosts, and add key to known_hosts
4274     if self.cfg.GetClusterInfo().modify_etc_hosts:
4275       master_node = self.cfg.GetMasterNode()
4276       result = self.rpc.call_etc_hosts_modify(master_node,
4277                                               constants.ETC_HOSTS_ADD,
4278                                               self.hostname.name,
4279                                               self.hostname.ip)
4280       result.Raise("Can't update hosts file with new host data")
4281
4282     if new_node.secondary_ip != new_node.primary_ip:
4283       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4284                                False)
4285
4286     node_verify_list = [self.cfg.GetMasterNode()]
4287     node_verify_param = {
4288       constants.NV_NODELIST: [node],
4289       # TODO: do a node-net-test as well?
4290     }
4291
4292     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4293                                        self.cfg.GetClusterName())
4294     for verifier in node_verify_list:
4295       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4296       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4297       if nl_payload:
4298         for failed in nl_payload:
4299           feedback_fn("ssh/hostname verification failed"
4300                       " (checking from %s): %s" %
4301                       (verifier, nl_payload[failed]))
4302         raise errors.OpExecError("ssh/hostname verification failed.")
4303
4304     if self.op.readd:
4305       _RedistributeAncillaryFiles(self)
4306       self.context.ReaddNode(new_node)
4307       # make sure we redistribute the config
4308       self.cfg.Update(new_node, feedback_fn)
4309       # and make sure the new node will not have old files around
4310       if not new_node.master_candidate:
4311         result = self.rpc.call_node_demote_from_mc(new_node.name)
4312         msg = result.fail_msg
4313         if msg:
4314           self.LogWarning("Node failed to demote itself from master"
4315                           " candidate status: %s" % msg)
4316     else:
4317       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4318                                   additional_vm=self.op.vm_capable)
4319       self.context.AddNode(new_node, self.proc.GetECId())
4320
4321
4322 class LUNodeSetParams(LogicalUnit):
4323   """Modifies the parameters of a node.
4324
4325   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4326       to the node role (as _ROLE_*)
4327   @cvar _R2F: a dictionary from node role to tuples of flags
4328   @cvar _FLAGS: a list of attribute names corresponding to the flags
4329
4330   """
4331   HPATH = "node-modify"
4332   HTYPE = constants.HTYPE_NODE
4333   REQ_BGL = False
4334   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4335   _F2R = {
4336     (True, False, False): _ROLE_CANDIDATE,
4337     (False, True, False): _ROLE_DRAINED,
4338     (False, False, True): _ROLE_OFFLINE,
4339     (False, False, False): _ROLE_REGULAR,
4340     }
4341   _R2F = dict((v, k) for k, v in _F2R.items())
4342   _FLAGS = ["master_candidate", "drained", "offline"]
4343
4344   def CheckArguments(self):
4345     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4346     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4347                 self.op.master_capable, self.op.vm_capable,
4348                 self.op.secondary_ip, self.op.ndparams]
4349     if all_mods.count(None) == len(all_mods):
4350       raise errors.OpPrereqError("Please pass at least one modification",
4351                                  errors.ECODE_INVAL)
4352     if all_mods.count(True) > 1:
4353       raise errors.OpPrereqError("Can't set the node into more than one"
4354                                  " state at the same time",
4355                                  errors.ECODE_INVAL)
4356
4357     # Boolean value that tells us whether we might be demoting from MC
4358     self.might_demote = (self.op.master_candidate == False or
4359                          self.op.offline == True or
4360                          self.op.drained == True or
4361                          self.op.master_capable == False)
4362
4363     if self.op.secondary_ip:
4364       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4365         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4366                                    " address" % self.op.secondary_ip,
4367                                    errors.ECODE_INVAL)
4368
4369     self.lock_all = self.op.auto_promote and self.might_demote
4370     self.lock_instances = self.op.secondary_ip is not None
4371
4372   def ExpandNames(self):
4373     if self.lock_all:
4374       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4375     else:
4376       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4377
4378     if self.lock_instances:
4379       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4380
4381   def DeclareLocks(self, level):
4382     # If we have locked all instances, before waiting to lock nodes, release
4383     # all the ones living on nodes unrelated to the current operation.
4384     if level == locking.LEVEL_NODE and self.lock_instances:
4385       instances_release = []
4386       instances_keep = []
4387       self.affected_instances = []
4388       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4389         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4390           instance = self.context.cfg.GetInstanceInfo(instance_name)
4391           i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4392           if i_mirrored and self.op.node_name in instance.all_nodes:
4393             instances_keep.append(instance_name)
4394             self.affected_instances.append(instance)
4395           else:
4396             instances_release.append(instance_name)
4397         if instances_release:
4398           self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4399           self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4400
4401   def BuildHooksEnv(self):
4402     """Build hooks env.
4403
4404     This runs on the master node.
4405
4406     """
4407     env = {
4408       "OP_TARGET": self.op.node_name,
4409       "MASTER_CANDIDATE": str(self.op.master_candidate),
4410       "OFFLINE": str(self.op.offline),
4411       "DRAINED": str(self.op.drained),
4412       "MASTER_CAPABLE": str(self.op.master_capable),
4413       "VM_CAPABLE": str(self.op.vm_capable),
4414       }
4415     nl = [self.cfg.GetMasterNode(),
4416           self.op.node_name]
4417     return env, nl, nl
4418
4419   def CheckPrereq(self):
4420     """Check prerequisites.
4421
4422     This only checks the instance list against the existing names.
4423
4424     """
4425     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4426
4427     if (self.op.master_candidate is not None or
4428         self.op.drained is not None or
4429         self.op.offline is not None):
4430       # we can't change the master's node flags
4431       if self.op.node_name == self.cfg.GetMasterNode():
4432         raise errors.OpPrereqError("The master role can be changed"
4433                                    " only via master-failover",
4434                                    errors.ECODE_INVAL)
4435
4436     if self.op.master_candidate and not node.master_capable:
4437       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4438                                  " it a master candidate" % node.name,
4439                                  errors.ECODE_STATE)
4440
4441     if self.op.vm_capable == False:
4442       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4443       if ipri or isec:
4444         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4445                                    " the vm_capable flag" % node.name,
4446                                    errors.ECODE_STATE)
4447
4448     if node.master_candidate and self.might_demote and not self.lock_all:
4449       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4450       # check if after removing the current node, we're missing master
4451       # candidates
4452       (mc_remaining, mc_should, _) = \
4453           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4454       if mc_remaining < mc_should:
4455         raise errors.OpPrereqError("Not enough master candidates, please"
4456                                    " pass auto promote option to allow"
4457                                    " promotion", errors.ECODE_STATE)
4458
4459     self.old_flags = old_flags = (node.master_candidate,
4460                                   node.drained, node.offline)
4461     assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4462     self.old_role = old_role = self._F2R[old_flags]
4463
4464     # Check for ineffective changes
4465     for attr in self._FLAGS:
4466       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4467         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4468         setattr(self.op, attr, None)
4469
4470     # Past this point, any flag change to False means a transition
4471     # away from the respective state, as only real changes are kept
4472
4473     # TODO: We might query the real power state if it supports OOB
4474     if _SupportsOob(self.cfg, node):
4475       if self.op.offline is False and not (node.powered or
4476                                            self.op.powered == True):
4477         raise errors.OpPrereqError(("Please power on node %s first before you"
4478                                     " can reset offline state") %
4479                                    self.op.node_name)
4480     elif self.op.powered is not None:
4481       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4482                                   " which does not support out-of-band"
4483                                   " handling") % self.op.node_name)
4484
4485     # If we're being deofflined/drained, we'll MC ourself if needed
4486     if (self.op.drained == False or self.op.offline == False or
4487         (self.op.master_capable and not node.master_capable)):
4488       if _DecideSelfPromotion(self):
4489         self.op.master_candidate = True
4490         self.LogInfo("Auto-promoting node to master candidate")
4491
4492     # If we're no longer master capable, we'll demote ourselves from MC
4493     if self.op.master_capable == False and node.master_candidate:
4494       self.LogInfo("Demoting from master candidate")
4495       self.op.master_candidate = False
4496
4497     # Compute new role
4498     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4499     if self.op.master_candidate:
4500       new_role = self._ROLE_CANDIDATE
4501     elif self.op.drained:
4502       new_role = self._ROLE_DRAINED
4503     elif self.op.offline:
4504       new_role = self._ROLE_OFFLINE
4505     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4506       # False is still in new flags, which means we're un-setting (the
4507       # only) True flag
4508       new_role = self._ROLE_REGULAR
4509     else: # no new flags, nothing, keep old role
4510       new_role = old_role
4511
4512     self.new_role = new_role
4513
4514     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4515       # Trying to transition out of offline status
4516       result = self.rpc.call_version([node.name])[node.name]
4517       if result.fail_msg:
4518         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4519                                    " to report its version: %s" %
4520                                    (node.name, result.fail_msg),
4521                                    errors.ECODE_STATE)
4522       else:
4523         self.LogWarning("Transitioning node from offline to online state"
4524                         " without using re-add. Please make sure the node"
4525                         " is healthy!")
4526
4527     if self.op.secondary_ip:
4528       # Ok even without locking, because this can't be changed by any LU
4529       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4530       master_singlehomed = master.secondary_ip == master.primary_ip
4531       if master_singlehomed and self.op.secondary_ip:
4532         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4533                                    " homed cluster", errors.ECODE_INVAL)
4534
4535       if node.offline:
4536         if self.affected_instances:
4537           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4538                                      " node has instances (%s) configured"
4539                                      " to use it" % self.affected_instances)
4540       else:
4541         # On online nodes, check that no instances are running, and that
4542         # the node has the new ip and we can reach it.
4543         for instance in self.affected_instances:
4544           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4545
4546         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4547         if master.name != node.name:
4548           # check reachability from master secondary ip to new secondary ip
4549           if not netutils.TcpPing(self.op.secondary_ip,
4550                                   constants.DEFAULT_NODED_PORT,
4551                                   source=master.secondary_ip):
4552             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4553                                        " based ping to node daemon port",
4554                                        errors.ECODE_ENVIRON)
4555
4556     if self.op.ndparams:
4557       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4558       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4559       self.new_ndparams = new_ndparams
4560
4561   def Exec(self, feedback_fn):
4562     """Modifies a node.
4563
4564     """
4565     node = self.node
4566     old_role = self.old_role
4567     new_role = self.new_role
4568
4569     result = []
4570
4571     if self.op.ndparams:
4572       node.ndparams = self.new_ndparams
4573
4574     if self.op.powered is not None:
4575       node.powered = self.op.powered
4576
4577     for attr in ["master_capable", "vm_capable"]:
4578       val = getattr(self.op, attr)
4579       if val is not None:
4580         setattr(node, attr, val)
4581         result.append((attr, str(val)))
4582
4583     if new_role != old_role:
4584       # Tell the node to demote itself, if no longer MC and not offline
4585       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4586         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4587         if msg:
4588           self.LogWarning("Node failed to demote itself: %s", msg)
4589
4590       new_flags = self._R2F[new_role]
4591       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4592         if of != nf:
4593           result.append((desc, str(nf)))
4594       (node.master_candidate, node.drained, node.offline) = new_flags
4595
4596       # we locked all nodes, we adjust the CP before updating this node
4597       if self.lock_all:
4598         _AdjustCandidatePool(self, [node.name])
4599
4600     if self.op.secondary_ip:
4601       node.secondary_ip = self.op.secondary_ip
4602       result.append(("secondary_ip", self.op.secondary_ip))
4603
4604     # this will trigger configuration file update, if needed
4605     self.cfg.Update(node, feedback_fn)
4606
4607     # this will trigger job queue propagation or cleanup if the mc
4608     # flag changed
4609     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4610       self.context.ReaddNode(node)
4611
4612     return result
4613
4614
4615 class LUNodePowercycle(NoHooksLU):
4616   """Powercycles a node.
4617
4618   """
4619   REQ_BGL = False
4620
4621   def CheckArguments(self):
4622     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4623     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4624       raise errors.OpPrereqError("The node is the master and the force"
4625                                  " parameter was not set",
4626                                  errors.ECODE_INVAL)
4627
4628   def ExpandNames(self):
4629     """Locking for PowercycleNode.
4630
4631     This is a last-resort option and shouldn't block on other
4632     jobs. Therefore, we grab no locks.
4633
4634     """
4635     self.needed_locks = {}
4636
4637   def Exec(self, feedback_fn):
4638     """Reboots a node.
4639
4640     """
4641     result = self.rpc.call_node_powercycle(self.op.node_name,
4642                                            self.cfg.GetHypervisorType())
4643     result.Raise("Failed to schedule the reboot")
4644     return result.payload
4645
4646
4647 class LUClusterQuery(NoHooksLU):
4648   """Query cluster configuration.
4649
4650   """
4651   REQ_BGL = False
4652
4653   def ExpandNames(self):
4654     self.needed_locks = {}
4655
4656   def Exec(self, feedback_fn):
4657     """Return cluster config.
4658
4659     """
4660     cluster = self.cfg.GetClusterInfo()
4661     os_hvp = {}
4662
4663     # Filter just for enabled hypervisors
4664     for os_name, hv_dict in cluster.os_hvp.items():
4665       os_hvp[os_name] = {}
4666       for hv_name, hv_params in hv_dict.items():
4667         if hv_name in cluster.enabled_hypervisors:
4668           os_hvp[os_name][hv_name] = hv_params
4669
4670     # Convert ip_family to ip_version
4671     primary_ip_version = constants.IP4_VERSION
4672     if cluster.primary_ip_family == netutils.IP6Address.family:
4673       primary_ip_version = constants.IP6_VERSION
4674
4675     result = {
4676       "software_version": constants.RELEASE_VERSION,
4677       "protocol_version": constants.PROTOCOL_VERSION,
4678       "config_version": constants.CONFIG_VERSION,
4679       "os_api_version": max(constants.OS_API_VERSIONS),
4680       "export_version": constants.EXPORT_VERSION,
4681       "architecture": (platform.architecture()[0], platform.machine()),
4682       "name": cluster.cluster_name,
4683       "master": cluster.master_node,
4684       "default_hypervisor": cluster.enabled_hypervisors[0],
4685       "enabled_hypervisors": cluster.enabled_hypervisors,
4686       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4687                         for hypervisor_name in cluster.enabled_hypervisors]),
4688       "os_hvp": os_hvp,
4689       "beparams": cluster.beparams,
4690       "osparams": cluster.osparams,
4691       "nicparams": cluster.nicparams,
4692       "ndparams": cluster.ndparams,
4693       "candidate_pool_size": cluster.candidate_pool_size,
4694       "master_netdev": cluster.master_netdev,
4695       "volume_group_name": cluster.volume_group_name,
4696       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4697       "file_storage_dir": cluster.file_storage_dir,
4698       "maintain_node_health": cluster.maintain_node_health,
4699       "ctime": cluster.ctime,
4700       "mtime": cluster.mtime,
4701       "uuid": cluster.uuid,
4702       "tags": list(cluster.GetTags()),
4703       "uid_pool": cluster.uid_pool,
4704       "default_iallocator": cluster.default_iallocator,
4705       "reserved_lvs": cluster.reserved_lvs,
4706       "primary_ip_version": primary_ip_version,
4707       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4708       "hidden_os": cluster.hidden_os,
4709       "blacklisted_os": cluster.blacklisted_os,
4710       }
4711
4712     return result
4713
4714
4715 class LUClusterConfigQuery(NoHooksLU):
4716   """Return configuration values.
4717
4718   """
4719   REQ_BGL = False
4720   _FIELDS_DYNAMIC = utils.FieldSet()
4721   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4722                                   "watcher_pause", "volume_group_name")
4723
4724   def CheckArguments(self):
4725     _CheckOutputFields(static=self._FIELDS_STATIC,
4726                        dynamic=self._FIELDS_DYNAMIC,
4727                        selected=self.op.output_fields)
4728
4729   def ExpandNames(self):
4730     self.needed_locks = {}
4731
4732   def Exec(self, feedback_fn):
4733     """Dump a representation of the cluster config to the standard output.
4734
4735     """
4736     values = []
4737     for field in self.op.output_fields:
4738       if field == "cluster_name":
4739         entry = self.cfg.GetClusterName()
4740       elif field == "master_node":
4741         entry = self.cfg.GetMasterNode()
4742       elif field == "drain_flag":
4743         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4744       elif field == "watcher_pause":
4745         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4746       elif field == "volume_group_name":
4747         entry = self.cfg.GetVGName()
4748       else:
4749         raise errors.ParameterError(field)
4750       values.append(entry)
4751     return values
4752
4753
4754 class LUInstanceActivateDisks(NoHooksLU):
4755   """Bring up an instance's disks.
4756
4757   """
4758   REQ_BGL = False
4759
4760   def ExpandNames(self):
4761     self._ExpandAndLockInstance()
4762     self.needed_locks[locking.LEVEL_NODE] = []
4763     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4764
4765   def DeclareLocks(self, level):
4766     if level == locking.LEVEL_NODE:
4767       self._LockInstancesNodes()
4768
4769   def CheckPrereq(self):
4770     """Check prerequisites.
4771
4772     This checks that the instance is in the cluster.
4773
4774     """
4775     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4776     assert self.instance is not None, \
4777       "Cannot retrieve locked instance %s" % self.op.instance_name
4778     _CheckNodeOnline(self, self.instance.primary_node)
4779
4780   def Exec(self, feedback_fn):
4781     """Activate the disks.
4782
4783     """
4784     disks_ok, disks_info = \
4785               _AssembleInstanceDisks(self, self.instance,
4786                                      ignore_size=self.op.ignore_size)
4787     if not disks_ok:
4788       raise errors.OpExecError("Cannot activate block devices")
4789
4790     return disks_info
4791
4792
4793 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4794                            ignore_size=False):
4795   """Prepare the block devices for an instance.
4796
4797   This sets up the block devices on all nodes.
4798
4799   @type lu: L{LogicalUnit}
4800   @param lu: the logical unit on whose behalf we execute
4801   @type instance: L{objects.Instance}
4802   @param instance: the instance for whose disks we assemble
4803   @type disks: list of L{objects.Disk} or None
4804   @param disks: which disks to assemble (or all, if None)
4805   @type ignore_secondaries: boolean
4806   @param ignore_secondaries: if true, errors on secondary nodes
4807       won't result in an error return from the function
4808   @type ignore_size: boolean
4809   @param ignore_size: if true, the current known size of the disk
4810       will not be used during the disk activation, useful for cases
4811       when the size is wrong
4812   @return: False if the operation failed, otherwise a list of
4813       (host, instance_visible_name, node_visible_name)
4814       with the mapping from node devices to instance devices
4815
4816   """
4817   device_info = []
4818   disks_ok = True
4819   iname = instance.name
4820   disks = _ExpandCheckDisks(instance, disks)
4821
4822   # With the two passes mechanism we try to reduce the window of
4823   # opportunity for the race condition of switching DRBD to primary
4824   # before handshaking occured, but we do not eliminate it
4825
4826   # The proper fix would be to wait (with some limits) until the
4827   # connection has been made and drbd transitions from WFConnection
4828   # into any other network-connected state (Connected, SyncTarget,
4829   # SyncSource, etc.)
4830
4831   # 1st pass, assemble on all nodes in secondary mode
4832   for idx, inst_disk in enumerate(disks):
4833     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4834       if ignore_size:
4835         node_disk = node_disk.Copy()
4836         node_disk.UnsetSize()
4837       lu.cfg.SetDiskID(node_disk, node)
4838       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
4839       msg = result.fail_msg
4840       if msg:
4841         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4842                            " (is_primary=False, pass=1): %s",
4843                            inst_disk.iv_name, node, msg)
4844         if not ignore_secondaries:
4845           disks_ok = False
4846
4847   # FIXME: race condition on drbd migration to primary
4848
4849   # 2nd pass, do only the primary node
4850   for idx, inst_disk in enumerate(disks):
4851     dev_path = None
4852
4853     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4854       if node != instance.primary_node:
4855         continue
4856       if ignore_size:
4857         node_disk = node_disk.Copy()
4858         node_disk.UnsetSize()
4859       lu.cfg.SetDiskID(node_disk, node)
4860       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
4861       msg = result.fail_msg
4862       if msg:
4863         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4864                            " (is_primary=True, pass=2): %s",
4865                            inst_disk.iv_name, node, msg)
4866         disks_ok = False
4867       else:
4868         dev_path = result.payload
4869
4870     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4871
4872   # leave the disks configured for the primary node
4873   # this is a workaround that would be fixed better by
4874   # improving the logical/physical id handling
4875   for disk in disks:
4876     lu.cfg.SetDiskID(disk, instance.primary_node)
4877
4878   return disks_ok, device_info
4879
4880
4881 def _StartInstanceDisks(lu, instance, force):
4882   """Start the disks of an instance.
4883
4884   """
4885   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4886                                            ignore_secondaries=force)
4887   if not disks_ok:
4888     _ShutdownInstanceDisks(lu, instance)
4889     if force is not None and not force:
4890       lu.proc.LogWarning("", hint="If the message above refers to a"
4891                          " secondary node,"
4892                          " you can retry the operation using '--force'.")
4893     raise errors.OpExecError("Disk consistency error")
4894
4895
4896 class LUInstanceDeactivateDisks(NoHooksLU):
4897   """Shutdown an instance's disks.
4898
4899   """
4900   REQ_BGL = False
4901
4902   def ExpandNames(self):
4903     self._ExpandAndLockInstance()
4904     self.needed_locks[locking.LEVEL_NODE] = []
4905     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4906
4907   def DeclareLocks(self, level):
4908     if level == locking.LEVEL_NODE:
4909       self._LockInstancesNodes()
4910
4911   def CheckPrereq(self):
4912     """Check prerequisites.
4913
4914     This checks that the instance is in the cluster.
4915
4916     """
4917     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4918     assert self.instance is not None, \
4919       "Cannot retrieve locked instance %s" % self.op.instance_name
4920
4921   def Exec(self, feedback_fn):
4922     """Deactivate the disks
4923
4924     """
4925     instance = self.instance
4926     if self.op.force:
4927       _ShutdownInstanceDisks(self, instance)
4928     else:
4929       _SafeShutdownInstanceDisks(self, instance)
4930
4931
4932 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4933   """Shutdown block devices of an instance.
4934
4935   This function checks if an instance is running, before calling
4936   _ShutdownInstanceDisks.
4937
4938   """
4939   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4940   _ShutdownInstanceDisks(lu, instance, disks=disks)
4941
4942
4943 def _ExpandCheckDisks(instance, disks):
4944   """Return the instance disks selected by the disks list
4945
4946   @type disks: list of L{objects.Disk} or None
4947   @param disks: selected disks
4948   @rtype: list of L{objects.Disk}
4949   @return: selected instance disks to act on
4950
4951   """
4952   if disks is None:
4953     return instance.disks
4954   else:
4955     if not set(disks).issubset(instance.disks):
4956       raise errors.ProgrammerError("Can only act on disks belonging to the"
4957                                    " target instance")
4958     return disks
4959
4960
4961 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4962   """Shutdown block devices of an instance.
4963
4964   This does the shutdown on all nodes of the instance.
4965
4966   If the ignore_primary is false, errors on the primary node are
4967   ignored.
4968
4969   """
4970   all_result = True
4971   disks = _ExpandCheckDisks(instance, disks)
4972
4973   for disk in disks:
4974     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4975       lu.cfg.SetDiskID(top_disk, node)
4976       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4977       msg = result.fail_msg
4978       if msg:
4979         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4980                       disk.iv_name, node, msg)
4981         if ((node == instance.primary_node and not ignore_primary) or
4982             (node != instance.primary_node and not result.offline)):
4983           all_result = False
4984   return all_result
4985
4986
4987 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4988   """Checks if a node has enough free memory.
4989
4990   This function check if a given node has the needed amount of free
4991   memory. In case the node has less memory or we cannot get the
4992   information from the node, this function raise an OpPrereqError
4993   exception.
4994
4995   @type lu: C{LogicalUnit}
4996   @param lu: a logical unit from which we get configuration data
4997   @type node: C{str}
4998   @param node: the node to check
4999   @type reason: C{str}
5000   @param reason: string to use in the error message
5001   @type requested: C{int}
5002   @param requested: the amount of memory in MiB to check for
5003   @type hypervisor_name: C{str}
5004   @param hypervisor_name: the hypervisor to ask for memory stats
5005   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5006       we cannot check the node
5007
5008   """
5009   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5010   nodeinfo[node].Raise("Can't get data from node %s" % node,
5011                        prereq=True, ecode=errors.ECODE_ENVIRON)
5012   free_mem = nodeinfo[node].payload.get('memory_free', None)
5013   if not isinstance(free_mem, int):
5014     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5015                                " was '%s'" % (node, free_mem),
5016                                errors.ECODE_ENVIRON)
5017   if requested > free_mem:
5018     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5019                                " needed %s MiB, available %s MiB" %
5020                                (node, reason, requested, free_mem),
5021                                errors.ECODE_NORES)
5022
5023
5024 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5025   """Checks if nodes have enough free disk space in the all VGs.
5026
5027   This function check if all given nodes have the needed amount of
5028   free disk. In case any node has less disk or we cannot get the
5029   information from the node, this function raise an OpPrereqError
5030   exception.
5031
5032   @type lu: C{LogicalUnit}
5033   @param lu: a logical unit from which we get configuration data
5034   @type nodenames: C{list}
5035   @param nodenames: the list of node names to check
5036   @type req_sizes: C{dict}
5037   @param req_sizes: the hash of vg and corresponding amount of disk in
5038       MiB to check for
5039   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5040       or we cannot check the node
5041
5042   """
5043   for vg, req_size in req_sizes.items():
5044     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5045
5046
5047 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5048   """Checks if nodes have enough free disk space in the specified VG.
5049
5050   This function check if all given nodes have the needed amount of
5051   free disk. In case any node has less disk or we cannot get the
5052   information from the node, this function raise an OpPrereqError
5053   exception.
5054
5055   @type lu: C{LogicalUnit}
5056   @param lu: a logical unit from which we get configuration data
5057   @type nodenames: C{list}
5058   @param nodenames: the list of node names to check
5059   @type vg: C{str}
5060   @param vg: the volume group to check
5061   @type requested: C{int}
5062   @param requested: the amount of disk in MiB to check for
5063   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5064       or we cannot check the node
5065
5066   """
5067   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5068   for node in nodenames:
5069     info = nodeinfo[node]
5070     info.Raise("Cannot get current information from node %s" % node,
5071                prereq=True, ecode=errors.ECODE_ENVIRON)
5072     vg_free = info.payload.get("vg_free", None)
5073     if not isinstance(vg_free, int):
5074       raise errors.OpPrereqError("Can't compute free disk space on node"
5075                                  " %s for vg %s, result was '%s'" %
5076                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5077     if requested > vg_free:
5078       raise errors.OpPrereqError("Not enough disk space on target node %s"
5079                                  " vg %s: required %d MiB, available %d MiB" %
5080                                  (node, vg, requested, vg_free),
5081                                  errors.ECODE_NORES)
5082
5083
5084 class LUInstanceStartup(LogicalUnit):
5085   """Starts an instance.
5086
5087   """
5088   HPATH = "instance-start"
5089   HTYPE = constants.HTYPE_INSTANCE
5090   REQ_BGL = False
5091
5092   def CheckArguments(self):
5093     # extra beparams
5094     if self.op.beparams:
5095       # fill the beparams dict
5096       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5097
5098   def ExpandNames(self):
5099     self._ExpandAndLockInstance()
5100
5101   def BuildHooksEnv(self):
5102     """Build hooks env.
5103
5104     This runs on master, primary and secondary nodes of the instance.
5105
5106     """
5107     env = {
5108       "FORCE": self.op.force,
5109       }
5110     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5111     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5112     return env, nl, nl
5113
5114   def CheckPrereq(self):
5115     """Check prerequisites.
5116
5117     This checks that the instance is in the cluster.
5118
5119     """
5120     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5121     assert self.instance is not None, \
5122       "Cannot retrieve locked instance %s" % self.op.instance_name
5123
5124     # extra hvparams
5125     if self.op.hvparams:
5126       # check hypervisor parameter syntax (locally)
5127       cluster = self.cfg.GetClusterInfo()
5128       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5129       filled_hvp = cluster.FillHV(instance)
5130       filled_hvp.update(self.op.hvparams)
5131       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5132       hv_type.CheckParameterSyntax(filled_hvp)
5133       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5134
5135     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5136
5137     if self.primary_offline and self.op.ignore_offline_nodes:
5138       self.proc.LogWarning("Ignoring offline primary node")
5139
5140       if self.op.hvparams or self.op.beparams:
5141         self.proc.LogWarning("Overridden parameters are ignored")
5142     else:
5143       _CheckNodeOnline(self, instance.primary_node)
5144
5145       bep = self.cfg.GetClusterInfo().FillBE(instance)
5146
5147       # check bridges existence
5148       _CheckInstanceBridgesExist(self, instance)
5149
5150       remote_info = self.rpc.call_instance_info(instance.primary_node,
5151                                                 instance.name,
5152                                                 instance.hypervisor)
5153       remote_info.Raise("Error checking node %s" % instance.primary_node,
5154                         prereq=True, ecode=errors.ECODE_ENVIRON)
5155       if not remote_info.payload: # not running already
5156         _CheckNodeFreeMemory(self, instance.primary_node,
5157                              "starting instance %s" % instance.name,
5158                              bep[constants.BE_MEMORY], instance.hypervisor)
5159
5160   def Exec(self, feedback_fn):
5161     """Start the instance.
5162
5163     """
5164     instance = self.instance
5165     force = self.op.force
5166
5167     self.cfg.MarkInstanceUp(instance.name)
5168
5169     if self.primary_offline:
5170       assert self.op.ignore_offline_nodes
5171       self.proc.LogInfo("Primary node offline, marked instance as started")
5172     else:
5173       node_current = instance.primary_node
5174
5175       _StartInstanceDisks(self, instance, force)
5176
5177       result = self.rpc.call_instance_start(node_current, instance,
5178                                             self.op.hvparams, self.op.beparams)
5179       msg = result.fail_msg
5180       if msg:
5181         _ShutdownInstanceDisks(self, instance)
5182         raise errors.OpExecError("Could not start instance: %s" % msg)
5183
5184
5185 class LUInstanceReboot(LogicalUnit):
5186   """Reboot an instance.
5187
5188   """
5189   HPATH = "instance-reboot"
5190   HTYPE = constants.HTYPE_INSTANCE
5191   REQ_BGL = False
5192
5193   def ExpandNames(self):
5194     self._ExpandAndLockInstance()
5195
5196   def BuildHooksEnv(self):
5197     """Build hooks env.
5198
5199     This runs on master, primary and secondary nodes of the instance.
5200
5201     """
5202     env = {
5203       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5204       "REBOOT_TYPE": self.op.reboot_type,
5205       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5206       }
5207     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5208     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5209     return env, nl, nl
5210
5211   def CheckPrereq(self):
5212     """Check prerequisites.
5213
5214     This checks that the instance is in the cluster.
5215
5216     """
5217     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5218     assert self.instance is not None, \
5219       "Cannot retrieve locked instance %s" % self.op.instance_name
5220
5221     _CheckNodeOnline(self, instance.primary_node)
5222
5223     # check bridges existence
5224     _CheckInstanceBridgesExist(self, instance)
5225
5226   def Exec(self, feedback_fn):
5227     """Reboot the instance.
5228
5229     """
5230     instance = self.instance
5231     ignore_secondaries = self.op.ignore_secondaries
5232     reboot_type = self.op.reboot_type
5233
5234     node_current = instance.primary_node
5235
5236     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5237                        constants.INSTANCE_REBOOT_HARD]:
5238       for disk in instance.disks:
5239         self.cfg.SetDiskID(disk, node_current)
5240       result = self.rpc.call_instance_reboot(node_current, instance,
5241                                              reboot_type,
5242                                              self.op.shutdown_timeout)
5243       result.Raise("Could not reboot instance")
5244     else:
5245       result = self.rpc.call_instance_shutdown(node_current, instance,
5246                                                self.op.shutdown_timeout)
5247       result.Raise("Could not shutdown instance for full reboot")
5248       _ShutdownInstanceDisks(self, instance)
5249       _StartInstanceDisks(self, instance, ignore_secondaries)
5250       result = self.rpc.call_instance_start(node_current, instance, None, None)
5251       msg = result.fail_msg
5252       if msg:
5253         _ShutdownInstanceDisks(self, instance)
5254         raise errors.OpExecError("Could not start instance for"
5255                                  " full reboot: %s" % msg)
5256
5257     self.cfg.MarkInstanceUp(instance.name)
5258
5259
5260 class LUInstanceShutdown(LogicalUnit):
5261   """Shutdown an instance.
5262
5263   """
5264   HPATH = "instance-stop"
5265   HTYPE = constants.HTYPE_INSTANCE
5266   REQ_BGL = False
5267
5268   def ExpandNames(self):
5269     self._ExpandAndLockInstance()
5270
5271   def BuildHooksEnv(self):
5272     """Build hooks env.
5273
5274     This runs on master, primary and secondary nodes of the instance.
5275
5276     """
5277     env = _BuildInstanceHookEnvByObject(self, self.instance)
5278     env["TIMEOUT"] = self.op.timeout
5279     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5280     return env, nl, nl
5281
5282   def CheckPrereq(self):
5283     """Check prerequisites.
5284
5285     This checks that the instance is in the cluster.
5286
5287     """
5288     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5289     assert self.instance is not None, \
5290       "Cannot retrieve locked instance %s" % self.op.instance_name
5291
5292     self.primary_offline = \
5293       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5294
5295     if self.primary_offline and self.op.ignore_offline_nodes:
5296       self.proc.LogWarning("Ignoring offline primary node")
5297     else:
5298       _CheckNodeOnline(self, self.instance.primary_node)
5299
5300   def Exec(self, feedback_fn):
5301     """Shutdown the instance.
5302
5303     """
5304     instance = self.instance
5305     node_current = instance.primary_node
5306     timeout = self.op.timeout
5307
5308     self.cfg.MarkInstanceDown(instance.name)
5309
5310     if self.primary_offline:
5311       assert self.op.ignore_offline_nodes
5312       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5313     else:
5314       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5315       msg = result.fail_msg
5316       if msg:
5317         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5318
5319       _ShutdownInstanceDisks(self, instance)
5320
5321
5322 class LUInstanceReinstall(LogicalUnit):
5323   """Reinstall an instance.
5324
5325   """
5326   HPATH = "instance-reinstall"
5327   HTYPE = constants.HTYPE_INSTANCE
5328   REQ_BGL = False
5329
5330   def ExpandNames(self):
5331     self._ExpandAndLockInstance()
5332
5333   def BuildHooksEnv(self):
5334     """Build hooks env.
5335
5336     This runs on master, primary and secondary nodes of the instance.
5337
5338     """
5339     env = _BuildInstanceHookEnvByObject(self, self.instance)
5340     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5341     return env, nl, nl
5342
5343   def CheckPrereq(self):
5344     """Check prerequisites.
5345
5346     This checks that the instance is in the cluster and is not running.
5347
5348     """
5349     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5350     assert instance is not None, \
5351       "Cannot retrieve locked instance %s" % self.op.instance_name
5352     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5353                      " offline, cannot reinstall")
5354     for node in instance.secondary_nodes:
5355       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5356                        " cannot reinstall")
5357
5358     if instance.disk_template == constants.DT_DISKLESS:
5359       raise errors.OpPrereqError("Instance '%s' has no disks" %
5360                                  self.op.instance_name,
5361                                  errors.ECODE_INVAL)
5362     _CheckInstanceDown(self, instance, "cannot reinstall")
5363
5364     if self.op.os_type is not None:
5365       # OS verification
5366       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5367       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5368       instance_os = self.op.os_type
5369     else:
5370       instance_os = instance.os
5371
5372     nodelist = list(instance.all_nodes)
5373
5374     if self.op.osparams:
5375       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5376       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5377       self.os_inst = i_osdict # the new dict (without defaults)
5378     else:
5379       self.os_inst = None
5380
5381     self.instance = instance
5382
5383   def Exec(self, feedback_fn):
5384     """Reinstall the instance.
5385
5386     """
5387     inst = self.instance
5388
5389     if self.op.os_type is not None:
5390       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5391       inst.os = self.op.os_type
5392       # Write to configuration
5393       self.cfg.Update(inst, feedback_fn)
5394
5395     _StartInstanceDisks(self, inst, None)
5396     try:
5397       feedback_fn("Running the instance OS create scripts...")
5398       # FIXME: pass debug option from opcode to backend
5399       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5400                                              self.op.debug_level,
5401                                              osparams=self.os_inst)
5402       result.Raise("Could not install OS for instance %s on node %s" %
5403                    (inst.name, inst.primary_node))
5404     finally:
5405       _ShutdownInstanceDisks(self, inst)
5406
5407
5408 class LUInstanceRecreateDisks(LogicalUnit):
5409   """Recreate an instance's missing disks.
5410
5411   """
5412   HPATH = "instance-recreate-disks"
5413   HTYPE = constants.HTYPE_INSTANCE
5414   REQ_BGL = False
5415
5416   def ExpandNames(self):
5417     self._ExpandAndLockInstance()
5418
5419   def BuildHooksEnv(self):
5420     """Build hooks env.
5421
5422     This runs on master, primary and secondary nodes of the instance.
5423
5424     """
5425     env = _BuildInstanceHookEnvByObject(self, self.instance)
5426     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5427     return env, nl, nl
5428
5429   def CheckPrereq(self):
5430     """Check prerequisites.
5431
5432     This checks that the instance is in the cluster and is not running.
5433
5434     """
5435     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5436     assert instance is not None, \
5437       "Cannot retrieve locked instance %s" % self.op.instance_name
5438     _CheckNodeOnline(self, instance.primary_node)
5439
5440     if instance.disk_template == constants.DT_DISKLESS:
5441       raise errors.OpPrereqError("Instance '%s' has no disks" %
5442                                  self.op.instance_name, errors.ECODE_INVAL)
5443     _CheckInstanceDown(self, instance, "cannot recreate disks")
5444
5445     if not self.op.disks:
5446       self.op.disks = range(len(instance.disks))
5447     else:
5448       for idx in self.op.disks:
5449         if idx >= len(instance.disks):
5450           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5451                                      errors.ECODE_INVAL)
5452
5453     self.instance = instance
5454
5455   def Exec(self, feedback_fn):
5456     """Recreate the disks.
5457
5458     """
5459     to_skip = []
5460     for idx, _ in enumerate(self.instance.disks):
5461       if idx not in self.op.disks: # disk idx has not been passed in
5462         to_skip.append(idx)
5463         continue
5464
5465     _CreateDisks(self, self.instance, to_skip=to_skip)
5466
5467
5468 class LUInstanceRename(LogicalUnit):
5469   """Rename an instance.
5470
5471   """
5472   HPATH = "instance-rename"
5473   HTYPE = constants.HTYPE_INSTANCE
5474
5475   def CheckArguments(self):
5476     """Check arguments.
5477
5478     """
5479     if self.op.ip_check and not self.op.name_check:
5480       # TODO: make the ip check more flexible and not depend on the name check
5481       raise errors.OpPrereqError("Cannot do ip check without a name check",
5482                                  errors.ECODE_INVAL)
5483
5484   def BuildHooksEnv(self):
5485     """Build hooks env.
5486
5487     This runs on master, primary and secondary nodes of the instance.
5488
5489     """
5490     env = _BuildInstanceHookEnvByObject(self, self.instance)
5491     env["INSTANCE_NEW_NAME"] = self.op.new_name
5492     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5493     return env, nl, nl
5494
5495   def CheckPrereq(self):
5496     """Check prerequisites.
5497
5498     This checks that the instance is in the cluster and is not running.
5499
5500     """
5501     self.op.instance_name = _ExpandInstanceName(self.cfg,
5502                                                 self.op.instance_name)
5503     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5504     assert instance is not None
5505     _CheckNodeOnline(self, instance.primary_node)
5506     _CheckInstanceDown(self, instance, "cannot rename")
5507     self.instance = instance
5508
5509     new_name = self.op.new_name
5510     if self.op.name_check:
5511       hostname = netutils.GetHostname(name=new_name)
5512       self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5513                    hostname.name)
5514       new_name = self.op.new_name = hostname.name
5515       if (self.op.ip_check and
5516           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5517         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5518                                    (hostname.ip, new_name),
5519                                    errors.ECODE_NOTUNIQUE)
5520
5521     instance_list = self.cfg.GetInstanceList()
5522     if new_name in instance_list and new_name != instance.name:
5523       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5524                                  new_name, errors.ECODE_EXISTS)
5525
5526   def Exec(self, feedback_fn):
5527     """Rename the instance.
5528
5529     """
5530     inst = self.instance
5531     old_name = inst.name
5532
5533     rename_file_storage = False
5534     if (inst.disk_template == constants.DT_FILE and
5535         self.op.new_name != inst.name):
5536       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5537       rename_file_storage = True
5538
5539     self.cfg.RenameInstance(inst.name, self.op.new_name)
5540     # Change the instance lock. This is definitely safe while we hold the BGL
5541     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5542     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5543
5544     # re-read the instance from the configuration after rename
5545     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5546
5547     if rename_file_storage:
5548       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5549       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5550                                                      old_file_storage_dir,
5551                                                      new_file_storage_dir)
5552       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5553                    " (but the instance has been renamed in Ganeti)" %
5554                    (inst.primary_node, old_file_storage_dir,
5555                     new_file_storage_dir))
5556
5557     _StartInstanceDisks(self, inst, None)
5558     try:
5559       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5560                                                  old_name, self.op.debug_level)
5561       msg = result.fail_msg
5562       if msg:
5563         msg = ("Could not run OS rename script for instance %s on node %s"
5564                " (but the instance has been renamed in Ganeti): %s" %
5565                (inst.name, inst.primary_node, msg))
5566         self.proc.LogWarning(msg)
5567     finally:
5568       _ShutdownInstanceDisks(self, inst)
5569
5570     return inst.name
5571
5572
5573 class LUInstanceRemove(LogicalUnit):
5574   """Remove an instance.
5575
5576   """
5577   HPATH = "instance-remove"
5578   HTYPE = constants.HTYPE_INSTANCE
5579   REQ_BGL = False
5580
5581   def ExpandNames(self):
5582     self._ExpandAndLockInstance()
5583     self.needed_locks[locking.LEVEL_NODE] = []
5584     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5585
5586   def DeclareLocks(self, level):
5587     if level == locking.LEVEL_NODE:
5588       self._LockInstancesNodes()
5589
5590   def BuildHooksEnv(self):
5591     """Build hooks env.
5592
5593     This runs on master, primary and secondary nodes of the instance.
5594
5595     """
5596     env = _BuildInstanceHookEnvByObject(self, self.instance)
5597     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5598     nl = [self.cfg.GetMasterNode()]
5599     nl_post = list(self.instance.all_nodes) + nl
5600     return env, nl, nl_post
5601
5602   def CheckPrereq(self):
5603     """Check prerequisites.
5604
5605     This checks that the instance is in the cluster.
5606
5607     """
5608     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5609     assert self.instance is not None, \
5610       "Cannot retrieve locked instance %s" % self.op.instance_name
5611
5612   def Exec(self, feedback_fn):
5613     """Remove the instance.
5614
5615     """
5616     instance = self.instance
5617     logging.info("Shutting down instance %s on node %s",
5618                  instance.name, instance.primary_node)
5619
5620     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5621                                              self.op.shutdown_timeout)
5622     msg = result.fail_msg
5623     if msg:
5624       if self.op.ignore_failures:
5625         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5626       else:
5627         raise errors.OpExecError("Could not shutdown instance %s on"
5628                                  " node %s: %s" %
5629                                  (instance.name, instance.primary_node, msg))
5630
5631     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5632
5633
5634 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5635   """Utility function to remove an instance.
5636
5637   """
5638   logging.info("Removing block devices for instance %s", instance.name)
5639
5640   if not _RemoveDisks(lu, instance):
5641     if not ignore_failures:
5642       raise errors.OpExecError("Can't remove instance's disks")
5643     feedback_fn("Warning: can't remove instance's disks")
5644
5645   logging.info("Removing instance %s out of cluster config", instance.name)
5646
5647   lu.cfg.RemoveInstance(instance.name)
5648
5649   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5650     "Instance lock removal conflict"
5651
5652   # Remove lock for the instance
5653   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5654
5655
5656 class LUInstanceQuery(NoHooksLU):
5657   """Logical unit for querying instances.
5658
5659   """
5660   # pylint: disable-msg=W0142
5661   REQ_BGL = False
5662
5663   def CheckArguments(self):
5664     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5665                              self.op.output_fields, self.op.use_locking)
5666
5667   def ExpandNames(self):
5668     self.iq.ExpandNames(self)
5669
5670   def DeclareLocks(self, level):
5671     self.iq.DeclareLocks(self, level)
5672
5673   def Exec(self, feedback_fn):
5674     return self.iq.OldStyleQuery(self)
5675
5676
5677 class LUInstanceFailover(LogicalUnit):
5678   """Failover an instance.
5679
5680   """
5681   HPATH = "instance-failover"
5682   HTYPE = constants.HTYPE_INSTANCE
5683   REQ_BGL = False
5684
5685   def ExpandNames(self):
5686     self._ExpandAndLockInstance()
5687     self.needed_locks[locking.LEVEL_NODE] = []
5688     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5689
5690   def DeclareLocks(self, level):
5691     if level == locking.LEVEL_NODE:
5692       self._LockInstancesNodes()
5693
5694   def BuildHooksEnv(self):
5695     """Build hooks env.
5696
5697     This runs on master, primary and secondary nodes of the instance.
5698
5699     """
5700     instance = self.instance
5701     source_node = instance.primary_node
5702     target_node = instance.secondary_nodes[0]
5703     env = {
5704       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5705       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5706       "OLD_PRIMARY": source_node,
5707       "OLD_SECONDARY": target_node,
5708       "NEW_PRIMARY": target_node,
5709       "NEW_SECONDARY": source_node,
5710       }
5711     env.update(_BuildInstanceHookEnvByObject(self, instance))
5712     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5713     nl_post = list(nl)
5714     nl_post.append(source_node)
5715     return env, nl, nl_post
5716
5717   def CheckPrereq(self):
5718     """Check prerequisites.
5719
5720     This checks that the instance is in the cluster.
5721
5722     """
5723     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5724     assert self.instance is not None, \
5725       "Cannot retrieve locked instance %s" % self.op.instance_name
5726
5727     bep = self.cfg.GetClusterInfo().FillBE(instance)
5728     if instance.disk_template not in constants.DTS_NET_MIRROR:
5729       raise errors.OpPrereqError("Instance's disk layout is not"
5730                                  " network mirrored, cannot failover.",
5731                                  errors.ECODE_STATE)
5732
5733     secondary_nodes = instance.secondary_nodes
5734     if not secondary_nodes:
5735       raise errors.ProgrammerError("no secondary node but using "
5736                                    "a mirrored disk template")
5737
5738     target_node = secondary_nodes[0]
5739     _CheckNodeOnline(self, target_node)
5740     _CheckNodeNotDrained(self, target_node)
5741     if instance.admin_up:
5742       # check memory requirements on the secondary node
5743       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5744                            instance.name, bep[constants.BE_MEMORY],
5745                            instance.hypervisor)
5746     else:
5747       self.LogInfo("Not checking memory on the secondary node as"
5748                    " instance will not be started")
5749
5750     # check bridge existance
5751     _CheckInstanceBridgesExist(self, instance, node=target_node)
5752
5753   def Exec(self, feedback_fn):
5754     """Failover an instance.
5755
5756     The failover is done by shutting it down on its present node and
5757     starting it on the secondary.
5758
5759     """
5760     instance = self.instance
5761     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5762
5763     source_node = instance.primary_node
5764     target_node = instance.secondary_nodes[0]
5765
5766     if instance.admin_up:
5767       feedback_fn("* checking disk consistency between source and target")
5768       for dev in instance.disks:
5769         # for drbd, these are drbd over lvm
5770         if not _CheckDiskConsistency(self, dev, target_node, False):
5771           if not self.op.ignore_consistency:
5772             raise errors.OpExecError("Disk %s is degraded on target node,"
5773                                      " aborting failover." % dev.iv_name)
5774     else:
5775       feedback_fn("* not checking disk consistency as instance is not running")
5776
5777     feedback_fn("* shutting down instance on source node")
5778     logging.info("Shutting down instance %s on node %s",
5779                  instance.name, source_node)
5780
5781     result = self.rpc.call_instance_shutdown(source_node, instance,
5782                                              self.op.shutdown_timeout)
5783     msg = result.fail_msg
5784     if msg:
5785       if self.op.ignore_consistency or primary_node.offline:
5786         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5787                              " Proceeding anyway. Please make sure node"
5788                              " %s is down. Error details: %s",
5789                              instance.name, source_node, source_node, msg)
5790       else:
5791         raise errors.OpExecError("Could not shutdown instance %s on"
5792                                  " node %s: %s" %
5793                                  (instance.name, source_node, msg))
5794
5795     feedback_fn("* deactivating the instance's disks on source node")
5796     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5797       raise errors.OpExecError("Can't shut down the instance's disks.")
5798
5799     instance.primary_node = target_node
5800     # distribute new instance config to the other nodes
5801     self.cfg.Update(instance, feedback_fn)
5802
5803     # Only start the instance if it's marked as up
5804     if instance.admin_up:
5805       feedback_fn("* activating the instance's disks on target node")
5806       logging.info("Starting instance %s on node %s",
5807                    instance.name, target_node)
5808
5809       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5810                                            ignore_secondaries=True)
5811       if not disks_ok:
5812         _ShutdownInstanceDisks(self, instance)
5813         raise errors.OpExecError("Can't activate the instance's disks")
5814
5815       feedback_fn("* starting the instance on the target node")
5816       result = self.rpc.call_instance_start(target_node, instance, None, None)
5817       msg = result.fail_msg
5818       if msg:
5819         _ShutdownInstanceDisks(self, instance)
5820         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5821                                  (instance.name, target_node, msg))
5822
5823
5824 class LUInstanceMigrate(LogicalUnit):
5825   """Migrate an instance.
5826
5827   This is migration without shutting down, compared to the failover,
5828   which is done with shutdown.
5829
5830   """
5831   HPATH = "instance-migrate"
5832   HTYPE = constants.HTYPE_INSTANCE
5833   REQ_BGL = False
5834
5835   def ExpandNames(self):
5836     self._ExpandAndLockInstance()
5837
5838     self.needed_locks[locking.LEVEL_NODE] = []
5839     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5840
5841     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5842                                        self.op.cleanup)
5843     self.tasklets = [self._migrater]
5844
5845   def DeclareLocks(self, level):
5846     if level == locking.LEVEL_NODE:
5847       self._LockInstancesNodes()
5848
5849   def BuildHooksEnv(self):
5850     """Build hooks env.
5851
5852     This runs on master, primary and secondary nodes of the instance.
5853
5854     """
5855     instance = self._migrater.instance
5856     source_node = instance.primary_node
5857     target_node = instance.secondary_nodes[0]
5858     env = _BuildInstanceHookEnvByObject(self, instance)
5859     env["MIGRATE_LIVE"] = self._migrater.live
5860     env["MIGRATE_CLEANUP"] = self.op.cleanup
5861     env.update({
5862         "OLD_PRIMARY": source_node,
5863         "OLD_SECONDARY": target_node,
5864         "NEW_PRIMARY": target_node,
5865         "NEW_SECONDARY": source_node,
5866         })
5867     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5868     nl_post = list(nl)
5869     nl_post.append(source_node)
5870     return env, nl, nl_post
5871
5872
5873 class LUInstanceMove(LogicalUnit):
5874   """Move an instance by data-copying.
5875
5876   """
5877   HPATH = "instance-move"
5878   HTYPE = constants.HTYPE_INSTANCE
5879   REQ_BGL = False
5880
5881   def ExpandNames(self):
5882     self._ExpandAndLockInstance()
5883     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5884     self.op.target_node = target_node
5885     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5886     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5887
5888   def DeclareLocks(self, level):
5889     if level == locking.LEVEL_NODE:
5890       self._LockInstancesNodes(primary_only=True)
5891
5892   def BuildHooksEnv(self):
5893     """Build hooks env.
5894
5895     This runs on master, primary and secondary nodes of the instance.
5896
5897     """
5898     env = {
5899       "TARGET_NODE": self.op.target_node,
5900       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5901       }
5902     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5903     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5904                                        self.op.target_node]
5905     return env, nl, nl
5906
5907   def CheckPrereq(self):
5908     """Check prerequisites.
5909
5910     This checks that the instance is in the cluster.
5911
5912     """
5913     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5914     assert self.instance is not None, \
5915       "Cannot retrieve locked instance %s" % self.op.instance_name
5916
5917     node = self.cfg.GetNodeInfo(self.op.target_node)
5918     assert node is not None, \
5919       "Cannot retrieve locked node %s" % self.op.target_node
5920
5921     self.target_node = target_node = node.name
5922
5923     if target_node == instance.primary_node:
5924       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5925                                  (instance.name, target_node),
5926                                  errors.ECODE_STATE)
5927
5928     bep = self.cfg.GetClusterInfo().FillBE(instance)
5929
5930     for idx, dsk in enumerate(instance.disks):
5931       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5932         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5933                                    " cannot copy" % idx, errors.ECODE_STATE)
5934
5935     _CheckNodeOnline(self, target_node)
5936     _CheckNodeNotDrained(self, target_node)
5937     _CheckNodeVmCapable(self, target_node)
5938
5939     if instance.admin_up:
5940       # check memory requirements on the secondary node
5941       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5942                            instance.name, bep[constants.BE_MEMORY],
5943                            instance.hypervisor)
5944     else:
5945       self.LogInfo("Not checking memory on the secondary node as"
5946                    " instance will not be started")
5947
5948     # check bridge existance
5949     _CheckInstanceBridgesExist(self, instance, node=target_node)
5950
5951   def Exec(self, feedback_fn):
5952     """Move an instance.
5953
5954     The move is done by shutting it down on its present node, copying
5955     the data over (slow) and starting it on the new node.
5956
5957     """
5958     instance = self.instance
5959
5960     source_node = instance.primary_node
5961     target_node = self.target_node
5962
5963     self.LogInfo("Shutting down instance %s on source node %s",
5964                  instance.name, source_node)
5965
5966     result = self.rpc.call_instance_shutdown(source_node, instance,
5967                                              self.op.shutdown_timeout)
5968     msg = result.fail_msg
5969     if msg:
5970       if self.op.ignore_consistency:
5971         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5972                              " Proceeding anyway. Please make sure node"
5973                              " %s is down. Error details: %s",
5974                              instance.name, source_node, source_node, msg)
5975       else:
5976         raise errors.OpExecError("Could not shutdown instance %s on"
5977                                  " node %s: %s" %
5978                                  (instance.name, source_node, msg))
5979
5980     # create the target disks
5981     try:
5982       _CreateDisks(self, instance, target_node=target_node)
5983     except errors.OpExecError:
5984       self.LogWarning("Device creation failed, reverting...")
5985       try:
5986         _RemoveDisks(self, instance, target_node=target_node)
5987       finally:
5988         self.cfg.ReleaseDRBDMinors(instance.name)
5989         raise
5990
5991     cluster_name = self.cfg.GetClusterInfo().cluster_name
5992
5993     errs = []
5994     # activate, get path, copy the data over
5995     for idx, disk in enumerate(instance.disks):
5996       self.LogInfo("Copying data for disk %d", idx)
5997       result = self.rpc.call_blockdev_assemble(target_node, disk,
5998                                                instance.name, True, idx)
5999       if result.fail_msg:
6000         self.LogWarning("Can't assemble newly created disk %d: %s",
6001                         idx, result.fail_msg)
6002         errs.append(result.fail_msg)
6003         break
6004       dev_path = result.payload
6005       result = self.rpc.call_blockdev_export(source_node, disk,
6006                                              target_node, dev_path,
6007                                              cluster_name)
6008       if result.fail_msg:
6009         self.LogWarning("Can't copy data over for disk %d: %s",
6010                         idx, result.fail_msg)
6011         errs.append(result.fail_msg)
6012         break
6013
6014     if errs:
6015       self.LogWarning("Some disks failed to copy, aborting")
6016       try:
6017         _RemoveDisks(self, instance, target_node=target_node)
6018       finally:
6019         self.cfg.ReleaseDRBDMinors(instance.name)
6020         raise errors.OpExecError("Errors during disk copy: %s" %
6021                                  (",".join(errs),))
6022
6023     instance.primary_node = target_node
6024     self.cfg.Update(instance, feedback_fn)
6025
6026     self.LogInfo("Removing the disks on the original node")
6027     _RemoveDisks(self, instance, target_node=source_node)
6028
6029     # Only start the instance if it's marked as up
6030     if instance.admin_up:
6031       self.LogInfo("Starting instance %s on node %s",
6032                    instance.name, target_node)
6033
6034       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6035                                            ignore_secondaries=True)
6036       if not disks_ok:
6037         _ShutdownInstanceDisks(self, instance)
6038         raise errors.OpExecError("Can't activate the instance's disks")
6039
6040       result = self.rpc.call_instance_start(target_node, instance, None, None)
6041       msg = result.fail_msg
6042       if msg:
6043         _ShutdownInstanceDisks(self, instance)
6044         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6045                                  (instance.name, target_node, msg))
6046
6047
6048 class LUNodeMigrate(LogicalUnit):
6049   """Migrate all instances from a node.
6050
6051   """
6052   HPATH = "node-migrate"
6053   HTYPE = constants.HTYPE_NODE
6054   REQ_BGL = False
6055
6056   def ExpandNames(self):
6057     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6058
6059     self.needed_locks = {
6060       locking.LEVEL_NODE: [self.op.node_name],
6061       }
6062
6063     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6064
6065     # Create tasklets for migrating instances for all instances on this node
6066     names = []
6067     tasklets = []
6068
6069     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6070       logging.debug("Migrating instance %s", inst.name)
6071       names.append(inst.name)
6072
6073       tasklets.append(TLMigrateInstance(self, inst.name, False))
6074
6075     self.tasklets = tasklets
6076
6077     # Declare instance locks
6078     self.needed_locks[locking.LEVEL_INSTANCE] = names
6079
6080   def DeclareLocks(self, level):
6081     if level == locking.LEVEL_NODE:
6082       self._LockInstancesNodes()
6083
6084   def BuildHooksEnv(self):
6085     """Build hooks env.
6086
6087     This runs on the master, the primary and all the secondaries.
6088
6089     """
6090     env = {
6091       "NODE_NAME": self.op.node_name,
6092       }
6093
6094     nl = [self.cfg.GetMasterNode()]
6095
6096     return (env, nl, nl)
6097
6098
6099 class TLMigrateInstance(Tasklet):
6100   """Tasklet class for instance migration.
6101
6102   @type live: boolean
6103   @ivar live: whether the migration will be done live or non-live;
6104       this variable is initalized only after CheckPrereq has run
6105
6106   """
6107   def __init__(self, lu, instance_name, cleanup):
6108     """Initializes this class.
6109
6110     """
6111     Tasklet.__init__(self, lu)
6112
6113     # Parameters
6114     self.instance_name = instance_name
6115     self.cleanup = cleanup
6116     self.live = False # will be overridden later
6117
6118   def CheckPrereq(self):
6119     """Check prerequisites.
6120
6121     This checks that the instance is in the cluster.
6122
6123     """
6124     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6125     instance = self.cfg.GetInstanceInfo(instance_name)
6126     assert instance is not None
6127
6128     if instance.disk_template != constants.DT_DRBD8:
6129       raise errors.OpPrereqError("Instance's disk layout is not"
6130                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
6131
6132     secondary_nodes = instance.secondary_nodes
6133     if not secondary_nodes:
6134       raise errors.ConfigurationError("No secondary node but using"
6135                                       " drbd8 disk template")
6136
6137     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6138
6139     target_node = secondary_nodes[0]
6140     # check memory requirements on the secondary node
6141     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6142                          instance.name, i_be[constants.BE_MEMORY],
6143                          instance.hypervisor)
6144
6145     # check bridge existance
6146     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6147
6148     if not self.cleanup:
6149       _CheckNodeNotDrained(self.lu, target_node)
6150       result = self.rpc.call_instance_migratable(instance.primary_node,
6151                                                  instance)
6152       result.Raise("Can't migrate, please use failover",
6153                    prereq=True, ecode=errors.ECODE_STATE)
6154
6155     self.instance = instance
6156
6157     if self.lu.op.live is not None and self.lu.op.mode is not None:
6158       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6159                                  " parameters are accepted",
6160                                  errors.ECODE_INVAL)
6161     if self.lu.op.live is not None:
6162       if self.lu.op.live:
6163         self.lu.op.mode = constants.HT_MIGRATION_LIVE
6164       else:
6165         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6166       # reset the 'live' parameter to None so that repeated
6167       # invocations of CheckPrereq do not raise an exception
6168       self.lu.op.live = None
6169     elif self.lu.op.mode is None:
6170       # read the default value from the hypervisor
6171       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6172       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6173
6174     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6175
6176   def _WaitUntilSync(self):
6177     """Poll with custom rpc for disk sync.
6178
6179     This uses our own step-based rpc call.
6180
6181     """
6182     self.feedback_fn("* wait until resync is done")
6183     all_done = False
6184     while not all_done:
6185       all_done = True
6186       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6187                                             self.nodes_ip,
6188                                             self.instance.disks)
6189       min_percent = 100
6190       for node, nres in result.items():
6191         nres.Raise("Cannot resync disks on node %s" % node)
6192         node_done, node_percent = nres.payload
6193         all_done = all_done and node_done
6194         if node_percent is not None:
6195           min_percent = min(min_percent, node_percent)
6196       if not all_done:
6197         if min_percent < 100:
6198           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6199         time.sleep(2)
6200
6201   def _EnsureSecondary(self, node):
6202     """Demote a node to secondary.
6203
6204     """
6205     self.feedback_fn("* switching node %s to secondary mode" % node)
6206
6207     for dev in self.instance.disks:
6208       self.cfg.SetDiskID(dev, node)
6209
6210     result = self.rpc.call_blockdev_close(node, self.instance.name,
6211                                           self.instance.disks)
6212     result.Raise("Cannot change disk to secondary on node %s" % node)
6213
6214   def _GoStandalone(self):
6215     """Disconnect from the network.
6216
6217     """
6218     self.feedback_fn("* changing into standalone mode")
6219     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6220                                                self.instance.disks)
6221     for node, nres in result.items():
6222       nres.Raise("Cannot disconnect disks node %s" % node)
6223
6224   def _GoReconnect(self, multimaster):
6225     """Reconnect to the network.
6226
6227     """
6228     if multimaster:
6229       msg = "dual-master"
6230     else:
6231       msg = "single-master"
6232     self.feedback_fn("* changing disks into %s mode" % msg)
6233     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6234                                            self.instance.disks,
6235                                            self.instance.name, multimaster)
6236     for node, nres in result.items():
6237       nres.Raise("Cannot change disks config on node %s" % node)
6238
6239   def _ExecCleanup(self):
6240     """Try to cleanup after a failed migration.
6241
6242     The cleanup is done by:
6243       - check that the instance is running only on one node
6244         (and update the config if needed)
6245       - change disks on its secondary node to secondary
6246       - wait until disks are fully synchronized
6247       - disconnect from the network
6248       - change disks into single-master mode
6249       - wait again until disks are fully synchronized
6250
6251     """
6252     instance = self.instance
6253     target_node = self.target_node
6254     source_node = self.source_node
6255
6256     # check running on only one node
6257     self.feedback_fn("* checking where the instance actually runs"
6258                      " (if this hangs, the hypervisor might be in"
6259                      " a bad state)")
6260     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6261     for node, result in ins_l.items():
6262       result.Raise("Can't contact node %s" % node)
6263
6264     runningon_source = instance.name in ins_l[source_node].payload
6265     runningon_target = instance.name in ins_l[target_node].payload
6266
6267     if runningon_source and runningon_target:
6268       raise errors.OpExecError("Instance seems to be running on two nodes,"
6269                                " or the hypervisor is confused. You will have"
6270                                " to ensure manually that it runs only on one"
6271                                " and restart this operation.")
6272
6273     if not (runningon_source or runningon_target):
6274       raise errors.OpExecError("Instance does not seem to be running at all."
6275                                " In this case, it's safer to repair by"
6276                                " running 'gnt-instance stop' to ensure disk"
6277                                " shutdown, and then restarting it.")
6278
6279     if runningon_target:
6280       # the migration has actually succeeded, we need to update the config
6281       self.feedback_fn("* instance running on secondary node (%s),"
6282                        " updating config" % target_node)
6283       instance.primary_node = target_node
6284       self.cfg.Update(instance, self.feedback_fn)
6285       demoted_node = source_node
6286     else:
6287       self.feedback_fn("* instance confirmed to be running on its"
6288                        " primary node (%s)" % source_node)
6289       demoted_node = target_node
6290
6291     self._EnsureSecondary(demoted_node)
6292     try:
6293       self._WaitUntilSync()
6294     except errors.OpExecError:
6295       # we ignore here errors, since if the device is standalone, it
6296       # won't be able to sync
6297       pass
6298     self._GoStandalone()
6299     self._GoReconnect(False)
6300     self._WaitUntilSync()
6301
6302     self.feedback_fn("* done")
6303
6304   def _RevertDiskStatus(self):
6305     """Try to revert the disk status after a failed migration.
6306
6307     """
6308     target_node = self.target_node
6309     try:
6310       self._EnsureSecondary(target_node)
6311       self._GoStandalone()
6312       self._GoReconnect(False)
6313       self._WaitUntilSync()
6314     except errors.OpExecError, err:
6315       self.lu.LogWarning("Migration failed and I can't reconnect the"
6316                          " drives: error '%s'\n"
6317                          "Please look and recover the instance status" %
6318                          str(err))
6319
6320   def _AbortMigration(self):
6321     """Call the hypervisor code to abort a started migration.
6322
6323     """
6324     instance = self.instance
6325     target_node = self.target_node
6326     migration_info = self.migration_info
6327
6328     abort_result = self.rpc.call_finalize_migration(target_node,
6329                                                     instance,
6330                                                     migration_info,
6331                                                     False)
6332     abort_msg = abort_result.fail_msg
6333     if abort_msg:
6334       logging.error("Aborting migration failed on target node %s: %s",
6335                     target_node, abort_msg)
6336       # Don't raise an exception here, as we stil have to try to revert the
6337       # disk status, even if this step failed.
6338
6339   def _ExecMigration(self):
6340     """Migrate an instance.
6341
6342     The migrate is done by:
6343       - change the disks into dual-master mode
6344       - wait until disks are fully synchronized again
6345       - migrate the instance
6346       - change disks on the new secondary node (the old primary) to secondary
6347       - wait until disks are fully synchronized
6348       - change disks into single-master mode
6349
6350     """
6351     instance = self.instance
6352     target_node = self.target_node
6353     source_node = self.source_node
6354
6355     self.feedback_fn("* checking disk consistency between source and target")
6356     for dev in instance.disks:
6357       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6358         raise errors.OpExecError("Disk %s is degraded or not fully"
6359                                  " synchronized on target node,"
6360                                  " aborting migrate." % dev.iv_name)
6361
6362     # First get the migration information from the remote node
6363     result = self.rpc.call_migration_info(source_node, instance)
6364     msg = result.fail_msg
6365     if msg:
6366       log_err = ("Failed fetching source migration information from %s: %s" %
6367                  (source_node, msg))
6368       logging.error(log_err)
6369       raise errors.OpExecError(log_err)
6370
6371     self.migration_info = migration_info = result.payload
6372
6373     # Then switch the disks to master/master mode
6374     self._EnsureSecondary(target_node)
6375     self._GoStandalone()
6376     self._GoReconnect(True)
6377     self._WaitUntilSync()
6378
6379     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6380     result = self.rpc.call_accept_instance(target_node,
6381                                            instance,
6382                                            migration_info,
6383                                            self.nodes_ip[target_node])
6384
6385     msg = result.fail_msg
6386     if msg:
6387       logging.error("Instance pre-migration failed, trying to revert"
6388                     " disk status: %s", msg)
6389       self.feedback_fn("Pre-migration failed, aborting")
6390       self._AbortMigration()
6391       self._RevertDiskStatus()
6392       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6393                                (instance.name, msg))
6394
6395     self.feedback_fn("* migrating instance to %s" % target_node)
6396     time.sleep(10)
6397     result = self.rpc.call_instance_migrate(source_node, instance,
6398                                             self.nodes_ip[target_node],
6399                                             self.live)
6400     msg = result.fail_msg
6401     if msg:
6402       logging.error("Instance migration failed, trying to revert"
6403                     " disk status: %s", msg)
6404       self.feedback_fn("Migration failed, aborting")
6405       self._AbortMigration()
6406       self._RevertDiskStatus()
6407       raise errors.OpExecError("Could not migrate instance %s: %s" %
6408                                (instance.name, msg))
6409     time.sleep(10)
6410
6411     instance.primary_node = target_node
6412     # distribute new instance config to the other nodes
6413     self.cfg.Update(instance, self.feedback_fn)
6414
6415     result = self.rpc.call_finalize_migration(target_node,
6416                                               instance,
6417                                               migration_info,
6418                                               True)
6419     msg = result.fail_msg
6420     if msg:
6421       logging.error("Instance migration succeeded, but finalization failed:"
6422                     " %s", msg)
6423       raise errors.OpExecError("Could not finalize instance migration: %s" %
6424                                msg)
6425
6426     self._EnsureSecondary(source_node)
6427     self._WaitUntilSync()
6428     self._GoStandalone()
6429     self._GoReconnect(False)
6430     self._WaitUntilSync()
6431
6432     self.feedback_fn("* done")
6433
6434   def Exec(self, feedback_fn):
6435     """Perform the migration.
6436
6437     """
6438     feedback_fn("Migrating instance %s" % self.instance.name)
6439
6440     self.feedback_fn = feedback_fn
6441
6442     self.source_node = self.instance.primary_node
6443     self.target_node = self.instance.secondary_nodes[0]
6444     self.all_nodes = [self.source_node, self.target_node]
6445     self.nodes_ip = {
6446       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6447       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6448       }
6449
6450     if self.cleanup:
6451       return self._ExecCleanup()
6452     else:
6453       return self._ExecMigration()
6454
6455
6456 def _CreateBlockDev(lu, node, instance, device, force_create,
6457                     info, force_open):
6458   """Create a tree of block devices on a given node.
6459
6460   If this device type has to be created on secondaries, create it and
6461   all its children.
6462
6463   If not, just recurse to children keeping the same 'force' value.
6464
6465   @param lu: the lu on whose behalf we execute
6466   @param node: the node on which to create the device
6467   @type instance: L{objects.Instance}
6468   @param instance: the instance which owns the device
6469   @type device: L{objects.Disk}
6470   @param device: the device to create
6471   @type force_create: boolean
6472   @param force_create: whether to force creation of this device; this
6473       will be change to True whenever we find a device which has
6474       CreateOnSecondary() attribute
6475   @param info: the extra 'metadata' we should attach to the device
6476       (this will be represented as a LVM tag)
6477   @type force_open: boolean
6478   @param force_open: this parameter will be passes to the
6479       L{backend.BlockdevCreate} function where it specifies
6480       whether we run on primary or not, and it affects both
6481       the child assembly and the device own Open() execution
6482
6483   """
6484   if device.CreateOnSecondary():
6485     force_create = True
6486
6487   if device.children:
6488     for child in device.children:
6489       _CreateBlockDev(lu, node, instance, child, force_create,
6490                       info, force_open)
6491
6492   if not force_create:
6493     return
6494
6495   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6496
6497
6498 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6499   """Create a single block device on a given node.
6500
6501   This will not recurse over children of the device, so they must be
6502   created in advance.
6503
6504   @param lu: the lu on whose behalf we execute
6505   @param node: the node on which to create the device
6506   @type instance: L{objects.Instance}
6507   @param instance: the instance which owns the device
6508   @type device: L{objects.Disk}
6509   @param device: the device to create
6510   @param info: the extra 'metadata' we should attach to the device
6511       (this will be represented as a LVM tag)
6512   @type force_open: boolean
6513   @param force_open: this parameter will be passes to the
6514       L{backend.BlockdevCreate} function where it specifies
6515       whether we run on primary or not, and it affects both
6516       the child assembly and the device own Open() execution
6517
6518   """
6519   lu.cfg.SetDiskID(device, node)
6520   result = lu.rpc.call_blockdev_create(node, device, device.size,
6521                                        instance.name, force_open, info)
6522   result.Raise("Can't create block device %s on"
6523                " node %s for instance %s" % (device, node, instance.name))
6524   if device.physical_id is None:
6525     device.physical_id = result.payload
6526
6527
6528 def _GenerateUniqueNames(lu, exts):
6529   """Generate a suitable LV name.
6530
6531   This will generate a logical volume name for the given instance.
6532
6533   """
6534   results = []
6535   for val in exts:
6536     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6537     results.append("%s%s" % (new_id, val))
6538   return results
6539
6540
6541 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6542                          p_minor, s_minor):
6543   """Generate a drbd8 device complete with its children.
6544
6545   """
6546   port = lu.cfg.AllocatePort()
6547   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6548   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6549                           logical_id=(vgname, names[0]))
6550   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6551                           logical_id=(vgname, names[1]))
6552   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6553                           logical_id=(primary, secondary, port,
6554                                       p_minor, s_minor,
6555                                       shared_secret),
6556                           children=[dev_data, dev_meta],
6557                           iv_name=iv_name)
6558   return drbd_dev
6559
6560
6561 def _GenerateDiskTemplate(lu, template_name,
6562                           instance_name, primary_node,
6563                           secondary_nodes, disk_info,
6564                           file_storage_dir, file_driver,
6565                           base_index, feedback_fn):
6566   """Generate the entire disk layout for a given template type.
6567
6568   """
6569   #TODO: compute space requirements
6570
6571   vgname = lu.cfg.GetVGName()
6572   disk_count = len(disk_info)
6573   disks = []
6574   if template_name == constants.DT_DISKLESS:
6575     pass
6576   elif template_name == constants.DT_PLAIN:
6577     if len(secondary_nodes) != 0:
6578       raise errors.ProgrammerError("Wrong template configuration")
6579
6580     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6581                                       for i in range(disk_count)])
6582     for idx, disk in enumerate(disk_info):
6583       disk_index = idx + base_index
6584       vg = disk.get("vg", vgname)
6585       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6586       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6587                               logical_id=(vg, names[idx]),
6588                               iv_name="disk/%d" % disk_index,
6589                               mode=disk["mode"])
6590       disks.append(disk_dev)
6591   elif template_name == constants.DT_DRBD8:
6592     if len(secondary_nodes) != 1:
6593       raise errors.ProgrammerError("Wrong template configuration")
6594     remote_node = secondary_nodes[0]
6595     minors = lu.cfg.AllocateDRBDMinor(
6596       [primary_node, remote_node] * len(disk_info), instance_name)
6597
6598     names = []
6599     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6600                                                for i in range(disk_count)]):
6601       names.append(lv_prefix + "_data")
6602       names.append(lv_prefix + "_meta")
6603     for idx, disk in enumerate(disk_info):
6604       disk_index = idx + base_index
6605       vg = disk.get("vg", vgname)
6606       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6607                                       disk["size"], vg, names[idx*2:idx*2+2],
6608                                       "disk/%d" % disk_index,
6609                                       minors[idx*2], minors[idx*2+1])
6610       disk_dev.mode = disk["mode"]
6611       disks.append(disk_dev)
6612   elif template_name == constants.DT_FILE:
6613     if len(secondary_nodes) != 0:
6614       raise errors.ProgrammerError("Wrong template configuration")
6615
6616     opcodes.RequireFileStorage()
6617
6618     for idx, disk in enumerate(disk_info):
6619       disk_index = idx + base_index
6620       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6621                               iv_name="disk/%d" % disk_index,
6622                               logical_id=(file_driver,
6623                                           "%s/disk%d" % (file_storage_dir,
6624                                                          disk_index)),
6625                               mode=disk["mode"])
6626       disks.append(disk_dev)
6627   else:
6628     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6629   return disks
6630
6631
6632 def _GetInstanceInfoText(instance):
6633   """Compute that text that should be added to the disk's metadata.
6634
6635   """
6636   return "originstname+%s" % instance.name
6637
6638
6639 def _CalcEta(time_taken, written, total_size):
6640   """Calculates the ETA based on size written and total size.
6641
6642   @param time_taken: The time taken so far
6643   @param written: amount written so far
6644   @param total_size: The total size of data to be written
6645   @return: The remaining time in seconds
6646
6647   """
6648   avg_time = time_taken / float(written)
6649   return (total_size - written) * avg_time
6650
6651
6652 def _WipeDisks(lu, instance):
6653   """Wipes instance disks.
6654
6655   @type lu: L{LogicalUnit}
6656   @param lu: the logical unit on whose behalf we execute
6657   @type instance: L{objects.Instance}
6658   @param instance: the instance whose disks we should create
6659   @return: the success of the wipe
6660
6661   """
6662   node = instance.primary_node
6663   logging.info("Pause sync of instance %s disks", instance.name)
6664   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6665
6666   for idx, success in enumerate(result.payload):
6667     if not success:
6668       logging.warn("pause-sync of instance %s for disks %d failed",
6669                    instance.name, idx)
6670
6671   try:
6672     for idx, device in enumerate(instance.disks):
6673       lu.LogInfo("* Wiping disk %d", idx)
6674       logging.info("Wiping disk %d for instance %s", idx, instance.name)
6675
6676       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6677       # MAX_WIPE_CHUNK at max
6678       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6679                             constants.MIN_WIPE_CHUNK_PERCENT)
6680
6681       offset = 0
6682       size = device.size
6683       last_output = 0
6684       start_time = time.time()
6685
6686       while offset < size:
6687         wipe_size = min(wipe_chunk_size, size - offset)
6688         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6689         result.Raise("Could not wipe disk %d at offset %d for size %d" %
6690                      (idx, offset, wipe_size))
6691         now = time.time()
6692         offset += wipe_size
6693         if now - last_output >= 60:
6694           eta = _CalcEta(now - start_time, offset, size)
6695           lu.LogInfo(" - done: %.1f%% ETA: %s" %
6696                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
6697           last_output = now
6698   finally:
6699     logging.info("Resume sync of instance %s disks", instance.name)
6700
6701     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6702
6703     for idx, success in enumerate(result.payload):
6704       if not success:
6705         lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6706                       " look at the status and troubleshoot the issue.", idx)
6707         logging.warn("resume-sync of instance %s for disks %d failed",
6708                      instance.name, idx)
6709
6710
6711 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6712   """Create all disks for an instance.
6713
6714   This abstracts away some work from AddInstance.
6715
6716   @type lu: L{LogicalUnit}
6717   @param lu: the logical unit on whose behalf we execute
6718   @type instance: L{objects.Instance}
6719   @param instance: the instance whose disks we should create
6720   @type to_skip: list
6721   @param to_skip: list of indices to skip
6722   @type target_node: string
6723   @param target_node: if passed, overrides the target node for creation
6724   @rtype: boolean
6725   @return: the success of the creation
6726
6727   """
6728   info = _GetInstanceInfoText(instance)
6729   if target_node is None:
6730     pnode = instance.primary_node
6731     all_nodes = instance.all_nodes
6732   else:
6733     pnode = target_node
6734     all_nodes = [pnode]
6735
6736   if instance.disk_template == constants.DT_FILE:
6737     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6738     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6739
6740     result.Raise("Failed to create directory '%s' on"
6741                  " node %s" % (file_storage_dir, pnode))
6742
6743   # Note: this needs to be kept in sync with adding of disks in
6744   # LUInstanceSetParams
6745   for idx, device in enumerate(instance.disks):
6746     if to_skip and idx in to_skip:
6747       continue
6748     logging.info("Creating volume %s for instance %s",
6749                  device.iv_name, instance.name)
6750     #HARDCODE
6751     for node in all_nodes:
6752       f_create = node == pnode
6753       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6754
6755
6756 def _RemoveDisks(lu, instance, target_node=None):
6757   """Remove all disks for an instance.
6758
6759   This abstracts away some work from `AddInstance()` and
6760   `RemoveInstance()`. Note that in case some of the devices couldn't
6761   be removed, the removal will continue with the other ones (compare
6762   with `_CreateDisks()`).
6763
6764   @type lu: L{LogicalUnit}
6765   @param lu: the logical unit on whose behalf we execute
6766   @type instance: L{objects.Instance}
6767   @param instance: the instance whose disks we should remove
6768   @type target_node: string
6769   @param target_node: used to override the node on which to remove the disks
6770   @rtype: boolean
6771   @return: the success of the removal
6772
6773   """
6774   logging.info("Removing block devices for instance %s", instance.name)
6775
6776   all_result = True
6777   for device in instance.disks:
6778     if target_node:
6779       edata = [(target_node, device)]
6780     else:
6781       edata = device.ComputeNodeTree(instance.primary_node)
6782     for node, disk in edata:
6783       lu.cfg.SetDiskID(disk, node)
6784       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6785       if msg:
6786         lu.LogWarning("Could not remove block device %s on node %s,"
6787                       " continuing anyway: %s", device.iv_name, node, msg)
6788         all_result = False
6789
6790   if instance.disk_template == constants.DT_FILE:
6791     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6792     if target_node:
6793       tgt = target_node
6794     else:
6795       tgt = instance.primary_node
6796     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6797     if result.fail_msg:
6798       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6799                     file_storage_dir, instance.primary_node, result.fail_msg)
6800       all_result = False
6801
6802   return all_result
6803
6804
6805 def _ComputeDiskSizePerVG(disk_template, disks):
6806   """Compute disk size requirements in the volume group
6807
6808   """
6809   def _compute(disks, payload):
6810     """Universal algorithm
6811
6812     """
6813     vgs = {}
6814     for disk in disks:
6815       vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6816
6817     return vgs
6818
6819   # Required free disk space as a function of disk and swap space
6820   req_size_dict = {
6821     constants.DT_DISKLESS: {},
6822     constants.DT_PLAIN: _compute(disks, 0),
6823     # 128 MB are added for drbd metadata for each disk
6824     constants.DT_DRBD8: _compute(disks, 128),
6825     constants.DT_FILE: {},
6826   }
6827
6828   if disk_template not in req_size_dict:
6829     raise errors.ProgrammerError("Disk template '%s' size requirement"
6830                                  " is unknown" %  disk_template)
6831
6832   return req_size_dict[disk_template]
6833
6834
6835 def _ComputeDiskSize(disk_template, disks):
6836   """Compute disk size requirements in the volume group
6837
6838   """
6839   # Required free disk space as a function of disk and swap space
6840   req_size_dict = {
6841     constants.DT_DISKLESS: None,
6842     constants.DT_PLAIN: sum(d["size"] for d in disks),
6843     # 128 MB are added for drbd metadata for each disk
6844     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6845     constants.DT_FILE: None,
6846   }
6847
6848   if disk_template not in req_size_dict:
6849     raise errors.ProgrammerError("Disk template '%s' size requirement"
6850                                  " is unknown" %  disk_template)
6851
6852   return req_size_dict[disk_template]
6853
6854
6855 def _FilterVmNodes(lu, nodenames):
6856   """Filters out non-vm_capable nodes from a list.
6857
6858   @type lu: L{LogicalUnit}
6859   @param lu: the logical unit for which we check
6860   @type nodenames: list
6861   @param nodenames: the list of nodes on which we should check
6862   @rtype: list
6863   @return: the list of vm-capable nodes
6864
6865   """
6866   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
6867   return [name for name in nodenames if name not in vm_nodes]
6868
6869
6870 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6871   """Hypervisor parameter validation.
6872
6873   This function abstract the hypervisor parameter validation to be
6874   used in both instance create and instance modify.
6875
6876   @type lu: L{LogicalUnit}
6877   @param lu: the logical unit for which we check
6878   @type nodenames: list
6879   @param nodenames: the list of nodes on which we should check
6880   @type hvname: string
6881   @param hvname: the name of the hypervisor we should use
6882   @type hvparams: dict
6883   @param hvparams: the parameters which we need to check
6884   @raise errors.OpPrereqError: if the parameters are not valid
6885
6886   """
6887   nodenames = _FilterVmNodes(lu, nodenames)
6888   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6889                                                   hvname,
6890                                                   hvparams)
6891   for node in nodenames:
6892     info = hvinfo[node]
6893     if info.offline:
6894       continue
6895     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6896
6897
6898 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6899   """OS parameters validation.
6900
6901   @type lu: L{LogicalUnit}
6902   @param lu: the logical unit for which we check
6903   @type required: boolean
6904   @param required: whether the validation should fail if the OS is not
6905       found
6906   @type nodenames: list
6907   @param nodenames: the list of nodes on which we should check
6908   @type osname: string
6909   @param osname: the name of the hypervisor we should use
6910   @type osparams: dict
6911   @param osparams: the parameters which we need to check
6912   @raise errors.OpPrereqError: if the parameters are not valid
6913
6914   """
6915   nodenames = _FilterVmNodes(lu, nodenames)
6916   result = lu.rpc.call_os_validate(required, nodenames, osname,
6917                                    [constants.OS_VALIDATE_PARAMETERS],
6918                                    osparams)
6919   for node, nres in result.items():
6920     # we don't check for offline cases since this should be run only
6921     # against the master node and/or an instance's nodes
6922     nres.Raise("OS Parameters validation failed on node %s" % node)
6923     if not nres.payload:
6924       lu.LogInfo("OS %s not found on node %s, validation skipped",
6925                  osname, node)
6926
6927
6928 class LUInstanceCreate(LogicalUnit):
6929   """Create an instance.
6930
6931   """
6932   HPATH = "instance-add"
6933   HTYPE = constants.HTYPE_INSTANCE
6934   REQ_BGL = False
6935
6936   def CheckArguments(self):
6937     """Check arguments.
6938
6939     """
6940     # do not require name_check to ease forward/backward compatibility
6941     # for tools
6942     if self.op.no_install and self.op.start:
6943       self.LogInfo("No-installation mode selected, disabling startup")
6944       self.op.start = False
6945     # validate/normalize the instance name
6946     self.op.instance_name = \
6947       netutils.Hostname.GetNormalizedName(self.op.instance_name)
6948
6949     if self.op.ip_check and not self.op.name_check:
6950       # TODO: make the ip check more flexible and not depend on the name check
6951       raise errors.OpPrereqError("Cannot do ip check without a name check",
6952                                  errors.ECODE_INVAL)
6953
6954     # check nics' parameter names
6955     for nic in self.op.nics:
6956       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6957
6958     # check disks. parameter names and consistent adopt/no-adopt strategy
6959     has_adopt = has_no_adopt = False
6960     for disk in self.op.disks:
6961       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6962       if "adopt" in disk:
6963         has_adopt = True
6964       else:
6965         has_no_adopt = True
6966     if has_adopt and has_no_adopt:
6967       raise errors.OpPrereqError("Either all disks are adopted or none is",
6968                                  errors.ECODE_INVAL)
6969     if has_adopt:
6970       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6971         raise errors.OpPrereqError("Disk adoption is not supported for the"
6972                                    " '%s' disk template" %
6973                                    self.op.disk_template,
6974                                    errors.ECODE_INVAL)
6975       if self.op.iallocator is not None:
6976         raise errors.OpPrereqError("Disk adoption not allowed with an"
6977                                    " iallocator script", errors.ECODE_INVAL)
6978       if self.op.mode == constants.INSTANCE_IMPORT:
6979         raise errors.OpPrereqError("Disk adoption not allowed for"
6980                                    " instance import", errors.ECODE_INVAL)
6981
6982     self.adopt_disks = has_adopt
6983
6984     # instance name verification
6985     if self.op.name_check:
6986       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6987       self.op.instance_name = self.hostname1.name
6988       # used in CheckPrereq for ip ping check
6989       self.check_ip = self.hostname1.ip
6990     else:
6991       self.check_ip = None
6992
6993     # file storage checks
6994     if (self.op.file_driver and
6995         not self.op.file_driver in constants.FILE_DRIVER):
6996       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6997                                  self.op.file_driver, errors.ECODE_INVAL)
6998
6999     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7000       raise errors.OpPrereqError("File storage directory path not absolute",
7001                                  errors.ECODE_INVAL)
7002
7003     ### Node/iallocator related checks
7004     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7005
7006     if self.op.pnode is not None:
7007       if self.op.disk_template in constants.DTS_NET_MIRROR:
7008         if self.op.snode is None:
7009           raise errors.OpPrereqError("The networked disk templates need"
7010                                      " a mirror node", errors.ECODE_INVAL)
7011       elif self.op.snode:
7012         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7013                         " template")
7014         self.op.snode = None
7015
7016     self._cds = _GetClusterDomainSecret()
7017
7018     if self.op.mode == constants.INSTANCE_IMPORT:
7019       # On import force_variant must be True, because if we forced it at
7020       # initial install, our only chance when importing it back is that it
7021       # works again!
7022       self.op.force_variant = True
7023
7024       if self.op.no_install:
7025         self.LogInfo("No-installation mode has no effect during import")
7026
7027     elif self.op.mode == constants.INSTANCE_CREATE:
7028       if self.op.os_type is None:
7029         raise errors.OpPrereqError("No guest OS specified",
7030                                    errors.ECODE_INVAL)
7031       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7032         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7033                                    " installation" % self.op.os_type,
7034                                    errors.ECODE_STATE)
7035       if self.op.disk_template is None:
7036         raise errors.OpPrereqError("No disk template specified",
7037                                    errors.ECODE_INVAL)
7038
7039     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7040       # Check handshake to ensure both clusters have the same domain secret
7041       src_handshake = self.op.source_handshake
7042       if not src_handshake:
7043         raise errors.OpPrereqError("Missing source handshake",
7044                                    errors.ECODE_INVAL)
7045
7046       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7047                                                            src_handshake)
7048       if errmsg:
7049         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7050                                    errors.ECODE_INVAL)
7051
7052       # Load and check source CA
7053       self.source_x509_ca_pem = self.op.source_x509_ca
7054       if not self.source_x509_ca_pem:
7055         raise errors.OpPrereqError("Missing source X509 CA",
7056                                    errors.ECODE_INVAL)
7057
7058       try:
7059         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7060                                                     self._cds)
7061       except OpenSSL.crypto.Error, err:
7062         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7063                                    (err, ), errors.ECODE_INVAL)
7064
7065       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7066       if errcode is not None:
7067         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7068                                    errors.ECODE_INVAL)
7069
7070       self.source_x509_ca = cert
7071
7072       src_instance_name = self.op.source_instance_name
7073       if not src_instance_name:
7074         raise errors.OpPrereqError("Missing source instance name",
7075                                    errors.ECODE_INVAL)
7076
7077       self.source_instance_name = \
7078           netutils.GetHostname(name=src_instance_name).name
7079
7080     else:
7081       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7082                                  self.op.mode, errors.ECODE_INVAL)
7083
7084   def ExpandNames(self):
7085     """ExpandNames for CreateInstance.
7086
7087     Figure out the right locks for instance creation.
7088
7089     """
7090     self.needed_locks = {}
7091
7092     instance_name = self.op.instance_name
7093     # this is just a preventive check, but someone might still add this
7094     # instance in the meantime, and creation will fail at lock-add time
7095     if instance_name in self.cfg.GetInstanceList():
7096       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7097                                  instance_name, errors.ECODE_EXISTS)
7098
7099     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7100
7101     if self.op.iallocator:
7102       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7103     else:
7104       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7105       nodelist = [self.op.pnode]
7106       if self.op.snode is not None:
7107         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7108         nodelist.append(self.op.snode)
7109       self.needed_locks[locking.LEVEL_NODE] = nodelist
7110
7111     # in case of import lock the source node too
7112     if self.op.mode == constants.INSTANCE_IMPORT:
7113       src_node = self.op.src_node
7114       src_path = self.op.src_path
7115
7116       if src_path is None:
7117         self.op.src_path = src_path = self.op.instance_name
7118
7119       if src_node is None:
7120         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7121         self.op.src_node = None
7122         if os.path.isabs(src_path):
7123           raise errors.OpPrereqError("Importing an instance from an absolute"
7124                                      " path requires a source node option.",
7125                                      errors.ECODE_INVAL)
7126       else:
7127         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7128         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7129           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7130         if not os.path.isabs(src_path):
7131           self.op.src_path = src_path = \
7132             utils.PathJoin(constants.EXPORT_DIR, src_path)
7133
7134   def _RunAllocator(self):
7135     """Run the allocator based on input opcode.
7136
7137     """
7138     nics = [n.ToDict() for n in self.nics]
7139     ial = IAllocator(self.cfg, self.rpc,
7140                      mode=constants.IALLOCATOR_MODE_ALLOC,
7141                      name=self.op.instance_name,
7142                      disk_template=self.op.disk_template,
7143                      tags=[],
7144                      os=self.op.os_type,
7145                      vcpus=self.be_full[constants.BE_VCPUS],
7146                      mem_size=self.be_full[constants.BE_MEMORY],
7147                      disks=self.disks,
7148                      nics=nics,
7149                      hypervisor=self.op.hypervisor,
7150                      )
7151
7152     ial.Run(self.op.iallocator)
7153
7154     if not ial.success:
7155       raise errors.OpPrereqError("Can't compute nodes using"
7156                                  " iallocator '%s': %s" %
7157                                  (self.op.iallocator, ial.info),
7158                                  errors.ECODE_NORES)
7159     if len(ial.result) != ial.required_nodes:
7160       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7161                                  " of nodes (%s), required %s" %
7162                                  (self.op.iallocator, len(ial.result),
7163                                   ial.required_nodes), errors.ECODE_FAULT)
7164     self.op.pnode = ial.result[0]
7165     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7166                  self.op.instance_name, self.op.iallocator,
7167                  utils.CommaJoin(ial.result))
7168     if ial.required_nodes == 2:
7169       self.op.snode = ial.result[1]
7170
7171   def BuildHooksEnv(self):
7172     """Build hooks env.
7173
7174     This runs on master, primary and secondary nodes of the instance.
7175
7176     """
7177     env = {
7178       "ADD_MODE": self.op.mode,
7179       }
7180     if self.op.mode == constants.INSTANCE_IMPORT:
7181       env["SRC_NODE"] = self.op.src_node
7182       env["SRC_PATH"] = self.op.src_path
7183       env["SRC_IMAGES"] = self.src_images
7184
7185     env.update(_BuildInstanceHookEnv(
7186       name=self.op.instance_name,
7187       primary_node=self.op.pnode,
7188       secondary_nodes=self.secondaries,
7189       status=self.op.start,
7190       os_type=self.op.os_type,
7191       memory=self.be_full[constants.BE_MEMORY],
7192       vcpus=self.be_full[constants.BE_VCPUS],
7193       nics=_NICListToTuple(self, self.nics),
7194       disk_template=self.op.disk_template,
7195       disks=[(d["size"], d["mode"]) for d in self.disks],
7196       bep=self.be_full,
7197       hvp=self.hv_full,
7198       hypervisor_name=self.op.hypervisor,
7199     ))
7200
7201     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7202           self.secondaries)
7203     return env, nl, nl
7204
7205   def _ReadExportInfo(self):
7206     """Reads the export information from disk.
7207
7208     It will override the opcode source node and path with the actual
7209     information, if these two were not specified before.
7210
7211     @return: the export information
7212
7213     """
7214     assert self.op.mode == constants.INSTANCE_IMPORT
7215
7216     src_node = self.op.src_node
7217     src_path = self.op.src_path
7218
7219     if src_node is None:
7220       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7221       exp_list = self.rpc.call_export_list(locked_nodes)
7222       found = False
7223       for node in exp_list:
7224         if exp_list[node].fail_msg:
7225           continue
7226         if src_path in exp_list[node].payload:
7227           found = True
7228           self.op.src_node = src_node = node
7229           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7230                                                        src_path)
7231           break
7232       if not found:
7233         raise errors.OpPrereqError("No export found for relative path %s" %
7234                                     src_path, errors.ECODE_INVAL)
7235
7236     _CheckNodeOnline(self, src_node)
7237     result = self.rpc.call_export_info(src_node, src_path)
7238     result.Raise("No export or invalid export found in dir %s" % src_path)
7239
7240     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7241     if not export_info.has_section(constants.INISECT_EXP):
7242       raise errors.ProgrammerError("Corrupted export config",
7243                                    errors.ECODE_ENVIRON)
7244
7245     ei_version = export_info.get(constants.INISECT_EXP, "version")
7246     if (int(ei_version) != constants.EXPORT_VERSION):
7247       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7248                                  (ei_version, constants.EXPORT_VERSION),
7249                                  errors.ECODE_ENVIRON)
7250     return export_info
7251
7252   def _ReadExportParams(self, einfo):
7253     """Use export parameters as defaults.
7254
7255     In case the opcode doesn't specify (as in override) some instance
7256     parameters, then try to use them from the export information, if
7257     that declares them.
7258
7259     """
7260     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7261
7262     if self.op.disk_template is None:
7263       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7264         self.op.disk_template = einfo.get(constants.INISECT_INS,
7265                                           "disk_template")
7266       else:
7267         raise errors.OpPrereqError("No disk template specified and the export"
7268                                    " is missing the disk_template information",
7269                                    errors.ECODE_INVAL)
7270
7271     if not self.op.disks:
7272       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7273         disks = []
7274         # TODO: import the disk iv_name too
7275         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7276           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7277           disks.append({"size": disk_sz})
7278         self.op.disks = disks
7279       else:
7280         raise errors.OpPrereqError("No disk info specified and the export"
7281                                    " is missing the disk information",
7282                                    errors.ECODE_INVAL)
7283
7284     if (not self.op.nics and
7285         einfo.has_option(constants.INISECT_INS, "nic_count")):
7286       nics = []
7287       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7288         ndict = {}
7289         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7290           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7291           ndict[name] = v
7292         nics.append(ndict)
7293       self.op.nics = nics
7294
7295     if (self.op.hypervisor is None and
7296         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7297       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7298     if einfo.has_section(constants.INISECT_HYP):
7299       # use the export parameters but do not override the ones
7300       # specified by the user
7301       for name, value in einfo.items(constants.INISECT_HYP):
7302         if name not in self.op.hvparams:
7303           self.op.hvparams[name] = value
7304
7305     if einfo.has_section(constants.INISECT_BEP):
7306       # use the parameters, without overriding
7307       for name, value in einfo.items(constants.INISECT_BEP):
7308         if name not in self.op.beparams:
7309           self.op.beparams[name] = value
7310     else:
7311       # try to read the parameters old style, from the main section
7312       for name in constants.BES_PARAMETERS:
7313         if (name not in self.op.beparams and
7314             einfo.has_option(constants.INISECT_INS, name)):
7315           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7316
7317     if einfo.has_section(constants.INISECT_OSP):
7318       # use the parameters, without overriding
7319       for name, value in einfo.items(constants.INISECT_OSP):
7320         if name not in self.op.osparams:
7321           self.op.osparams[name] = value
7322
7323   def _RevertToDefaults(self, cluster):
7324     """Revert the instance parameters to the default values.
7325
7326     """
7327     # hvparams
7328     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7329     for name in self.op.hvparams.keys():
7330       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7331         del self.op.hvparams[name]
7332     # beparams
7333     be_defs = cluster.SimpleFillBE({})
7334     for name in self.op.beparams.keys():
7335       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7336         del self.op.beparams[name]
7337     # nic params
7338     nic_defs = cluster.SimpleFillNIC({})
7339     for nic in self.op.nics:
7340       for name in constants.NICS_PARAMETERS:
7341         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7342           del nic[name]
7343     # osparams
7344     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7345     for name in self.op.osparams.keys():
7346       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7347         del self.op.osparams[name]
7348
7349   def CheckPrereq(self):
7350     """Check prerequisites.
7351
7352     """
7353     if self.op.mode == constants.INSTANCE_IMPORT:
7354       export_info = self._ReadExportInfo()
7355       self._ReadExportParams(export_info)
7356
7357     if (not self.cfg.GetVGName() and
7358         self.op.disk_template not in constants.DTS_NOT_LVM):
7359       raise errors.OpPrereqError("Cluster does not support lvm-based"
7360                                  " instances", errors.ECODE_STATE)
7361
7362     if self.op.hypervisor is None:
7363       self.op.hypervisor = self.cfg.GetHypervisorType()
7364
7365     cluster = self.cfg.GetClusterInfo()
7366     enabled_hvs = cluster.enabled_hypervisors
7367     if self.op.hypervisor not in enabled_hvs:
7368       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7369                                  " cluster (%s)" % (self.op.hypervisor,
7370                                   ",".join(enabled_hvs)),
7371                                  errors.ECODE_STATE)
7372
7373     # check hypervisor parameter syntax (locally)
7374     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7375     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7376                                       self.op.hvparams)
7377     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7378     hv_type.CheckParameterSyntax(filled_hvp)
7379     self.hv_full = filled_hvp
7380     # check that we don't specify global parameters on an instance
7381     _CheckGlobalHvParams(self.op.hvparams)
7382
7383     # fill and remember the beparams dict
7384     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7385     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7386
7387     # build os parameters
7388     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7389
7390     # now that hvp/bep are in final format, let's reset to defaults,
7391     # if told to do so
7392     if self.op.identify_defaults:
7393       self._RevertToDefaults(cluster)
7394
7395     # NIC buildup
7396     self.nics = []
7397     for idx, nic in enumerate(self.op.nics):
7398       nic_mode_req = nic.get("mode", None)
7399       nic_mode = nic_mode_req
7400       if nic_mode is None:
7401         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7402
7403       # in routed mode, for the first nic, the default ip is 'auto'
7404       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7405         default_ip_mode = constants.VALUE_AUTO
7406       else:
7407         default_ip_mode = constants.VALUE_NONE
7408
7409       # ip validity checks
7410       ip = nic.get("ip", default_ip_mode)
7411       if ip is None or ip.lower() == constants.VALUE_NONE:
7412         nic_ip = None
7413       elif ip.lower() == constants.VALUE_AUTO:
7414         if not self.op.name_check:
7415           raise errors.OpPrereqError("IP address set to auto but name checks"
7416                                      " have been skipped",
7417                                      errors.ECODE_INVAL)
7418         nic_ip = self.hostname1.ip
7419       else:
7420         if not netutils.IPAddress.IsValid(ip):
7421           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7422                                      errors.ECODE_INVAL)
7423         nic_ip = ip
7424
7425       # TODO: check the ip address for uniqueness
7426       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7427         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7428                                    errors.ECODE_INVAL)
7429
7430       # MAC address verification
7431       mac = nic.get("mac", constants.VALUE_AUTO)
7432       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7433         mac = utils.NormalizeAndValidateMac(mac)
7434
7435         try:
7436           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7437         except errors.ReservationError:
7438           raise errors.OpPrereqError("MAC address %s already in use"
7439                                      " in cluster" % mac,
7440                                      errors.ECODE_NOTUNIQUE)
7441
7442       # bridge verification
7443       bridge = nic.get("bridge", None)
7444       link = nic.get("link", None)
7445       if bridge and link:
7446         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7447                                    " at the same time", errors.ECODE_INVAL)
7448       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7449         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7450                                    errors.ECODE_INVAL)
7451       elif bridge:
7452         link = bridge
7453
7454       nicparams = {}
7455       if nic_mode_req:
7456         nicparams[constants.NIC_MODE] = nic_mode_req
7457       if link:
7458         nicparams[constants.NIC_LINK] = link
7459
7460       check_params = cluster.SimpleFillNIC(nicparams)
7461       objects.NIC.CheckParameterSyntax(check_params)
7462       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7463
7464     # disk checks/pre-build
7465     self.disks = []
7466     for disk in self.op.disks:
7467       mode = disk.get("mode", constants.DISK_RDWR)
7468       if mode not in constants.DISK_ACCESS_SET:
7469         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7470                                    mode, errors.ECODE_INVAL)
7471       size = disk.get("size", None)
7472       if size is None:
7473         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7474       try:
7475         size = int(size)
7476       except (TypeError, ValueError):
7477         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7478                                    errors.ECODE_INVAL)
7479       vg = disk.get("vg", self.cfg.GetVGName())
7480       new_disk = {"size": size, "mode": mode, "vg": vg}
7481       if "adopt" in disk:
7482         new_disk["adopt"] = disk["adopt"]
7483       self.disks.append(new_disk)
7484
7485     if self.op.mode == constants.INSTANCE_IMPORT:
7486
7487       # Check that the new instance doesn't have less disks than the export
7488       instance_disks = len(self.disks)
7489       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7490       if instance_disks < export_disks:
7491         raise errors.OpPrereqError("Not enough disks to import."
7492                                    " (instance: %d, export: %d)" %
7493                                    (instance_disks, export_disks),
7494                                    errors.ECODE_INVAL)
7495
7496       disk_images = []
7497       for idx in range(export_disks):
7498         option = 'disk%d_dump' % idx
7499         if export_info.has_option(constants.INISECT_INS, option):
7500           # FIXME: are the old os-es, disk sizes, etc. useful?
7501           export_name = export_info.get(constants.INISECT_INS, option)
7502           image = utils.PathJoin(self.op.src_path, export_name)
7503           disk_images.append(image)
7504         else:
7505           disk_images.append(False)
7506
7507       self.src_images = disk_images
7508
7509       old_name = export_info.get(constants.INISECT_INS, 'name')
7510       try:
7511         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7512       except (TypeError, ValueError), err:
7513         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7514                                    " an integer: %s" % str(err),
7515                                    errors.ECODE_STATE)
7516       if self.op.instance_name == old_name:
7517         for idx, nic in enumerate(self.nics):
7518           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7519             nic_mac_ini = 'nic%d_mac' % idx
7520             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7521
7522     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7523
7524     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7525     if self.op.ip_check:
7526       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7527         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7528                                    (self.check_ip, self.op.instance_name),
7529                                    errors.ECODE_NOTUNIQUE)
7530
7531     #### mac address generation
7532     # By generating here the mac address both the allocator and the hooks get
7533     # the real final mac address rather than the 'auto' or 'generate' value.
7534     # There is a race condition between the generation and the instance object
7535     # creation, which means that we know the mac is valid now, but we're not
7536     # sure it will be when we actually add the instance. If things go bad
7537     # adding the instance will abort because of a duplicate mac, and the
7538     # creation job will fail.
7539     for nic in self.nics:
7540       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7541         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7542
7543     #### allocator run
7544
7545     if self.op.iallocator is not None:
7546       self._RunAllocator()
7547
7548     #### node related checks
7549
7550     # check primary node
7551     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7552     assert self.pnode is not None, \
7553       "Cannot retrieve locked node %s" % self.op.pnode
7554     if pnode.offline:
7555       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7556                                  pnode.name, errors.ECODE_STATE)
7557     if pnode.drained:
7558       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7559                                  pnode.name, errors.ECODE_STATE)
7560     if not pnode.vm_capable:
7561       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7562                                  " '%s'" % pnode.name, errors.ECODE_STATE)
7563
7564     self.secondaries = []
7565
7566     # mirror node verification
7567     if self.op.disk_template in constants.DTS_NET_MIRROR:
7568       if self.op.snode == pnode.name:
7569         raise errors.OpPrereqError("The secondary node cannot be the"
7570                                    " primary node.", errors.ECODE_INVAL)
7571       _CheckNodeOnline(self, self.op.snode)
7572       _CheckNodeNotDrained(self, self.op.snode)
7573       _CheckNodeVmCapable(self, self.op.snode)
7574       self.secondaries.append(self.op.snode)
7575
7576     nodenames = [pnode.name] + self.secondaries
7577
7578     if not self.adopt_disks:
7579       # Check lv size requirements, if not adopting
7580       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7581       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7582
7583     else: # instead, we must check the adoption data
7584       all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7585       if len(all_lvs) != len(self.disks):
7586         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7587                                    errors.ECODE_INVAL)
7588       for lv_name in all_lvs:
7589         try:
7590           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7591           # to ReserveLV uses the same syntax
7592           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7593         except errors.ReservationError:
7594           raise errors.OpPrereqError("LV named %s used by another instance" %
7595                                      lv_name, errors.ECODE_NOTUNIQUE)
7596
7597       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7598       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7599
7600       node_lvs = self.rpc.call_lv_list([pnode.name],
7601                                        vg_names.payload.keys())[pnode.name]
7602       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7603       node_lvs = node_lvs.payload
7604
7605       delta = all_lvs.difference(node_lvs.keys())
7606       if delta:
7607         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7608                                    utils.CommaJoin(delta),
7609                                    errors.ECODE_INVAL)
7610       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7611       if online_lvs:
7612         raise errors.OpPrereqError("Online logical volumes found, cannot"
7613                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7614                                    errors.ECODE_STATE)
7615       # update the size of disk based on what is found
7616       for dsk in self.disks:
7617         dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7618
7619     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7620
7621     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7622     # check OS parameters (remotely)
7623     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7624
7625     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7626
7627     # memory check on primary node
7628     if self.op.start:
7629       _CheckNodeFreeMemory(self, self.pnode.name,
7630                            "creating instance %s" % self.op.instance_name,
7631                            self.be_full[constants.BE_MEMORY],
7632                            self.op.hypervisor)
7633
7634     self.dry_run_result = list(nodenames)
7635
7636   def Exec(self, feedback_fn):
7637     """Create and add the instance to the cluster.
7638
7639     """
7640     instance = self.op.instance_name
7641     pnode_name = self.pnode.name
7642
7643     ht_kind = self.op.hypervisor
7644     if ht_kind in constants.HTS_REQ_PORT:
7645       network_port = self.cfg.AllocatePort()
7646     else:
7647       network_port = None
7648
7649     if constants.ENABLE_FILE_STORAGE:
7650       # this is needed because os.path.join does not accept None arguments
7651       if self.op.file_storage_dir is None:
7652         string_file_storage_dir = ""
7653       else:
7654         string_file_storage_dir = self.op.file_storage_dir
7655
7656       # build the full file storage dir path
7657       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7658                                         string_file_storage_dir, instance)
7659     else:
7660       file_storage_dir = ""
7661
7662     disks = _GenerateDiskTemplate(self,
7663                                   self.op.disk_template,
7664                                   instance, pnode_name,
7665                                   self.secondaries,
7666                                   self.disks,
7667                                   file_storage_dir,
7668                                   self.op.file_driver,
7669                                   0,
7670                                   feedback_fn)
7671
7672     iobj = objects.Instance(name=instance, os=self.op.os_type,
7673                             primary_node=pnode_name,
7674                             nics=self.nics, disks=disks,
7675                             disk_template=self.op.disk_template,
7676                             admin_up=False,
7677                             network_port=network_port,
7678                             beparams=self.op.beparams,
7679                             hvparams=self.op.hvparams,
7680                             hypervisor=self.op.hypervisor,
7681                             osparams=self.op.osparams,
7682                             )
7683
7684     if self.adopt_disks:
7685       # rename LVs to the newly-generated names; we need to construct
7686       # 'fake' LV disks with the old data, plus the new unique_id
7687       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7688       rename_to = []
7689       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7690         rename_to.append(t_dsk.logical_id)
7691         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7692         self.cfg.SetDiskID(t_dsk, pnode_name)
7693       result = self.rpc.call_blockdev_rename(pnode_name,
7694                                              zip(tmp_disks, rename_to))
7695       result.Raise("Failed to rename adoped LVs")
7696     else:
7697       feedback_fn("* creating instance disks...")
7698       try:
7699         _CreateDisks(self, iobj)
7700       except errors.OpExecError:
7701         self.LogWarning("Device creation failed, reverting...")
7702         try:
7703           _RemoveDisks(self, iobj)
7704         finally:
7705           self.cfg.ReleaseDRBDMinors(instance)
7706           raise
7707
7708       if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7709         feedback_fn("* wiping instance disks...")
7710         try:
7711           _WipeDisks(self, iobj)
7712         except errors.OpExecError:
7713           self.LogWarning("Device wiping failed, reverting...")
7714           try:
7715             _RemoveDisks(self, iobj)
7716           finally:
7717             self.cfg.ReleaseDRBDMinors(instance)
7718             raise
7719
7720     feedback_fn("adding instance %s to cluster config" % instance)
7721
7722     self.cfg.AddInstance(iobj, self.proc.GetECId())
7723
7724     # Declare that we don't want to remove the instance lock anymore, as we've
7725     # added the instance to the config
7726     del self.remove_locks[locking.LEVEL_INSTANCE]
7727     # Unlock all the nodes
7728     if self.op.mode == constants.INSTANCE_IMPORT:
7729       nodes_keep = [self.op.src_node]
7730       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7731                        if node != self.op.src_node]
7732       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7733       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7734     else:
7735       self.context.glm.release(locking.LEVEL_NODE)
7736       del self.acquired_locks[locking.LEVEL_NODE]
7737
7738     if self.op.wait_for_sync:
7739       disk_abort = not _WaitForSync(self, iobj)
7740     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7741       # make sure the disks are not degraded (still sync-ing is ok)
7742       time.sleep(15)
7743       feedback_fn("* checking mirrors status")
7744       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7745     else:
7746       disk_abort = False
7747
7748     if disk_abort:
7749       _RemoveDisks(self, iobj)
7750       self.cfg.RemoveInstance(iobj.name)
7751       # Make sure the instance lock gets removed
7752       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7753       raise errors.OpExecError("There are some degraded disks for"
7754                                " this instance")
7755
7756     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7757       if self.op.mode == constants.INSTANCE_CREATE:
7758         if not self.op.no_install:
7759           feedback_fn("* running the instance OS create scripts...")
7760           # FIXME: pass debug option from opcode to backend
7761           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7762                                                  self.op.debug_level)
7763           result.Raise("Could not add os for instance %s"
7764                        " on node %s" % (instance, pnode_name))
7765
7766       elif self.op.mode == constants.INSTANCE_IMPORT:
7767         feedback_fn("* running the instance OS import scripts...")
7768
7769         transfers = []
7770
7771         for idx, image in enumerate(self.src_images):
7772           if not image:
7773             continue
7774
7775           # FIXME: pass debug option from opcode to backend
7776           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7777                                              constants.IEIO_FILE, (image, ),
7778                                              constants.IEIO_SCRIPT,
7779                                              (iobj.disks[idx], idx),
7780                                              None)
7781           transfers.append(dt)
7782
7783         import_result = \
7784           masterd.instance.TransferInstanceData(self, feedback_fn,
7785                                                 self.op.src_node, pnode_name,
7786                                                 self.pnode.secondary_ip,
7787                                                 iobj, transfers)
7788         if not compat.all(import_result):
7789           self.LogWarning("Some disks for instance %s on node %s were not"
7790                           " imported successfully" % (instance, pnode_name))
7791
7792       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7793         feedback_fn("* preparing remote import...")
7794         # The source cluster will stop the instance before attempting to make a
7795         # connection. In some cases stopping an instance can take a long time,
7796         # hence the shutdown timeout is added to the connection timeout.
7797         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7798                            self.op.source_shutdown_timeout)
7799         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7800
7801         assert iobj.primary_node == self.pnode.name
7802         disk_results = \
7803           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7804                                         self.source_x509_ca,
7805                                         self._cds, timeouts)
7806         if not compat.all(disk_results):
7807           # TODO: Should the instance still be started, even if some disks
7808           # failed to import (valid for local imports, too)?
7809           self.LogWarning("Some disks for instance %s on node %s were not"
7810                           " imported successfully" % (instance, pnode_name))
7811
7812         # Run rename script on newly imported instance
7813         assert iobj.name == instance
7814         feedback_fn("Running rename script for %s" % instance)
7815         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7816                                                    self.source_instance_name,
7817                                                    self.op.debug_level)
7818         if result.fail_msg:
7819           self.LogWarning("Failed to run rename script for %s on node"
7820                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7821
7822       else:
7823         # also checked in the prereq part
7824         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7825                                      % self.op.mode)
7826
7827     if self.op.start:
7828       iobj.admin_up = True
7829       self.cfg.Update(iobj, feedback_fn)
7830       logging.info("Starting instance %s on node %s", instance, pnode_name)
7831       feedback_fn("* starting instance...")
7832       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7833       result.Raise("Could not start instance")
7834
7835     return list(iobj.all_nodes)
7836
7837
7838 class LUInstanceConsole(NoHooksLU):
7839   """Connect to an instance's console.
7840
7841   This is somewhat special in that it returns the command line that
7842   you need to run on the master node in order to connect to the
7843   console.
7844
7845   """
7846   REQ_BGL = False
7847
7848   def ExpandNames(self):
7849     self._ExpandAndLockInstance()
7850
7851   def CheckPrereq(self):
7852     """Check prerequisites.
7853
7854     This checks that the instance is in the cluster.
7855
7856     """
7857     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7858     assert self.instance is not None, \
7859       "Cannot retrieve locked instance %s" % self.op.instance_name
7860     _CheckNodeOnline(self, self.instance.primary_node)
7861
7862   def Exec(self, feedback_fn):
7863     """Connect to the console of an instance
7864
7865     """
7866     instance = self.instance
7867     node = instance.primary_node
7868
7869     node_insts = self.rpc.call_instance_list([node],
7870                                              [instance.hypervisor])[node]
7871     node_insts.Raise("Can't get node information from %s" % node)
7872
7873     if instance.name not in node_insts.payload:
7874       if instance.admin_up:
7875         state = "ERROR_down"
7876       else:
7877         state = "ADMIN_down"
7878       raise errors.OpExecError("Instance %s is not running (state %s)" %
7879                                (instance.name, state))
7880
7881     logging.debug("Connecting to console of %s on %s", instance.name, node)
7882
7883     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
7884
7885
7886 def _GetInstanceConsole(cluster, instance):
7887   """Returns console information for an instance.
7888
7889   @type cluster: L{objects.Cluster}
7890   @type instance: L{objects.Instance}
7891   @rtype: dict
7892
7893   """
7894   hyper = hypervisor.GetHypervisor(instance.hypervisor)
7895   # beparams and hvparams are passed separately, to avoid editing the
7896   # instance and then saving the defaults in the instance itself.
7897   hvparams = cluster.FillHV(instance)
7898   beparams = cluster.FillBE(instance)
7899   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7900
7901   assert console.instance == instance.name
7902   assert console.Validate()
7903
7904   return console.ToDict()
7905
7906
7907 class LUInstanceReplaceDisks(LogicalUnit):
7908   """Replace the disks of an instance.
7909
7910   """
7911   HPATH = "mirrors-replace"
7912   HTYPE = constants.HTYPE_INSTANCE
7913   REQ_BGL = False
7914
7915   def CheckArguments(self):
7916     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7917                                   self.op.iallocator)
7918
7919   def ExpandNames(self):
7920     self._ExpandAndLockInstance()
7921
7922     if self.op.iallocator is not None:
7923       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7924
7925     elif self.op.remote_node is not None:
7926       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7927       self.op.remote_node = remote_node
7928
7929       # Warning: do not remove the locking of the new secondary here
7930       # unless DRBD8.AddChildren is changed to work in parallel;
7931       # currently it doesn't since parallel invocations of
7932       # FindUnusedMinor will conflict
7933       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7934       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7935
7936     else:
7937       self.needed_locks[locking.LEVEL_NODE] = []
7938       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7939
7940     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7941                                    self.op.iallocator, self.op.remote_node,
7942                                    self.op.disks, False, self.op.early_release)
7943
7944     self.tasklets = [self.replacer]
7945
7946   def DeclareLocks(self, level):
7947     # If we're not already locking all nodes in the set we have to declare the
7948     # instance's primary/secondary nodes.
7949     if (level == locking.LEVEL_NODE and
7950         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7951       self._LockInstancesNodes()
7952
7953   def BuildHooksEnv(self):
7954     """Build hooks env.
7955
7956     This runs on the master, the primary and all the secondaries.
7957
7958     """
7959     instance = self.replacer.instance
7960     env = {
7961       "MODE": self.op.mode,
7962       "NEW_SECONDARY": self.op.remote_node,
7963       "OLD_SECONDARY": instance.secondary_nodes[0],
7964       }
7965     env.update(_BuildInstanceHookEnvByObject(self, instance))
7966     nl = [
7967       self.cfg.GetMasterNode(),
7968       instance.primary_node,
7969       ]
7970     if self.op.remote_node is not None:
7971       nl.append(self.op.remote_node)
7972     return env, nl, nl
7973
7974
7975 class TLReplaceDisks(Tasklet):
7976   """Replaces disks for an instance.
7977
7978   Note: Locking is not within the scope of this class.
7979
7980   """
7981   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7982                disks, delay_iallocator, early_release):
7983     """Initializes this class.
7984
7985     """
7986     Tasklet.__init__(self, lu)
7987
7988     # Parameters
7989     self.instance_name = instance_name
7990     self.mode = mode
7991     self.iallocator_name = iallocator_name
7992     self.remote_node = remote_node
7993     self.disks = disks
7994     self.delay_iallocator = delay_iallocator
7995     self.early_release = early_release
7996
7997     # Runtime data
7998     self.instance = None
7999     self.new_node = None
8000     self.target_node = None
8001     self.other_node = None
8002     self.remote_node_info = None
8003     self.node_secondary_ip = None
8004
8005   @staticmethod
8006   def CheckArguments(mode, remote_node, iallocator):
8007     """Helper function for users of this class.
8008
8009     """
8010     # check for valid parameter combination
8011     if mode == constants.REPLACE_DISK_CHG:
8012       if remote_node is None and iallocator is None:
8013         raise errors.OpPrereqError("When changing the secondary either an"
8014                                    " iallocator script must be used or the"
8015                                    " new node given", errors.ECODE_INVAL)
8016
8017       if remote_node is not None and iallocator is not None:
8018         raise errors.OpPrereqError("Give either the iallocator or the new"
8019                                    " secondary, not both", errors.ECODE_INVAL)
8020
8021     elif remote_node is not None or iallocator is not None:
8022       # Not replacing the secondary
8023       raise errors.OpPrereqError("The iallocator and new node options can"
8024                                  " only be used when changing the"
8025                                  " secondary node", errors.ECODE_INVAL)
8026
8027   @staticmethod
8028   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8029     """Compute a new secondary node using an IAllocator.
8030
8031     """
8032     ial = IAllocator(lu.cfg, lu.rpc,
8033                      mode=constants.IALLOCATOR_MODE_RELOC,
8034                      name=instance_name,
8035                      relocate_from=relocate_from)
8036
8037     ial.Run(iallocator_name)
8038
8039     if not ial.success:
8040       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8041                                  " %s" % (iallocator_name, ial.info),
8042                                  errors.ECODE_NORES)
8043
8044     if len(ial.result) != ial.required_nodes:
8045       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8046                                  " of nodes (%s), required %s" %
8047                                  (iallocator_name,
8048                                   len(ial.result), ial.required_nodes),
8049                                  errors.ECODE_FAULT)
8050
8051     remote_node_name = ial.result[0]
8052
8053     lu.LogInfo("Selected new secondary for instance '%s': %s",
8054                instance_name, remote_node_name)
8055
8056     return remote_node_name
8057
8058   def _FindFaultyDisks(self, node_name):
8059     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8060                                     node_name, True)
8061
8062   def CheckPrereq(self):
8063     """Check prerequisites.
8064
8065     This checks that the instance is in the cluster.
8066
8067     """
8068     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8069     assert instance is not None, \
8070       "Cannot retrieve locked instance %s" % self.instance_name
8071
8072     if instance.disk_template != constants.DT_DRBD8:
8073       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8074                                  " instances", errors.ECODE_INVAL)
8075
8076     if len(instance.secondary_nodes) != 1:
8077       raise errors.OpPrereqError("The instance has a strange layout,"
8078                                  " expected one secondary but found %d" %
8079                                  len(instance.secondary_nodes),
8080                                  errors.ECODE_FAULT)
8081
8082     if not self.delay_iallocator:
8083       self._CheckPrereq2()
8084
8085   def _CheckPrereq2(self):
8086     """Check prerequisites, second part.
8087
8088     This function should always be part of CheckPrereq. It was separated and is
8089     now called from Exec because during node evacuation iallocator was only
8090     called with an unmodified cluster model, not taking planned changes into
8091     account.
8092
8093     """
8094     instance = self.instance
8095     secondary_node = instance.secondary_nodes[0]
8096
8097     if self.iallocator_name is None:
8098       remote_node = self.remote_node
8099     else:
8100       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8101                                        instance.name, instance.secondary_nodes)
8102
8103     if remote_node is not None:
8104       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8105       assert self.remote_node_info is not None, \
8106         "Cannot retrieve locked node %s" % remote_node
8107     else:
8108       self.remote_node_info = None
8109
8110     if remote_node == self.instance.primary_node:
8111       raise errors.OpPrereqError("The specified node is the primary node of"
8112                                  " the instance.", errors.ECODE_INVAL)
8113
8114     if remote_node == secondary_node:
8115       raise errors.OpPrereqError("The specified node is already the"
8116                                  " secondary node of the instance.",
8117                                  errors.ECODE_INVAL)
8118
8119     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8120                                     constants.REPLACE_DISK_CHG):
8121       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8122                                  errors.ECODE_INVAL)
8123
8124     if self.mode == constants.REPLACE_DISK_AUTO:
8125       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8126       faulty_secondary = self._FindFaultyDisks(secondary_node)
8127
8128       if faulty_primary and faulty_secondary:
8129         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8130                                    " one node and can not be repaired"
8131                                    " automatically" % self.instance_name,
8132                                    errors.ECODE_STATE)
8133
8134       if faulty_primary:
8135         self.disks = faulty_primary
8136         self.target_node = instance.primary_node
8137         self.other_node = secondary_node
8138         check_nodes = [self.target_node, self.other_node]
8139       elif faulty_secondary:
8140         self.disks = faulty_secondary
8141         self.target_node = secondary_node
8142         self.other_node = instance.primary_node
8143         check_nodes = [self.target_node, self.other_node]
8144       else:
8145         self.disks = []
8146         check_nodes = []
8147
8148     else:
8149       # Non-automatic modes
8150       if self.mode == constants.REPLACE_DISK_PRI:
8151         self.target_node = instance.primary_node
8152         self.other_node = secondary_node
8153         check_nodes = [self.target_node, self.other_node]
8154
8155       elif self.mode == constants.REPLACE_DISK_SEC:
8156         self.target_node = secondary_node
8157         self.other_node = instance.primary_node
8158         check_nodes = [self.target_node, self.other_node]
8159
8160       elif self.mode == constants.REPLACE_DISK_CHG:
8161         self.new_node = remote_node
8162         self.other_node = instance.primary_node
8163         self.target_node = secondary_node
8164         check_nodes = [self.new_node, self.other_node]
8165
8166         _CheckNodeNotDrained(self.lu, remote_node)
8167         _CheckNodeVmCapable(self.lu, remote_node)
8168
8169         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8170         assert old_node_info is not None
8171         if old_node_info.offline and not self.early_release:
8172           # doesn't make sense to delay the release
8173           self.early_release = True
8174           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8175                           " early-release mode", secondary_node)
8176
8177       else:
8178         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8179                                      self.mode)
8180
8181       # If not specified all disks should be replaced
8182       if not self.disks:
8183         self.disks = range(len(self.instance.disks))
8184
8185     for node in check_nodes:
8186       _CheckNodeOnline(self.lu, node)
8187
8188     # Check whether disks are valid
8189     for disk_idx in self.disks:
8190       instance.FindDisk(disk_idx)
8191
8192     # Get secondary node IP addresses
8193     node_2nd_ip = {}
8194
8195     for node_name in [self.target_node, self.other_node, self.new_node]:
8196       if node_name is not None:
8197         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8198
8199     self.node_secondary_ip = node_2nd_ip
8200
8201   def Exec(self, feedback_fn):
8202     """Execute disk replacement.
8203
8204     This dispatches the disk replacement to the appropriate handler.
8205
8206     """
8207     if self.delay_iallocator:
8208       self._CheckPrereq2()
8209
8210     if not self.disks:
8211       feedback_fn("No disks need replacement")
8212       return
8213
8214     feedback_fn("Replacing disk(s) %s for %s" %
8215                 (utils.CommaJoin(self.disks), self.instance.name))
8216
8217     activate_disks = (not self.instance.admin_up)
8218
8219     # Activate the instance disks if we're replacing them on a down instance
8220     if activate_disks:
8221       _StartInstanceDisks(self.lu, self.instance, True)
8222
8223     try:
8224       # Should we replace the secondary node?
8225       if self.new_node is not None:
8226         fn = self._ExecDrbd8Secondary
8227       else:
8228         fn = self._ExecDrbd8DiskOnly
8229
8230       return fn(feedback_fn)
8231
8232     finally:
8233       # Deactivate the instance disks if we're replacing them on a
8234       # down instance
8235       if activate_disks:
8236         _SafeShutdownInstanceDisks(self.lu, self.instance)
8237
8238   def _CheckVolumeGroup(self, nodes):
8239     self.lu.LogInfo("Checking volume groups")
8240
8241     vgname = self.cfg.GetVGName()
8242
8243     # Make sure volume group exists on all involved nodes
8244     results = self.rpc.call_vg_list(nodes)
8245     if not results:
8246       raise errors.OpExecError("Can't list volume groups on the nodes")
8247
8248     for node in nodes:
8249       res = results[node]
8250       res.Raise("Error checking node %s" % node)
8251       if vgname not in res.payload:
8252         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8253                                  (vgname, node))
8254
8255   def _CheckDisksExistence(self, nodes):
8256     # Check disk existence
8257     for idx, dev in enumerate(self.instance.disks):
8258       if idx not in self.disks:
8259         continue
8260
8261       for node in nodes:
8262         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8263         self.cfg.SetDiskID(dev, node)
8264
8265         result = self.rpc.call_blockdev_find(node, dev)
8266
8267         msg = result.fail_msg
8268         if msg or not result.payload:
8269           if not msg:
8270             msg = "disk not found"
8271           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8272                                    (idx, node, msg))
8273
8274   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8275     for idx, dev in enumerate(self.instance.disks):
8276       if idx not in self.disks:
8277         continue
8278
8279       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8280                       (idx, node_name))
8281
8282       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8283                                    ldisk=ldisk):
8284         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8285                                  " replace disks for instance %s" %
8286                                  (node_name, self.instance.name))
8287
8288   def _CreateNewStorage(self, node_name):
8289     vgname = self.cfg.GetVGName()
8290     iv_names = {}
8291
8292     for idx, dev in enumerate(self.instance.disks):
8293       if idx not in self.disks:
8294         continue
8295
8296       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8297
8298       self.cfg.SetDiskID(dev, node_name)
8299
8300       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8301       names = _GenerateUniqueNames(self.lu, lv_names)
8302
8303       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8304                              logical_id=(vgname, names[0]))
8305       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8306                              logical_id=(vgname, names[1]))
8307
8308       new_lvs = [lv_data, lv_meta]
8309       old_lvs = dev.children
8310       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8311
8312       # we pass force_create=True to force the LVM creation
8313       for new_lv in new_lvs:
8314         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8315                         _GetInstanceInfoText(self.instance), False)
8316
8317     return iv_names
8318
8319   def _CheckDevices(self, node_name, iv_names):
8320     for name, (dev, _, _) in iv_names.iteritems():
8321       self.cfg.SetDiskID(dev, node_name)
8322
8323       result = self.rpc.call_blockdev_find(node_name, dev)
8324
8325       msg = result.fail_msg
8326       if msg or not result.payload:
8327         if not msg:
8328           msg = "disk not found"
8329         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8330                                  (name, msg))
8331
8332       if result.payload.is_degraded:
8333         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8334
8335   def _RemoveOldStorage(self, node_name, iv_names):
8336     for name, (_, old_lvs, _) in iv_names.iteritems():
8337       self.lu.LogInfo("Remove logical volumes for %s" % name)
8338
8339       for lv in old_lvs:
8340         self.cfg.SetDiskID(lv, node_name)
8341
8342         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8343         if msg:
8344           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8345                              hint="remove unused LVs manually")
8346
8347   def _ReleaseNodeLock(self, node_name):
8348     """Releases the lock for a given node."""
8349     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8350
8351   def _ExecDrbd8DiskOnly(self, feedback_fn):
8352     """Replace a disk on the primary or secondary for DRBD 8.
8353
8354     The algorithm for replace is quite complicated:
8355
8356       1. for each disk to be replaced:
8357
8358         1. create new LVs on the target node with unique names
8359         1. detach old LVs from the drbd device
8360         1. rename old LVs to name_replaced.<time_t>
8361         1. rename new LVs to old LVs
8362         1. attach the new LVs (with the old names now) to the drbd device
8363
8364       1. wait for sync across all devices
8365
8366       1. for each modified disk:
8367
8368         1. remove old LVs (which have the name name_replaces.<time_t>)
8369
8370     Failures are not very well handled.
8371
8372     """
8373     steps_total = 6
8374
8375     # Step: check device activation
8376     self.lu.LogStep(1, steps_total, "Check device existence")
8377     self._CheckDisksExistence([self.other_node, self.target_node])
8378     self._CheckVolumeGroup([self.target_node, self.other_node])
8379
8380     # Step: check other node consistency
8381     self.lu.LogStep(2, steps_total, "Check peer consistency")
8382     self._CheckDisksConsistency(self.other_node,
8383                                 self.other_node == self.instance.primary_node,
8384                                 False)
8385
8386     # Step: create new storage
8387     self.lu.LogStep(3, steps_total, "Allocate new storage")
8388     iv_names = self._CreateNewStorage(self.target_node)
8389
8390     # Step: for each lv, detach+rename*2+attach
8391     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8392     for dev, old_lvs, new_lvs in iv_names.itervalues():
8393       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8394
8395       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8396                                                      old_lvs)
8397       result.Raise("Can't detach drbd from local storage on node"
8398                    " %s for device %s" % (self.target_node, dev.iv_name))
8399       #dev.children = []
8400       #cfg.Update(instance)
8401
8402       # ok, we created the new LVs, so now we know we have the needed
8403       # storage; as such, we proceed on the target node to rename
8404       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8405       # using the assumption that logical_id == physical_id (which in
8406       # turn is the unique_id on that node)
8407
8408       # FIXME(iustin): use a better name for the replaced LVs
8409       temp_suffix = int(time.time())
8410       ren_fn = lambda d, suff: (d.physical_id[0],
8411                                 d.physical_id[1] + "_replaced-%s" % suff)
8412
8413       # Build the rename list based on what LVs exist on the node
8414       rename_old_to_new = []
8415       for to_ren in old_lvs:
8416         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8417         if not result.fail_msg and result.payload:
8418           # device exists
8419           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8420
8421       self.lu.LogInfo("Renaming the old LVs on the target node")
8422       result = self.rpc.call_blockdev_rename(self.target_node,
8423                                              rename_old_to_new)
8424       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8425
8426       # Now we rename the new LVs to the old LVs
8427       self.lu.LogInfo("Renaming the new LVs on the target node")
8428       rename_new_to_old = [(new, old.physical_id)
8429                            for old, new in zip(old_lvs, new_lvs)]
8430       result = self.rpc.call_blockdev_rename(self.target_node,
8431                                              rename_new_to_old)
8432       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8433
8434       for old, new in zip(old_lvs, new_lvs):
8435         new.logical_id = old.logical_id
8436         self.cfg.SetDiskID(new, self.target_node)
8437
8438       for disk in old_lvs:
8439         disk.logical_id = ren_fn(disk, temp_suffix)
8440         self.cfg.SetDiskID(disk, self.target_node)
8441
8442       # Now that the new lvs have the old name, we can add them to the device
8443       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8444       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8445                                                   new_lvs)
8446       msg = result.fail_msg
8447       if msg:
8448         for new_lv in new_lvs:
8449           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8450                                                new_lv).fail_msg
8451           if msg2:
8452             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8453                                hint=("cleanup manually the unused logical"
8454                                      "volumes"))
8455         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8456
8457       dev.children = new_lvs
8458
8459       self.cfg.Update(self.instance, feedback_fn)
8460
8461     cstep = 5
8462     if self.early_release:
8463       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8464       cstep += 1
8465       self._RemoveOldStorage(self.target_node, iv_names)
8466       # WARNING: we release both node locks here, do not do other RPCs
8467       # than WaitForSync to the primary node
8468       self._ReleaseNodeLock([self.target_node, self.other_node])
8469
8470     # Wait for sync
8471     # This can fail as the old devices are degraded and _WaitForSync
8472     # does a combined result over all disks, so we don't check its return value
8473     self.lu.LogStep(cstep, steps_total, "Sync devices")
8474     cstep += 1
8475     _WaitForSync(self.lu, self.instance)
8476
8477     # Check all devices manually
8478     self._CheckDevices(self.instance.primary_node, iv_names)
8479
8480     # Step: remove old storage
8481     if not self.early_release:
8482       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8483       cstep += 1
8484       self._RemoveOldStorage(self.target_node, iv_names)
8485
8486   def _ExecDrbd8Secondary(self, feedback_fn):
8487     """Replace the secondary node for DRBD 8.
8488
8489     The algorithm for replace is quite complicated:
8490       - for all disks of the instance:
8491         - create new LVs on the new node with same names
8492         - shutdown the drbd device on the old secondary
8493         - disconnect the drbd network on the primary
8494         - create the drbd device on the new secondary
8495         - network attach the drbd on the primary, using an artifice:
8496           the drbd code for Attach() will connect to the network if it
8497           finds a device which is connected to the good local disks but
8498           not network enabled
8499       - wait for sync across all devices
8500       - remove all disks from the old secondary
8501
8502     Failures are not very well handled.
8503
8504     """
8505     steps_total = 6
8506
8507     # Step: check device activation
8508     self.lu.LogStep(1, steps_total, "Check device existence")
8509     self._CheckDisksExistence([self.instance.primary_node])
8510     self._CheckVolumeGroup([self.instance.primary_node])
8511
8512     # Step: check other node consistency
8513     self.lu.LogStep(2, steps_total, "Check peer consistency")
8514     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8515
8516     # Step: create new storage
8517     self.lu.LogStep(3, steps_total, "Allocate new storage")
8518     for idx, dev in enumerate(self.instance.disks):
8519       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8520                       (self.new_node, idx))
8521       # we pass force_create=True to force LVM creation
8522       for new_lv in dev.children:
8523         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8524                         _GetInstanceInfoText(self.instance), False)
8525
8526     # Step 4: dbrd minors and drbd setups changes
8527     # after this, we must manually remove the drbd minors on both the
8528     # error and the success paths
8529     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8530     minors = self.cfg.AllocateDRBDMinor([self.new_node
8531                                          for dev in self.instance.disks],
8532                                         self.instance.name)
8533     logging.debug("Allocated minors %r", minors)
8534
8535     iv_names = {}
8536     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8537       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8538                       (self.new_node, idx))
8539       # create new devices on new_node; note that we create two IDs:
8540       # one without port, so the drbd will be activated without
8541       # networking information on the new node at this stage, and one
8542       # with network, for the latter activation in step 4
8543       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8544       if self.instance.primary_node == o_node1:
8545         p_minor = o_minor1
8546       else:
8547         assert self.instance.primary_node == o_node2, "Three-node instance?"
8548         p_minor = o_minor2
8549
8550       new_alone_id = (self.instance.primary_node, self.new_node, None,
8551                       p_minor, new_minor, o_secret)
8552       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8553                     p_minor, new_minor, o_secret)
8554
8555       iv_names[idx] = (dev, dev.children, new_net_id)
8556       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8557                     new_net_id)
8558       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8559                               logical_id=new_alone_id,
8560                               children=dev.children,
8561                               size=dev.size)
8562       try:
8563         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8564                               _GetInstanceInfoText(self.instance), False)
8565       except errors.GenericError:
8566         self.cfg.ReleaseDRBDMinors(self.instance.name)
8567         raise
8568
8569     # We have new devices, shutdown the drbd on the old secondary
8570     for idx, dev in enumerate(self.instance.disks):
8571       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8572       self.cfg.SetDiskID(dev, self.target_node)
8573       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8574       if msg:
8575         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8576                            "node: %s" % (idx, msg),
8577                            hint=("Please cleanup this device manually as"
8578                                  " soon as possible"))
8579
8580     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8581     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8582                                                self.node_secondary_ip,
8583                                                self.instance.disks)\
8584                                               [self.instance.primary_node]
8585
8586     msg = result.fail_msg
8587     if msg:
8588       # detaches didn't succeed (unlikely)
8589       self.cfg.ReleaseDRBDMinors(self.instance.name)
8590       raise errors.OpExecError("Can't detach the disks from the network on"
8591                                " old node: %s" % (msg,))
8592
8593     # if we managed to detach at least one, we update all the disks of
8594     # the instance to point to the new secondary
8595     self.lu.LogInfo("Updating instance configuration")
8596     for dev, _, new_logical_id in iv_names.itervalues():
8597       dev.logical_id = new_logical_id
8598       self.cfg.SetDiskID(dev, self.instance.primary_node)
8599
8600     self.cfg.Update(self.instance, feedback_fn)
8601
8602     # and now perform the drbd attach
8603     self.lu.LogInfo("Attaching primary drbds to new secondary"
8604                     " (standalone => connected)")
8605     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8606                                             self.new_node],
8607                                            self.node_secondary_ip,
8608                                            self.instance.disks,
8609                                            self.instance.name,
8610                                            False)
8611     for to_node, to_result in result.items():
8612       msg = to_result.fail_msg
8613       if msg:
8614         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8615                            to_node, msg,
8616                            hint=("please do a gnt-instance info to see the"
8617                                  " status of disks"))
8618     cstep = 5
8619     if self.early_release:
8620       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8621       cstep += 1
8622       self._RemoveOldStorage(self.target_node, iv_names)
8623       # WARNING: we release all node locks here, do not do other RPCs
8624       # than WaitForSync to the primary node
8625       self._ReleaseNodeLock([self.instance.primary_node,
8626                              self.target_node,
8627                              self.new_node])
8628
8629     # Wait for sync
8630     # This can fail as the old devices are degraded and _WaitForSync
8631     # does a combined result over all disks, so we don't check its return value
8632     self.lu.LogStep(cstep, steps_total, "Sync devices")
8633     cstep += 1
8634     _WaitForSync(self.lu, self.instance)
8635
8636     # Check all devices manually
8637     self._CheckDevices(self.instance.primary_node, iv_names)
8638
8639     # Step: remove old storage
8640     if not self.early_release:
8641       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8642       self._RemoveOldStorage(self.target_node, iv_names)
8643
8644
8645 class LURepairNodeStorage(NoHooksLU):
8646   """Repairs the volume group on a node.
8647
8648   """
8649   REQ_BGL = False
8650
8651   def CheckArguments(self):
8652     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8653
8654     storage_type = self.op.storage_type
8655
8656     if (constants.SO_FIX_CONSISTENCY not in
8657         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8658       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8659                                  " repaired" % storage_type,
8660                                  errors.ECODE_INVAL)
8661
8662   def ExpandNames(self):
8663     self.needed_locks = {
8664       locking.LEVEL_NODE: [self.op.node_name],
8665       }
8666
8667   def _CheckFaultyDisks(self, instance, node_name):
8668     """Ensure faulty disks abort the opcode or at least warn."""
8669     try:
8670       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8671                                   node_name, True):
8672         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8673                                    " node '%s'" % (instance.name, node_name),
8674                                    errors.ECODE_STATE)
8675     except errors.OpPrereqError, err:
8676       if self.op.ignore_consistency:
8677         self.proc.LogWarning(str(err.args[0]))
8678       else:
8679         raise
8680
8681   def CheckPrereq(self):
8682     """Check prerequisites.
8683
8684     """
8685     # Check whether any instance on this node has faulty disks
8686     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8687       if not inst.admin_up:
8688         continue
8689       check_nodes = set(inst.all_nodes)
8690       check_nodes.discard(self.op.node_name)
8691       for inst_node_name in check_nodes:
8692         self._CheckFaultyDisks(inst, inst_node_name)
8693
8694   def Exec(self, feedback_fn):
8695     feedback_fn("Repairing storage unit '%s' on %s ..." %
8696                 (self.op.name, self.op.node_name))
8697
8698     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8699     result = self.rpc.call_storage_execute(self.op.node_name,
8700                                            self.op.storage_type, st_args,
8701                                            self.op.name,
8702                                            constants.SO_FIX_CONSISTENCY)
8703     result.Raise("Failed to repair storage unit '%s' on %s" %
8704                  (self.op.name, self.op.node_name))
8705
8706
8707 class LUNodeEvacStrategy(NoHooksLU):
8708   """Computes the node evacuation strategy.
8709
8710   """
8711   REQ_BGL = False
8712
8713   def CheckArguments(self):
8714     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8715
8716   def ExpandNames(self):
8717     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8718     self.needed_locks = locks = {}
8719     if self.op.remote_node is None:
8720       locks[locking.LEVEL_NODE] = locking.ALL_SET
8721     else:
8722       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8723       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8724
8725   def Exec(self, feedback_fn):
8726     if self.op.remote_node is not None:
8727       instances = []
8728       for node in self.op.nodes:
8729         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8730       result = []
8731       for i in instances:
8732         if i.primary_node == self.op.remote_node:
8733           raise errors.OpPrereqError("Node %s is the primary node of"
8734                                      " instance %s, cannot use it as"
8735                                      " secondary" %
8736                                      (self.op.remote_node, i.name),
8737                                      errors.ECODE_INVAL)
8738         result.append([i.name, self.op.remote_node])
8739     else:
8740       ial = IAllocator(self.cfg, self.rpc,
8741                        mode=constants.IALLOCATOR_MODE_MEVAC,
8742                        evac_nodes=self.op.nodes)
8743       ial.Run(self.op.iallocator, validate=True)
8744       if not ial.success:
8745         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8746                                  errors.ECODE_NORES)
8747       result = ial.result
8748     return result
8749
8750
8751 class LUInstanceGrowDisk(LogicalUnit):
8752   """Grow a disk of an instance.
8753
8754   """
8755   HPATH = "disk-grow"
8756   HTYPE = constants.HTYPE_INSTANCE
8757   REQ_BGL = False
8758
8759   def ExpandNames(self):
8760     self._ExpandAndLockInstance()
8761     self.needed_locks[locking.LEVEL_NODE] = []
8762     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8763
8764   def DeclareLocks(self, level):
8765     if level == locking.LEVEL_NODE:
8766       self._LockInstancesNodes()
8767
8768   def BuildHooksEnv(self):
8769     """Build hooks env.
8770
8771     This runs on the master, the primary and all the secondaries.
8772
8773     """
8774     env = {
8775       "DISK": self.op.disk,
8776       "AMOUNT": self.op.amount,
8777       }
8778     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8779     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8780     return env, nl, nl
8781
8782   def CheckPrereq(self):
8783     """Check prerequisites.
8784
8785     This checks that the instance is in the cluster.
8786
8787     """
8788     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8789     assert instance is not None, \
8790       "Cannot retrieve locked instance %s" % self.op.instance_name
8791     nodenames = list(instance.all_nodes)
8792     for node in nodenames:
8793       _CheckNodeOnline(self, node)
8794
8795     self.instance = instance
8796
8797     if instance.disk_template not in constants.DTS_GROWABLE:
8798       raise errors.OpPrereqError("Instance's disk layout does not support"
8799                                  " growing.", errors.ECODE_INVAL)
8800
8801     self.disk = instance.FindDisk(self.op.disk)
8802
8803     if instance.disk_template != constants.DT_FILE:
8804       # TODO: check the free disk space for file, when that feature
8805       # will be supported
8806       _CheckNodesFreeDiskPerVG(self, nodenames,
8807                                self.disk.ComputeGrowth(self.op.amount))
8808
8809   def Exec(self, feedback_fn):
8810     """Execute disk grow.
8811
8812     """
8813     instance = self.instance
8814     disk = self.disk
8815
8816     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8817     if not disks_ok:
8818       raise errors.OpExecError("Cannot activate block device to grow")
8819
8820     for node in instance.all_nodes:
8821       self.cfg.SetDiskID(disk, node)
8822       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8823       result.Raise("Grow request failed to node %s" % node)
8824
8825       # TODO: Rewrite code to work properly
8826       # DRBD goes into sync mode for a short amount of time after executing the
8827       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8828       # calling "resize" in sync mode fails. Sleeping for a short amount of
8829       # time is a work-around.
8830       time.sleep(5)
8831
8832     disk.RecordGrow(self.op.amount)
8833     self.cfg.Update(instance, feedback_fn)
8834     if self.op.wait_for_sync:
8835       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8836       if disk_abort:
8837         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8838                              " status.\nPlease check the instance.")
8839       if not instance.admin_up:
8840         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8841     elif not instance.admin_up:
8842       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8843                            " not supposed to be running because no wait for"
8844                            " sync mode was requested.")
8845
8846
8847 class LUInstanceQueryData(NoHooksLU):
8848   """Query runtime instance data.
8849
8850   """
8851   REQ_BGL = False
8852
8853   def ExpandNames(self):
8854     self.needed_locks = {}
8855     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8856
8857     if self.op.instances:
8858       self.wanted_names = []
8859       for name in self.op.instances:
8860         full_name = _ExpandInstanceName(self.cfg, name)
8861         self.wanted_names.append(full_name)
8862       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8863     else:
8864       self.wanted_names = None
8865       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8866
8867     self.needed_locks[locking.LEVEL_NODE] = []
8868     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8869
8870   def DeclareLocks(self, level):
8871     if level == locking.LEVEL_NODE:
8872       self._LockInstancesNodes()
8873
8874   def CheckPrereq(self):
8875     """Check prerequisites.
8876
8877     This only checks the optional instance list against the existing names.
8878
8879     """
8880     if self.wanted_names is None:
8881       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8882
8883     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8884                              in self.wanted_names]
8885
8886   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8887     """Returns the status of a block device
8888
8889     """
8890     if self.op.static or not node:
8891       return None
8892
8893     self.cfg.SetDiskID(dev, node)
8894
8895     result = self.rpc.call_blockdev_find(node, dev)
8896     if result.offline:
8897       return None
8898
8899     result.Raise("Can't compute disk status for %s" % instance_name)
8900
8901     status = result.payload
8902     if status is None:
8903       return None
8904
8905     return (status.dev_path, status.major, status.minor,
8906             status.sync_percent, status.estimated_time,
8907             status.is_degraded, status.ldisk_status)
8908
8909   def _ComputeDiskStatus(self, instance, snode, dev):
8910     """Compute block device status.
8911
8912     """
8913     if dev.dev_type in constants.LDS_DRBD:
8914       # we change the snode then (otherwise we use the one passed in)
8915       if dev.logical_id[0] == instance.primary_node:
8916         snode = dev.logical_id[1]
8917       else:
8918         snode = dev.logical_id[0]
8919
8920     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8921                                               instance.name, dev)
8922     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8923
8924     if dev.children:
8925       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8926                       for child in dev.children]
8927     else:
8928       dev_children = []
8929
8930     data = {
8931       "iv_name": dev.iv_name,
8932       "dev_type": dev.dev_type,
8933       "logical_id": dev.logical_id,
8934       "physical_id": dev.physical_id,
8935       "pstatus": dev_pstatus,
8936       "sstatus": dev_sstatus,
8937       "children": dev_children,
8938       "mode": dev.mode,
8939       "size": dev.size,
8940       }
8941
8942     return data
8943
8944   def Exec(self, feedback_fn):
8945     """Gather and return data"""
8946     result = {}
8947
8948     cluster = self.cfg.GetClusterInfo()
8949
8950     for instance in self.wanted_instances:
8951       if not self.op.static:
8952         remote_info = self.rpc.call_instance_info(instance.primary_node,
8953                                                   instance.name,
8954                                                   instance.hypervisor)
8955         remote_info.Raise("Error checking node %s" % instance.primary_node)
8956         remote_info = remote_info.payload
8957         if remote_info and "state" in remote_info:
8958           remote_state = "up"
8959         else:
8960           remote_state = "down"
8961       else:
8962         remote_state = None
8963       if instance.admin_up:
8964         config_state = "up"
8965       else:
8966         config_state = "down"
8967
8968       disks = [self._ComputeDiskStatus(instance, None, device)
8969                for device in instance.disks]
8970
8971       idict = {
8972         "name": instance.name,
8973         "config_state": config_state,
8974         "run_state": remote_state,
8975         "pnode": instance.primary_node,
8976         "snodes": instance.secondary_nodes,
8977         "os": instance.os,
8978         # this happens to be the same format used for hooks
8979         "nics": _NICListToTuple(self, instance.nics),
8980         "disk_template": instance.disk_template,
8981         "disks": disks,
8982         "hypervisor": instance.hypervisor,
8983         "network_port": instance.network_port,
8984         "hv_instance": instance.hvparams,
8985         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8986         "be_instance": instance.beparams,
8987         "be_actual": cluster.FillBE(instance),
8988         "os_instance": instance.osparams,
8989         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8990         "serial_no": instance.serial_no,
8991         "mtime": instance.mtime,
8992         "ctime": instance.ctime,
8993         "uuid": instance.uuid,
8994         }
8995
8996       result[instance.name] = idict
8997
8998     return result
8999
9000
9001 class LUInstanceSetParams(LogicalUnit):
9002   """Modifies an instances's parameters.
9003
9004   """
9005   HPATH = "instance-modify"
9006   HTYPE = constants.HTYPE_INSTANCE
9007   REQ_BGL = False
9008
9009   def CheckArguments(self):
9010     if not (self.op.nics or self.op.disks or self.op.disk_template or
9011             self.op.hvparams or self.op.beparams or self.op.os_name):
9012       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9013
9014     if self.op.hvparams:
9015       _CheckGlobalHvParams(self.op.hvparams)
9016
9017     # Disk validation
9018     disk_addremove = 0
9019     for disk_op, disk_dict in self.op.disks:
9020       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9021       if disk_op == constants.DDM_REMOVE:
9022         disk_addremove += 1
9023         continue
9024       elif disk_op == constants.DDM_ADD:
9025         disk_addremove += 1
9026       else:
9027         if not isinstance(disk_op, int):
9028           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9029         if not isinstance(disk_dict, dict):
9030           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9031           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9032
9033       if disk_op == constants.DDM_ADD:
9034         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9035         if mode not in constants.DISK_ACCESS_SET:
9036           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9037                                      errors.ECODE_INVAL)
9038         size = disk_dict.get('size', None)
9039         if size is None:
9040           raise errors.OpPrereqError("Required disk parameter size missing",
9041                                      errors.ECODE_INVAL)
9042         try:
9043           size = int(size)
9044         except (TypeError, ValueError), err:
9045           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9046                                      str(err), errors.ECODE_INVAL)
9047         disk_dict['size'] = size
9048       else:
9049         # modification of disk
9050         if 'size' in disk_dict:
9051           raise errors.OpPrereqError("Disk size change not possible, use"
9052                                      " grow-disk", errors.ECODE_INVAL)
9053
9054     if disk_addremove > 1:
9055       raise errors.OpPrereqError("Only one disk add or remove operation"
9056                                  " supported at a time", errors.ECODE_INVAL)
9057
9058     if self.op.disks and self.op.disk_template is not None:
9059       raise errors.OpPrereqError("Disk template conversion and other disk"
9060                                  " changes not supported at the same time",
9061                                  errors.ECODE_INVAL)
9062
9063     if (self.op.disk_template and
9064         self.op.disk_template in constants.DTS_NET_MIRROR and
9065         self.op.remote_node is None):
9066       raise errors.OpPrereqError("Changing the disk template to a mirrored"
9067                                  " one requires specifying a secondary node",
9068                                  errors.ECODE_INVAL)
9069
9070     # NIC validation
9071     nic_addremove = 0
9072     for nic_op, nic_dict in self.op.nics:
9073       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9074       if nic_op == constants.DDM_REMOVE:
9075         nic_addremove += 1
9076         continue
9077       elif nic_op == constants.DDM_ADD:
9078         nic_addremove += 1
9079       else:
9080         if not isinstance(nic_op, int):
9081           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9082         if not isinstance(nic_dict, dict):
9083           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9084           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9085
9086       # nic_dict should be a dict
9087       nic_ip = nic_dict.get('ip', None)
9088       if nic_ip is not None:
9089         if nic_ip.lower() == constants.VALUE_NONE:
9090           nic_dict['ip'] = None
9091         else:
9092           if not netutils.IPAddress.IsValid(nic_ip):
9093             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9094                                        errors.ECODE_INVAL)
9095
9096       nic_bridge = nic_dict.get('bridge', None)
9097       nic_link = nic_dict.get('link', None)
9098       if nic_bridge and nic_link:
9099         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9100                                    " at the same time", errors.ECODE_INVAL)
9101       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9102         nic_dict['bridge'] = None
9103       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9104         nic_dict['link'] = None
9105
9106       if nic_op == constants.DDM_ADD:
9107         nic_mac = nic_dict.get('mac', None)
9108         if nic_mac is None:
9109           nic_dict['mac'] = constants.VALUE_AUTO
9110
9111       if 'mac' in nic_dict:
9112         nic_mac = nic_dict['mac']
9113         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9114           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9115
9116         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9117           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9118                                      " modifying an existing nic",
9119                                      errors.ECODE_INVAL)
9120
9121     if nic_addremove > 1:
9122       raise errors.OpPrereqError("Only one NIC add or remove operation"
9123                                  " supported at a time", errors.ECODE_INVAL)
9124
9125   def ExpandNames(self):
9126     self._ExpandAndLockInstance()
9127     self.needed_locks[locking.LEVEL_NODE] = []
9128     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9129
9130   def DeclareLocks(self, level):
9131     if level == locking.LEVEL_NODE:
9132       self._LockInstancesNodes()
9133       if self.op.disk_template and self.op.remote_node:
9134         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9135         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9136
9137   def BuildHooksEnv(self):
9138     """Build hooks env.
9139
9140     This runs on the master, primary and secondaries.
9141
9142     """
9143     args = dict()
9144     if constants.BE_MEMORY in self.be_new:
9145       args['memory'] = self.be_new[constants.BE_MEMORY]
9146     if constants.BE_VCPUS in self.be_new:
9147       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9148     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9149     # information at all.
9150     if self.op.nics:
9151       args['nics'] = []
9152       nic_override = dict(self.op.nics)
9153       for idx, nic in enumerate(self.instance.nics):
9154         if idx in nic_override:
9155           this_nic_override = nic_override[idx]
9156         else:
9157           this_nic_override = {}
9158         if 'ip' in this_nic_override:
9159           ip = this_nic_override['ip']
9160         else:
9161           ip = nic.ip
9162         if 'mac' in this_nic_override:
9163           mac = this_nic_override['mac']
9164         else:
9165           mac = nic.mac
9166         if idx in self.nic_pnew:
9167           nicparams = self.nic_pnew[idx]
9168         else:
9169           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9170         mode = nicparams[constants.NIC_MODE]
9171         link = nicparams[constants.NIC_LINK]
9172         args['nics'].append((ip, mac, mode, link))
9173       if constants.DDM_ADD in nic_override:
9174         ip = nic_override[constants.DDM_ADD].get('ip', None)
9175         mac = nic_override[constants.DDM_ADD]['mac']
9176         nicparams = self.nic_pnew[constants.DDM_ADD]
9177         mode = nicparams[constants.NIC_MODE]
9178         link = nicparams[constants.NIC_LINK]
9179         args['nics'].append((ip, mac, mode, link))
9180       elif constants.DDM_REMOVE in nic_override:
9181         del args['nics'][-1]
9182
9183     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9184     if self.op.disk_template:
9185       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9186     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9187     return env, nl, nl
9188
9189   def CheckPrereq(self):
9190     """Check prerequisites.
9191
9192     This only checks the instance list against the existing names.
9193
9194     """
9195     # checking the new params on the primary/secondary nodes
9196
9197     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9198     cluster = self.cluster = self.cfg.GetClusterInfo()
9199     assert self.instance is not None, \
9200       "Cannot retrieve locked instance %s" % self.op.instance_name
9201     pnode = instance.primary_node
9202     nodelist = list(instance.all_nodes)
9203
9204     # OS change
9205     if self.op.os_name and not self.op.force:
9206       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9207                       self.op.force_variant)
9208       instance_os = self.op.os_name
9209     else:
9210       instance_os = instance.os
9211
9212     if self.op.disk_template:
9213       if instance.disk_template == self.op.disk_template:
9214         raise errors.OpPrereqError("Instance already has disk template %s" %
9215                                    instance.disk_template, errors.ECODE_INVAL)
9216
9217       if (instance.disk_template,
9218           self.op.disk_template) not in self._DISK_CONVERSIONS:
9219         raise errors.OpPrereqError("Unsupported disk template conversion from"
9220                                    " %s to %s" % (instance.disk_template,
9221                                                   self.op.disk_template),
9222                                    errors.ECODE_INVAL)
9223       _CheckInstanceDown(self, instance, "cannot change disk template")
9224       if self.op.disk_template in constants.DTS_NET_MIRROR:
9225         if self.op.remote_node == pnode:
9226           raise errors.OpPrereqError("Given new secondary node %s is the same"
9227                                      " as the primary node of the instance" %
9228                                      self.op.remote_node, errors.ECODE_STATE)
9229         _CheckNodeOnline(self, self.op.remote_node)
9230         _CheckNodeNotDrained(self, self.op.remote_node)
9231         # FIXME: here we assume that the old instance type is DT_PLAIN
9232         assert instance.disk_template == constants.DT_PLAIN
9233         disks = [{"size": d.size, "vg": d.logical_id[0]}
9234                  for d in instance.disks]
9235         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9236         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9237
9238     # hvparams processing
9239     if self.op.hvparams:
9240       hv_type = instance.hypervisor
9241       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9242       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9243       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9244
9245       # local check
9246       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9247       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9248       self.hv_new = hv_new # the new actual values
9249       self.hv_inst = i_hvdict # the new dict (without defaults)
9250     else:
9251       self.hv_new = self.hv_inst = {}
9252
9253     # beparams processing
9254     if self.op.beparams:
9255       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9256                                    use_none=True)
9257       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9258       be_new = cluster.SimpleFillBE(i_bedict)
9259       self.be_new = be_new # the new actual values
9260       self.be_inst = i_bedict # the new dict (without defaults)
9261     else:
9262       self.be_new = self.be_inst = {}
9263
9264     # osparams processing
9265     if self.op.osparams:
9266       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9267       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9268       self.os_inst = i_osdict # the new dict (without defaults)
9269     else:
9270       self.os_inst = {}
9271
9272     self.warn = []
9273
9274     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9275       mem_check_list = [pnode]
9276       if be_new[constants.BE_AUTO_BALANCE]:
9277         # either we changed auto_balance to yes or it was from before
9278         mem_check_list.extend(instance.secondary_nodes)
9279       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9280                                                   instance.hypervisor)
9281       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9282                                          instance.hypervisor)
9283       pninfo = nodeinfo[pnode]
9284       msg = pninfo.fail_msg
9285       if msg:
9286         # Assume the primary node is unreachable and go ahead
9287         self.warn.append("Can't get info from primary node %s: %s" %
9288                          (pnode,  msg))
9289       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9290         self.warn.append("Node data from primary node %s doesn't contain"
9291                          " free memory information" % pnode)
9292       elif instance_info.fail_msg:
9293         self.warn.append("Can't get instance runtime information: %s" %
9294                         instance_info.fail_msg)
9295       else:
9296         if instance_info.payload:
9297           current_mem = int(instance_info.payload['memory'])
9298         else:
9299           # Assume instance not running
9300           # (there is a slight race condition here, but it's not very probable,
9301           # and we have no other way to check)
9302           current_mem = 0
9303         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9304                     pninfo.payload['memory_free'])
9305         if miss_mem > 0:
9306           raise errors.OpPrereqError("This change will prevent the instance"
9307                                      " from starting, due to %d MB of memory"
9308                                      " missing on its primary node" % miss_mem,
9309                                      errors.ECODE_NORES)
9310
9311       if be_new[constants.BE_AUTO_BALANCE]:
9312         for node, nres in nodeinfo.items():
9313           if node not in instance.secondary_nodes:
9314             continue
9315           msg = nres.fail_msg
9316           if msg:
9317             self.warn.append("Can't get info from secondary node %s: %s" %
9318                              (node, msg))
9319           elif not isinstance(nres.payload.get('memory_free', None), int):
9320             self.warn.append("Secondary node %s didn't return free"
9321                              " memory information" % node)
9322           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9323             self.warn.append("Not enough memory to failover instance to"
9324                              " secondary node %s" % node)
9325
9326     # NIC processing
9327     self.nic_pnew = {}
9328     self.nic_pinst = {}
9329     for nic_op, nic_dict in self.op.nics:
9330       if nic_op == constants.DDM_REMOVE:
9331         if not instance.nics:
9332           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9333                                      errors.ECODE_INVAL)
9334         continue
9335       if nic_op != constants.DDM_ADD:
9336         # an existing nic
9337         if not instance.nics:
9338           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9339                                      " no NICs" % nic_op,
9340                                      errors.ECODE_INVAL)
9341         if nic_op < 0 or nic_op >= len(instance.nics):
9342           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9343                                      " are 0 to %d" %
9344                                      (nic_op, len(instance.nics) - 1),
9345                                      errors.ECODE_INVAL)
9346         old_nic_params = instance.nics[nic_op].nicparams
9347         old_nic_ip = instance.nics[nic_op].ip
9348       else:
9349         old_nic_params = {}
9350         old_nic_ip = None
9351
9352       update_params_dict = dict([(key, nic_dict[key])
9353                                  for key in constants.NICS_PARAMETERS
9354                                  if key in nic_dict])
9355
9356       if 'bridge' in nic_dict:
9357         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9358
9359       new_nic_params = _GetUpdatedParams(old_nic_params,
9360                                          update_params_dict)
9361       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9362       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9363       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9364       self.nic_pinst[nic_op] = new_nic_params
9365       self.nic_pnew[nic_op] = new_filled_nic_params
9366       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9367
9368       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9369         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9370         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9371         if msg:
9372           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9373           if self.op.force:
9374             self.warn.append(msg)
9375           else:
9376             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9377       if new_nic_mode == constants.NIC_MODE_ROUTED:
9378         if 'ip' in nic_dict:
9379           nic_ip = nic_dict['ip']
9380         else:
9381           nic_ip = old_nic_ip
9382         if nic_ip is None:
9383           raise errors.OpPrereqError('Cannot set the nic ip to None'
9384                                      ' on a routed nic', errors.ECODE_INVAL)
9385       if 'mac' in nic_dict:
9386         nic_mac = nic_dict['mac']
9387         if nic_mac is None:
9388           raise errors.OpPrereqError('Cannot set the nic mac to None',
9389                                      errors.ECODE_INVAL)
9390         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9391           # otherwise generate the mac
9392           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9393         else:
9394           # or validate/reserve the current one
9395           try:
9396             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9397           except errors.ReservationError:
9398             raise errors.OpPrereqError("MAC address %s already in use"
9399                                        " in cluster" % nic_mac,
9400                                        errors.ECODE_NOTUNIQUE)
9401
9402     # DISK processing
9403     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9404       raise errors.OpPrereqError("Disk operations not supported for"
9405                                  " diskless instances",
9406                                  errors.ECODE_INVAL)
9407     for disk_op, _ in self.op.disks:
9408       if disk_op == constants.DDM_REMOVE:
9409         if len(instance.disks) == 1:
9410           raise errors.OpPrereqError("Cannot remove the last disk of"
9411                                      " an instance", errors.ECODE_INVAL)
9412         _CheckInstanceDown(self, instance, "cannot remove disks")
9413
9414       if (disk_op == constants.DDM_ADD and
9415           len(instance.disks) >= constants.MAX_DISKS):
9416         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9417                                    " add more" % constants.MAX_DISKS,
9418                                    errors.ECODE_STATE)
9419       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9420         # an existing disk
9421         if disk_op < 0 or disk_op >= len(instance.disks):
9422           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9423                                      " are 0 to %d" %
9424                                      (disk_op, len(instance.disks)),
9425                                      errors.ECODE_INVAL)
9426
9427     return
9428
9429   def _ConvertPlainToDrbd(self, feedback_fn):
9430     """Converts an instance from plain to drbd.
9431
9432     """
9433     feedback_fn("Converting template to drbd")
9434     instance = self.instance
9435     pnode = instance.primary_node
9436     snode = self.op.remote_node
9437
9438     # create a fake disk info for _GenerateDiskTemplate
9439     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9440     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9441                                       instance.name, pnode, [snode],
9442                                       disk_info, None, None, 0, feedback_fn)
9443     info = _GetInstanceInfoText(instance)
9444     feedback_fn("Creating aditional volumes...")
9445     # first, create the missing data and meta devices
9446     for disk in new_disks:
9447       # unfortunately this is... not too nice
9448       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9449                             info, True)
9450       for child in disk.children:
9451         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9452     # at this stage, all new LVs have been created, we can rename the
9453     # old ones
9454     feedback_fn("Renaming original volumes...")
9455     rename_list = [(o, n.children[0].logical_id)
9456                    for (o, n) in zip(instance.disks, new_disks)]
9457     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9458     result.Raise("Failed to rename original LVs")
9459
9460     feedback_fn("Initializing DRBD devices...")
9461     # all child devices are in place, we can now create the DRBD devices
9462     for disk in new_disks:
9463       for node in [pnode, snode]:
9464         f_create = node == pnode
9465         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9466
9467     # at this point, the instance has been modified
9468     instance.disk_template = constants.DT_DRBD8
9469     instance.disks = new_disks
9470     self.cfg.Update(instance, feedback_fn)
9471
9472     # disks are created, waiting for sync
9473     disk_abort = not _WaitForSync(self, instance)
9474     if disk_abort:
9475       raise errors.OpExecError("There are some degraded disks for"
9476                                " this instance, please cleanup manually")
9477
9478   def _ConvertDrbdToPlain(self, feedback_fn):
9479     """Converts an instance from drbd to plain.
9480
9481     """
9482     instance = self.instance
9483     assert len(instance.secondary_nodes) == 1
9484     pnode = instance.primary_node
9485     snode = instance.secondary_nodes[0]
9486     feedback_fn("Converting template to plain")
9487
9488     old_disks = instance.disks
9489     new_disks = [d.children[0] for d in old_disks]
9490
9491     # copy over size and mode
9492     for parent, child in zip(old_disks, new_disks):
9493       child.size = parent.size
9494       child.mode = parent.mode
9495
9496     # update instance structure
9497     instance.disks = new_disks
9498     instance.disk_template = constants.DT_PLAIN
9499     self.cfg.Update(instance, feedback_fn)
9500
9501     feedback_fn("Removing volumes on the secondary node...")
9502     for disk in old_disks:
9503       self.cfg.SetDiskID(disk, snode)
9504       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9505       if msg:
9506         self.LogWarning("Could not remove block device %s on node %s,"
9507                         " continuing anyway: %s", disk.iv_name, snode, msg)
9508
9509     feedback_fn("Removing unneeded volumes on the primary node...")
9510     for idx, disk in enumerate(old_disks):
9511       meta = disk.children[1]
9512       self.cfg.SetDiskID(meta, pnode)
9513       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9514       if msg:
9515         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9516                         " continuing anyway: %s", idx, pnode, msg)
9517
9518   def Exec(self, feedback_fn):
9519     """Modifies an instance.
9520
9521     All parameters take effect only at the next restart of the instance.
9522
9523     """
9524     # Process here the warnings from CheckPrereq, as we don't have a
9525     # feedback_fn there.
9526     for warn in self.warn:
9527       feedback_fn("WARNING: %s" % warn)
9528
9529     result = []
9530     instance = self.instance
9531     # disk changes
9532     for disk_op, disk_dict in self.op.disks:
9533       if disk_op == constants.DDM_REMOVE:
9534         # remove the last disk
9535         device = instance.disks.pop()
9536         device_idx = len(instance.disks)
9537         for node, disk in device.ComputeNodeTree(instance.primary_node):
9538           self.cfg.SetDiskID(disk, node)
9539           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9540           if msg:
9541             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9542                             " continuing anyway", device_idx, node, msg)
9543         result.append(("disk/%d" % device_idx, "remove"))
9544       elif disk_op == constants.DDM_ADD:
9545         # add a new disk
9546         if instance.disk_template == constants.DT_FILE:
9547           file_driver, file_path = instance.disks[0].logical_id
9548           file_path = os.path.dirname(file_path)
9549         else:
9550           file_driver = file_path = None
9551         disk_idx_base = len(instance.disks)
9552         new_disk = _GenerateDiskTemplate(self,
9553                                          instance.disk_template,
9554                                          instance.name, instance.primary_node,
9555                                          instance.secondary_nodes,
9556                                          [disk_dict],
9557                                          file_path,
9558                                          file_driver,
9559                                          disk_idx_base, feedback_fn)[0]
9560         instance.disks.append(new_disk)
9561         info = _GetInstanceInfoText(instance)
9562
9563         logging.info("Creating volume %s for instance %s",
9564                      new_disk.iv_name, instance.name)
9565         # Note: this needs to be kept in sync with _CreateDisks
9566         #HARDCODE
9567         for node in instance.all_nodes:
9568           f_create = node == instance.primary_node
9569           try:
9570             _CreateBlockDev(self, node, instance, new_disk,
9571                             f_create, info, f_create)
9572           except errors.OpExecError, err:
9573             self.LogWarning("Failed to create volume %s (%s) on"
9574                             " node %s: %s",
9575                             new_disk.iv_name, new_disk, node, err)
9576         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9577                        (new_disk.size, new_disk.mode)))
9578       else:
9579         # change a given disk
9580         instance.disks[disk_op].mode = disk_dict['mode']
9581         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9582
9583     if self.op.disk_template:
9584       r_shut = _ShutdownInstanceDisks(self, instance)
9585       if not r_shut:
9586         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9587                                  " proceed with disk template conversion")
9588       mode = (instance.disk_template, self.op.disk_template)
9589       try:
9590         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9591       except:
9592         self.cfg.ReleaseDRBDMinors(instance.name)
9593         raise
9594       result.append(("disk_template", self.op.disk_template))
9595
9596     # NIC changes
9597     for nic_op, nic_dict in self.op.nics:
9598       if nic_op == constants.DDM_REMOVE:
9599         # remove the last nic
9600         del instance.nics[-1]
9601         result.append(("nic.%d" % len(instance.nics), "remove"))
9602       elif nic_op == constants.DDM_ADD:
9603         # mac and bridge should be set, by now
9604         mac = nic_dict['mac']
9605         ip = nic_dict.get('ip', None)
9606         nicparams = self.nic_pinst[constants.DDM_ADD]
9607         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9608         instance.nics.append(new_nic)
9609         result.append(("nic.%d" % (len(instance.nics) - 1),
9610                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9611                        (new_nic.mac, new_nic.ip,
9612                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9613                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9614                        )))
9615       else:
9616         for key in 'mac', 'ip':
9617           if key in nic_dict:
9618             setattr(instance.nics[nic_op], key, nic_dict[key])
9619         if nic_op in self.nic_pinst:
9620           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9621         for key, val in nic_dict.iteritems():
9622           result.append(("nic.%s/%d" % (key, nic_op), val))
9623
9624     # hvparams changes
9625     if self.op.hvparams:
9626       instance.hvparams = self.hv_inst
9627       for key, val in self.op.hvparams.iteritems():
9628         result.append(("hv/%s" % key, val))
9629
9630     # beparams changes
9631     if self.op.beparams:
9632       instance.beparams = self.be_inst
9633       for key, val in self.op.beparams.iteritems():
9634         result.append(("be/%s" % key, val))
9635
9636     # OS change
9637     if self.op.os_name:
9638       instance.os = self.op.os_name
9639
9640     # osparams changes
9641     if self.op.osparams:
9642       instance.osparams = self.os_inst
9643       for key, val in self.op.osparams.iteritems():
9644         result.append(("os/%s" % key, val))
9645
9646     self.cfg.Update(instance, feedback_fn)
9647
9648     return result
9649
9650   _DISK_CONVERSIONS = {
9651     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9652     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9653     }
9654
9655
9656 class LUBackupQuery(NoHooksLU):
9657   """Query the exports list
9658
9659   """
9660   REQ_BGL = False
9661
9662   def ExpandNames(self):
9663     self.needed_locks = {}
9664     self.share_locks[locking.LEVEL_NODE] = 1
9665     if not self.op.nodes:
9666       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9667     else:
9668       self.needed_locks[locking.LEVEL_NODE] = \
9669         _GetWantedNodes(self, self.op.nodes)
9670
9671   def Exec(self, feedback_fn):
9672     """Compute the list of all the exported system images.
9673
9674     @rtype: dict
9675     @return: a dictionary with the structure node->(export-list)
9676         where export-list is a list of the instances exported on
9677         that node.
9678
9679     """
9680     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9681     rpcresult = self.rpc.call_export_list(self.nodes)
9682     result = {}
9683     for node in rpcresult:
9684       if rpcresult[node].fail_msg:
9685         result[node] = False
9686       else:
9687         result[node] = rpcresult[node].payload
9688
9689     return result
9690
9691
9692 class LUBackupPrepare(NoHooksLU):
9693   """Prepares an instance for an export and returns useful information.
9694
9695   """
9696   REQ_BGL = False
9697
9698   def ExpandNames(self):
9699     self._ExpandAndLockInstance()
9700
9701   def CheckPrereq(self):
9702     """Check prerequisites.
9703
9704     """
9705     instance_name = self.op.instance_name
9706
9707     self.instance = self.cfg.GetInstanceInfo(instance_name)
9708     assert self.instance is not None, \
9709           "Cannot retrieve locked instance %s" % self.op.instance_name
9710     _CheckNodeOnline(self, self.instance.primary_node)
9711
9712     self._cds = _GetClusterDomainSecret()
9713
9714   def Exec(self, feedback_fn):
9715     """Prepares an instance for an export.
9716
9717     """
9718     instance = self.instance
9719
9720     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9721       salt = utils.GenerateSecret(8)
9722
9723       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9724       result = self.rpc.call_x509_cert_create(instance.primary_node,
9725                                               constants.RIE_CERT_VALIDITY)
9726       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9727
9728       (name, cert_pem) = result.payload
9729
9730       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9731                                              cert_pem)
9732
9733       return {
9734         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9735         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9736                           salt),
9737         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9738         }
9739
9740     return None
9741
9742
9743 class LUBackupExport(LogicalUnit):
9744   """Export an instance to an image in the cluster.
9745
9746   """
9747   HPATH = "instance-export"
9748   HTYPE = constants.HTYPE_INSTANCE
9749   REQ_BGL = False
9750
9751   def CheckArguments(self):
9752     """Check the arguments.
9753
9754     """
9755     self.x509_key_name = self.op.x509_key_name
9756     self.dest_x509_ca_pem = self.op.destination_x509_ca
9757
9758     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9759       if not self.x509_key_name:
9760         raise errors.OpPrereqError("Missing X509 key name for encryption",
9761                                    errors.ECODE_INVAL)
9762
9763       if not self.dest_x509_ca_pem:
9764         raise errors.OpPrereqError("Missing destination X509 CA",
9765                                    errors.ECODE_INVAL)
9766
9767   def ExpandNames(self):
9768     self._ExpandAndLockInstance()
9769
9770     # Lock all nodes for local exports
9771     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9772       # FIXME: lock only instance primary and destination node
9773       #
9774       # Sad but true, for now we have do lock all nodes, as we don't know where
9775       # the previous export might be, and in this LU we search for it and
9776       # remove it from its current node. In the future we could fix this by:
9777       #  - making a tasklet to search (share-lock all), then create the
9778       #    new one, then one to remove, after
9779       #  - removing the removal operation altogether
9780       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9781
9782   def DeclareLocks(self, level):
9783     """Last minute lock declaration."""
9784     # All nodes are locked anyway, so nothing to do here.
9785
9786   def BuildHooksEnv(self):
9787     """Build hooks env.
9788
9789     This will run on the master, primary node and target node.
9790
9791     """
9792     env = {
9793       "EXPORT_MODE": self.op.mode,
9794       "EXPORT_NODE": self.op.target_node,
9795       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9796       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9797       # TODO: Generic function for boolean env variables
9798       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9799       }
9800
9801     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9802
9803     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9804
9805     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9806       nl.append(self.op.target_node)
9807
9808     return env, nl, nl
9809
9810   def CheckPrereq(self):
9811     """Check prerequisites.
9812
9813     This checks that the instance and node names are valid.
9814
9815     """
9816     instance_name = self.op.instance_name
9817
9818     self.instance = self.cfg.GetInstanceInfo(instance_name)
9819     assert self.instance is not None, \
9820           "Cannot retrieve locked instance %s" % self.op.instance_name
9821     _CheckNodeOnline(self, self.instance.primary_node)
9822
9823     if (self.op.remove_instance and self.instance.admin_up and
9824         not self.op.shutdown):
9825       raise errors.OpPrereqError("Can not remove instance without shutting it"
9826                                  " down before")
9827
9828     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9829       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9830       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9831       assert self.dst_node is not None
9832
9833       _CheckNodeOnline(self, self.dst_node.name)
9834       _CheckNodeNotDrained(self, self.dst_node.name)
9835
9836       self._cds = None
9837       self.dest_disk_info = None
9838       self.dest_x509_ca = None
9839
9840     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9841       self.dst_node = None
9842
9843       if len(self.op.target_node) != len(self.instance.disks):
9844         raise errors.OpPrereqError(("Received destination information for %s"
9845                                     " disks, but instance %s has %s disks") %
9846                                    (len(self.op.target_node), instance_name,
9847                                     len(self.instance.disks)),
9848                                    errors.ECODE_INVAL)
9849
9850       cds = _GetClusterDomainSecret()
9851
9852       # Check X509 key name
9853       try:
9854         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9855       except (TypeError, ValueError), err:
9856         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9857
9858       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9859         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9860                                    errors.ECODE_INVAL)
9861
9862       # Load and verify CA
9863       try:
9864         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9865       except OpenSSL.crypto.Error, err:
9866         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9867                                    (err, ), errors.ECODE_INVAL)
9868
9869       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9870       if errcode is not None:
9871         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9872                                    (msg, ), errors.ECODE_INVAL)
9873
9874       self.dest_x509_ca = cert
9875
9876       # Verify target information
9877       disk_info = []
9878       for idx, disk_data in enumerate(self.op.target_node):
9879         try:
9880           (host, port, magic) = \
9881             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9882         except errors.GenericError, err:
9883           raise errors.OpPrereqError("Target info for disk %s: %s" %
9884                                      (idx, err), errors.ECODE_INVAL)
9885
9886         disk_info.append((host, port, magic))
9887
9888       assert len(disk_info) == len(self.op.target_node)
9889       self.dest_disk_info = disk_info
9890
9891     else:
9892       raise errors.ProgrammerError("Unhandled export mode %r" %
9893                                    self.op.mode)
9894
9895     # instance disk type verification
9896     # TODO: Implement export support for file-based disks
9897     for disk in self.instance.disks:
9898       if disk.dev_type == constants.LD_FILE:
9899         raise errors.OpPrereqError("Export not supported for instances with"
9900                                    " file-based disks", errors.ECODE_INVAL)
9901
9902   def _CleanupExports(self, feedback_fn):
9903     """Removes exports of current instance from all other nodes.
9904
9905     If an instance in a cluster with nodes A..D was exported to node C, its
9906     exports will be removed from the nodes A, B and D.
9907
9908     """
9909     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9910
9911     nodelist = self.cfg.GetNodeList()
9912     nodelist.remove(self.dst_node.name)
9913
9914     # on one-node clusters nodelist will be empty after the removal
9915     # if we proceed the backup would be removed because OpBackupQuery
9916     # substitutes an empty list with the full cluster node list.
9917     iname = self.instance.name
9918     if nodelist:
9919       feedback_fn("Removing old exports for instance %s" % iname)
9920       exportlist = self.rpc.call_export_list(nodelist)
9921       for node in exportlist:
9922         if exportlist[node].fail_msg:
9923           continue
9924         if iname in exportlist[node].payload:
9925           msg = self.rpc.call_export_remove(node, iname).fail_msg
9926           if msg:
9927             self.LogWarning("Could not remove older export for instance %s"
9928                             " on node %s: %s", iname, node, msg)
9929
9930   def Exec(self, feedback_fn):
9931     """Export an instance to an image in the cluster.
9932
9933     """
9934     assert self.op.mode in constants.EXPORT_MODES
9935
9936     instance = self.instance
9937     src_node = instance.primary_node
9938
9939     if self.op.shutdown:
9940       # shutdown the instance, but not the disks
9941       feedback_fn("Shutting down instance %s" % instance.name)
9942       result = self.rpc.call_instance_shutdown(src_node, instance,
9943                                                self.op.shutdown_timeout)
9944       # TODO: Maybe ignore failures if ignore_remove_failures is set
9945       result.Raise("Could not shutdown instance %s on"
9946                    " node %s" % (instance.name, src_node))
9947
9948     # set the disks ID correctly since call_instance_start needs the
9949     # correct drbd minor to create the symlinks
9950     for disk in instance.disks:
9951       self.cfg.SetDiskID(disk, src_node)
9952
9953     activate_disks = (not instance.admin_up)
9954
9955     if activate_disks:
9956       # Activate the instance disks if we'exporting a stopped instance
9957       feedback_fn("Activating disks for %s" % instance.name)
9958       _StartInstanceDisks(self, instance, None)
9959
9960     try:
9961       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9962                                                      instance)
9963
9964       helper.CreateSnapshots()
9965       try:
9966         if (self.op.shutdown and instance.admin_up and
9967             not self.op.remove_instance):
9968           assert not activate_disks
9969           feedback_fn("Starting instance %s" % instance.name)
9970           result = self.rpc.call_instance_start(src_node, instance, None, None)
9971           msg = result.fail_msg
9972           if msg:
9973             feedback_fn("Failed to start instance: %s" % msg)
9974             _ShutdownInstanceDisks(self, instance)
9975             raise errors.OpExecError("Could not start instance: %s" % msg)
9976
9977         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9978           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9979         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9980           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9981           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9982
9983           (key_name, _, _) = self.x509_key_name
9984
9985           dest_ca_pem = \
9986             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9987                                             self.dest_x509_ca)
9988
9989           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9990                                                      key_name, dest_ca_pem,
9991                                                      timeouts)
9992       finally:
9993         helper.Cleanup()
9994
9995       # Check for backwards compatibility
9996       assert len(dresults) == len(instance.disks)
9997       assert compat.all(isinstance(i, bool) for i in dresults), \
9998              "Not all results are boolean: %r" % dresults
9999
10000     finally:
10001       if activate_disks:
10002         feedback_fn("Deactivating disks for %s" % instance.name)
10003         _ShutdownInstanceDisks(self, instance)
10004
10005     if not (compat.all(dresults) and fin_resu):
10006       failures = []
10007       if not fin_resu:
10008         failures.append("export finalization")
10009       if not compat.all(dresults):
10010         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10011                                if not dsk)
10012         failures.append("disk export: disk(s) %s" % fdsk)
10013
10014       raise errors.OpExecError("Export failed, errors in %s" %
10015                                utils.CommaJoin(failures))
10016
10017     # At this point, the export was successful, we can cleanup/finish
10018
10019     # Remove instance if requested
10020     if self.op.remove_instance:
10021       feedback_fn("Removing instance %s" % instance.name)
10022       _RemoveInstance(self, feedback_fn, instance,
10023                       self.op.ignore_remove_failures)
10024
10025     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10026       self._CleanupExports(feedback_fn)
10027
10028     return fin_resu, dresults
10029
10030
10031 class LUBackupRemove(NoHooksLU):
10032   """Remove exports related to the named instance.
10033
10034   """
10035   REQ_BGL = False
10036
10037   def ExpandNames(self):
10038     self.needed_locks = {}
10039     # We need all nodes to be locked in order for RemoveExport to work, but we
10040     # don't need to lock the instance itself, as nothing will happen to it (and
10041     # we can remove exports also for a removed instance)
10042     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10043
10044   def Exec(self, feedback_fn):
10045     """Remove any export.
10046
10047     """
10048     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10049     # If the instance was not found we'll try with the name that was passed in.
10050     # This will only work if it was an FQDN, though.
10051     fqdn_warn = False
10052     if not instance_name:
10053       fqdn_warn = True
10054       instance_name = self.op.instance_name
10055
10056     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10057     exportlist = self.rpc.call_export_list(locked_nodes)
10058     found = False
10059     for node in exportlist:
10060       msg = exportlist[node].fail_msg
10061       if msg:
10062         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10063         continue
10064       if instance_name in exportlist[node].payload:
10065         found = True
10066         result = self.rpc.call_export_remove(node, instance_name)
10067         msg = result.fail_msg
10068         if msg:
10069           logging.error("Could not remove export for instance %s"
10070                         " on node %s: %s", instance_name, node, msg)
10071
10072     if fqdn_warn and not found:
10073       feedback_fn("Export not found. If trying to remove an export belonging"
10074                   " to a deleted instance please use its Fully Qualified"
10075                   " Domain Name.")
10076
10077
10078 class LUGroupAdd(LogicalUnit):
10079   """Logical unit for creating node groups.
10080
10081   """
10082   HPATH = "group-add"
10083   HTYPE = constants.HTYPE_GROUP
10084   REQ_BGL = False
10085
10086   def ExpandNames(self):
10087     # We need the new group's UUID here so that we can create and acquire the
10088     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10089     # that it should not check whether the UUID exists in the configuration.
10090     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10091     self.needed_locks = {}
10092     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10093
10094   def CheckPrereq(self):
10095     """Check prerequisites.
10096
10097     This checks that the given group name is not an existing node group
10098     already.
10099
10100     """
10101     try:
10102       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10103     except errors.OpPrereqError:
10104       pass
10105     else:
10106       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10107                                  " node group (UUID: %s)" %
10108                                  (self.op.group_name, existing_uuid),
10109                                  errors.ECODE_EXISTS)
10110
10111     if self.op.ndparams:
10112       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10113
10114   def BuildHooksEnv(self):
10115     """Build hooks env.
10116
10117     """
10118     env = {
10119       "GROUP_NAME": self.op.group_name,
10120       }
10121     mn = self.cfg.GetMasterNode()
10122     return env, [mn], [mn]
10123
10124   def Exec(self, feedback_fn):
10125     """Add the node group to the cluster.
10126
10127     """
10128     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10129                                   uuid=self.group_uuid,
10130                                   alloc_policy=self.op.alloc_policy,
10131                                   ndparams=self.op.ndparams)
10132
10133     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10134     del self.remove_locks[locking.LEVEL_NODEGROUP]
10135
10136
10137 class LUGroupAssignNodes(NoHooksLU):
10138   """Logical unit for assigning nodes to groups.
10139
10140   """
10141   REQ_BGL = False
10142
10143   def ExpandNames(self):
10144     # These raise errors.OpPrereqError on their own:
10145     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10146     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10147
10148     # We want to lock all the affected nodes and groups. We have readily
10149     # available the list of nodes, and the *destination* group. To gather the
10150     # list of "source" groups, we need to fetch node information.
10151     self.node_data = self.cfg.GetAllNodesInfo()
10152     affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10153     affected_groups.add(self.group_uuid)
10154
10155     self.needed_locks = {
10156       locking.LEVEL_NODEGROUP: list(affected_groups),
10157       locking.LEVEL_NODE: self.op.nodes,
10158       }
10159
10160   def CheckPrereq(self):
10161     """Check prerequisites.
10162
10163     """
10164     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10165     instance_data = self.cfg.GetAllInstancesInfo()
10166
10167     if self.group is None:
10168       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10169                                (self.op.group_name, self.group_uuid))
10170
10171     (new_splits, previous_splits) = \
10172       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10173                                              for node in self.op.nodes],
10174                                             self.node_data, instance_data)
10175
10176     if new_splits:
10177       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10178
10179       if not self.op.force:
10180         raise errors.OpExecError("The following instances get split by this"
10181                                  " change and --force was not given: %s" %
10182                                  fmt_new_splits)
10183       else:
10184         self.LogWarning("This operation will split the following instances: %s",
10185                         fmt_new_splits)
10186
10187         if previous_splits:
10188           self.LogWarning("In addition, these already-split instances continue"
10189                           " to be spit across groups: %s",
10190                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10191
10192   def Exec(self, feedback_fn):
10193     """Assign nodes to a new group.
10194
10195     """
10196     for node in self.op.nodes:
10197       self.node_data[node].group = self.group_uuid
10198
10199     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10200
10201   @staticmethod
10202   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10203     """Check for split instances after a node assignment.
10204
10205     This method considers a series of node assignments as an atomic operation,
10206     and returns information about split instances after applying the set of
10207     changes.
10208
10209     In particular, it returns information about newly split instances, and
10210     instances that were already split, and remain so after the change.
10211
10212     Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10213     considered.
10214
10215     @type changes: list of (node_name, new_group_uuid) pairs.
10216     @param changes: list of node assignments to consider.
10217     @param node_data: a dict with data for all nodes
10218     @param instance_data: a dict with all instances to consider
10219     @rtype: a two-tuple
10220     @return: a list of instances that were previously okay and result split as a
10221       consequence of this change, and a list of instances that were previously
10222       split and this change does not fix.
10223
10224     """
10225     changed_nodes = dict((node, group) for node, group in changes
10226                          if node_data[node].group != group)
10227
10228     all_split_instances = set()
10229     previously_split_instances = set()
10230
10231     def InstanceNodes(instance):
10232       return [instance.primary_node] + list(instance.secondary_nodes)
10233
10234     for inst in instance_data.values():
10235       if inst.disk_template not in constants.DTS_NET_MIRROR:
10236         continue
10237
10238       instance_nodes = InstanceNodes(inst)
10239
10240       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10241         previously_split_instances.add(inst.name)
10242
10243       if len(set(changed_nodes.get(node, node_data[node].group)
10244                  for node in instance_nodes)) > 1:
10245         all_split_instances.add(inst.name)
10246
10247     return (list(all_split_instances - previously_split_instances),
10248             list(previously_split_instances & all_split_instances))
10249
10250
10251 class _GroupQuery(_QueryBase):
10252   FIELDS = query.GROUP_FIELDS
10253
10254   def ExpandNames(self, lu):
10255     lu.needed_locks = {}
10256
10257     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10258     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10259
10260     if not self.names:
10261       self.wanted = [name_to_uuid[name]
10262                      for name in utils.NiceSort(name_to_uuid.keys())]
10263     else:
10264       # Accept names to be either names or UUIDs.
10265       missing = []
10266       self.wanted = []
10267       all_uuid = frozenset(self._all_groups.keys())
10268
10269       for name in self.names:
10270         if name in all_uuid:
10271           self.wanted.append(name)
10272         elif name in name_to_uuid:
10273           self.wanted.append(name_to_uuid[name])
10274         else:
10275           missing.append(name)
10276
10277       if missing:
10278         raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10279                                    errors.ECODE_NOENT)
10280
10281   def DeclareLocks(self, lu, level):
10282     pass
10283
10284   def _GetQueryData(self, lu):
10285     """Computes the list of node groups and their attributes.
10286
10287     """
10288     do_nodes = query.GQ_NODE in self.requested_data
10289     do_instances = query.GQ_INST in self.requested_data
10290
10291     group_to_nodes = None
10292     group_to_instances = None
10293
10294     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10295     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10296     # latter GetAllInstancesInfo() is not enough, for we have to go through
10297     # instance->node. Hence, we will need to process nodes even if we only need
10298     # instance information.
10299     if do_nodes or do_instances:
10300       all_nodes = lu.cfg.GetAllNodesInfo()
10301       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10302       node_to_group = {}
10303
10304       for node in all_nodes.values():
10305         if node.group in group_to_nodes:
10306           group_to_nodes[node.group].append(node.name)
10307           node_to_group[node.name] = node.group
10308
10309       if do_instances:
10310         all_instances = lu.cfg.GetAllInstancesInfo()
10311         group_to_instances = dict((uuid, []) for uuid in self.wanted)
10312
10313         for instance in all_instances.values():
10314           node = instance.primary_node
10315           if node in node_to_group:
10316             group_to_instances[node_to_group[node]].append(instance.name)
10317
10318         if not do_nodes:
10319           # Do not pass on node information if it was not requested.
10320           group_to_nodes = None
10321
10322     return query.GroupQueryData([self._all_groups[uuid]
10323                                  for uuid in self.wanted],
10324                                 group_to_nodes, group_to_instances)
10325
10326
10327 class LUGroupQuery(NoHooksLU):
10328   """Logical unit for querying node groups.
10329
10330   """
10331   REQ_BGL = False
10332
10333   def CheckArguments(self):
10334     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10335                           self.op.output_fields, False)
10336
10337   def ExpandNames(self):
10338     self.gq.ExpandNames(self)
10339
10340   def Exec(self, feedback_fn):
10341     return self.gq.OldStyleQuery(self)
10342
10343
10344 class LUGroupSetParams(LogicalUnit):
10345   """Modifies the parameters of a node group.
10346
10347   """
10348   HPATH = "group-modify"
10349   HTYPE = constants.HTYPE_GROUP
10350   REQ_BGL = False
10351
10352   def CheckArguments(self):
10353     all_changes = [
10354       self.op.ndparams,
10355       self.op.alloc_policy,
10356       ]
10357
10358     if all_changes.count(None) == len(all_changes):
10359       raise errors.OpPrereqError("Please pass at least one modification",
10360                                  errors.ECODE_INVAL)
10361
10362   def ExpandNames(self):
10363     # This raises errors.OpPrereqError on its own:
10364     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10365
10366     self.needed_locks = {
10367       locking.LEVEL_NODEGROUP: [self.group_uuid],
10368       }
10369
10370   def CheckPrereq(self):
10371     """Check prerequisites.
10372
10373     """
10374     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10375
10376     if self.group is None:
10377       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10378                                (self.op.group_name, self.group_uuid))
10379
10380     if self.op.ndparams:
10381       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10382       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10383       self.new_ndparams = new_ndparams
10384
10385   def BuildHooksEnv(self):
10386     """Build hooks env.
10387
10388     """
10389     env = {
10390       "GROUP_NAME": self.op.group_name,
10391       "NEW_ALLOC_POLICY": self.op.alloc_policy,
10392       }
10393     mn = self.cfg.GetMasterNode()
10394     return env, [mn], [mn]
10395
10396   def Exec(self, feedback_fn):
10397     """Modifies the node group.
10398
10399     """
10400     result = []
10401
10402     if self.op.ndparams:
10403       self.group.ndparams = self.new_ndparams
10404       result.append(("ndparams", str(self.group.ndparams)))
10405
10406     if self.op.alloc_policy:
10407       self.group.alloc_policy = self.op.alloc_policy
10408
10409     self.cfg.Update(self.group, feedback_fn)
10410     return result
10411
10412
10413
10414 class LUGroupRemove(LogicalUnit):
10415   HPATH = "group-remove"
10416   HTYPE = constants.HTYPE_GROUP
10417   REQ_BGL = False
10418
10419   def ExpandNames(self):
10420     # This will raises errors.OpPrereqError on its own:
10421     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10422     self.needed_locks = {
10423       locking.LEVEL_NODEGROUP: [self.group_uuid],
10424       }
10425
10426   def CheckPrereq(self):
10427     """Check prerequisites.
10428
10429     This checks that the given group name exists as a node group, that is
10430     empty (i.e., contains no nodes), and that is not the last group of the
10431     cluster.
10432
10433     """
10434     # Verify that the group is empty.
10435     group_nodes = [node.name
10436                    for node in self.cfg.GetAllNodesInfo().values()
10437                    if node.group == self.group_uuid]
10438
10439     if group_nodes:
10440       raise errors.OpPrereqError("Group '%s' not empty, has the following"
10441                                  " nodes: %s" %
10442                                  (self.op.group_name,
10443                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
10444                                  errors.ECODE_STATE)
10445
10446     # Verify the cluster would not be left group-less.
10447     if len(self.cfg.GetNodeGroupList()) == 1:
10448       raise errors.OpPrereqError("Group '%s' is the only group,"
10449                                  " cannot be removed" %
10450                                  self.op.group_name,
10451                                  errors.ECODE_STATE)
10452
10453   def BuildHooksEnv(self):
10454     """Build hooks env.
10455
10456     """
10457     env = {
10458       "GROUP_NAME": self.op.group_name,
10459       }
10460     mn = self.cfg.GetMasterNode()
10461     return env, [mn], [mn]
10462
10463   def Exec(self, feedback_fn):
10464     """Remove the node group.
10465
10466     """
10467     try:
10468       self.cfg.RemoveNodeGroup(self.group_uuid)
10469     except errors.ConfigurationError:
10470       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10471                                (self.op.group_name, self.group_uuid))
10472
10473     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10474
10475
10476 class LUGroupRename(LogicalUnit):
10477   HPATH = "group-rename"
10478   HTYPE = constants.HTYPE_GROUP
10479   REQ_BGL = False
10480
10481   def ExpandNames(self):
10482     # This raises errors.OpPrereqError on its own:
10483     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10484
10485     self.needed_locks = {
10486       locking.LEVEL_NODEGROUP: [self.group_uuid],
10487       }
10488
10489   def CheckPrereq(self):
10490     """Check prerequisites.
10491
10492     Ensures requested new name is not yet used.
10493
10494     """
10495     try:
10496       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10497     except errors.OpPrereqError:
10498       pass
10499     else:
10500       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10501                                  " node group (UUID: %s)" %
10502                                  (self.op.new_name, new_name_uuid),
10503                                  errors.ECODE_EXISTS)
10504
10505   def BuildHooksEnv(self):
10506     """Build hooks env.
10507
10508     """
10509     env = {
10510       "OLD_NAME": self.op.group_name,
10511       "NEW_NAME": self.op.new_name,
10512       }
10513
10514     mn = self.cfg.GetMasterNode()
10515     all_nodes = self.cfg.GetAllNodesInfo()
10516     run_nodes = [mn]
10517     all_nodes.pop(mn, None)
10518
10519     for node in all_nodes.values():
10520       if node.group == self.group_uuid:
10521         run_nodes.append(node.name)
10522
10523     return env, run_nodes, run_nodes
10524
10525   def Exec(self, feedback_fn):
10526     """Rename the node group.
10527
10528     """
10529     group = self.cfg.GetNodeGroup(self.group_uuid)
10530
10531     if group is None:
10532       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10533                                (self.op.group_name, self.group_uuid))
10534
10535     group.name = self.op.new_name
10536     self.cfg.Update(group, feedback_fn)
10537
10538     return self.op.new_name
10539
10540
10541 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10542   """Generic tags LU.
10543
10544   This is an abstract class which is the parent of all the other tags LUs.
10545
10546   """
10547
10548   def ExpandNames(self):
10549     self.needed_locks = {}
10550     if self.op.kind == constants.TAG_NODE:
10551       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10552       self.needed_locks[locking.LEVEL_NODE] = self.op.name
10553     elif self.op.kind == constants.TAG_INSTANCE:
10554       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10555       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10556
10557     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10558     # not possible to acquire the BGL based on opcode parameters)
10559
10560   def CheckPrereq(self):
10561     """Check prerequisites.
10562
10563     """
10564     if self.op.kind == constants.TAG_CLUSTER:
10565       self.target = self.cfg.GetClusterInfo()
10566     elif self.op.kind == constants.TAG_NODE:
10567       self.target = self.cfg.GetNodeInfo(self.op.name)
10568     elif self.op.kind == constants.TAG_INSTANCE:
10569       self.target = self.cfg.GetInstanceInfo(self.op.name)
10570     else:
10571       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10572                                  str(self.op.kind), errors.ECODE_INVAL)
10573
10574
10575 class LUTagsGet(TagsLU):
10576   """Returns the tags of a given object.
10577
10578   """
10579   REQ_BGL = False
10580
10581   def ExpandNames(self):
10582     TagsLU.ExpandNames(self)
10583
10584     # Share locks as this is only a read operation
10585     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10586
10587   def Exec(self, feedback_fn):
10588     """Returns the tag list.
10589
10590     """
10591     return list(self.target.GetTags())
10592
10593
10594 class LUTagsSearch(NoHooksLU):
10595   """Searches the tags for a given pattern.
10596
10597   """
10598   REQ_BGL = False
10599
10600   def ExpandNames(self):
10601     self.needed_locks = {}
10602
10603   def CheckPrereq(self):
10604     """Check prerequisites.
10605
10606     This checks the pattern passed for validity by compiling it.
10607
10608     """
10609     try:
10610       self.re = re.compile(self.op.pattern)
10611     except re.error, err:
10612       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10613                                  (self.op.pattern, err), errors.ECODE_INVAL)
10614
10615   def Exec(self, feedback_fn):
10616     """Returns the tag list.
10617
10618     """
10619     cfg = self.cfg
10620     tgts = [("/cluster", cfg.GetClusterInfo())]
10621     ilist = cfg.GetAllInstancesInfo().values()
10622     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10623     nlist = cfg.GetAllNodesInfo().values()
10624     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10625     results = []
10626     for path, target in tgts:
10627       for tag in target.GetTags():
10628         if self.re.search(tag):
10629           results.append((path, tag))
10630     return results
10631
10632
10633 class LUTagsSet(TagsLU):
10634   """Sets a tag on a given object.
10635
10636   """
10637   REQ_BGL = False
10638
10639   def CheckPrereq(self):
10640     """Check prerequisites.
10641
10642     This checks the type and length of the tag name and value.
10643
10644     """
10645     TagsLU.CheckPrereq(self)
10646     for tag in self.op.tags:
10647       objects.TaggableObject.ValidateTag(tag)
10648
10649   def Exec(self, feedback_fn):
10650     """Sets the tag.
10651
10652     """
10653     try:
10654       for tag in self.op.tags:
10655         self.target.AddTag(tag)
10656     except errors.TagError, err:
10657       raise errors.OpExecError("Error while setting tag: %s" % str(err))
10658     self.cfg.Update(self.target, feedback_fn)
10659
10660
10661 class LUTagsDel(TagsLU):
10662   """Delete a list of tags from a given object.
10663
10664   """
10665   REQ_BGL = False
10666
10667   def CheckPrereq(self):
10668     """Check prerequisites.
10669
10670     This checks that we have the given tag.
10671
10672     """
10673     TagsLU.CheckPrereq(self)
10674     for tag in self.op.tags:
10675       objects.TaggableObject.ValidateTag(tag)
10676     del_tags = frozenset(self.op.tags)
10677     cur_tags = self.target.GetTags()
10678
10679     diff_tags = del_tags - cur_tags
10680     if diff_tags:
10681       diff_names = ("'%s'" % i for i in sorted(diff_tags))
10682       raise errors.OpPrereqError("Tag(s) %s not found" %
10683                                  (utils.CommaJoin(diff_names), ),
10684                                  errors.ECODE_NOENT)
10685
10686   def Exec(self, feedback_fn):
10687     """Remove the tag from the object.
10688
10689     """
10690     for tag in self.op.tags:
10691       self.target.RemoveTag(tag)
10692     self.cfg.Update(self.target, feedback_fn)
10693
10694
10695 class LUTestDelay(NoHooksLU):
10696   """Sleep for a specified amount of time.
10697
10698   This LU sleeps on the master and/or nodes for a specified amount of
10699   time.
10700
10701   """
10702   REQ_BGL = False
10703
10704   def ExpandNames(self):
10705     """Expand names and set required locks.
10706
10707     This expands the node list, if any.
10708
10709     """
10710     self.needed_locks = {}
10711     if self.op.on_nodes:
10712       # _GetWantedNodes can be used here, but is not always appropriate to use
10713       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10714       # more information.
10715       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10716       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10717
10718   def _TestDelay(self):
10719     """Do the actual sleep.
10720
10721     """
10722     if self.op.on_master:
10723       if not utils.TestDelay(self.op.duration):
10724         raise errors.OpExecError("Error during master delay test")
10725     if self.op.on_nodes:
10726       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10727       for node, node_result in result.items():
10728         node_result.Raise("Failure during rpc call to node %s" % node)
10729
10730   def Exec(self, feedback_fn):
10731     """Execute the test delay opcode, with the wanted repetitions.
10732
10733     """
10734     if self.op.repeat == 0:
10735       self._TestDelay()
10736     else:
10737       top_value = self.op.repeat - 1
10738       for i in range(self.op.repeat):
10739         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10740         self._TestDelay()
10741
10742
10743 class LUTestJqueue(NoHooksLU):
10744   """Utility LU to test some aspects of the job queue.
10745
10746   """
10747   REQ_BGL = False
10748
10749   # Must be lower than default timeout for WaitForJobChange to see whether it
10750   # notices changed jobs
10751   _CLIENT_CONNECT_TIMEOUT = 20.0
10752   _CLIENT_CONFIRM_TIMEOUT = 60.0
10753
10754   @classmethod
10755   def _NotifyUsingSocket(cls, cb, errcls):
10756     """Opens a Unix socket and waits for another program to connect.
10757
10758     @type cb: callable
10759     @param cb: Callback to send socket name to client
10760     @type errcls: class
10761     @param errcls: Exception class to use for errors
10762
10763     """
10764     # Using a temporary directory as there's no easy way to create temporary
10765     # sockets without writing a custom loop around tempfile.mktemp and
10766     # socket.bind
10767     tmpdir = tempfile.mkdtemp()
10768     try:
10769       tmpsock = utils.PathJoin(tmpdir, "sock")
10770
10771       logging.debug("Creating temporary socket at %s", tmpsock)
10772       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10773       try:
10774         sock.bind(tmpsock)
10775         sock.listen(1)
10776
10777         # Send details to client
10778         cb(tmpsock)
10779
10780         # Wait for client to connect before continuing
10781         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10782         try:
10783           (conn, _) = sock.accept()
10784         except socket.error, err:
10785           raise errcls("Client didn't connect in time (%s)" % err)
10786       finally:
10787         sock.close()
10788     finally:
10789       # Remove as soon as client is connected
10790       shutil.rmtree(tmpdir)
10791
10792     # Wait for client to close
10793     try:
10794       try:
10795         # pylint: disable-msg=E1101
10796         # Instance of '_socketobject' has no ... member
10797         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10798         conn.recv(1)
10799       except socket.error, err:
10800         raise errcls("Client failed to confirm notification (%s)" % err)
10801     finally:
10802       conn.close()
10803
10804   def _SendNotification(self, test, arg, sockname):
10805     """Sends a notification to the client.
10806
10807     @type test: string
10808     @param test: Test name
10809     @param arg: Test argument (depends on test)
10810     @type sockname: string
10811     @param sockname: Socket path
10812
10813     """
10814     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10815
10816   def _Notify(self, prereq, test, arg):
10817     """Notifies the client of a test.
10818
10819     @type prereq: bool
10820     @param prereq: Whether this is a prereq-phase test
10821     @type test: string
10822     @param test: Test name
10823     @param arg: Test argument (depends on test)
10824
10825     """
10826     if prereq:
10827       errcls = errors.OpPrereqError
10828     else:
10829       errcls = errors.OpExecError
10830
10831     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10832                                                   test, arg),
10833                                    errcls)
10834
10835   def CheckArguments(self):
10836     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10837     self.expandnames_calls = 0
10838
10839   def ExpandNames(self):
10840     checkargs_calls = getattr(self, "checkargs_calls", 0)
10841     if checkargs_calls < 1:
10842       raise errors.ProgrammerError("CheckArguments was not called")
10843
10844     self.expandnames_calls += 1
10845
10846     if self.op.notify_waitlock:
10847       self._Notify(True, constants.JQT_EXPANDNAMES, None)
10848
10849     self.LogInfo("Expanding names")
10850
10851     # Get lock on master node (just to get a lock, not for a particular reason)
10852     self.needed_locks = {
10853       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10854       }
10855
10856   def Exec(self, feedback_fn):
10857     if self.expandnames_calls < 1:
10858       raise errors.ProgrammerError("ExpandNames was not called")
10859
10860     if self.op.notify_exec:
10861       self._Notify(False, constants.JQT_EXEC, None)
10862
10863     self.LogInfo("Executing")
10864
10865     if self.op.log_messages:
10866       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10867       for idx, msg in enumerate(self.op.log_messages):
10868         self.LogInfo("Sending log message %s", idx + 1)
10869         feedback_fn(constants.JQT_MSGPREFIX + msg)
10870         # Report how many test messages have been sent
10871         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10872
10873     if self.op.fail:
10874       raise errors.OpExecError("Opcode failure was requested")
10875
10876     return True
10877
10878
10879 class IAllocator(object):
10880   """IAllocator framework.
10881
10882   An IAllocator instance has three sets of attributes:
10883     - cfg that is needed to query the cluster
10884     - input data (all members of the _KEYS class attribute are required)
10885     - four buffer attributes (in|out_data|text), that represent the
10886       input (to the external script) in text and data structure format,
10887       and the output from it, again in two formats
10888     - the result variables from the script (success, info, nodes) for
10889       easy usage
10890
10891   """
10892   # pylint: disable-msg=R0902
10893   # lots of instance attributes
10894   _ALLO_KEYS = [
10895     "name", "mem_size", "disks", "disk_template",
10896     "os", "tags", "nics", "vcpus", "hypervisor",
10897     ]
10898   _RELO_KEYS = [
10899     "name", "relocate_from",
10900     ]
10901   _EVAC_KEYS = [
10902     "evac_nodes",
10903     ]
10904
10905   def __init__(self, cfg, rpc, mode, **kwargs):
10906     self.cfg = cfg
10907     self.rpc = rpc
10908     # init buffer variables
10909     self.in_text = self.out_text = self.in_data = self.out_data = None
10910     # init all input fields so that pylint is happy
10911     self.mode = mode
10912     self.mem_size = self.disks = self.disk_template = None
10913     self.os = self.tags = self.nics = self.vcpus = None
10914     self.hypervisor = None
10915     self.relocate_from = None
10916     self.name = None
10917     self.evac_nodes = None
10918     # computed fields
10919     self.required_nodes = None
10920     # init result fields
10921     self.success = self.info = self.result = None
10922     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10923       keyset = self._ALLO_KEYS
10924       fn = self._AddNewInstance
10925     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10926       keyset = self._RELO_KEYS
10927       fn = self._AddRelocateInstance
10928     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10929       keyset = self._EVAC_KEYS
10930       fn = self._AddEvacuateNodes
10931     else:
10932       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10933                                    " IAllocator" % self.mode)
10934     for key in kwargs:
10935       if key not in keyset:
10936         raise errors.ProgrammerError("Invalid input parameter '%s' to"
10937                                      " IAllocator" % key)
10938       setattr(self, key, kwargs[key])
10939
10940     for key in keyset:
10941       if key not in kwargs:
10942         raise errors.ProgrammerError("Missing input parameter '%s' to"
10943                                      " IAllocator" % key)
10944     self._BuildInputData(fn)
10945
10946   def _ComputeClusterData(self):
10947     """Compute the generic allocator input data.
10948
10949     This is the data that is independent of the actual operation.
10950
10951     """
10952     cfg = self.cfg
10953     cluster_info = cfg.GetClusterInfo()
10954     # cluster data
10955     data = {
10956       "version": constants.IALLOCATOR_VERSION,
10957       "cluster_name": cfg.GetClusterName(),
10958       "cluster_tags": list(cluster_info.GetTags()),
10959       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10960       # we don't have job IDs
10961       }
10962     ninfo = cfg.GetAllNodesInfo()
10963     iinfo = cfg.GetAllInstancesInfo().values()
10964     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10965
10966     # node data
10967     node_list = [n.name for n in ninfo.values() if n.vm_capable]
10968
10969     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10970       hypervisor_name = self.hypervisor
10971     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10972       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10973     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10974       hypervisor_name = cluster_info.enabled_hypervisors[0]
10975
10976     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10977                                         hypervisor_name)
10978     node_iinfo = \
10979       self.rpc.call_all_instances_info(node_list,
10980                                        cluster_info.enabled_hypervisors)
10981
10982     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10983
10984     config_ndata = self._ComputeBasicNodeData(ninfo)
10985     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
10986                                                  i_list, config_ndata)
10987     assert len(data["nodes"]) == len(ninfo), \
10988         "Incomplete node data computed"
10989
10990     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10991
10992     self.in_data = data
10993
10994   @staticmethod
10995   def _ComputeNodeGroupData(cfg):
10996     """Compute node groups data.
10997
10998     """
10999     ng = {}
11000     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11001       ng[guuid] = {
11002         "name": gdata.name,
11003         "alloc_policy": gdata.alloc_policy,
11004         }
11005     return ng
11006
11007   @staticmethod
11008   def _ComputeBasicNodeData(node_cfg):
11009     """Compute global node data.
11010
11011     @rtype: dict
11012     @returns: a dict of name: (node dict, node config)
11013
11014     """
11015     node_results = {}
11016     for ninfo in node_cfg.values():
11017       # fill in static (config-based) values
11018       pnr = {
11019         "tags": list(ninfo.GetTags()),
11020         "primary_ip": ninfo.primary_ip,
11021         "secondary_ip": ninfo.secondary_ip,
11022         "offline": ninfo.offline,
11023         "drained": ninfo.drained,
11024         "master_candidate": ninfo.master_candidate,
11025         "group": ninfo.group,
11026         "master_capable": ninfo.master_capable,
11027         "vm_capable": ninfo.vm_capable,
11028         }
11029
11030       node_results[ninfo.name] = pnr
11031
11032     return node_results
11033
11034   @staticmethod
11035   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11036                               node_results):
11037     """Compute global node data.
11038
11039     @param node_results: the basic node structures as filled from the config
11040
11041     """
11042     # make a copy of the current dict
11043     node_results = dict(node_results)
11044     for nname, nresult in node_data.items():
11045       assert nname in node_results, "Missing basic data for node %s" % nname
11046       ninfo = node_cfg[nname]
11047
11048       if not (ninfo.offline or ninfo.drained):
11049         nresult.Raise("Can't get data for node %s" % nname)
11050         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11051                                 nname)
11052         remote_info = nresult.payload
11053
11054         for attr in ['memory_total', 'memory_free', 'memory_dom0',
11055                      'vg_size', 'vg_free', 'cpu_total']:
11056           if attr not in remote_info:
11057             raise errors.OpExecError("Node '%s' didn't return attribute"
11058                                      " '%s'" % (nname, attr))
11059           if not isinstance(remote_info[attr], int):
11060             raise errors.OpExecError("Node '%s' returned invalid value"
11061                                      " for '%s': %s" %
11062                                      (nname, attr, remote_info[attr]))
11063         # compute memory used by primary instances
11064         i_p_mem = i_p_up_mem = 0
11065         for iinfo, beinfo in i_list:
11066           if iinfo.primary_node == nname:
11067             i_p_mem += beinfo[constants.BE_MEMORY]
11068             if iinfo.name not in node_iinfo[nname].payload:
11069               i_used_mem = 0
11070             else:
11071               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11072             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11073             remote_info['memory_free'] -= max(0, i_mem_diff)
11074
11075             if iinfo.admin_up:
11076               i_p_up_mem += beinfo[constants.BE_MEMORY]
11077
11078         # compute memory used by instances
11079         pnr_dyn = {
11080           "total_memory": remote_info['memory_total'],
11081           "reserved_memory": remote_info['memory_dom0'],
11082           "free_memory": remote_info['memory_free'],
11083           "total_disk": remote_info['vg_size'],
11084           "free_disk": remote_info['vg_free'],
11085           "total_cpus": remote_info['cpu_total'],
11086           "i_pri_memory": i_p_mem,
11087           "i_pri_up_memory": i_p_up_mem,
11088           }
11089         pnr_dyn.update(node_results[nname])
11090         node_results[nname] = pnr_dyn
11091
11092     return node_results
11093
11094   @staticmethod
11095   def _ComputeInstanceData(cluster_info, i_list):
11096     """Compute global instance data.
11097
11098     """
11099     instance_data = {}
11100     for iinfo, beinfo in i_list:
11101       nic_data = []
11102       for nic in iinfo.nics:
11103         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11104         nic_dict = {"mac": nic.mac,
11105                     "ip": nic.ip,
11106                     "mode": filled_params[constants.NIC_MODE],
11107                     "link": filled_params[constants.NIC_LINK],
11108                    }
11109         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11110           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11111         nic_data.append(nic_dict)
11112       pir = {
11113         "tags": list(iinfo.GetTags()),
11114         "admin_up": iinfo.admin_up,
11115         "vcpus": beinfo[constants.BE_VCPUS],
11116         "memory": beinfo[constants.BE_MEMORY],
11117         "os": iinfo.os,
11118         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11119         "nics": nic_data,
11120         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11121         "disk_template": iinfo.disk_template,
11122         "hypervisor": iinfo.hypervisor,
11123         }
11124       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11125                                                  pir["disks"])
11126       instance_data[iinfo.name] = pir
11127
11128     return instance_data
11129
11130   def _AddNewInstance(self):
11131     """Add new instance data to allocator structure.
11132
11133     This in combination with _AllocatorGetClusterData will create the
11134     correct structure needed as input for the allocator.
11135
11136     The checks for the completeness of the opcode must have already been
11137     done.
11138
11139     """
11140     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11141
11142     if self.disk_template in constants.DTS_NET_MIRROR:
11143       self.required_nodes = 2
11144     else:
11145       self.required_nodes = 1
11146     request = {
11147       "name": self.name,
11148       "disk_template": self.disk_template,
11149       "tags": self.tags,
11150       "os": self.os,
11151       "vcpus": self.vcpus,
11152       "memory": self.mem_size,
11153       "disks": self.disks,
11154       "disk_space_total": disk_space,
11155       "nics": self.nics,
11156       "required_nodes": self.required_nodes,
11157       }
11158     return request
11159
11160   def _AddRelocateInstance(self):
11161     """Add relocate instance data to allocator structure.
11162
11163     This in combination with _IAllocatorGetClusterData will create the
11164     correct structure needed as input for the allocator.
11165
11166     The checks for the completeness of the opcode must have already been
11167     done.
11168
11169     """
11170     instance = self.cfg.GetInstanceInfo(self.name)
11171     if instance is None:
11172       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11173                                    " IAllocator" % self.name)
11174
11175     if instance.disk_template not in constants.DTS_NET_MIRROR:
11176       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11177                                  errors.ECODE_INVAL)
11178
11179     if len(instance.secondary_nodes) != 1:
11180       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11181                                  errors.ECODE_STATE)
11182
11183     self.required_nodes = 1
11184     disk_sizes = [{'size': disk.size} for disk in instance.disks]
11185     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11186
11187     request = {
11188       "name": self.name,
11189       "disk_space_total": disk_space,
11190       "required_nodes": self.required_nodes,
11191       "relocate_from": self.relocate_from,
11192       }
11193     return request
11194
11195   def _AddEvacuateNodes(self):
11196     """Add evacuate nodes data to allocator structure.
11197
11198     """
11199     request = {
11200       "evac_nodes": self.evac_nodes
11201       }
11202     return request
11203
11204   def _BuildInputData(self, fn):
11205     """Build input data structures.
11206
11207     """
11208     self._ComputeClusterData()
11209
11210     request = fn()
11211     request["type"] = self.mode
11212     self.in_data["request"] = request
11213
11214     self.in_text = serializer.Dump(self.in_data)
11215
11216   def Run(self, name, validate=True, call_fn=None):
11217     """Run an instance allocator and return the results.
11218
11219     """
11220     if call_fn is None:
11221       call_fn = self.rpc.call_iallocator_runner
11222
11223     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11224     result.Raise("Failure while running the iallocator script")
11225
11226     self.out_text = result.payload
11227     if validate:
11228       self._ValidateResult()
11229
11230   def _ValidateResult(self):
11231     """Process the allocator results.
11232
11233     This will process and if successful save the result in
11234     self.out_data and the other parameters.
11235
11236     """
11237     try:
11238       rdict = serializer.Load(self.out_text)
11239     except Exception, err:
11240       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11241
11242     if not isinstance(rdict, dict):
11243       raise errors.OpExecError("Can't parse iallocator results: not a dict")
11244
11245     # TODO: remove backwards compatiblity in later versions
11246     if "nodes" in rdict and "result" not in rdict:
11247       rdict["result"] = rdict["nodes"]
11248       del rdict["nodes"]
11249
11250     for key in "success", "info", "result":
11251       if key not in rdict:
11252         raise errors.OpExecError("Can't parse iallocator results:"
11253                                  " missing key '%s'" % key)
11254       setattr(self, key, rdict[key])
11255
11256     if not isinstance(rdict["result"], list):
11257       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11258                                " is not a list")
11259     self.out_data = rdict
11260
11261
11262 class LUTestAllocator(NoHooksLU):
11263   """Run allocator tests.
11264
11265   This LU runs the allocator tests
11266
11267   """
11268   def CheckPrereq(self):
11269     """Check prerequisites.
11270
11271     This checks the opcode parameters depending on the director and mode test.
11272
11273     """
11274     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11275       for attr in ["mem_size", "disks", "disk_template",
11276                    "os", "tags", "nics", "vcpus"]:
11277         if not hasattr(self.op, attr):
11278           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11279                                      attr, errors.ECODE_INVAL)
11280       iname = self.cfg.ExpandInstanceName(self.op.name)
11281       if iname is not None:
11282         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11283                                    iname, errors.ECODE_EXISTS)
11284       if not isinstance(self.op.nics, list):
11285         raise errors.OpPrereqError("Invalid parameter 'nics'",
11286                                    errors.ECODE_INVAL)
11287       if not isinstance(self.op.disks, list):
11288         raise errors.OpPrereqError("Invalid parameter 'disks'",
11289                                    errors.ECODE_INVAL)
11290       for row in self.op.disks:
11291         if (not isinstance(row, dict) or
11292             "size" not in row or
11293             not isinstance(row["size"], int) or
11294             "mode" not in row or
11295             row["mode"] not in ['r', 'w']):
11296           raise errors.OpPrereqError("Invalid contents of the 'disks'"
11297                                      " parameter", errors.ECODE_INVAL)
11298       if self.op.hypervisor is None:
11299         self.op.hypervisor = self.cfg.GetHypervisorType()
11300     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11301       fname = _ExpandInstanceName(self.cfg, self.op.name)
11302       self.op.name = fname
11303       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11304     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11305       if not hasattr(self.op, "evac_nodes"):
11306         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11307                                    " opcode input", errors.ECODE_INVAL)
11308     else:
11309       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11310                                  self.op.mode, errors.ECODE_INVAL)
11311
11312     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11313       if self.op.allocator is None:
11314         raise errors.OpPrereqError("Missing allocator name",
11315                                    errors.ECODE_INVAL)
11316     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11317       raise errors.OpPrereqError("Wrong allocator test '%s'" %
11318                                  self.op.direction, errors.ECODE_INVAL)
11319
11320   def Exec(self, feedback_fn):
11321     """Run the allocator test.
11322
11323     """
11324     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11325       ial = IAllocator(self.cfg, self.rpc,
11326                        mode=self.op.mode,
11327                        name=self.op.name,
11328                        mem_size=self.op.mem_size,
11329                        disks=self.op.disks,
11330                        disk_template=self.op.disk_template,
11331                        os=self.op.os,
11332                        tags=self.op.tags,
11333                        nics=self.op.nics,
11334                        vcpus=self.op.vcpus,
11335                        hypervisor=self.op.hypervisor,
11336                        )
11337     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11338       ial = IAllocator(self.cfg, self.rpc,
11339                        mode=self.op.mode,
11340                        name=self.op.name,
11341                        relocate_from=list(self.relocate_from),
11342                        )
11343     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11344       ial = IAllocator(self.cfg, self.rpc,
11345                        mode=self.op.mode,
11346                        evac_nodes=self.op.evac_nodes)
11347     else:
11348       raise errors.ProgrammerError("Uncatched mode %s in"
11349                                    " LUTestAllocator.Exec", self.op.mode)
11350
11351     if self.op.direction == constants.IALLOCATOR_DIR_IN:
11352       result = ial.in_text
11353     else:
11354       ial.Run(self.op.allocator, validate=False)
11355       result = ial.out_text
11356     return result
11357
11358
11359 #: Query type implementations
11360 _QUERY_IMPL = {
11361   constants.QR_INSTANCE: _InstanceQuery,
11362   constants.QR_NODE: _NodeQuery,
11363   constants.QR_GROUP: _GroupQuery,
11364   }
11365
11366
11367 def _GetQueryImplementation(name):
11368   """Returns the implemtnation for a query type.
11369
11370   @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11371
11372   """
11373   try:
11374     return _QUERY_IMPL[name]
11375   except KeyError:
11376     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11377                                errors.ECODE_INVAL)