code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 # End types
  78 class LogicalUnit(object):
  79   """Logical Unit base class.
  80
  81   Subclasses must follow these rules:
  82     - implement ExpandNames
  83     - implement CheckPrereq (except when tasklets are used)
  84     - implement Exec (except when tasklets are used)
  85     - implement BuildHooksEnv
  86     - redefine HPATH and HTYPE
  87     - optionally redefine their run requirements:
  88         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  89
  90   Note that all commands require root permissions.
  91
  92   @ivar dry_run_result: the value (if any) that will be returned to the caller
  93       in dry-run mode (signalled by opcode dry_run parameter)
  94
  95   """
  96   HPATH = None
  97   HTYPE = None
  98   REQ_BGL = True
  99
 100   def __init__(self, processor, op, context, rpc):
 101     """Constructor for LogicalUnit.
 102
 103     This needs to be overridden in derived classes in order to check op
 104     validity.
 105
 106     """
 107     self.proc = processor
 108     self.op = op
 109     self.cfg = context.cfg
 110     self.context = context
 111     self.rpc = rpc
 112     # Dicts used to declare locking needs to mcpu
 113     self.needed_locks = None
 114     self.acquired_locks = {}
 115     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 116     self.add_locks = {}
 117     self.remove_locks = {}
 118     # Used to force good behavior when calling helper functions
 119     self.recalculate_locks = {}
 120     self.__ssh = None
 121     # logging
 122     self.Log = processor.Log # pylint: disable-msg=C0103
 123     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 124     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 125     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 126     # support for dry-run
 127     self.dry_run_result = None
 128     # support for generic debug attribute
 129     if (not hasattr(self.op, "debug_level") or
 130         not isinstance(self.op.debug_level, int)):
 131       self.op.debug_level = 0
 132
 133     # Tasklets
 134     self.tasklets = None
 135
 136     # Validate opcode parameters and set defaults
 137     self.op.Validate(True)
 138
 139     self.CheckArguments()
 140
 141   def __GetSSH(self):
 142     """Returns the SshRunner object
 143
 144     """
 145     if not self.__ssh:
 146       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 147     return self.__ssh
 148
 149   ssh = property(fget=__GetSSH)
 150
 151   def CheckArguments(self):
 152     """Check syntactic validity for the opcode arguments.
 153
 154     This method is for doing a simple syntactic check and ensure
 155     validity of opcode parameters, without any cluster-related
 156     checks. While the same can be accomplished in ExpandNames and/or
 157     CheckPrereq, doing these separate is better because:
 158
 159       - ExpandNames is left as as purely a lock-related function
 160       - CheckPrereq is run after we have acquired locks (and possible
 161         waited for them)
 162
 163     The function is allowed to change the self.op attribute so that
 164     later methods can no longer worry about missing parameters.
 165
 166     """
 167     pass
 168
 169   def ExpandNames(self):
 170     """Expand names for this LU.
 171
 172     This method is called before starting to execute the opcode, and it should
 173     update all the parameters of the opcode to their canonical form (e.g. a
 174     short node name must be fully expanded after this method has successfully
 175     completed). This way locking, hooks, logging, etc. can work correctly.
 176
 177     LUs which implement this method must also populate the self.needed_locks
 178     member, as a dict with lock levels as keys, and a list of needed lock names
 179     as values. Rules:
 180
 181       - use an empty dict if you don't need any lock
 182       - if you don't need any lock at a particular level omit that level
 183       - don't put anything for the BGL level
 184       - if you want all locks at a level use locking.ALL_SET as a value
 185
 186     If you need to share locks (rather than acquire them exclusively) at one
 187     level you can modify self.share_locks, setting a true value (usually 1) for
 188     that level. By default locks are not shared.
 189
 190     This function can also define a list of tasklets, which then will be
 191     executed in order instead of the usual LU-level CheckPrereq and Exec
 192     functions, if those are not defined by the LU.
 193
 194     Examples::
 195
 196       # Acquire all nodes and one instance
 197       self.needed_locks = {
 198         locking.LEVEL_NODE: locking.ALL_SET,
 199         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 200       }
 201       # Acquire just two nodes
 202       self.needed_locks = {
 203         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 204       }
 205       # Acquire no locks
 206       self.needed_locks = {} # No, you can't leave it to the default value None
 207
 208     """
 209     # The implementation of this method is mandatory only if the new LU is
 210     # concurrent, so that old LUs don't need to be changed all at the same
 211     # time.
 212     if self.REQ_BGL:
 213       self.needed_locks = {} # Exclusive LUs don't need locks.
 214     else:
 215       raise NotImplementedError
 216
 217   def DeclareLocks(self, level):
 218     """Declare LU locking needs for a level
 219
 220     While most LUs can just declare their locking needs at ExpandNames time,
 221     sometimes there's the need to calculate some locks after having acquired
 222     the ones before. This function is called just before acquiring locks at a
 223     particular level, but after acquiring the ones at lower levels, and permits
 224     such calculations. It can be used to modify self.needed_locks, and by
 225     default it does nothing.
 226
 227     This function is only called if you have something already set in
 228     self.needed_locks for the level.
 229
 230     @param level: Locking level which is going to be locked
 231     @type level: member of ganeti.locking.LEVELS
 232
 233     """
 234
 235   def CheckPrereq(self):
 236     """Check prerequisites for this LU.
 237
 238     This method should check that the prerequisites for the execution
 239     of this LU are fulfilled. It can do internode communication, but
 240     it should be idempotent - no cluster or system changes are
 241     allowed.
 242
 243     The method should raise errors.OpPrereqError in case something is
 244     not fulfilled. Its return value is ignored.
 245
 246     This method should also update all the parameters of the opcode to
 247     their canonical form if it hasn't been done by ExpandNames before.
 248
 249     """
 250     if self.tasklets is not None:
 251       for (idx, tl) in enumerate(self.tasklets):
 252         logging.debug("Checking prerequisites for tasklet %s/%s",
 253                       idx + 1, len(self.tasklets))
 254         tl.CheckPrereq()
 255     else:
 256       pass
 257
 258   def Exec(self, feedback_fn):
 259     """Execute the LU.
 260
 261     This method should implement the actual work. It should raise
 262     errors.OpExecError for failures that are somewhat dealt with in
 263     code, or expected.
 264
 265     """
 266     if self.tasklets is not None:
 267       for (idx, tl) in enumerate(self.tasklets):
 268         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 269         tl.Exec(feedback_fn)
 270     else:
 271       raise NotImplementedError
 272
 273   def BuildHooksEnv(self):
 274     """Build hooks environment for this LU.
 275
 276     This method should return a three-node tuple consisting of: a dict
 277     containing the environment that will be used for running the
 278     specific hook for this LU, a list of node names on which the hook
 279     should run before the execution, and a list of node names on which
 280     the hook should run after the execution.
 281
 282     The keys of the dict must not have 'GANETI_' prefixed as this will
 283     be handled in the hooks runner. Also note additional keys will be
 284     added by the hooks runner. If the LU doesn't define any
 285     environment, an empty dict (and not None) should be returned.
 286
 287     No nodes should be returned as an empty list (and not None).
 288
 289     Note that if the HPATH for a LU class is None, this function will
 290     not be called.
 291
 292     """
 293     raise NotImplementedError
 294
 295   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 296     """Notify the LU about the results of its hooks.
 297
 298     This method is called every time a hooks phase is executed, and notifies
 299     the Logical Unit about the hooks' result. The LU can then use it to alter
 300     its result based on the hooks.  By default the method does nothing and the
 301     previous result is passed back unchanged but any LU can define it if it
 302     wants to use the local cluster hook-scripts somehow.
 303
 304     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 305         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 306     @param hook_results: the results of the multi-node hooks rpc call
 307     @param feedback_fn: function used send feedback back to the caller
 308     @param lu_result: the previous Exec result this LU had, or None
 309         in the PRE phase
 310     @return: the new Exec result, based on the previous result
 311         and hook results
 312
 313     """
 314     # API must be kept, thus we ignore the unused argument and could
 315     # be a function warnings
 316     # pylint: disable-msg=W0613,R0201
 317     return lu_result
 318
 319   def _ExpandAndLockInstance(self):
 320     """Helper function to expand and lock an instance.
 321
 322     Many LUs that work on an instance take its name in self.op.instance_name
 323     and need to expand it and then declare the expanded name for locking. This
 324     function does it, and then updates self.op.instance_name to the expanded
 325     name. It also initializes needed_locks as a dict, if this hasn't been done
 326     before.
 327
 328     """
 329     if self.needed_locks is None:
 330       self.needed_locks = {}
 331     else:
 332       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 333         "_ExpandAndLockInstance called with instance-level locks set"
 334     self.op.instance_name = _ExpandInstanceName(self.cfg,
 335                                                 self.op.instance_name)
 336     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 337
 338   def _LockInstancesNodes(self, primary_only=False):
 339     """Helper function to declare instances' nodes for locking.
 340
 341     This function should be called after locking one or more instances to lock
 342     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 343     with all primary or secondary nodes for instances already locked and
 344     present in self.needed_locks[locking.LEVEL_INSTANCE].
 345
 346     It should be called from DeclareLocks, and for safety only works if
 347     self.recalculate_locks[locking.LEVEL_NODE] is set.
 348
 349     In the future it may grow parameters to just lock some instance's nodes, or
 350     to just lock primaries or secondary nodes, if needed.
 351
 352     If should be called in DeclareLocks in a way similar to::
 353
 354       if level == locking.LEVEL_NODE:
 355         self._LockInstancesNodes()
 356
 357     @type primary_only: boolean
 358     @param primary_only: only lock primary nodes of locked instances
 359
 360     """
 361     assert locking.LEVEL_NODE in self.recalculate_locks, \
 362       "_LockInstancesNodes helper function called with no nodes to recalculate"
 363
 364     # TODO: check if we're really been called with the instance locks held
 365
 366     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 367     # future we might want to have different behaviors depending on the value
 368     # of self.recalculate_locks[locking.LEVEL_NODE]
 369     wanted_nodes = []
 370     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 371       instance = self.context.cfg.GetInstanceInfo(instance_name)
 372       wanted_nodes.append(instance.primary_node)
 373       if not primary_only:
 374         wanted_nodes.extend(instance.secondary_nodes)
 375
 376     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 377       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 378     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 379       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 380
 381     del self.recalculate_locks[locking.LEVEL_NODE]
 382
 383
 384 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 385   """Simple LU which runs no hooks.
 386
 387   This LU is intended as a parent for other LogicalUnits which will
 388   run no hooks, in order to reduce duplicate code.
 389
 390   """
 391   HPATH = None
 392   HTYPE = None
 393
 394   def BuildHooksEnv(self):
 395     """Empty BuildHooksEnv for NoHooksLu.
 396
 397     This just raises an error.
 398
 399     """
 400     assert False, "BuildHooksEnv called for NoHooksLUs"
 401
 402
 403 class Tasklet:
 404   """Tasklet base class.
 405
 406   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 407   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 408   tasklets know nothing about locks.
 409
 410   Subclasses must follow these rules:
 411     - Implement CheckPrereq
 412     - Implement Exec
 413
 414   """
 415   def __init__(self, lu):
 416     self.lu = lu
 417
 418     # Shortcuts
 419     self.cfg = lu.cfg
 420     self.rpc = lu.rpc
 421
 422   def CheckPrereq(self):
 423     """Check prerequisites for this tasklets.
 424
 425     This method should check whether the prerequisites for the execution of
 426     this tasklet are fulfilled. It can do internode communication, but it
 427     should be idempotent - no cluster or system changes are allowed.
 428
 429     The method should raise errors.OpPrereqError in case something is not
 430     fulfilled. Its return value is ignored.
 431
 432     This method should also update all parameters to their canonical form if it
 433     hasn't been done before.
 434
 435     """
 436     pass
 437
 438   def Exec(self, feedback_fn):
 439     """Execute the tasklet.
 440
 441     This method should implement the actual work. It should raise
 442     errors.OpExecError for failures that are somewhat dealt with in code, or
 443     expected.
 444
 445     """
 446     raise NotImplementedError
 447
 448
 449 class _QueryBase:
 450   """Base for query utility classes.
 451
 452   """
 453   #: Attribute holding field definitions
 454   FIELDS = None
 455
 456   def __init__(self, names, fields, use_locking):
 457     """Initializes this class.
 458
 459     """
 460     self.names = names
 461     self.use_locking = use_locking
 462
 463     self.query = query.Query(self.FIELDS, fields)
 464     self.requested_data = self.query.RequestedData()
 465
 466     self.do_locking = None
 467     self.wanted = None
 468
 469   def _GetNames(self, lu, all_names, lock_level):
 470     """Helper function to determine names asked for in the query.
 471
 472     """
 473     if self.do_locking:
 474       names = lu.acquired_locks[lock_level]
 475     else:
 476       names = all_names
 477
 478     if self.wanted == locking.ALL_SET:
 479       assert not self.names
 480       # caller didn't specify names, so ordering is not important
 481       return utils.NiceSort(names)
 482
 483     # caller specified names and we must keep the same order
 484     assert self.names
 485     assert not self.do_locking or lu.acquired_locks[lock_level]
 486
 487     missing = set(self.wanted).difference(names)
 488     if missing:
 489       raise errors.OpExecError("Some items were removed before retrieving"
 490                                " their data: %s" % missing)
 491
 492     # Return expanded names
 493     return self.wanted
 494
 495   @classmethod
 496   def FieldsQuery(cls, fields):
 497     """Returns list of available fields.
 498
 499     @return: List of L{objects.QueryFieldDefinition}
 500
 501     """
 502     return query.QueryFields(cls.FIELDS, fields)
 503
 504   def ExpandNames(self, lu):
 505     """Expand names for this query.
 506
 507     See L{LogicalUnit.ExpandNames}.
 508
 509     """
 510     raise NotImplementedError()
 511
 512   def DeclareLocks(self, lu, level):
 513     """Declare locks for this query.
 514
 515     See L{LogicalUnit.DeclareLocks}.
 516
 517     """
 518     raise NotImplementedError()
 519
 520   def _GetQueryData(self, lu):
 521     """Collects all data for this query.
 522
 523     @return: Query data object
 524
 525     """
 526     raise NotImplementedError()
 527
 528   def NewStyleQuery(self, lu):
 529     """Collect data and execute query.
 530
 531     """
 532     return query.GetQueryResponse(self.query, self._GetQueryData(lu))
 533
 534   def OldStyleQuery(self, lu):
 535     """Collect data and execute query.
 536
 537     """
 538     return self.query.OldStyleQuery(self._GetQueryData(lu))
 539
 540
 541 def _GetWantedNodes(lu, nodes):
 542   """Returns list of checked and expanded node names.
 543
 544   @type lu: L{LogicalUnit}
 545   @param lu: the logical unit on whose behalf we execute
 546   @type nodes: list
 547   @param nodes: list of node names or None for all nodes
 548   @rtype: list
 549   @return: the list of nodes, sorted
 550   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 551
 552   """
 553   if nodes:
 554     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 555
 556   return utils.NiceSort(lu.cfg.GetNodeList())
 557
 558
 559 def _GetWantedInstances(lu, instances):
 560   """Returns list of checked and expanded instance names.
 561
 562   @type lu: L{LogicalUnit}
 563   @param lu: the logical unit on whose behalf we execute
 564   @type instances: list
 565   @param instances: list of instance names or None for all instances
 566   @rtype: list
 567   @return: the list of instances, sorted
 568   @raise errors.OpPrereqError: if the instances parameter is wrong type
 569   @raise errors.OpPrereqError: if any of the passed instances is not found
 570
 571   """
 572   if instances:
 573     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 574   else:
 575     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 576   return wanted
 577
 578
 579 def _GetUpdatedParams(old_params, update_dict,
 580                       use_default=True, use_none=False):
 581   """Return the new version of a parameter dictionary.
 582
 583   @type old_params: dict
 584   @param old_params: old parameters
 585   @type update_dict: dict
 586   @param update_dict: dict containing new parameter values, or
 587       constants.VALUE_DEFAULT to reset the parameter to its default
 588       value
 589   @param use_default: boolean
 590   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 591       values as 'to be deleted' values
 592   @param use_none: boolean
 593   @type use_none: whether to recognise C{None} values as 'to be
 594       deleted' values
 595   @rtype: dict
 596   @return: the new parameter dictionary
 597
 598   """
 599   params_copy = copy.deepcopy(old_params)
 600   for key, val in update_dict.iteritems():
 601     if ((use_default and val == constants.VALUE_DEFAULT) or
 602         (use_none and val is None)):
 603       try:
 604         del params_copy[key]
 605       except KeyError:
 606         pass
 607     else:
 608       params_copy[key] = val
 609   return params_copy
 610
 611
 612 def _CheckOutputFields(static, dynamic, selected):
 613   """Checks whether all selected fields are valid.
 614
 615   @type static: L{utils.FieldSet}
 616   @param static: static fields set
 617   @type dynamic: L{utils.FieldSet}
 618   @param dynamic: dynamic fields set
 619
 620   """
 621   f = utils.FieldSet()
 622   f.Extend(static)
 623   f.Extend(dynamic)
 624
 625   delta = f.NonMatching(selected)
 626   if delta:
 627     raise errors.OpPrereqError("Unknown output fields selected: %s"
 628                                % ",".join(delta), errors.ECODE_INVAL)
 629
 630
 631 def _CheckGlobalHvParams(params):
 632   """Validates that given hypervisor params are not global ones.
 633
 634   This will ensure that instances don't get customised versions of
 635   global params.
 636
 637   """
 638   used_globals = constants.HVC_GLOBALS.intersection(params)
 639   if used_globals:
 640     msg = ("The following hypervisor parameters are global and cannot"
 641            " be customized at instance level, please modify them at"
 642            " cluster level: %s" % utils.CommaJoin(used_globals))
 643     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 644
 645
 646 def _CheckNodeOnline(lu, node, msg=None):
 647   """Ensure that a given node is online.
 648
 649   @param lu: the LU on behalf of which we make the check
 650   @param node: the node to check
 651   @param msg: if passed, should be a message to replace the default one
 652   @raise errors.OpPrereqError: if the node is offline
 653
 654   """
 655   if msg is None:
 656     msg = "Can't use offline node"
 657   if lu.cfg.GetNodeInfo(node).offline:
 658     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 659
 660
 661 def _CheckNodeNotDrained(lu, node):
 662   """Ensure that a given node is not drained.
 663
 664   @param lu: the LU on behalf of which we make the check
 665   @param node: the node to check
 666   @raise errors.OpPrereqError: if the node is drained
 667
 668   """
 669   if lu.cfg.GetNodeInfo(node).drained:
 670     raise errors.OpPrereqError("Can't use drained node %s" % node,
 671                                errors.ECODE_STATE)
 672
 673
 674 def _CheckNodeVmCapable(lu, node):
 675   """Ensure that a given node is vm capable.
 676
 677   @param lu: the LU on behalf of which we make the check
 678   @param node: the node to check
 679   @raise errors.OpPrereqError: if the node is not vm capable
 680
 681   """
 682   if not lu.cfg.GetNodeInfo(node).vm_capable:
 683     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 684                                errors.ECODE_STATE)
 685
 686
 687 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 688   """Ensure that a node supports a given OS.
 689
 690   @param lu: the LU on behalf of which we make the check
 691   @param node: the node to check
 692   @param os_name: the OS to query about
 693   @param force_variant: whether to ignore variant errors
 694   @raise errors.OpPrereqError: if the node is not supporting the OS
 695
 696   """
 697   result = lu.rpc.call_os_get(node, os_name)
 698   result.Raise("OS '%s' not in supported OS list for node %s" %
 699                (os_name, node),
 700                prereq=True, ecode=errors.ECODE_INVAL)
 701   if not force_variant:
 702     _CheckOSVariant(result.payload, os_name)
 703
 704
 705 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 706   """Ensure that a node has the given secondary ip.
 707
 708   @type lu: L{LogicalUnit}
 709   @param lu: the LU on behalf of which we make the check
 710   @type node: string
 711   @param node: the node to check
 712   @type secondary_ip: string
 713   @param secondary_ip: the ip to check
 714   @type prereq: boolean
 715   @param prereq: whether to throw a prerequisite or an execute error
 716   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 717   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 718
 719   """
 720   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 721   result.Raise("Failure checking secondary ip on node %s" % node,
 722                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 723   if not result.payload:
 724     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 725            " please fix and re-run this command" % secondary_ip)
 726     if prereq:
 727       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 728     else:
 729       raise errors.OpExecError(msg)
 730
 731
 732 def _GetClusterDomainSecret():
 733   """Reads the cluster domain secret.
 734
 735   """
 736   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 737                                strict=True)
 738
 739
 740 def _CheckInstanceDown(lu, instance, reason):
 741   """Ensure that an instance is not running."""
 742   if instance.admin_up:
 743     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 744                                (instance.name, reason), errors.ECODE_STATE)
 745
 746   pnode = instance.primary_node
 747   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 748   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 749               prereq=True, ecode=errors.ECODE_ENVIRON)
 750
 751   if instance.name in ins_l.payload:
 752     raise errors.OpPrereqError("Instance %s is running, %s" %
 753                                (instance.name, reason), errors.ECODE_STATE)
 754
 755
 756 def _ExpandItemName(fn, name, kind):
 757   """Expand an item name.
 758
 759   @param fn: the function to use for expansion
 760   @param name: requested item name
 761   @param kind: text description ('Node' or 'Instance')
 762   @return: the resolved (full) name
 763   @raise errors.OpPrereqError: if the item is not found
 764
 765   """
 766   full_name = fn(name)
 767   if full_name is None:
 768     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 769                                errors.ECODE_NOENT)
 770   return full_name
 771
 772
 773 def _ExpandNodeName(cfg, name):
 774   """Wrapper over L{_ExpandItemName} for nodes."""
 775   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 776
 777
 778 def _ExpandInstanceName(cfg, name):
 779   """Wrapper over L{_ExpandItemName} for instance."""
 780   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 781
 782
 783 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 784                           memory, vcpus, nics, disk_template, disks,
 785                           bep, hvp, hypervisor_name):
 786   """Builds instance related env variables for hooks
 787
 788   This builds the hook environment from individual variables.
 789
 790   @type name: string
 791   @param name: the name of the instance
 792   @type primary_node: string
 793   @param primary_node: the name of the instance's primary node
 794   @type secondary_nodes: list
 795   @param secondary_nodes: list of secondary nodes as strings
 796   @type os_type: string
 797   @param os_type: the name of the instance's OS
 798   @type status: boolean
 799   @param status: the should_run status of the instance
 800   @type memory: string
 801   @param memory: the memory size of the instance
 802   @type vcpus: string
 803   @param vcpus: the count of VCPUs the instance has
 804   @type nics: list
 805   @param nics: list of tuples (ip, mac, mode, link) representing
 806       the NICs the instance has
 807   @type disk_template: string
 808   @param disk_template: the disk template of the instance
 809   @type disks: list
 810   @param disks: the list of (size, mode) pairs
 811   @type bep: dict
 812   @param bep: the backend parameters for the instance
 813   @type hvp: dict
 814   @param hvp: the hypervisor parameters for the instance
 815   @type hypervisor_name: string
 816   @param hypervisor_name: the hypervisor for the instance
 817   @rtype: dict
 818   @return: the hook environment for this instance
 819
 820   """
 821   if status:
 822     str_status = "up"
 823   else:
 824     str_status = "down"
 825   env = {
 826     "OP_TARGET": name,
 827     "INSTANCE_NAME": name,
 828     "INSTANCE_PRIMARY": primary_node,
 829     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 830     "INSTANCE_OS_TYPE": os_type,
 831     "INSTANCE_STATUS": str_status,
 832     "INSTANCE_MEMORY": memory,
 833     "INSTANCE_VCPUS": vcpus,
 834     "INSTANCE_DISK_TEMPLATE": disk_template,
 835     "INSTANCE_HYPERVISOR": hypervisor_name,
 836   }
 837
 838   if nics:
 839     nic_count = len(nics)
 840     for idx, (ip, mac, mode, link) in enumerate(nics):
 841       if ip is None:
 842         ip = ""
 843       env["INSTANCE_NIC%d_IP" % idx] = ip
 844       env["INSTANCE_NIC%d_MAC" % idx] = mac
 845       env["INSTANCE_NIC%d_MODE" % idx] = mode
 846       env["INSTANCE_NIC%d_LINK" % idx] = link
 847       if mode == constants.NIC_MODE_BRIDGED:
 848         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 849   else:
 850     nic_count = 0
 851
 852   env["INSTANCE_NIC_COUNT"] = nic_count
 853
 854   if disks:
 855     disk_count = len(disks)
 856     for idx, (size, mode) in enumerate(disks):
 857       env["INSTANCE_DISK%d_SIZE" % idx] = size
 858       env["INSTANCE_DISK%d_MODE" % idx] = mode
 859   else:
 860     disk_count = 0
 861
 862   env["INSTANCE_DISK_COUNT"] = disk_count
 863
 864   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 865     for key, value in source.items():
 866       env["INSTANCE_%s_%s" % (kind, key)] = value
 867
 868   return env
 869
 870
 871 def _NICListToTuple(lu, nics):
 872   """Build a list of nic information tuples.
 873
 874   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 875   value in LUInstanceQueryData.
 876
 877   @type lu:  L{LogicalUnit}
 878   @param lu: the logical unit on whose behalf we execute
 879   @type nics: list of L{objects.NIC}
 880   @param nics: list of nics to convert to hooks tuples
 881
 882   """
 883   hooks_nics = []
 884   cluster = lu.cfg.GetClusterInfo()
 885   for nic in nics:
 886     ip = nic.ip
 887     mac = nic.mac
 888     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 889     mode = filled_params[constants.NIC_MODE]
 890     link = filled_params[constants.NIC_LINK]
 891     hooks_nics.append((ip, mac, mode, link))
 892   return hooks_nics
 893
 894
 895 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 896   """Builds instance related env variables for hooks from an object.
 897
 898   @type lu: L{LogicalUnit}
 899   @param lu: the logical unit on whose behalf we execute
 900   @type instance: L{objects.Instance}
 901   @param instance: the instance for which we should build the
 902       environment
 903   @type override: dict
 904   @param override: dictionary with key/values that will override
 905       our values
 906   @rtype: dict
 907   @return: the hook environment dictionary
 908
 909   """
 910   cluster = lu.cfg.GetClusterInfo()
 911   bep = cluster.FillBE(instance)
 912   hvp = cluster.FillHV(instance)
 913   args = {
 914     'name': instance.name,
 915     'primary_node': instance.primary_node,
 916     'secondary_nodes': instance.secondary_nodes,
 917     'os_type': instance.os,
 918     'status': instance.admin_up,
 919     'memory': bep[constants.BE_MEMORY],
 920     'vcpus': bep[constants.BE_VCPUS],
 921     'nics': _NICListToTuple(lu, instance.nics),
 922     'disk_template': instance.disk_template,
 923     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 924     'bep': bep,
 925     'hvp': hvp,
 926     'hypervisor_name': instance.hypervisor,
 927   }
 928   if override:
 929     args.update(override)
 930   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 931
 932
 933 def _AdjustCandidatePool(lu, exceptions):
 934   """Adjust the candidate pool after node operations.
 935
 936   """
 937   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 938   if mod_list:
 939     lu.LogInfo("Promoted nodes to master candidate role: %s",
 940                utils.CommaJoin(node.name for node in mod_list))
 941     for name in mod_list:
 942       lu.context.ReaddNode(name)
 943   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 944   if mc_now > mc_max:
 945     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 946                (mc_now, mc_max))
 947
 948
 949 def _DecideSelfPromotion(lu, exceptions=None):
 950   """Decide whether I should promote myself as a master candidate.
 951
 952   """
 953   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 954   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 955   # the new node will increase mc_max with one, so:
 956   mc_should = min(mc_should + 1, cp_size)
 957   return mc_now < mc_should
 958
 959
 960 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 961   """Check that the brigdes needed by a list of nics exist.
 962
 963   """
 964   cluster = lu.cfg.GetClusterInfo()
 965   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 966   brlist = [params[constants.NIC_LINK] for params in paramslist
 967             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 968   if brlist:
 969     result = lu.rpc.call_bridges_exist(target_node, brlist)
 970     result.Raise("Error checking bridges on destination node '%s'" %
 971                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 972
 973
 974 def _CheckInstanceBridgesExist(lu, instance, node=None):
 975   """Check that the brigdes needed by an instance exist.
 976
 977   """
 978   if node is None:
 979     node = instance.primary_node
 980   _CheckNicsBridgesExist(lu, instance.nics, node)
 981
 982
 983 def _CheckOSVariant(os_obj, name):
 984   """Check whether an OS name conforms to the os variants specification.
 985
 986   @type os_obj: L{objects.OS}
 987   @param os_obj: OS object to check
 988   @type name: string
 989   @param name: OS name passed by the user, to check for validity
 990
 991   """
 992   if not os_obj.supported_variants:
 993     return
 994   variant = objects.OS.GetVariant(name)
 995   if not variant:
 996     raise errors.OpPrereqError("OS name must include a variant",
 997                                errors.ECODE_INVAL)
 998
 999   if variant not in os_obj.supported_variants:
1000     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1001
1002
1003 def _GetNodeInstancesInner(cfg, fn):
1004   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1005
1006
1007 def _GetNodeInstances(cfg, node_name):
1008   """Returns a list of all primary and secondary instances on a node.
1009
1010   """
1011
1012   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1013
1014
1015 def _GetNodePrimaryInstances(cfg, node_name):
1016   """Returns primary instances on a node.
1017
1018   """
1019   return _GetNodeInstancesInner(cfg,
1020                                 lambda inst: node_name == inst.primary_node)
1021
1022
1023 def _GetNodeSecondaryInstances(cfg, node_name):
1024   """Returns secondary instances on a node.
1025
1026   """
1027   return _GetNodeInstancesInner(cfg,
1028                                 lambda inst: node_name in inst.secondary_nodes)
1029
1030
1031 def _GetStorageTypeArgs(cfg, storage_type):
1032   """Returns the arguments for a storage type.
1033
1034   """
1035   # Special case for file storage
1036   if storage_type == constants.ST_FILE:
1037     # storage.FileStorage wants a list of storage directories
1038     return [[cfg.GetFileStorageDir()]]
1039
1040   return []
1041
1042
1043 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1044   faulty = []
1045
1046   for dev in instance.disks:
1047     cfg.SetDiskID(dev, node_name)
1048
1049   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1050   result.Raise("Failed to get disk status from node %s" % node_name,
1051                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1052
1053   for idx, bdev_status in enumerate(result.payload):
1054     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1055       faulty.append(idx)
1056
1057   return faulty
1058
1059
1060 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1061   """Check the sanity of iallocator and node arguments and use the
1062   cluster-wide iallocator if appropriate.
1063
1064   Check that at most one of (iallocator, node) is specified. If none is
1065   specified, then the LU's opcode's iallocator slot is filled with the
1066   cluster-wide default iallocator.
1067
1068   @type iallocator_slot: string
1069   @param iallocator_slot: the name of the opcode iallocator slot
1070   @type node_slot: string
1071   @param node_slot: the name of the opcode target node slot
1072
1073   """
1074   node = getattr(lu.op, node_slot, None)
1075   iallocator = getattr(lu.op, iallocator_slot, None)
1076
1077   if node is not None and iallocator is not None:
1078     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1079                                errors.ECODE_INVAL)
1080   elif node is None and iallocator is None:
1081     default_iallocator = lu.cfg.GetDefaultIAllocator()
1082     if default_iallocator:
1083       setattr(lu.op, iallocator_slot, default_iallocator)
1084     else:
1085       raise errors.OpPrereqError("No iallocator or node given and no"
1086                                  " cluster-wide default iallocator found."
1087                                  " Please specify either an iallocator or a"
1088                                  " node, or set a cluster-wide default"
1089                                  " iallocator.")
1090
1091
1092 class LUClusterPostInit(LogicalUnit):
1093   """Logical unit for running hooks after cluster initialization.
1094
1095   """
1096   HPATH = "cluster-init"
1097   HTYPE = constants.HTYPE_CLUSTER
1098
1099   def BuildHooksEnv(self):
1100     """Build hooks env.
1101
1102     """
1103     env = {"OP_TARGET": self.cfg.GetClusterName()}
1104     mn = self.cfg.GetMasterNode()
1105     return env, [], [mn]
1106
1107   def Exec(self, feedback_fn):
1108     """Nothing to do.
1109
1110     """
1111     return True
1112
1113
1114 class LUClusterDestroy(LogicalUnit):
1115   """Logical unit for destroying the cluster.
1116
1117   """
1118   HPATH = "cluster-destroy"
1119   HTYPE = constants.HTYPE_CLUSTER
1120
1121   def BuildHooksEnv(self):
1122     """Build hooks env.
1123
1124     """
1125     env = {"OP_TARGET": self.cfg.GetClusterName()}
1126     return env, [], []
1127
1128   def CheckPrereq(self):
1129     """Check prerequisites.
1130
1131     This checks whether the cluster is empty.
1132
1133     Any errors are signaled by raising errors.OpPrereqError.
1134
1135     """
1136     master = self.cfg.GetMasterNode()
1137
1138     nodelist = self.cfg.GetNodeList()
1139     if len(nodelist) != 1 or nodelist[0] != master:
1140       raise errors.OpPrereqError("There are still %d node(s) in"
1141                                  " this cluster." % (len(nodelist) - 1),
1142                                  errors.ECODE_INVAL)
1143     instancelist = self.cfg.GetInstanceList()
1144     if instancelist:
1145       raise errors.OpPrereqError("There are still %d instance(s) in"
1146                                  " this cluster." % len(instancelist),
1147                                  errors.ECODE_INVAL)
1148
1149   def Exec(self, feedback_fn):
1150     """Destroys the cluster.
1151
1152     """
1153     master = self.cfg.GetMasterNode()
1154
1155     # Run post hooks on master node before it's removed
1156     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1157     try:
1158       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1159     except:
1160       # pylint: disable-msg=W0702
1161       self.LogWarning("Errors occurred running hooks on %s" % master)
1162
1163     result = self.rpc.call_node_stop_master(master, False)
1164     result.Raise("Could not disable the master role")
1165
1166     return master
1167
1168
1169 def _VerifyCertificate(filename):
1170   """Verifies a certificate for LUClusterVerify.
1171
1172   @type filename: string
1173   @param filename: Path to PEM file
1174
1175   """
1176   try:
1177     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1178                                            utils.ReadFile(filename))
1179   except Exception, err: # pylint: disable-msg=W0703
1180     return (LUClusterVerify.ETYPE_ERROR,
1181             "Failed to load X509 certificate %s: %s" % (filename, err))
1182
1183   (errcode, msg) = \
1184     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1185                                 constants.SSL_CERT_EXPIRATION_ERROR)
1186
1187   if msg:
1188     fnamemsg = "While verifying %s: %s" % (filename, msg)
1189   else:
1190     fnamemsg = None
1191
1192   if errcode is None:
1193     return (None, fnamemsg)
1194   elif errcode == utils.CERT_WARNING:
1195     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1196   elif errcode == utils.CERT_ERROR:
1197     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1198
1199   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1200
1201
1202 class LUClusterVerify(LogicalUnit):
1203   """Verifies the cluster status.
1204
1205   """
1206   HPATH = "cluster-verify"
1207   HTYPE = constants.HTYPE_CLUSTER
1208   REQ_BGL = False
1209
1210   TCLUSTER = "cluster"
1211   TNODE = "node"
1212   TINSTANCE = "instance"
1213
1214   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1215   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1216   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1217   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1218   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1219   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1220   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1221   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1222   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1223   ENODEDRBD = (TNODE, "ENODEDRBD")
1224   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1225   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1226   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1227   ENODEHV = (TNODE, "ENODEHV")
1228   ENODELVM = (TNODE, "ENODELVM")
1229   ENODEN1 = (TNODE, "ENODEN1")
1230   ENODENET = (TNODE, "ENODENET")
1231   ENODEOS = (TNODE, "ENODEOS")
1232   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1233   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1234   ENODERPC = (TNODE, "ENODERPC")
1235   ENODESSH = (TNODE, "ENODESSH")
1236   ENODEVERSION = (TNODE, "ENODEVERSION")
1237   ENODESETUP = (TNODE, "ENODESETUP")
1238   ENODETIME = (TNODE, "ENODETIME")
1239   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1240
1241   ETYPE_FIELD = "code"
1242   ETYPE_ERROR = "ERROR"
1243   ETYPE_WARNING = "WARNING"
1244
1245   _HOOKS_INDENT_RE = re.compile("^", re.M)
1246
1247   class NodeImage(object):
1248     """A class representing the logical and physical status of a node.
1249
1250     @type name: string
1251     @ivar name: the node name to which this object refers
1252     @ivar volumes: a structure as returned from
1253         L{ganeti.backend.GetVolumeList} (runtime)
1254     @ivar instances: a list of running instances (runtime)
1255     @ivar pinst: list of configured primary instances (config)
1256     @ivar sinst: list of configured secondary instances (config)
1257     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1258         of this node (config)
1259     @ivar mfree: free memory, as reported by hypervisor (runtime)
1260     @ivar dfree: free disk, as reported by the node (runtime)
1261     @ivar offline: the offline status (config)
1262     @type rpc_fail: boolean
1263     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1264         not whether the individual keys were correct) (runtime)
1265     @type lvm_fail: boolean
1266     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1267     @type hyp_fail: boolean
1268     @ivar hyp_fail: whether the RPC call didn't return the instance list
1269     @type ghost: boolean
1270     @ivar ghost: whether this is a known node or not (config)
1271     @type os_fail: boolean
1272     @ivar os_fail: whether the RPC call didn't return valid OS data
1273     @type oslist: list
1274     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1275     @type vm_capable: boolean
1276     @ivar vm_capable: whether the node can host instances
1277
1278     """
1279     def __init__(self, offline=False, name=None, vm_capable=True):
1280       self.name = name
1281       self.volumes = {}
1282       self.instances = []
1283       self.pinst = []
1284       self.sinst = []
1285       self.sbp = {}
1286       self.mfree = 0
1287       self.dfree = 0
1288       self.offline = offline
1289       self.vm_capable = vm_capable
1290       self.rpc_fail = False
1291       self.lvm_fail = False
1292       self.hyp_fail = False
1293       self.ghost = False
1294       self.os_fail = False
1295       self.oslist = {}
1296
1297   def ExpandNames(self):
1298     self.needed_locks = {
1299       locking.LEVEL_NODE: locking.ALL_SET,
1300       locking.LEVEL_INSTANCE: locking.ALL_SET,
1301     }
1302     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1303
1304   def _Error(self, ecode, item, msg, *args, **kwargs):
1305     """Format an error message.
1306
1307     Based on the opcode's error_codes parameter, either format a
1308     parseable error code, or a simpler error string.
1309
1310     This must be called only from Exec and functions called from Exec.
1311
1312     """
1313     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1314     itype, etxt = ecode
1315     # first complete the msg
1316     if args:
1317       msg = msg % args
1318     # then format the whole message
1319     if self.op.error_codes:
1320       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1321     else:
1322       if item:
1323         item = " " + item
1324       else:
1325         item = ""
1326       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1327     # and finally report it via the feedback_fn
1328     self._feedback_fn("  - %s" % msg)
1329
1330   def _ErrorIf(self, cond, *args, **kwargs):
1331     """Log an error message if the passed condition is True.
1332
1333     """
1334     cond = bool(cond) or self.op.debug_simulate_errors
1335     if cond:
1336       self._Error(*args, **kwargs)
1337     # do not mark the operation as failed for WARN cases only
1338     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1339       self.bad = self.bad or cond
1340
1341   def _VerifyNode(self, ninfo, nresult):
1342     """Perform some basic validation on data returned from a node.
1343
1344       - check the result data structure is well formed and has all the
1345         mandatory fields
1346       - check ganeti version
1347
1348     @type ninfo: L{objects.Node}
1349     @param ninfo: the node to check
1350     @param nresult: the results from the node
1351     @rtype: boolean
1352     @return: whether overall this call was successful (and we can expect
1353          reasonable values in the respose)
1354
1355     """
1356     node = ninfo.name
1357     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1358
1359     # main result, nresult should be a non-empty dict
1360     test = not nresult or not isinstance(nresult, dict)
1361     _ErrorIf(test, self.ENODERPC, node,
1362                   "unable to verify node: no data returned")
1363     if test:
1364       return False
1365
1366     # compares ganeti version
1367     local_version = constants.PROTOCOL_VERSION
1368     remote_version = nresult.get("version", None)
1369     test = not (remote_version and
1370                 isinstance(remote_version, (list, tuple)) and
1371                 len(remote_version) == 2)
1372     _ErrorIf(test, self.ENODERPC, node,
1373              "connection to node returned invalid data")
1374     if test:
1375       return False
1376
1377     test = local_version != remote_version[0]
1378     _ErrorIf(test, self.ENODEVERSION, node,
1379              "incompatible protocol versions: master %s,"
1380              " node %s", local_version, remote_version[0])
1381     if test:
1382       return False
1383
1384     # node seems compatible, we can actually try to look into its results
1385
1386     # full package version
1387     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1388                   self.ENODEVERSION, node,
1389                   "software version mismatch: master %s, node %s",
1390                   constants.RELEASE_VERSION, remote_version[1],
1391                   code=self.ETYPE_WARNING)
1392
1393     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1394     if ninfo.vm_capable and isinstance(hyp_result, dict):
1395       for hv_name, hv_result in hyp_result.iteritems():
1396         test = hv_result is not None
1397         _ErrorIf(test, self.ENODEHV, node,
1398                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1399
1400     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1401     if ninfo.vm_capable and isinstance(hvp_result, list):
1402       for item, hv_name, hv_result in hvp_result:
1403         _ErrorIf(True, self.ENODEHV, node,
1404                  "hypervisor %s parameter verify failure (source %s): %s",
1405                  hv_name, item, hv_result)
1406
1407     test = nresult.get(constants.NV_NODESETUP,
1408                            ["Missing NODESETUP results"])
1409     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1410              "; ".join(test))
1411
1412     return True
1413
1414   def _VerifyNodeTime(self, ninfo, nresult,
1415                       nvinfo_starttime, nvinfo_endtime):
1416     """Check the node time.
1417
1418     @type ninfo: L{objects.Node}
1419     @param ninfo: the node to check
1420     @param nresult: the remote results for the node
1421     @param nvinfo_starttime: the start time of the RPC call
1422     @param nvinfo_endtime: the end time of the RPC call
1423
1424     """
1425     node = ninfo.name
1426     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1427
1428     ntime = nresult.get(constants.NV_TIME, None)
1429     try:
1430       ntime_merged = utils.MergeTime(ntime)
1431     except (ValueError, TypeError):
1432       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1433       return
1434
1435     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1436       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1437     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1438       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1439     else:
1440       ntime_diff = None
1441
1442     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1443              "Node time diverges by at least %s from master node time",
1444              ntime_diff)
1445
1446   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1447     """Check the node time.
1448
1449     @type ninfo: L{objects.Node}
1450     @param ninfo: the node to check
1451     @param nresult: the remote results for the node
1452     @param vg_name: the configured VG name
1453
1454     """
1455     if vg_name is None:
1456       return
1457
1458     node = ninfo.name
1459     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1460
1461     # checks vg existence and size > 20G
1462     vglist = nresult.get(constants.NV_VGLIST, None)
1463     test = not vglist
1464     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1465     if not test:
1466       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1467                                             constants.MIN_VG_SIZE)
1468       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1469
1470     # check pv names
1471     pvlist = nresult.get(constants.NV_PVLIST, None)
1472     test = pvlist is None
1473     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1474     if not test:
1475       # check that ':' is not present in PV names, since it's a
1476       # special character for lvcreate (denotes the range of PEs to
1477       # use on the PV)
1478       for _, pvname, owner_vg in pvlist:
1479         test = ":" in pvname
1480         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1481                  " '%s' of VG '%s'", pvname, owner_vg)
1482
1483   def _VerifyNodeNetwork(self, ninfo, nresult):
1484     """Check the node time.
1485
1486     @type ninfo: L{objects.Node}
1487     @param ninfo: the node to check
1488     @param nresult: the remote results for the node
1489
1490     """
1491     node = ninfo.name
1492     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1493
1494     test = constants.NV_NODELIST not in nresult
1495     _ErrorIf(test, self.ENODESSH, node,
1496              "node hasn't returned node ssh connectivity data")
1497     if not test:
1498       if nresult[constants.NV_NODELIST]:
1499         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1500           _ErrorIf(True, self.ENODESSH, node,
1501                    "ssh communication with node '%s': %s", a_node, a_msg)
1502
1503     test = constants.NV_NODENETTEST not in nresult
1504     _ErrorIf(test, self.ENODENET, node,
1505              "node hasn't returned node tcp connectivity data")
1506     if not test:
1507       if nresult[constants.NV_NODENETTEST]:
1508         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1509         for anode in nlist:
1510           _ErrorIf(True, self.ENODENET, node,
1511                    "tcp communication with node '%s': %s",
1512                    anode, nresult[constants.NV_NODENETTEST][anode])
1513
1514     test = constants.NV_MASTERIP not in nresult
1515     _ErrorIf(test, self.ENODENET, node,
1516              "node hasn't returned node master IP reachability data")
1517     if not test:
1518       if not nresult[constants.NV_MASTERIP]:
1519         if node == self.master_node:
1520           msg = "the master node cannot reach the master IP (not configured?)"
1521         else:
1522           msg = "cannot reach the master IP"
1523         _ErrorIf(True, self.ENODENET, node, msg)
1524
1525   def _VerifyInstance(self, instance, instanceconfig, node_image,
1526                       diskstatus):
1527     """Verify an instance.
1528
1529     This function checks to see if the required block devices are
1530     available on the instance's node.
1531
1532     """
1533     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1534     node_current = instanceconfig.primary_node
1535
1536     node_vol_should = {}
1537     instanceconfig.MapLVsByNode(node_vol_should)
1538
1539     for node in node_vol_should:
1540       n_img = node_image[node]
1541       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1542         # ignore missing volumes on offline or broken nodes
1543         continue
1544       for volume in node_vol_should[node]:
1545         test = volume not in n_img.volumes
1546         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1547                  "volume %s missing on node %s", volume, node)
1548
1549     if instanceconfig.admin_up:
1550       pri_img = node_image[node_current]
1551       test = instance not in pri_img.instances and not pri_img.offline
1552       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1553                "instance not running on its primary node %s",
1554                node_current)
1555
1556     for node, n_img in node_image.items():
1557       if node != node_current:
1558         test = instance in n_img.instances
1559         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1560                  "instance should not run on node %s", node)
1561
1562     diskdata = [(nname, success, status, idx)
1563                 for (nname, disks) in diskstatus.items()
1564                 for idx, (success, status) in enumerate(disks)]
1565
1566     for nname, success, bdev_status, idx in diskdata:
1567       # the 'ghost node' construction in Exec() ensures that we have a
1568       # node here
1569       snode = node_image[nname]
1570       bad_snode = snode.ghost or snode.offline
1571       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1572                self.EINSTANCEFAULTYDISK, instance,
1573                "couldn't retrieve status for disk/%s on %s: %s",
1574                idx, nname, bdev_status)
1575       _ErrorIf((instanceconfig.admin_up and success and
1576                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1577                self.EINSTANCEFAULTYDISK, instance,
1578                "disk/%s on %s is faulty", idx, nname)
1579
1580   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1581     """Verify if there are any unknown volumes in the cluster.
1582
1583     The .os, .swap and backup volumes are ignored. All other volumes are
1584     reported as unknown.
1585
1586     @type reserved: L{ganeti.utils.FieldSet}
1587     @param reserved: a FieldSet of reserved volume names
1588
1589     """
1590     for node, n_img in node_image.items():
1591       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1592         # skip non-healthy nodes
1593         continue
1594       for volume in n_img.volumes:
1595         test = ((node not in node_vol_should or
1596                 volume not in node_vol_should[node]) and
1597                 not reserved.Matches(volume))
1598         self._ErrorIf(test, self.ENODEORPHANLV, node,
1599                       "volume %s is unknown", volume)
1600
1601   def _VerifyOrphanInstances(self, instancelist, node_image):
1602     """Verify the list of running instances.
1603
1604     This checks what instances are running but unknown to the cluster.
1605
1606     """
1607     for node, n_img in node_image.items():
1608       for o_inst in n_img.instances:
1609         test = o_inst not in instancelist
1610         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1611                       "instance %s on node %s should not exist", o_inst, node)
1612
1613   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1614     """Verify N+1 Memory Resilience.
1615
1616     Check that if one single node dies we can still start all the
1617     instances it was primary for.
1618
1619     """
1620     for node, n_img in node_image.items():
1621       # This code checks that every node which is now listed as
1622       # secondary has enough memory to host all instances it is
1623       # supposed to should a single other node in the cluster fail.
1624       # FIXME: not ready for failover to an arbitrary node
1625       # FIXME: does not support file-backed instances
1626       # WARNING: we currently take into account down instances as well
1627       # as up ones, considering that even if they're down someone
1628       # might want to start them even in the event of a node failure.
1629       if n_img.offline:
1630         # we're skipping offline nodes from the N+1 warning, since
1631         # most likely we don't have good memory infromation from them;
1632         # we already list instances living on such nodes, and that's
1633         # enough warning
1634         continue
1635       for prinode, instances in n_img.sbp.items():
1636         needed_mem = 0
1637         for instance in instances:
1638           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1639           if bep[constants.BE_AUTO_BALANCE]:
1640             needed_mem += bep[constants.BE_MEMORY]
1641         test = n_img.mfree < needed_mem
1642         self._ErrorIf(test, self.ENODEN1, node,
1643                       "not enough memory to accomodate instance failovers"
1644                       " should node %s fail", prinode)
1645
1646   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1647                        master_files):
1648     """Verifies and computes the node required file checksums.
1649
1650     @type ninfo: L{objects.Node}
1651     @param ninfo: the node to check
1652     @param nresult: the remote results for the node
1653     @param file_list: required list of files
1654     @param local_cksum: dictionary of local files and their checksums
1655     @param master_files: list of files that only masters should have
1656
1657     """
1658     node = ninfo.name
1659     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1660
1661     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1662     test = not isinstance(remote_cksum, dict)
1663     _ErrorIf(test, self.ENODEFILECHECK, node,
1664              "node hasn't returned file checksum data")
1665     if test:
1666       return
1667
1668     for file_name in file_list:
1669       node_is_mc = ninfo.master_candidate
1670       must_have = (file_name not in master_files) or node_is_mc
1671       # missing
1672       test1 = file_name not in remote_cksum
1673       # invalid checksum
1674       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1675       # existing and good
1676       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1677       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1678                "file '%s' missing", file_name)
1679       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1680                "file '%s' has wrong checksum", file_name)
1681       # not candidate and this is not a must-have file
1682       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1683                "file '%s' should not exist on non master"
1684                " candidates (and the file is outdated)", file_name)
1685       # all good, except non-master/non-must have combination
1686       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1687                "file '%s' should not exist"
1688                " on non master candidates", file_name)
1689
1690   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1691                       drbd_map):
1692     """Verifies and the node DRBD status.
1693
1694     @type ninfo: L{objects.Node}
1695     @param ninfo: the node to check
1696     @param nresult: the remote results for the node
1697     @param instanceinfo: the dict of instances
1698     @param drbd_helper: the configured DRBD usermode helper
1699     @param drbd_map: the DRBD map as returned by
1700         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1701
1702     """
1703     node = ninfo.name
1704     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1705
1706     if drbd_helper:
1707       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1708       test = (helper_result == None)
1709       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1710                "no drbd usermode helper returned")
1711       if helper_result:
1712         status, payload = helper_result
1713         test = not status
1714         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1715                  "drbd usermode helper check unsuccessful: %s", payload)
1716         test = status and (payload != drbd_helper)
1717         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1718                  "wrong drbd usermode helper: %s", payload)
1719
1720     # compute the DRBD minors
1721     node_drbd = {}
1722     for minor, instance in drbd_map[node].items():
1723       test = instance not in instanceinfo
1724       _ErrorIf(test, self.ECLUSTERCFG, None,
1725                "ghost instance '%s' in temporary DRBD map", instance)
1726         # ghost instance should not be running, but otherwise we
1727         # don't give double warnings (both ghost instance and
1728         # unallocated minor in use)
1729       if test:
1730         node_drbd[minor] = (instance, False)
1731       else:
1732         instance = instanceinfo[instance]
1733         node_drbd[minor] = (instance.name, instance.admin_up)
1734
1735     # and now check them
1736     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1737     test = not isinstance(used_minors, (tuple, list))
1738     _ErrorIf(test, self.ENODEDRBD, node,
1739              "cannot parse drbd status file: %s", str(used_minors))
1740     if test:
1741       # we cannot check drbd status
1742       return
1743
1744     for minor, (iname, must_exist) in node_drbd.items():
1745       test = minor not in used_minors and must_exist
1746       _ErrorIf(test, self.ENODEDRBD, node,
1747                "drbd minor %d of instance %s is not active", minor, iname)
1748     for minor in used_minors:
1749       test = minor not in node_drbd
1750       _ErrorIf(test, self.ENODEDRBD, node,
1751                "unallocated drbd minor %d is in use", minor)
1752
1753   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1754     """Builds the node OS structures.
1755
1756     @type ninfo: L{objects.Node}
1757     @param ninfo: the node to check
1758     @param nresult: the remote results for the node
1759     @param nimg: the node image object
1760
1761     """
1762     node = ninfo.name
1763     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1764
1765     remote_os = nresult.get(constants.NV_OSLIST, None)
1766     test = (not isinstance(remote_os, list) or
1767             not compat.all(isinstance(v, list) and len(v) == 7
1768                            for v in remote_os))
1769
1770     _ErrorIf(test, self.ENODEOS, node,
1771              "node hasn't returned valid OS data")
1772
1773     nimg.os_fail = test
1774
1775     if test:
1776       return
1777
1778     os_dict = {}
1779
1780     for (name, os_path, status, diagnose,
1781          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1782
1783       if name not in os_dict:
1784         os_dict[name] = []
1785
1786       # parameters is a list of lists instead of list of tuples due to
1787       # JSON lacking a real tuple type, fix it:
1788       parameters = [tuple(v) for v in parameters]
1789       os_dict[name].append((os_path, status, diagnose,
1790                             set(variants), set(parameters), set(api_ver)))
1791
1792     nimg.oslist = os_dict
1793
1794   def _VerifyNodeOS(self, ninfo, nimg, base):
1795     """Verifies the node OS list.
1796
1797     @type ninfo: L{objects.Node}
1798     @param ninfo: the node to check
1799     @param nimg: the node image object
1800     @param base: the 'template' node we match against (e.g. from the master)
1801
1802     """
1803     node = ninfo.name
1804     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1805
1806     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1807
1808     for os_name, os_data in nimg.oslist.items():
1809       assert os_data, "Empty OS status for OS %s?!" % os_name
1810       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1811       _ErrorIf(not f_status, self.ENODEOS, node,
1812                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1813       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1814                "OS '%s' has multiple entries (first one shadows the rest): %s",
1815                os_name, utils.CommaJoin([v[0] for v in os_data]))
1816       # this will catched in backend too
1817       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1818                and not f_var, self.ENODEOS, node,
1819                "OS %s with API at least %d does not declare any variant",
1820                os_name, constants.OS_API_V15)
1821       # comparisons with the 'base' image
1822       test = os_name not in base.oslist
1823       _ErrorIf(test, self.ENODEOS, node,
1824                "Extra OS %s not present on reference node (%s)",
1825                os_name, base.name)
1826       if test:
1827         continue
1828       assert base.oslist[os_name], "Base node has empty OS status?"
1829       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1830       if not b_status:
1831         # base OS is invalid, skipping
1832         continue
1833       for kind, a, b in [("API version", f_api, b_api),
1834                          ("variants list", f_var, b_var),
1835                          ("parameters", f_param, b_param)]:
1836         _ErrorIf(a != b, self.ENODEOS, node,
1837                  "OS %s %s differs from reference node %s: %s vs. %s",
1838                  kind, os_name, base.name,
1839                  utils.CommaJoin(a), utils.CommaJoin(b))
1840
1841     # check any missing OSes
1842     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1843     _ErrorIf(missing, self.ENODEOS, node,
1844              "OSes present on reference node %s but missing on this node: %s",
1845              base.name, utils.CommaJoin(missing))
1846
1847   def _VerifyOob(self, ninfo, nresult):
1848     """Verifies out of band functionality of a node.
1849
1850     @type ninfo: L{objects.Node}
1851     @param ninfo: the node to check
1852     @param nresult: the remote results for the node
1853
1854     """
1855     node = ninfo.name
1856     # We just have to verify the paths on master and/or master candidates
1857     # as the oob helper is invoked on the master
1858     if ((ninfo.master_candidate or ninfo.master_capable) and
1859         constants.NV_OOB_PATHS in nresult):
1860       for path_result in nresult[constants.NV_OOB_PATHS]:
1861         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1862
1863   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1864     """Verifies and updates the node volume data.
1865
1866     This function will update a L{NodeImage}'s internal structures
1867     with data from the remote call.
1868
1869     @type ninfo: L{objects.Node}
1870     @param ninfo: the node to check
1871     @param nresult: the remote results for the node
1872     @param nimg: the node image object
1873     @param vg_name: the configured VG name
1874
1875     """
1876     node = ninfo.name
1877     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1878
1879     nimg.lvm_fail = True
1880     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1881     if vg_name is None:
1882       pass
1883     elif isinstance(lvdata, basestring):
1884       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1885                utils.SafeEncode(lvdata))
1886     elif not isinstance(lvdata, dict):
1887       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1888     else:
1889       nimg.volumes = lvdata
1890       nimg.lvm_fail = False
1891
1892   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1893     """Verifies and updates the node instance list.
1894
1895     If the listing was successful, then updates this node's instance
1896     list. Otherwise, it marks the RPC call as failed for the instance
1897     list key.
1898
1899     @type ninfo: L{objects.Node}
1900     @param ninfo: the node to check
1901     @param nresult: the remote results for the node
1902     @param nimg: the node image object
1903
1904     """
1905     idata = nresult.get(constants.NV_INSTANCELIST, None)
1906     test = not isinstance(idata, list)
1907     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1908                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1909     if test:
1910       nimg.hyp_fail = True
1911     else:
1912       nimg.instances = idata
1913
1914   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1915     """Verifies and computes a node information map
1916
1917     @type ninfo: L{objects.Node}
1918     @param ninfo: the node to check
1919     @param nresult: the remote results for the node
1920     @param nimg: the node image object
1921     @param vg_name: the configured VG name
1922
1923     """
1924     node = ninfo.name
1925     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1926
1927     # try to read free memory (from the hypervisor)
1928     hv_info = nresult.get(constants.NV_HVINFO, None)
1929     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1930     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1931     if not test:
1932       try:
1933         nimg.mfree = int(hv_info["memory_free"])
1934       except (ValueError, TypeError):
1935         _ErrorIf(True, self.ENODERPC, node,
1936                  "node returned invalid nodeinfo, check hypervisor")
1937
1938     # FIXME: devise a free space model for file based instances as well
1939     if vg_name is not None:
1940       test = (constants.NV_VGLIST not in nresult or
1941               vg_name not in nresult[constants.NV_VGLIST])
1942       _ErrorIf(test, self.ENODELVM, node,
1943                "node didn't return data for the volume group '%s'"
1944                " - it is either missing or broken", vg_name)
1945       if not test:
1946         try:
1947           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1948         except (ValueError, TypeError):
1949           _ErrorIf(True, self.ENODERPC, node,
1950                    "node returned invalid LVM info, check LVM status")
1951
1952   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1953     """Gets per-disk status information for all instances.
1954
1955     @type nodelist: list of strings
1956     @param nodelist: Node names
1957     @type node_image: dict of (name, L{objects.Node})
1958     @param node_image: Node objects
1959     @type instanceinfo: dict of (name, L{objects.Instance})
1960     @param instanceinfo: Instance objects
1961     @rtype: {instance: {node: [(succes, payload)]}}
1962     @return: a dictionary of per-instance dictionaries with nodes as
1963         keys and disk information as values; the disk information is a
1964         list of tuples (success, payload)
1965
1966     """
1967     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1968
1969     node_disks = {}
1970     node_disks_devonly = {}
1971     diskless_instances = set()
1972     diskless = constants.DT_DISKLESS
1973
1974     for nname in nodelist:
1975       node_instances = list(itertools.chain(node_image[nname].pinst,
1976                                             node_image[nname].sinst))
1977       diskless_instances.update(inst for inst in node_instances
1978                                 if instanceinfo[inst].disk_template == diskless)
1979       disks = [(inst, disk)
1980                for inst in node_instances
1981                for disk in instanceinfo[inst].disks]
1982
1983       if not disks:
1984         # No need to collect data
1985         continue
1986
1987       node_disks[nname] = disks
1988
1989       # Creating copies as SetDiskID below will modify the objects and that can
1990       # lead to incorrect data returned from nodes
1991       devonly = [dev.Copy() for (_, dev) in disks]
1992
1993       for dev in devonly:
1994         self.cfg.SetDiskID(dev, nname)
1995
1996       node_disks_devonly[nname] = devonly
1997
1998     assert len(node_disks) == len(node_disks_devonly)
1999
2000     # Collect data from all nodes with disks
2001     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2002                                                           node_disks_devonly)
2003
2004     assert len(result) == len(node_disks)
2005
2006     instdisk = {}
2007
2008     for (nname, nres) in result.items():
2009       disks = node_disks[nname]
2010
2011       if nres.offline:
2012         # No data from this node
2013         data = len(disks) * [(False, "node offline")]
2014       else:
2015         msg = nres.fail_msg
2016         _ErrorIf(msg, self.ENODERPC, nname,
2017                  "while getting disk information: %s", msg)
2018         if msg:
2019           # No data from this node
2020           data = len(disks) * [(False, msg)]
2021         else:
2022           data = []
2023           for idx, i in enumerate(nres.payload):
2024             if isinstance(i, (tuple, list)) and len(i) == 2:
2025               data.append(i)
2026             else:
2027               logging.warning("Invalid result from node %s, entry %d: %s",
2028                               nname, idx, i)
2029               data.append((False, "Invalid result from the remote node"))
2030
2031       for ((inst, _), status) in zip(disks, data):
2032         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2033
2034     # Add empty entries for diskless instances.
2035     for inst in diskless_instances:
2036       assert inst not in instdisk
2037       instdisk[inst] = {}
2038
2039     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2040                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2041                       compat.all(isinstance(s, (tuple, list)) and
2042                                  len(s) == 2 for s in statuses)
2043                       for inst, nnames in instdisk.items()
2044                       for nname, statuses in nnames.items())
2045     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2046
2047     return instdisk
2048
2049   def _VerifyHVP(self, hvp_data):
2050     """Verifies locally the syntax of the hypervisor parameters.
2051
2052     """
2053     for item, hv_name, hv_params in hvp_data:
2054       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2055              (item, hv_name))
2056       try:
2057         hv_class = hypervisor.GetHypervisor(hv_name)
2058         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2059         hv_class.CheckParameterSyntax(hv_params)
2060       except errors.GenericError, err:
2061         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2062
2063
2064   def BuildHooksEnv(self):
2065     """Build hooks env.
2066
2067     Cluster-Verify hooks just ran in the post phase and their failure makes
2068     the output be logged in the verify output and the verification to fail.
2069
2070     """
2071     all_nodes = self.cfg.GetNodeList()
2072     env = {
2073       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2074       }
2075     for node in self.cfg.GetAllNodesInfo().values():
2076       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2077
2078     return env, [], all_nodes
2079
2080   def Exec(self, feedback_fn):
2081     """Verify integrity of cluster, performing various test on nodes.
2082
2083     """
2084     # This method has too many local variables. pylint: disable-msg=R0914
2085     self.bad = False
2086     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2087     verbose = self.op.verbose
2088     self._feedback_fn = feedback_fn
2089     feedback_fn("* Verifying global settings")
2090     for msg in self.cfg.VerifyConfig():
2091       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2092
2093     # Check the cluster certificates
2094     for cert_filename in constants.ALL_CERT_FILES:
2095       (errcode, msg) = _VerifyCertificate(cert_filename)
2096       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2097
2098     vg_name = self.cfg.GetVGName()
2099     drbd_helper = self.cfg.GetDRBDHelper()
2100     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2101     cluster = self.cfg.GetClusterInfo()
2102     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2103     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2104     nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2105     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2106     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2107                         for iname in instancelist)
2108     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2109     i_non_redundant = [] # Non redundant instances
2110     i_non_a_balanced = [] # Non auto-balanced instances
2111     n_offline = 0 # Count of offline nodes
2112     n_drained = 0 # Count of nodes being drained
2113     node_vol_should = {}
2114
2115     # FIXME: verify OS list
2116     # do local checksums
2117     master_files = [constants.CLUSTER_CONF_FILE]
2118     master_node = self.master_node = self.cfg.GetMasterNode()
2119     master_ip = self.cfg.GetMasterIP()
2120
2121     file_names = ssconf.SimpleStore().GetFileList()
2122     file_names.extend(constants.ALL_CERT_FILES)
2123     file_names.extend(master_files)
2124     if cluster.modify_etc_hosts:
2125       file_names.append(constants.ETC_HOSTS)
2126
2127     local_checksums = utils.FingerprintFiles(file_names)
2128
2129     # Compute the set of hypervisor parameters
2130     hvp_data = []
2131     for hv_name in hypervisors:
2132       hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2133     for os_name, os_hvp in cluster.os_hvp.items():
2134       for hv_name, hv_params in os_hvp.items():
2135         if not hv_params:
2136           continue
2137         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2138         hvp_data.append(("os %s" % os_name, hv_name, full_params))
2139     # TODO: collapse identical parameter values in a single one
2140     for instance in instanceinfo.values():
2141       if not instance.hvparams:
2142         continue
2143       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2144                        cluster.FillHV(instance)))
2145     # and verify them locally
2146     self._VerifyHVP(hvp_data)
2147
2148     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2149     node_verify_param = {
2150       constants.NV_FILELIST: file_names,
2151       constants.NV_NODELIST: [node.name for node in nodeinfo
2152                               if not node.offline],
2153       constants.NV_HYPERVISOR: hypervisors,
2154       constants.NV_HVPARAMS: hvp_data,
2155       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2156                                   node.secondary_ip) for node in nodeinfo
2157                                  if not node.offline],
2158       constants.NV_INSTANCELIST: hypervisors,
2159       constants.NV_VERSION: None,
2160       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2161       constants.NV_NODESETUP: None,
2162       constants.NV_TIME: None,
2163       constants.NV_MASTERIP: (master_node, master_ip),
2164       constants.NV_OSLIST: None,
2165       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2166       }
2167
2168     if vg_name is not None:
2169       node_verify_param[constants.NV_VGLIST] = None
2170       node_verify_param[constants.NV_LVLIST] = vg_name
2171       node_verify_param[constants.NV_PVLIST] = [vg_name]
2172       node_verify_param[constants.NV_DRBDLIST] = None
2173
2174     if drbd_helper:
2175       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2176
2177     # Build our expected cluster state
2178     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2179                                                  name=node.name,
2180                                                  vm_capable=node.vm_capable))
2181                       for node in nodeinfo)
2182
2183     # Gather OOB paths
2184     oob_paths = []
2185     for node in nodeinfo:
2186       path = _SupportsOob(self.cfg, node)
2187       if path and path not in oob_paths:
2188         oob_paths.append(path)
2189
2190     if oob_paths:
2191       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2192
2193     for instance in instancelist:
2194       inst_config = instanceinfo[instance]
2195
2196       for nname in inst_config.all_nodes:
2197         if nname not in node_image:
2198           # ghost node
2199           gnode = self.NodeImage(name=nname)
2200           gnode.ghost = True
2201           node_image[nname] = gnode
2202
2203       inst_config.MapLVsByNode(node_vol_should)
2204
2205       pnode = inst_config.primary_node
2206       node_image[pnode].pinst.append(instance)
2207
2208       for snode in inst_config.secondary_nodes:
2209         nimg = node_image[snode]
2210         nimg.sinst.append(instance)
2211         if pnode not in nimg.sbp:
2212           nimg.sbp[pnode] = []
2213         nimg.sbp[pnode].append(instance)
2214
2215     # At this point, we have the in-memory data structures complete,
2216     # except for the runtime information, which we'll gather next
2217
2218     # Due to the way our RPC system works, exact response times cannot be
2219     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2220     # time before and after executing the request, we can at least have a time
2221     # window.
2222     nvinfo_starttime = time.time()
2223     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2224                                            self.cfg.GetClusterName())
2225     nvinfo_endtime = time.time()
2226
2227     all_drbd_map = self.cfg.ComputeDRBDMap()
2228
2229     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2230     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2231
2232     feedback_fn("* Verifying node status")
2233
2234     refos_img = None
2235
2236     for node_i in nodeinfo:
2237       node = node_i.name
2238       nimg = node_image[node]
2239
2240       if node_i.offline:
2241         if verbose:
2242           feedback_fn("* Skipping offline node %s" % (node,))
2243         n_offline += 1
2244         continue
2245
2246       if node == master_node:
2247         ntype = "master"
2248       elif node_i.master_candidate:
2249         ntype = "master candidate"
2250       elif node_i.drained:
2251         ntype = "drained"
2252         n_drained += 1
2253       else:
2254         ntype = "regular"
2255       if verbose:
2256         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2257
2258       msg = all_nvinfo[node].fail_msg
2259       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2260       if msg:
2261         nimg.rpc_fail = True
2262         continue
2263
2264       nresult = all_nvinfo[node].payload
2265
2266       nimg.call_ok = self._VerifyNode(node_i, nresult)
2267       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2268       self._VerifyNodeNetwork(node_i, nresult)
2269       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2270                             master_files)
2271
2272       self._VerifyOob(node_i, nresult)
2273
2274       if nimg.vm_capable:
2275         self._VerifyNodeLVM(node_i, nresult, vg_name)
2276         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2277                              all_drbd_map)
2278
2279         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2280         self._UpdateNodeInstances(node_i, nresult, nimg)
2281         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2282         self._UpdateNodeOS(node_i, nresult, nimg)
2283         if not nimg.os_fail:
2284           if refos_img is None:
2285             refos_img = nimg
2286           self._VerifyNodeOS(node_i, nimg, refos_img)
2287
2288     feedback_fn("* Verifying instance status")
2289     for instance in instancelist:
2290       if verbose:
2291         feedback_fn("* Verifying instance %s" % instance)
2292       inst_config = instanceinfo[instance]
2293       self._VerifyInstance(instance, inst_config, node_image,
2294                            instdisk[instance])
2295       inst_nodes_offline = []
2296
2297       pnode = inst_config.primary_node
2298       pnode_img = node_image[pnode]
2299       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2300                self.ENODERPC, pnode, "instance %s, connection to"
2301                " primary node failed", instance)
2302
2303       _ErrorIf(pnode_img.offline, self.EINSTANCEBADNODE, instance,
2304                "instance lives on offline node %s", inst_config.primary_node)
2305
2306       # If the instance is non-redundant we cannot survive losing its primary
2307       # node, so we are not N+1 compliant. On the other hand we have no disk
2308       # templates with more than one secondary so that situation is not well
2309       # supported either.
2310       # FIXME: does not support file-backed instances
2311       if not inst_config.secondary_nodes:
2312         i_non_redundant.append(instance)
2313
2314       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2315                instance, "instance has multiple secondary nodes: %s",
2316                utils.CommaJoin(inst_config.secondary_nodes),
2317                code=self.ETYPE_WARNING)
2318
2319       if inst_config.disk_template in constants.DTS_NET_MIRROR:
2320         pnode = inst_config.primary_node
2321         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2322         instance_groups = {}
2323
2324         for node in instance_nodes:
2325           instance_groups.setdefault(nodeinfo_byname[node].group,
2326                                      []).append(node)
2327
2328         pretty_list = [
2329           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2330           # Sort so that we always list the primary node first.
2331           for group, nodes in sorted(instance_groups.items(),
2332                                      key=lambda (_, nodes): pnode in nodes,
2333                                      reverse=True)]
2334
2335         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2336                       instance, "instance has primary and secondary nodes in"
2337                       " different groups: %s", utils.CommaJoin(pretty_list),
2338                       code=self.ETYPE_WARNING)
2339
2340       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2341         i_non_a_balanced.append(instance)
2342
2343       for snode in inst_config.secondary_nodes:
2344         s_img = node_image[snode]
2345         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2346                  "instance %s, connection to secondary node failed", instance)
2347
2348         if s_img.offline:
2349           inst_nodes_offline.append(snode)
2350
2351       # warn that the instance lives on offline nodes
2352       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2353                "instance has offline secondary node(s) %s",
2354                utils.CommaJoin(inst_nodes_offline))
2355       # ... or ghost/non-vm_capable nodes
2356       for node in inst_config.all_nodes:
2357         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2358                  "instance lives on ghost node %s", node)
2359         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2360                  instance, "instance lives on non-vm_capable node %s", node)
2361
2362     feedback_fn("* Verifying orphan volumes")
2363     reserved = utils.FieldSet(*cluster.reserved_lvs)
2364     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2365
2366     feedback_fn("* Verifying orphan instances")
2367     self._VerifyOrphanInstances(instancelist, node_image)
2368
2369     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2370       feedback_fn("* Verifying N+1 Memory redundancy")
2371       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2372
2373     feedback_fn("* Other Notes")
2374     if i_non_redundant:
2375       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2376                   % len(i_non_redundant))
2377
2378     if i_non_a_balanced:
2379       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2380                   % len(i_non_a_balanced))
2381
2382     if n_offline:
2383       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2384
2385     if n_drained:
2386       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2387
2388     return not self.bad
2389
2390   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2391     """Analyze the post-hooks' result
2392
2393     This method analyses the hook result, handles it, and sends some
2394     nicely-formatted feedback back to the user.
2395
2396     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2397         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2398     @param hooks_results: the results of the multi-node hooks rpc call
2399     @param feedback_fn: function used send feedback back to the caller
2400     @param lu_result: previous Exec result
2401     @return: the new Exec result, based on the previous result
2402         and hook results
2403
2404     """
2405     # We only really run POST phase hooks, and are only interested in
2406     # their results
2407     if phase == constants.HOOKS_PHASE_POST:
2408       # Used to change hooks' output to proper indentation
2409       feedback_fn("* Hooks Results")
2410       assert hooks_results, "invalid result from hooks"
2411
2412       for node_name in hooks_results:
2413         res = hooks_results[node_name]
2414         msg = res.fail_msg
2415         test = msg and not res.offline
2416         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2417                       "Communication failure in hooks execution: %s", msg)
2418         if res.offline or msg:
2419           # No need to investigate payload if node is offline or gave an error.
2420           # override manually lu_result here as _ErrorIf only
2421           # overrides self.bad
2422           lu_result = 1
2423           continue
2424         for script, hkr, output in res.payload:
2425           test = hkr == constants.HKR_FAIL
2426           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2427                         "Script %s failed, output:", script)
2428           if test:
2429             output = self._HOOKS_INDENT_RE.sub('      ', output)
2430             feedback_fn("%s" % output)
2431             lu_result = 0
2432
2433       return lu_result
2434
2435
2436 class LUClusterVerifyDisks(NoHooksLU):
2437   """Verifies the cluster disks status.
2438
2439   """
2440   REQ_BGL = False
2441
2442   def ExpandNames(self):
2443     self.needed_locks = {
2444       locking.LEVEL_NODE: locking.ALL_SET,
2445       locking.LEVEL_INSTANCE: locking.ALL_SET,
2446     }
2447     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2448
2449   def Exec(self, feedback_fn):
2450     """Verify integrity of cluster disks.
2451
2452     @rtype: tuple of three items
2453     @return: a tuple of (dict of node-to-node_error, list of instances
2454         which need activate-disks, dict of instance: (node, volume) for
2455         missing volumes
2456
2457     """
2458     result = res_nodes, res_instances, res_missing = {}, [], {}
2459
2460     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2461     instances = self.cfg.GetAllInstancesInfo().values()
2462
2463     nv_dict = {}
2464     for inst in instances:
2465       inst_lvs = {}
2466       if not inst.admin_up:
2467         continue
2468       inst.MapLVsByNode(inst_lvs)
2469       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2470       for node, vol_list in inst_lvs.iteritems():
2471         for vol in vol_list:
2472           nv_dict[(node, vol)] = inst
2473
2474     if not nv_dict:
2475       return result
2476
2477     node_lvs = self.rpc.call_lv_list(nodes, [])
2478     for node, node_res in node_lvs.items():
2479       if node_res.offline:
2480         continue
2481       msg = node_res.fail_msg
2482       if msg:
2483         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2484         res_nodes[node] = msg
2485         continue
2486
2487       lvs = node_res.payload
2488       for lv_name, (_, _, lv_online) in lvs.items():
2489         inst = nv_dict.pop((node, lv_name), None)
2490         if (not lv_online and inst is not None
2491             and inst.name not in res_instances):
2492           res_instances.append(inst.name)
2493
2494     # any leftover items in nv_dict are missing LVs, let's arrange the
2495     # data better
2496     for key, inst in nv_dict.iteritems():
2497       if inst.name not in res_missing:
2498         res_missing[inst.name] = []
2499       res_missing[inst.name].append(key)
2500
2501     return result
2502
2503
2504 class LUClusterRepairDiskSizes(NoHooksLU):
2505   """Verifies the cluster disks sizes.
2506
2507   """
2508   REQ_BGL = False
2509
2510   def ExpandNames(self):
2511     if self.op.instances:
2512       self.wanted_names = []
2513       for name in self.op.instances:
2514         full_name = _ExpandInstanceName(self.cfg, name)
2515         self.wanted_names.append(full_name)
2516       self.needed_locks = {
2517         locking.LEVEL_NODE: [],
2518         locking.LEVEL_INSTANCE: self.wanted_names,
2519         }
2520       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2521     else:
2522       self.wanted_names = None
2523       self.needed_locks = {
2524         locking.LEVEL_NODE: locking.ALL_SET,
2525         locking.LEVEL_INSTANCE: locking.ALL_SET,
2526         }
2527     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2528
2529   def DeclareLocks(self, level):
2530     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2531       self._LockInstancesNodes(primary_only=True)
2532
2533   def CheckPrereq(self):
2534     """Check prerequisites.
2535
2536     This only checks the optional instance list against the existing names.
2537
2538     """
2539     if self.wanted_names is None:
2540       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2541
2542     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2543                              in self.wanted_names]
2544
2545   def _EnsureChildSizes(self, disk):
2546     """Ensure children of the disk have the needed disk size.
2547
2548     This is valid mainly for DRBD8 and fixes an issue where the
2549     children have smaller disk size.
2550
2551     @param disk: an L{ganeti.objects.Disk} object
2552
2553     """
2554     if disk.dev_type == constants.LD_DRBD8:
2555       assert disk.children, "Empty children for DRBD8?"
2556       fchild = disk.children[0]
2557       mismatch = fchild.size < disk.size
2558       if mismatch:
2559         self.LogInfo("Child disk has size %d, parent %d, fixing",
2560                      fchild.size, disk.size)
2561         fchild.size = disk.size
2562
2563       # and we recurse on this child only, not on the metadev
2564       return self._EnsureChildSizes(fchild) or mismatch
2565     else:
2566       return False
2567
2568   def Exec(self, feedback_fn):
2569     """Verify the size of cluster disks.
2570
2571     """
2572     # TODO: check child disks too
2573     # TODO: check differences in size between primary/secondary nodes
2574     per_node_disks = {}
2575     for instance in self.wanted_instances:
2576       pnode = instance.primary_node
2577       if pnode not in per_node_disks:
2578         per_node_disks[pnode] = []
2579       for idx, disk in enumerate(instance.disks):
2580         per_node_disks[pnode].append((instance, idx, disk))
2581
2582     changed = []
2583     for node, dskl in per_node_disks.items():
2584       newl = [v[2].Copy() for v in dskl]
2585       for dsk in newl:
2586         self.cfg.SetDiskID(dsk, node)
2587       result = self.rpc.call_blockdev_getsize(node, newl)
2588       if result.fail_msg:
2589         self.LogWarning("Failure in blockdev_getsize call to node"
2590                         " %s, ignoring", node)
2591         continue
2592       if len(result.payload) != len(dskl):
2593         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2594                         " result.payload=%s", node, len(dskl), result.payload)
2595         self.LogWarning("Invalid result from node %s, ignoring node results",
2596                         node)
2597         continue
2598       for ((instance, idx, disk), size) in zip(dskl, result.payload):
2599         if size is None:
2600           self.LogWarning("Disk %d of instance %s did not return size"
2601                           " information, ignoring", idx, instance.name)
2602           continue
2603         if not isinstance(size, (int, long)):
2604           self.LogWarning("Disk %d of instance %s did not return valid"
2605                           " size information, ignoring", idx, instance.name)
2606           continue
2607         size = size >> 20
2608         if size != disk.size:
2609           self.LogInfo("Disk %d of instance %s has mismatched size,"
2610                        " correcting: recorded %d, actual %d", idx,
2611                        instance.name, disk.size, size)
2612           disk.size = size
2613           self.cfg.Update(instance, feedback_fn)
2614           changed.append((instance.name, idx, size))
2615         if self._EnsureChildSizes(disk):
2616           self.cfg.Update(instance, feedback_fn)
2617           changed.append((instance.name, idx, disk.size))
2618     return changed
2619
2620
2621 class LUClusterRename(LogicalUnit):
2622   """Rename the cluster.
2623
2624   """
2625   HPATH = "cluster-rename"
2626   HTYPE = constants.HTYPE_CLUSTER
2627
2628   def BuildHooksEnv(self):
2629     """Build hooks env.
2630
2631     """
2632     env = {
2633       "OP_TARGET": self.cfg.GetClusterName(),
2634       "NEW_NAME": self.op.name,
2635       }
2636     mn = self.cfg.GetMasterNode()
2637     all_nodes = self.cfg.GetNodeList()
2638     return env, [mn], all_nodes
2639
2640   def CheckPrereq(self):
2641     """Verify that the passed name is a valid one.
2642
2643     """
2644     hostname = netutils.GetHostname(name=self.op.name,
2645                                     family=self.cfg.GetPrimaryIPFamily())
2646
2647     new_name = hostname.name
2648     self.ip = new_ip = hostname.ip
2649     old_name = self.cfg.GetClusterName()
2650     old_ip = self.cfg.GetMasterIP()
2651     if new_name == old_name and new_ip == old_ip:
2652       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2653                                  " cluster has changed",
2654                                  errors.ECODE_INVAL)
2655     if new_ip != old_ip:
2656       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2657         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2658                                    " reachable on the network" %
2659                                    new_ip, errors.ECODE_NOTUNIQUE)
2660
2661     self.op.name = new_name
2662
2663   def Exec(self, feedback_fn):
2664     """Rename the cluster.
2665
2666     """
2667     clustername = self.op.name
2668     ip = self.ip
2669
2670     # shutdown the master IP
2671     master = self.cfg.GetMasterNode()
2672     result = self.rpc.call_node_stop_master(master, False)
2673     result.Raise("Could not disable the master role")
2674
2675     try:
2676       cluster = self.cfg.GetClusterInfo()
2677       cluster.cluster_name = clustername
2678       cluster.master_ip = ip
2679       self.cfg.Update(cluster, feedback_fn)
2680
2681       # update the known hosts file
2682       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2683       node_list = self.cfg.GetOnlineNodeList()
2684       try:
2685         node_list.remove(master)
2686       except ValueError:
2687         pass
2688       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2689     finally:
2690       result = self.rpc.call_node_start_master(master, False, False)
2691       msg = result.fail_msg
2692       if msg:
2693         self.LogWarning("Could not re-enable the master role on"
2694                         " the master, please restart manually: %s", msg)
2695
2696     return clustername
2697
2698
2699 class LUClusterSetParams(LogicalUnit):
2700   """Change the parameters of the cluster.
2701
2702   """
2703   HPATH = "cluster-modify"
2704   HTYPE = constants.HTYPE_CLUSTER
2705   REQ_BGL = False
2706
2707   def CheckArguments(self):
2708     """Check parameters
2709
2710     """
2711     if self.op.uid_pool:
2712       uidpool.CheckUidPool(self.op.uid_pool)
2713
2714     if self.op.add_uids:
2715       uidpool.CheckUidPool(self.op.add_uids)
2716
2717     if self.op.remove_uids:
2718       uidpool.CheckUidPool(self.op.remove_uids)
2719
2720   def ExpandNames(self):
2721     # FIXME: in the future maybe other cluster params won't require checking on
2722     # all nodes to be modified.
2723     self.needed_locks = {
2724       locking.LEVEL_NODE: locking.ALL_SET,
2725     }
2726     self.share_locks[locking.LEVEL_NODE] = 1
2727
2728   def BuildHooksEnv(self):
2729     """Build hooks env.
2730
2731     """
2732     env = {
2733       "OP_TARGET": self.cfg.GetClusterName(),
2734       "NEW_VG_NAME": self.op.vg_name,
2735       }
2736     mn = self.cfg.GetMasterNode()
2737     return env, [mn], [mn]
2738
2739   def CheckPrereq(self):
2740     """Check prerequisites.
2741
2742     This checks whether the given params don't conflict and
2743     if the given volume group is valid.
2744
2745     """
2746     if self.op.vg_name is not None and not self.op.vg_name:
2747       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2748         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2749                                    " instances exist", errors.ECODE_INVAL)
2750
2751     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2752       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2753         raise errors.OpPrereqError("Cannot disable drbd helper while"
2754                                    " drbd-based instances exist",
2755                                    errors.ECODE_INVAL)
2756
2757     node_list = self.acquired_locks[locking.LEVEL_NODE]
2758
2759     # if vg_name not None, checks given volume group on all nodes
2760     if self.op.vg_name:
2761       vglist = self.rpc.call_vg_list(node_list)
2762       for node in node_list:
2763         msg = vglist[node].fail_msg
2764         if msg:
2765           # ignoring down node
2766           self.LogWarning("Error while gathering data on node %s"
2767                           " (ignoring node): %s", node, msg)
2768           continue
2769         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2770                                               self.op.vg_name,
2771                                               constants.MIN_VG_SIZE)
2772         if vgstatus:
2773           raise errors.OpPrereqError("Error on node '%s': %s" %
2774                                      (node, vgstatus), errors.ECODE_ENVIRON)
2775
2776     if self.op.drbd_helper:
2777       # checks given drbd helper on all nodes
2778       helpers = self.rpc.call_drbd_helper(node_list)
2779       for node in node_list:
2780         ninfo = self.cfg.GetNodeInfo(node)
2781         if ninfo.offline:
2782           self.LogInfo("Not checking drbd helper on offline node %s", node)
2783           continue
2784         msg = helpers[node].fail_msg
2785         if msg:
2786           raise errors.OpPrereqError("Error checking drbd helper on node"
2787                                      " '%s': %s" % (node, msg),
2788                                      errors.ECODE_ENVIRON)
2789         node_helper = helpers[node].payload
2790         if node_helper != self.op.drbd_helper:
2791           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2792                                      (node, node_helper), errors.ECODE_ENVIRON)
2793
2794     self.cluster = cluster = self.cfg.GetClusterInfo()
2795     # validate params changes
2796     if self.op.beparams:
2797       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2798       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2799
2800     if self.op.ndparams:
2801       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2802       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2803
2804     if self.op.nicparams:
2805       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2806       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2807       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2808       nic_errors = []
2809
2810       # check all instances for consistency
2811       for instance in self.cfg.GetAllInstancesInfo().values():
2812         for nic_idx, nic in enumerate(instance.nics):
2813           params_copy = copy.deepcopy(nic.nicparams)
2814           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2815
2816           # check parameter syntax
2817           try:
2818             objects.NIC.CheckParameterSyntax(params_filled)
2819           except errors.ConfigurationError, err:
2820             nic_errors.append("Instance %s, nic/%d: %s" %
2821                               (instance.name, nic_idx, err))
2822
2823           # if we're moving instances to routed, check that they have an ip
2824           target_mode = params_filled[constants.NIC_MODE]
2825           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2826             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2827                               (instance.name, nic_idx))
2828       if nic_errors:
2829         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2830                                    "\n".join(nic_errors))
2831
2832     # hypervisor list/parameters
2833     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2834     if self.op.hvparams:
2835       for hv_name, hv_dict in self.op.hvparams.items():
2836         if hv_name not in self.new_hvparams:
2837           self.new_hvparams[hv_name] = hv_dict
2838         else:
2839           self.new_hvparams[hv_name].update(hv_dict)
2840
2841     # os hypervisor parameters
2842     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2843     if self.op.os_hvp:
2844       for os_name, hvs in self.op.os_hvp.items():
2845         if os_name not in self.new_os_hvp:
2846           self.new_os_hvp[os_name] = hvs
2847         else:
2848           for hv_name, hv_dict in hvs.items():
2849             if hv_name not in self.new_os_hvp[os_name]:
2850               self.new_os_hvp[os_name][hv_name] = hv_dict
2851             else:
2852               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2853
2854     # os parameters
2855     self.new_osp = objects.FillDict(cluster.osparams, {})
2856     if self.op.osparams:
2857       for os_name, osp in self.op.osparams.items():
2858         if os_name not in self.new_osp:
2859           self.new_osp[os_name] = {}
2860
2861         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2862                                                   use_none=True)
2863
2864         if not self.new_osp[os_name]:
2865           # we removed all parameters
2866           del self.new_osp[os_name]
2867         else:
2868           # check the parameter validity (remote check)
2869           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2870                          os_name, self.new_osp[os_name])
2871
2872     # changes to the hypervisor list
2873     if self.op.enabled_hypervisors is not None:
2874       self.hv_list = self.op.enabled_hypervisors
2875       for hv in self.hv_list:
2876         # if the hypervisor doesn't already exist in the cluster
2877         # hvparams, we initialize it to empty, and then (in both
2878         # cases) we make sure to fill the defaults, as we might not
2879         # have a complete defaults list if the hypervisor wasn't
2880         # enabled before
2881         if hv not in new_hvp:
2882           new_hvp[hv] = {}
2883         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2884         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2885     else:
2886       self.hv_list = cluster.enabled_hypervisors
2887
2888     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2889       # either the enabled list has changed, or the parameters have, validate
2890       for hv_name, hv_params in self.new_hvparams.items():
2891         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2892             (self.op.enabled_hypervisors and
2893              hv_name in self.op.enabled_hypervisors)):
2894           # either this is a new hypervisor, or its parameters have changed
2895           hv_class = hypervisor.GetHypervisor(hv_name)
2896           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2897           hv_class.CheckParameterSyntax(hv_params)
2898           _CheckHVParams(self, node_list, hv_name, hv_params)
2899
2900     if self.op.os_hvp:
2901       # no need to check any newly-enabled hypervisors, since the
2902       # defaults have already been checked in the above code-block
2903       for os_name, os_hvp in self.new_os_hvp.items():
2904         for hv_name, hv_params in os_hvp.items():
2905           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2906           # we need to fill in the new os_hvp on top of the actual hv_p
2907           cluster_defaults = self.new_hvparams.get(hv_name, {})
2908           new_osp = objects.FillDict(cluster_defaults, hv_params)
2909           hv_class = hypervisor.GetHypervisor(hv_name)
2910           hv_class.CheckParameterSyntax(new_osp)
2911           _CheckHVParams(self, node_list, hv_name, new_osp)
2912
2913     if self.op.default_iallocator:
2914       alloc_script = utils.FindFile(self.op.default_iallocator,
2915                                     constants.IALLOCATOR_SEARCH_PATH,
2916                                     os.path.isfile)
2917       if alloc_script is None:
2918         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2919                                    " specified" % self.op.default_iallocator,
2920                                    errors.ECODE_INVAL)
2921
2922   def Exec(self, feedback_fn):
2923     """Change the parameters of the cluster.
2924
2925     """
2926     if self.op.vg_name is not None:
2927       new_volume = self.op.vg_name
2928       if not new_volume:
2929         new_volume = None
2930       if new_volume != self.cfg.GetVGName():
2931         self.cfg.SetVGName(new_volume)
2932       else:
2933         feedback_fn("Cluster LVM configuration already in desired"
2934                     " state, not changing")
2935     if self.op.drbd_helper is not None:
2936       new_helper = self.op.drbd_helper
2937       if not new_helper:
2938         new_helper = None
2939       if new_helper != self.cfg.GetDRBDHelper():
2940         self.cfg.SetDRBDHelper(new_helper)
2941       else:
2942         feedback_fn("Cluster DRBD helper already in desired state,"
2943                     " not changing")
2944     if self.op.hvparams:
2945       self.cluster.hvparams = self.new_hvparams
2946     if self.op.os_hvp:
2947       self.cluster.os_hvp = self.new_os_hvp
2948     if self.op.enabled_hypervisors is not None:
2949       self.cluster.hvparams = self.new_hvparams
2950       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2951     if self.op.beparams:
2952       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2953     if self.op.nicparams:
2954       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2955     if self.op.osparams:
2956       self.cluster.osparams = self.new_osp
2957     if self.op.ndparams:
2958       self.cluster.ndparams = self.new_ndparams
2959
2960     if self.op.candidate_pool_size is not None:
2961       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2962       # we need to update the pool size here, otherwise the save will fail
2963       _AdjustCandidatePool(self, [])
2964
2965     if self.op.maintain_node_health is not None:
2966       self.cluster.maintain_node_health = self.op.maintain_node_health
2967
2968     if self.op.prealloc_wipe_disks is not None:
2969       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2970
2971     if self.op.add_uids is not None:
2972       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2973
2974     if self.op.remove_uids is not None:
2975       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2976
2977     if self.op.uid_pool is not None:
2978       self.cluster.uid_pool = self.op.uid_pool
2979
2980     if self.op.default_iallocator is not None:
2981       self.cluster.default_iallocator = self.op.default_iallocator
2982
2983     if self.op.reserved_lvs is not None:
2984       self.cluster.reserved_lvs = self.op.reserved_lvs
2985
2986     def helper_os(aname, mods, desc):
2987       desc += " OS list"
2988       lst = getattr(self.cluster, aname)
2989       for key, val in mods:
2990         if key == constants.DDM_ADD:
2991           if val in lst:
2992             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2993           else:
2994             lst.append(val)
2995         elif key == constants.DDM_REMOVE:
2996           if val in lst:
2997             lst.remove(val)
2998           else:
2999             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3000         else:
3001           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3002
3003     if self.op.hidden_os:
3004       helper_os("hidden_os", self.op.hidden_os, "hidden")
3005
3006     if self.op.blacklisted_os:
3007       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3008
3009     if self.op.master_netdev:
3010       master = self.cfg.GetMasterNode()
3011       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3012                   self.cluster.master_netdev)
3013       result = self.rpc.call_node_stop_master(master, False)
3014       result.Raise("Could not disable the master ip")
3015       feedback_fn("Changing master_netdev from %s to %s" %
3016                   (self.cluster.master_netdev, self.op.master_netdev))
3017       self.cluster.master_netdev = self.op.master_netdev
3018
3019     self.cfg.Update(self.cluster, feedback_fn)
3020
3021     if self.op.master_netdev:
3022       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3023                   self.op.master_netdev)
3024       result = self.rpc.call_node_start_master(master, False, False)
3025       if result.fail_msg:
3026         self.LogWarning("Could not re-enable the master ip on"
3027                         " the master, please restart manually: %s",
3028                         result.fail_msg)
3029
3030
3031 def _UploadHelper(lu, nodes, fname):
3032   """Helper for uploading a file and showing warnings.
3033
3034   """
3035   if os.path.exists(fname):
3036     result = lu.rpc.call_upload_file(nodes, fname)
3037     for to_node, to_result in result.items():
3038       msg = to_result.fail_msg
3039       if msg:
3040         msg = ("Copy of file %s to node %s failed: %s" %
3041                (fname, to_node, msg))
3042         lu.proc.LogWarning(msg)
3043
3044
3045 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3046   """Distribute additional files which are part of the cluster configuration.
3047
3048   ConfigWriter takes care of distributing the config and ssconf files, but
3049   there are more files which should be distributed to all nodes. This function
3050   makes sure those are copied.
3051
3052   @param lu: calling logical unit
3053   @param additional_nodes: list of nodes not in the config to distribute to
3054   @type additional_vm: boolean
3055   @param additional_vm: whether the additional nodes are vm-capable or not
3056
3057   """
3058   # 1. Gather target nodes
3059   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3060   dist_nodes = lu.cfg.GetOnlineNodeList()
3061   nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3062   vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3063   if additional_nodes is not None:
3064     dist_nodes.extend(additional_nodes)
3065     if additional_vm:
3066       vm_nodes.extend(additional_nodes)
3067   if myself.name in dist_nodes:
3068     dist_nodes.remove(myself.name)
3069   if myself.name in vm_nodes:
3070     vm_nodes.remove(myself.name)
3071
3072   # 2. Gather files to distribute
3073   dist_files = set([constants.ETC_HOSTS,
3074                     constants.SSH_KNOWN_HOSTS_FILE,
3075                     constants.RAPI_CERT_FILE,
3076                     constants.RAPI_USERS_FILE,
3077                     constants.CONFD_HMAC_KEY,
3078                     constants.CLUSTER_DOMAIN_SECRET_FILE,
3079                    ])
3080
3081   vm_files = set()
3082   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3083   for hv_name in enabled_hypervisors:
3084     hv_class = hypervisor.GetHypervisor(hv_name)
3085     vm_files.update(hv_class.GetAncillaryFiles())
3086
3087   # 3. Perform the files upload
3088   for fname in dist_files:
3089     _UploadHelper(lu, dist_nodes, fname)
3090   for fname in vm_files:
3091     _UploadHelper(lu, vm_nodes, fname)
3092
3093
3094 class LUClusterRedistConf(NoHooksLU):
3095   """Force the redistribution of cluster configuration.
3096
3097   This is a very simple LU.
3098
3099   """
3100   REQ_BGL = False
3101
3102   def ExpandNames(self):
3103     self.needed_locks = {
3104       locking.LEVEL_NODE: locking.ALL_SET,
3105     }
3106     self.share_locks[locking.LEVEL_NODE] = 1
3107
3108   def Exec(self, feedback_fn):
3109     """Redistribute the configuration.
3110
3111     """
3112     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3113     _RedistributeAncillaryFiles(self)
3114
3115
3116 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3117   """Sleep and poll for an instance's disk to sync.
3118
3119   """
3120   if not instance.disks or disks is not None and not disks:
3121     return True
3122
3123   disks = _ExpandCheckDisks(instance, disks)
3124
3125   if not oneshot:
3126     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3127
3128   node = instance.primary_node
3129
3130   for dev in disks:
3131     lu.cfg.SetDiskID(dev, node)
3132
3133   # TODO: Convert to utils.Retry
3134
3135   retries = 0
3136   degr_retries = 10 # in seconds, as we sleep 1 second each time
3137   while True:
3138     max_time = 0
3139     done = True
3140     cumul_degraded = False
3141     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3142     msg = rstats.fail_msg
3143     if msg:
3144       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3145       retries += 1
3146       if retries >= 10:
3147         raise errors.RemoteError("Can't contact node %s for mirror data,"
3148                                  " aborting." % node)
3149       time.sleep(6)
3150       continue
3151     rstats = rstats.payload
3152     retries = 0
3153     for i, mstat in enumerate(rstats):
3154       if mstat is None:
3155         lu.LogWarning("Can't compute data for node %s/%s",
3156                            node, disks[i].iv_name)
3157         continue
3158
3159       cumul_degraded = (cumul_degraded or
3160                         (mstat.is_degraded and mstat.sync_percent is None))
3161       if mstat.sync_percent is not None:
3162         done = False
3163         if mstat.estimated_time is not None:
3164           rem_time = ("%s remaining (estimated)" %
3165                       utils.FormatSeconds(mstat.estimated_time))
3166           max_time = mstat.estimated_time
3167         else:
3168           rem_time = "no time estimate"
3169         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3170                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3171
3172     # if we're done but degraded, let's do a few small retries, to
3173     # make sure we see a stable and not transient situation; therefore
3174     # we force restart of the loop
3175     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3176       logging.info("Degraded disks found, %d retries left", degr_retries)
3177       degr_retries -= 1
3178       time.sleep(1)
3179       continue
3180
3181     if done or oneshot:
3182       break
3183
3184     time.sleep(min(60, max_time))
3185
3186   if done:
3187     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3188   return not cumul_degraded
3189
3190
3191 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3192   """Check that mirrors are not degraded.
3193
3194   The ldisk parameter, if True, will change the test from the
3195   is_degraded attribute (which represents overall non-ok status for
3196   the device(s)) to the ldisk (representing the local storage status).
3197
3198   """
3199   lu.cfg.SetDiskID(dev, node)
3200
3201   result = True
3202
3203   if on_primary or dev.AssembleOnSecondary():
3204     rstats = lu.rpc.call_blockdev_find(node, dev)
3205     msg = rstats.fail_msg
3206     if msg:
3207       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3208       result = False
3209     elif not rstats.payload:
3210       lu.LogWarning("Can't find disk on node %s", node)
3211       result = False
3212     else:
3213       if ldisk:
3214         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3215       else:
3216         result = result and not rstats.payload.is_degraded
3217
3218   if dev.children:
3219     for child in dev.children:
3220       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3221
3222   return result
3223
3224
3225 class LUOobCommand(NoHooksLU):
3226   """Logical unit for OOB handling.
3227
3228   """
3229   REG_BGL = False
3230
3231   def CheckPrereq(self):
3232     """Check prerequisites.
3233
3234     This checks:
3235      - the node exists in the configuration
3236      - OOB is supported
3237
3238     Any errors are signaled by raising errors.OpPrereqError.
3239
3240     """
3241     self.nodes = []
3242     for node_name in self.op.node_names:
3243       node = self.cfg.GetNodeInfo(node_name)
3244
3245       if node is None:
3246         raise errors.OpPrereqError("Node %s not found" % node_name,
3247                                    errors.ECODE_NOENT)
3248       else:
3249         self.nodes.append(node)
3250
3251       if (self.op.command == constants.OOB_POWER_OFF and not node.offline):
3252         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3253                                     " not marked offline") % node_name,
3254                                    errors.ECODE_STATE)
3255
3256   def ExpandNames(self):
3257     """Gather locks we need.
3258
3259     """
3260     if self.op.node_names:
3261       self.op.node_names = [_ExpandNodeName(self.cfg, name)
3262                             for name in self.op.node_names]
3263     else:
3264       self.op.node_names = self.cfg.GetNodeList()
3265
3266     self.needed_locks = {
3267       locking.LEVEL_NODE: self.op.node_names,
3268       }
3269
3270   def Exec(self, feedback_fn):
3271     """Execute OOB and return result if we expect any.
3272
3273     """
3274     master_node = self.cfg.GetMasterNode()
3275     ret = []
3276
3277     for node in self.nodes:
3278       node_entry = [(constants.RS_NORMAL, node.name)]
3279       ret.append(node_entry)
3280
3281       oob_program = _SupportsOob(self.cfg, node)
3282
3283       if not oob_program:
3284         node_entry.append((constants.RS_UNAVAIL, None))
3285         continue
3286
3287       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3288                    self.op.command, oob_program, node.name)
3289       result = self.rpc.call_run_oob(master_node, oob_program,
3290                                      self.op.command, node.name,
3291                                      self.op.timeout)
3292
3293       if result.fail_msg:
3294         self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3295                         node.name, result.fail_msg)
3296         node_entry.append((constants.RS_NODATA, None))
3297       else:
3298         try:
3299           self._CheckPayload(result)
3300         except errors.OpExecError, err:
3301           self.LogWarning("The payload returned by '%s' is not valid: %s",
3302                           node.name, err)
3303           node_entry.append((constants.RS_NODATA, None))
3304         else:
3305           if self.op.command == constants.OOB_HEALTH:
3306             # For health we should log important events
3307             for item, status in result.payload:
3308               if status in [constants.OOB_STATUS_WARNING,
3309                             constants.OOB_STATUS_CRITICAL]:
3310                 self.LogWarning("On node '%s' item '%s' has status '%s'",
3311                                 node.name, item, status)
3312
3313           if self.op.command == constants.OOB_POWER_ON:
3314             node.powered = True
3315           elif self.op.command == constants.OOB_POWER_OFF:
3316             node.powered = False
3317           elif self.op.command == constants.OOB_POWER_STATUS:
3318             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3319             if powered != node.powered:
3320               logging.warning(("Recorded power state (%s) of node '%s' does not"
3321                                " match actual power state (%s)"), node.powered,
3322                               node.name, powered)
3323
3324           # For configuration changing commands we should update the node
3325           if self.op.command in (constants.OOB_POWER_ON,
3326                                  constants.OOB_POWER_OFF):
3327             self.cfg.Update(node, feedback_fn)
3328
3329           node_entry.append((constants.RS_NORMAL, result.payload))
3330
3331     return ret
3332
3333   def _CheckPayload(self, result):
3334     """Checks if the payload is valid.
3335
3336     @param result: RPC result
3337     @raises errors.OpExecError: If payload is not valid
3338
3339     """
3340     errs = []
3341     if self.op.command == constants.OOB_HEALTH:
3342       if not isinstance(result.payload, list):
3343         errs.append("command 'health' is expected to return a list but got %s" %
3344                     type(result.payload))
3345       else:
3346         for item, status in result.payload:
3347           if status not in constants.OOB_STATUSES:
3348             errs.append("health item '%s' has invalid status '%s'" %
3349                         (item, status))
3350
3351     if self.op.command == constants.OOB_POWER_STATUS:
3352       if not isinstance(result.payload, dict):
3353         errs.append("power-status is expected to return a dict but got %s" %
3354                     type(result.payload))
3355
3356     if self.op.command in [
3357         constants.OOB_POWER_ON,
3358         constants.OOB_POWER_OFF,
3359         constants.OOB_POWER_CYCLE,
3360         ]:
3361       if result.payload is not None:
3362         errs.append("%s is expected to not return payload but got '%s'" %
3363                     (self.op.command, result.payload))
3364
3365     if errs:
3366       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3367                                utils.CommaJoin(errs))
3368
3369
3370
3371 class LUOsDiagnose(NoHooksLU):
3372   """Logical unit for OS diagnose/query.
3373
3374   """
3375   REQ_BGL = False
3376   _HID = "hidden"
3377   _BLK = "blacklisted"
3378   _VLD = "valid"
3379   _FIELDS_STATIC = utils.FieldSet()
3380   _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3381                                    "parameters", "api_versions", _HID, _BLK)
3382
3383   def CheckArguments(self):
3384     if self.op.names:
3385       raise errors.OpPrereqError("Selective OS query not supported",
3386                                  errors.ECODE_INVAL)
3387
3388     _CheckOutputFields(static=self._FIELDS_STATIC,
3389                        dynamic=self._FIELDS_DYNAMIC,
3390                        selected=self.op.output_fields)
3391
3392   def ExpandNames(self):
3393     # Lock all nodes, in shared mode
3394     # Temporary removal of locks, should be reverted later
3395     # TODO: reintroduce locks when they are lighter-weight
3396     self.needed_locks = {}
3397     #self.share_locks[locking.LEVEL_NODE] = 1
3398     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3399
3400   @staticmethod
3401   def _DiagnoseByOS(rlist):
3402     """Remaps a per-node return list into an a per-os per-node dictionary
3403
3404     @param rlist: a map with node names as keys and OS objects as values
3405
3406     @rtype: dict
3407     @return: a dictionary with osnames as keys and as value another
3408         map, with nodes as keys and tuples of (path, status, diagnose,
3409         variants, parameters, api_versions) as values, eg::
3410
3411           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3412                                      (/srv/..., False, "invalid api")],
3413                            "node2": [(/srv/..., True, "", [], [])]}
3414           }
3415
3416     """
3417     all_os = {}
3418     # we build here the list of nodes that didn't fail the RPC (at RPC
3419     # level), so that nodes with a non-responding node daemon don't
3420     # make all OSes invalid
3421     good_nodes = [node_name for node_name in rlist
3422                   if not rlist[node_name].fail_msg]
3423     for node_name, nr in rlist.items():
3424       if nr.fail_msg or not nr.payload:
3425         continue
3426       for (name, path, status, diagnose, variants,
3427            params, api_versions) in nr.payload:
3428         if name not in all_os:
3429           # build a list of nodes for this os containing empty lists
3430           # for each node in node_list
3431           all_os[name] = {}
3432           for nname in good_nodes:
3433             all_os[name][nname] = []
3434         # convert params from [name, help] to (name, help)
3435         params = [tuple(v) for v in params]
3436         all_os[name][node_name].append((path, status, diagnose,
3437                                         variants, params, api_versions))
3438     return all_os
3439
3440   def Exec(self, feedback_fn):
3441     """Compute the list of OSes.
3442
3443     """
3444     valid_nodes = [node.name
3445                    for node in self.cfg.GetAllNodesInfo().values()
3446                    if not node.offline and node.vm_capable]
3447     node_data = self.rpc.call_os_diagnose(valid_nodes)
3448     pol = self._DiagnoseByOS(node_data)
3449     output = []
3450     cluster = self.cfg.GetClusterInfo()
3451
3452     for os_name in utils.NiceSort(pol.keys()):
3453       os_data = pol[os_name]
3454       row = []
3455       valid = True
3456       (variants, params, api_versions) = null_state = (set(), set(), set())
3457       for idx, osl in enumerate(os_data.values()):
3458         valid = bool(valid and osl and osl[0][1])
3459         if not valid:
3460           (variants, params, api_versions) = null_state
3461           break
3462         node_variants, node_params, node_api = osl[0][3:6]
3463         if idx == 0: # first entry
3464           variants = set(node_variants)
3465           params = set(node_params)
3466           api_versions = set(node_api)
3467         else: # keep consistency
3468           variants.intersection_update(node_variants)
3469           params.intersection_update(node_params)
3470           api_versions.intersection_update(node_api)
3471
3472       is_hid = os_name in cluster.hidden_os
3473       is_blk = os_name in cluster.blacklisted_os
3474       if ((self._HID not in self.op.output_fields and is_hid) or
3475           (self._BLK not in self.op.output_fields and is_blk) or
3476           (self._VLD not in self.op.output_fields and not valid)):
3477         continue
3478
3479       for field in self.op.output_fields:
3480         if field == "name":
3481           val = os_name
3482         elif field == self._VLD:
3483           val = valid
3484         elif field == "node_status":
3485           # this is just a copy of the dict
3486           val = {}
3487           for node_name, nos_list in os_data.items():
3488             val[node_name] = nos_list
3489         elif field == "variants":
3490           val = utils.NiceSort(list(variants))
3491         elif field == "parameters":
3492           val = list(params)
3493         elif field == "api_versions":
3494           val = list(api_versions)
3495         elif field == self._HID:
3496           val = is_hid
3497         elif field == self._BLK:
3498           val = is_blk
3499         else:
3500           raise errors.ParameterError(field)
3501         row.append(val)
3502       output.append(row)
3503
3504     return output
3505
3506
3507 class LUNodeRemove(LogicalUnit):
3508   """Logical unit for removing a node.
3509
3510   """
3511   HPATH = "node-remove"
3512   HTYPE = constants.HTYPE_NODE
3513
3514   def BuildHooksEnv(self):
3515     """Build hooks env.
3516
3517     This doesn't run on the target node in the pre phase as a failed
3518     node would then be impossible to remove.
3519
3520     """
3521     env = {
3522       "OP_TARGET": self.op.node_name,
3523       "NODE_NAME": self.op.node_name,
3524       }
3525     all_nodes = self.cfg.GetNodeList()
3526     try:
3527       all_nodes.remove(self.op.node_name)
3528     except ValueError:
3529       logging.warning("Node %s which is about to be removed not found"
3530                       " in the all nodes list", self.op.node_name)
3531     return env, all_nodes, all_nodes
3532
3533   def CheckPrereq(self):
3534     """Check prerequisites.
3535
3536     This checks:
3537      - the node exists in the configuration
3538      - it does not have primary or secondary instances
3539      - it's not the master
3540
3541     Any errors are signaled by raising errors.OpPrereqError.
3542
3543     """
3544     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3545     node = self.cfg.GetNodeInfo(self.op.node_name)
3546     assert node is not None
3547
3548     instance_list = self.cfg.GetInstanceList()
3549
3550     masternode = self.cfg.GetMasterNode()
3551     if node.name == masternode:
3552       raise errors.OpPrereqError("Node is the master node,"
3553                                  " you need to failover first.",
3554                                  errors.ECODE_INVAL)
3555
3556     for instance_name in instance_list:
3557       instance = self.cfg.GetInstanceInfo(instance_name)
3558       if node.name in instance.all_nodes:
3559         raise errors.OpPrereqError("Instance %s is still running on the node,"
3560                                    " please remove first." % instance_name,
3561                                    errors.ECODE_INVAL)
3562     self.op.node_name = node.name
3563     self.node = node
3564
3565   def Exec(self, feedback_fn):
3566     """Removes the node from the cluster.
3567
3568     """
3569     node = self.node
3570     logging.info("Stopping the node daemon and removing configs from node %s",
3571                  node.name)
3572
3573     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3574
3575     # Promote nodes to master candidate as needed
3576     _AdjustCandidatePool(self, exceptions=[node.name])
3577     self.context.RemoveNode(node.name)
3578
3579     # Run post hooks on the node before it's removed
3580     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3581     try:
3582       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3583     except:
3584       # pylint: disable-msg=W0702
3585       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3586
3587     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3588     msg = result.fail_msg
3589     if msg:
3590       self.LogWarning("Errors encountered on the remote node while leaving"
3591                       " the cluster: %s", msg)
3592
3593     # Remove node from our /etc/hosts
3594     if self.cfg.GetClusterInfo().modify_etc_hosts:
3595       master_node = self.cfg.GetMasterNode()
3596       result = self.rpc.call_etc_hosts_modify(master_node,
3597                                               constants.ETC_HOSTS_REMOVE,
3598                                               node.name, None)
3599       result.Raise("Can't update hosts file with new host data")
3600       _RedistributeAncillaryFiles(self)
3601
3602
3603 class _NodeQuery(_QueryBase):
3604   FIELDS = query.NODE_FIELDS
3605
3606   def ExpandNames(self, lu):
3607     lu.needed_locks = {}
3608     lu.share_locks[locking.LEVEL_NODE] = 1
3609
3610     if self.names:
3611       self.wanted = _GetWantedNodes(lu, self.names)
3612     else:
3613       self.wanted = locking.ALL_SET
3614
3615     self.do_locking = (self.use_locking and
3616                        query.NQ_LIVE in self.requested_data)
3617
3618     if self.do_locking:
3619       # if we don't request only static fields, we need to lock the nodes
3620       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3621
3622   def DeclareLocks(self, lu, level):
3623     pass
3624
3625   def _GetQueryData(self, lu):
3626     """Computes the list of nodes and their attributes.
3627
3628     """
3629     all_info = lu.cfg.GetAllNodesInfo()
3630
3631     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3632
3633     # Gather data as requested
3634     if query.NQ_LIVE in self.requested_data:
3635       # filter out non-vm_capable nodes
3636       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3637
3638       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3639                                         lu.cfg.GetHypervisorType())
3640       live_data = dict((name, nresult.payload)
3641                        for (name, nresult) in node_data.items()
3642                        if not nresult.fail_msg and nresult.payload)
3643     else:
3644       live_data = None
3645
3646     if query.NQ_INST in self.requested_data:
3647       node_to_primary = dict([(name, set()) for name in nodenames])
3648       node_to_secondary = dict([(name, set()) for name in nodenames])
3649
3650       inst_data = lu.cfg.GetAllInstancesInfo()
3651
3652       for inst in inst_data.values():
3653         if inst.primary_node in node_to_primary:
3654           node_to_primary[inst.primary_node].add(inst.name)
3655         for secnode in inst.secondary_nodes:
3656           if secnode in node_to_secondary:
3657             node_to_secondary[secnode].add(inst.name)
3658     else:
3659       node_to_primary = None
3660       node_to_secondary = None
3661
3662     if query.NQ_OOB in self.requested_data:
3663       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3664                          for name, node in all_info.iteritems())
3665     else:
3666       oob_support = None
3667
3668     if query.NQ_GROUP in self.requested_data:
3669       groups = lu.cfg.GetAllNodeGroupsInfo()
3670     else:
3671       groups = {}
3672
3673     return query.NodeQueryData([all_info[name] for name in nodenames],
3674                                live_data, lu.cfg.GetMasterNode(),
3675                                node_to_primary, node_to_secondary, groups,
3676                                oob_support, lu.cfg.GetClusterInfo())
3677
3678
3679 class LUNodeQuery(NoHooksLU):
3680   """Logical unit for querying nodes.
3681
3682   """
3683   # pylint: disable-msg=W0142
3684   REQ_BGL = False
3685
3686   def CheckArguments(self):
3687     self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3688                          self.op.use_locking)
3689
3690   def ExpandNames(self):
3691     self.nq.ExpandNames(self)
3692
3693   def Exec(self, feedback_fn):
3694     return self.nq.OldStyleQuery(self)
3695
3696
3697 class LUNodeQueryvols(NoHooksLU):
3698   """Logical unit for getting volumes on node(s).
3699
3700   """
3701   REQ_BGL = False
3702   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3703   _FIELDS_STATIC = utils.FieldSet("node")
3704
3705   def CheckArguments(self):
3706     _CheckOutputFields(static=self._FIELDS_STATIC,
3707                        dynamic=self._FIELDS_DYNAMIC,
3708                        selected=self.op.output_fields)
3709
3710   def ExpandNames(self):
3711     self.needed_locks = {}
3712     self.share_locks[locking.LEVEL_NODE] = 1
3713     if not self.op.nodes:
3714       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3715     else:
3716       self.needed_locks[locking.LEVEL_NODE] = \
3717         _GetWantedNodes(self, self.op.nodes)
3718
3719   def Exec(self, feedback_fn):
3720     """Computes the list of nodes and their attributes.
3721
3722     """
3723     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3724     volumes = self.rpc.call_node_volumes(nodenames)
3725
3726     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3727              in self.cfg.GetInstanceList()]
3728
3729     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3730
3731     output = []
3732     for node in nodenames:
3733       nresult = volumes[node]
3734       if nresult.offline:
3735         continue
3736       msg = nresult.fail_msg
3737       if msg:
3738         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3739         continue
3740
3741       node_vols = nresult.payload[:]
3742       node_vols.sort(key=lambda vol: vol['dev'])
3743
3744       for vol in node_vols:
3745         node_output = []
3746         for field in self.op.output_fields:
3747           if field == "node":
3748             val = node
3749           elif field == "phys":
3750             val = vol['dev']
3751           elif field == "vg":
3752             val = vol['vg']
3753           elif field == "name":
3754             val = vol['name']
3755           elif field == "size":
3756             val = int(float(vol['size']))
3757           elif field == "instance":
3758             for inst in ilist:
3759               if node not in lv_by_node[inst]:
3760                 continue
3761               if vol['name'] in lv_by_node[inst][node]:
3762                 val = inst.name
3763                 break
3764             else:
3765               val = '-'
3766           else:
3767             raise errors.ParameterError(field)
3768           node_output.append(str(val))
3769
3770         output.append(node_output)
3771
3772     return output
3773
3774
3775 class LUNodeQueryStorage(NoHooksLU):
3776   """Logical unit for getting information on storage units on node(s).
3777
3778   """
3779   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3780   REQ_BGL = False
3781
3782   def CheckArguments(self):
3783     _CheckOutputFields(static=self._FIELDS_STATIC,
3784                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3785                        selected=self.op.output_fields)
3786
3787   def ExpandNames(self):
3788     self.needed_locks = {}
3789     self.share_locks[locking.LEVEL_NODE] = 1
3790
3791     if self.op.nodes:
3792       self.needed_locks[locking.LEVEL_NODE] = \
3793         _GetWantedNodes(self, self.op.nodes)
3794     else:
3795       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3796
3797   def Exec(self, feedback_fn):
3798     """Computes the list of nodes and their attributes.
3799
3800     """
3801     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3802
3803     # Always get name to sort by
3804     if constants.SF_NAME in self.op.output_fields:
3805       fields = self.op.output_fields[:]
3806     else:
3807       fields = [constants.SF_NAME] + self.op.output_fields
3808
3809     # Never ask for node or type as it's only known to the LU
3810     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3811       while extra in fields:
3812         fields.remove(extra)
3813
3814     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3815     name_idx = field_idx[constants.SF_NAME]
3816
3817     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3818     data = self.rpc.call_storage_list(self.nodes,
3819                                       self.op.storage_type, st_args,
3820                                       self.op.name, fields)
3821
3822     result = []
3823
3824     for node in utils.NiceSort(self.nodes):
3825       nresult = data[node]
3826       if nresult.offline:
3827         continue
3828
3829       msg = nresult.fail_msg
3830       if msg:
3831         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3832         continue
3833
3834       rows = dict([(row[name_idx], row) for row in nresult.payload])
3835
3836       for name in utils.NiceSort(rows.keys()):
3837         row = rows[name]
3838
3839         out = []
3840
3841         for field in self.op.output_fields:
3842           if field == constants.SF_NODE:
3843             val = node
3844           elif field == constants.SF_TYPE:
3845             val = self.op.storage_type
3846           elif field in field_idx:
3847             val = row[field_idx[field]]
3848           else:
3849             raise errors.ParameterError(field)
3850
3851           out.append(val)
3852
3853         result.append(out)
3854
3855     return result
3856
3857
3858 class _InstanceQuery(_QueryBase):
3859   FIELDS = query.INSTANCE_FIELDS
3860
3861   def ExpandNames(self, lu):
3862     lu.needed_locks = {}
3863     lu.share_locks[locking.LEVEL_INSTANCE] = 1
3864     lu.share_locks[locking.LEVEL_NODE] = 1
3865
3866     if self.names:
3867       self.wanted = _GetWantedInstances(lu, self.names)
3868     else:
3869       self.wanted = locking.ALL_SET
3870
3871     self.do_locking = (self.use_locking and
3872                        query.IQ_LIVE in self.requested_data)
3873     if self.do_locking:
3874       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3875       lu.needed_locks[locking.LEVEL_NODE] = []
3876       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3877
3878   def DeclareLocks(self, lu, level):
3879     if level == locking.LEVEL_NODE and self.do_locking:
3880       lu._LockInstancesNodes() # pylint: disable-msg=W0212
3881
3882   def _GetQueryData(self, lu):
3883     """Computes the list of instances and their attributes.
3884
3885     """
3886     cluster = lu.cfg.GetClusterInfo()
3887     all_info = lu.cfg.GetAllInstancesInfo()
3888
3889     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3890
3891     instance_list = [all_info[name] for name in instance_names]
3892     nodes = frozenset(itertools.chain(*(inst.all_nodes
3893                                         for inst in instance_list)))
3894     hv_list = list(set([inst.hypervisor for inst in instance_list]))
3895     bad_nodes = []
3896     offline_nodes = []
3897     wrongnode_inst = set()
3898
3899     # Gather data as requested
3900     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
3901       live_data = {}
3902       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3903       for name in nodes:
3904         result = node_data[name]
3905         if result.offline:
3906           # offline nodes will be in both lists
3907           assert result.fail_msg
3908           offline_nodes.append(name)
3909         if result.fail_msg:
3910           bad_nodes.append(name)
3911         elif result.payload:
3912           for inst in result.payload:
3913             if all_info[inst].primary_node == name:
3914               live_data.update(result.payload)
3915             else:
3916               wrongnode_inst.add(inst)
3917         # else no instance is alive
3918     else:
3919       live_data = {}
3920
3921     if query.IQ_DISKUSAGE in self.requested_data:
3922       disk_usage = dict((inst.name,
3923                          _ComputeDiskSize(inst.disk_template,
3924                                           [{"size": disk.size}
3925                                            for disk in inst.disks]))
3926                         for inst in instance_list)
3927     else:
3928       disk_usage = None
3929
3930     if query.IQ_CONSOLE in self.requested_data:
3931       consinfo = {}
3932       for inst in instance_list:
3933         if inst.name in live_data:
3934           # Instance is running
3935           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
3936         else:
3937           consinfo[inst.name] = None
3938       assert set(consinfo.keys()) == set(instance_names)
3939     else:
3940       consinfo = None
3941
3942     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3943                                    disk_usage, offline_nodes, bad_nodes,
3944                                    live_data, wrongnode_inst, consinfo)
3945
3946
3947 class LUQuery(NoHooksLU):
3948   """Query for resources/items of a certain kind.
3949
3950   """
3951   # pylint: disable-msg=W0142
3952   REQ_BGL = False
3953
3954   def CheckArguments(self):
3955     qcls = _GetQueryImplementation(self.op.what)
3956     names = qlang.ReadSimpleFilter("name", self.op.filter)
3957
3958     self.impl = qcls(names, self.op.fields, False)
3959
3960   def ExpandNames(self):
3961     self.impl.ExpandNames(self)
3962
3963   def DeclareLocks(self, level):
3964     self.impl.DeclareLocks(self, level)
3965
3966   def Exec(self, feedback_fn):
3967     return self.impl.NewStyleQuery(self)
3968
3969
3970 class LUQueryFields(NoHooksLU):
3971   """Query for resources/items of a certain kind.
3972
3973   """
3974   # pylint: disable-msg=W0142
3975   REQ_BGL = False
3976
3977   def CheckArguments(self):
3978     self.qcls = _GetQueryImplementation(self.op.what)
3979
3980   def ExpandNames(self):
3981     self.needed_locks = {}
3982
3983   def Exec(self, feedback_fn):
3984     return self.qcls.FieldsQuery(self.op.fields)
3985
3986
3987 class LUNodeModifyStorage(NoHooksLU):
3988   """Logical unit for modifying a storage volume on a node.
3989
3990   """
3991   REQ_BGL = False
3992
3993   def CheckArguments(self):
3994     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3995
3996     storage_type = self.op.storage_type
3997
3998     try:
3999       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4000     except KeyError:
4001       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4002                                  " modified" % storage_type,
4003                                  errors.ECODE_INVAL)
4004
4005     diff = set(self.op.changes.keys()) - modifiable
4006     if diff:
4007       raise errors.OpPrereqError("The following fields can not be modified for"
4008                                  " storage units of type '%s': %r" %
4009                                  (storage_type, list(diff)),
4010                                  errors.ECODE_INVAL)
4011
4012   def ExpandNames(self):
4013     self.needed_locks = {
4014       locking.LEVEL_NODE: self.op.node_name,
4015       }
4016
4017   def Exec(self, feedback_fn):
4018     """Computes the list of nodes and their attributes.
4019
4020     """
4021     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4022     result = self.rpc.call_storage_modify(self.op.node_name,
4023                                           self.op.storage_type, st_args,
4024                                           self.op.name, self.op.changes)
4025     result.Raise("Failed to modify storage unit '%s' on %s" %
4026                  (self.op.name, self.op.node_name))
4027
4028
4029 class LUNodeAdd(LogicalUnit):
4030   """Logical unit for adding node to the cluster.
4031
4032   """
4033   HPATH = "node-add"
4034   HTYPE = constants.HTYPE_NODE
4035   _NFLAGS = ["master_capable", "vm_capable"]
4036
4037   def CheckArguments(self):
4038     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4039     # validate/normalize the node name
4040     self.hostname = netutils.GetHostname(name=self.op.node_name,
4041                                          family=self.primary_ip_family)
4042     self.op.node_name = self.hostname.name
4043     if self.op.readd and self.op.group:
4044       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4045                                  " being readded", errors.ECODE_INVAL)
4046
4047   def BuildHooksEnv(self):
4048     """Build hooks env.
4049
4050     This will run on all nodes before, and on all nodes + the new node after.
4051
4052     """
4053     env = {
4054       "OP_TARGET": self.op.node_name,
4055       "NODE_NAME": self.op.node_name,
4056       "NODE_PIP": self.op.primary_ip,
4057       "NODE_SIP": self.op.secondary_ip,
4058       "MASTER_CAPABLE": str(self.op.master_capable),
4059       "VM_CAPABLE": str(self.op.vm_capable),
4060       }
4061     nodes_0 = self.cfg.GetNodeList()
4062     nodes_1 = nodes_0 + [self.op.node_name, ]
4063     return env, nodes_0, nodes_1
4064
4065   def CheckPrereq(self):
4066     """Check prerequisites.
4067
4068     This checks:
4069      - the new node is not already in the config
4070      - it is resolvable
4071      - its parameters (single/dual homed) matches the cluster
4072
4073     Any errors are signaled by raising errors.OpPrereqError.
4074
4075     """
4076     cfg = self.cfg
4077     hostname = self.hostname
4078     node = hostname.name
4079     primary_ip = self.op.primary_ip = hostname.ip
4080     if self.op.secondary_ip is None:
4081       if self.primary_ip_family == netutils.IP6Address.family:
4082         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4083                                    " IPv4 address must be given as secondary",
4084                                    errors.ECODE_INVAL)
4085       self.op.secondary_ip = primary_ip
4086
4087     secondary_ip = self.op.secondary_ip
4088     if not netutils.IP4Address.IsValid(secondary_ip):
4089       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4090                                  " address" % secondary_ip, errors.ECODE_INVAL)
4091
4092     node_list = cfg.GetNodeList()
4093     if not self.op.readd and node in node_list:
4094       raise errors.OpPrereqError("Node %s is already in the configuration" %
4095                                  node, errors.ECODE_EXISTS)
4096     elif self.op.readd and node not in node_list:
4097       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4098                                  errors.ECODE_NOENT)
4099
4100     self.changed_primary_ip = False
4101
4102     for existing_node_name in node_list:
4103       existing_node = cfg.GetNodeInfo(existing_node_name)
4104
4105       if self.op.readd and node == existing_node_name:
4106         if existing_node.secondary_ip != secondary_ip:
4107           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4108                                      " address configuration as before",
4109                                      errors.ECODE_INVAL)
4110         if existing_node.primary_ip != primary_ip:
4111           self.changed_primary_ip = True
4112
4113         continue
4114
4115       if (existing_node.primary_ip == primary_ip or
4116           existing_node.secondary_ip == primary_ip or
4117           existing_node.primary_ip == secondary_ip or
4118           existing_node.secondary_ip == secondary_ip):
4119         raise errors.OpPrereqError("New node ip address(es) conflict with"
4120                                    " existing node %s" % existing_node.name,
4121                                    errors.ECODE_NOTUNIQUE)
4122
4123     # After this 'if' block, None is no longer a valid value for the
4124     # _capable op attributes
4125     if self.op.readd:
4126       old_node = self.cfg.GetNodeInfo(node)
4127       assert old_node is not None, "Can't retrieve locked node %s" % node
4128       for attr in self._NFLAGS:
4129         if getattr(self.op, attr) is None:
4130           setattr(self.op, attr, getattr(old_node, attr))
4131     else:
4132       for attr in self._NFLAGS:
4133         if getattr(self.op, attr) is None:
4134           setattr(self.op, attr, True)
4135
4136     if self.op.readd and not self.op.vm_capable:
4137       pri, sec = cfg.GetNodeInstances(node)
4138       if pri or sec:
4139         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4140                                    " flag set to false, but it already holds"
4141                                    " instances" % node,
4142                                    errors.ECODE_STATE)
4143
4144     # check that the type of the node (single versus dual homed) is the
4145     # same as for the master
4146     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4147     master_singlehomed = myself.secondary_ip == myself.primary_ip
4148     newbie_singlehomed = secondary_ip == primary_ip
4149     if master_singlehomed != newbie_singlehomed:
4150       if master_singlehomed:
4151         raise errors.OpPrereqError("The master has no secondary ip but the"
4152                                    " new node has one",
4153                                    errors.ECODE_INVAL)
4154       else:
4155         raise errors.OpPrereqError("The master has a secondary ip but the"
4156                                    " new node doesn't have one",
4157                                    errors.ECODE_INVAL)
4158
4159     # checks reachability
4160     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4161       raise errors.OpPrereqError("Node not reachable by ping",
4162                                  errors.ECODE_ENVIRON)
4163
4164     if not newbie_singlehomed:
4165       # check reachability from my secondary ip to newbie's secondary ip
4166       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4167                            source=myself.secondary_ip):
4168         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4169                                    " based ping to node daemon port",
4170                                    errors.ECODE_ENVIRON)
4171
4172     if self.op.readd:
4173       exceptions = [node]
4174     else:
4175       exceptions = []
4176
4177     if self.op.master_capable:
4178       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4179     else:
4180       self.master_candidate = False
4181
4182     if self.op.readd:
4183       self.new_node = old_node
4184     else:
4185       node_group = cfg.LookupNodeGroup(self.op.group)
4186       self.new_node = objects.Node(name=node,
4187                                    primary_ip=primary_ip,
4188                                    secondary_ip=secondary_ip,
4189                                    master_candidate=self.master_candidate,
4190                                    offline=False, drained=False,
4191                                    group=node_group)
4192
4193     if self.op.ndparams:
4194       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4195
4196   def Exec(self, feedback_fn):
4197     """Adds the new node to the cluster.
4198
4199     """
4200     new_node = self.new_node
4201     node = new_node.name
4202
4203     # We adding a new node so we assume it's powered
4204     new_node.powered = True
4205
4206     # for re-adds, reset the offline/drained/master-candidate flags;
4207     # we need to reset here, otherwise offline would prevent RPC calls
4208     # later in the procedure; this also means that if the re-add
4209     # fails, we are left with a non-offlined, broken node
4210     if self.op.readd:
4211       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4212       self.LogInfo("Readding a node, the offline/drained flags were reset")
4213       # if we demote the node, we do cleanup later in the procedure
4214       new_node.master_candidate = self.master_candidate
4215       if self.changed_primary_ip:
4216         new_node.primary_ip = self.op.primary_ip
4217
4218     # copy the master/vm_capable flags
4219     for attr in self._NFLAGS:
4220       setattr(new_node, attr, getattr(self.op, attr))
4221
4222     # notify the user about any possible mc promotion
4223     if new_node.master_candidate:
4224       self.LogInfo("Node will be a master candidate")
4225
4226     if self.op.ndparams:
4227       new_node.ndparams = self.op.ndparams
4228     else:
4229       new_node.ndparams = {}
4230
4231     # check connectivity
4232     result = self.rpc.call_version([node])[node]
4233     result.Raise("Can't get version information from node %s" % node)
4234     if constants.PROTOCOL_VERSION == result.payload:
4235       logging.info("Communication to node %s fine, sw version %s match",
4236                    node, result.payload)
4237     else:
4238       raise errors.OpExecError("Version mismatch master version %s,"
4239                                " node version %s" %
4240                                (constants.PROTOCOL_VERSION, result.payload))
4241
4242     # Add node to our /etc/hosts, and add key to known_hosts
4243     if self.cfg.GetClusterInfo().modify_etc_hosts:
4244       master_node = self.cfg.GetMasterNode()
4245       result = self.rpc.call_etc_hosts_modify(master_node,
4246                                               constants.ETC_HOSTS_ADD,
4247                                               self.hostname.name,
4248                                               self.hostname.ip)
4249       result.Raise("Can't update hosts file with new host data")
4250
4251     if new_node.secondary_ip != new_node.primary_ip:
4252       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4253                                False)
4254
4255     node_verify_list = [self.cfg.GetMasterNode()]
4256     node_verify_param = {
4257       constants.NV_NODELIST: [node],
4258       # TODO: do a node-net-test as well?
4259     }
4260
4261     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4262                                        self.cfg.GetClusterName())
4263     for verifier in node_verify_list:
4264       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4265       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4266       if nl_payload:
4267         for failed in nl_payload:
4268           feedback_fn("ssh/hostname verification failed"
4269                       " (checking from %s): %s" %
4270                       (verifier, nl_payload[failed]))
4271         raise errors.OpExecError("ssh/hostname verification failed.")
4272
4273     if self.op.readd:
4274       _RedistributeAncillaryFiles(self)
4275       self.context.ReaddNode(new_node)
4276       # make sure we redistribute the config
4277       self.cfg.Update(new_node, feedback_fn)
4278       # and make sure the new node will not have old files around
4279       if not new_node.master_candidate:
4280         result = self.rpc.call_node_demote_from_mc(new_node.name)
4281         msg = result.fail_msg
4282         if msg:
4283           self.LogWarning("Node failed to demote itself from master"
4284                           " candidate status: %s" % msg)
4285     else:
4286       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4287                                   additional_vm=self.op.vm_capable)
4288       self.context.AddNode(new_node, self.proc.GetECId())
4289
4290
4291 class LUNodeSetParams(LogicalUnit):
4292   """Modifies the parameters of a node.
4293
4294   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4295       to the node role (as _ROLE_*)
4296   @cvar _R2F: a dictionary from node role to tuples of flags
4297   @cvar _FLAGS: a list of attribute names corresponding to the flags
4298
4299   """
4300   HPATH = "node-modify"
4301   HTYPE = constants.HTYPE_NODE
4302   REQ_BGL = False
4303   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4304   _F2R = {
4305     (True, False, False): _ROLE_CANDIDATE,
4306     (False, True, False): _ROLE_DRAINED,
4307     (False, False, True): _ROLE_OFFLINE,
4308     (False, False, False): _ROLE_REGULAR,
4309     }
4310   _R2F = dict((v, k) for k, v in _F2R.items())
4311   _FLAGS = ["master_candidate", "drained", "offline"]
4312
4313   def CheckArguments(self):
4314     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4315     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4316                 self.op.master_capable, self.op.vm_capable,
4317                 self.op.secondary_ip, self.op.ndparams]
4318     if all_mods.count(None) == len(all_mods):
4319       raise errors.OpPrereqError("Please pass at least one modification",
4320                                  errors.ECODE_INVAL)
4321     if all_mods.count(True) > 1:
4322       raise errors.OpPrereqError("Can't set the node into more than one"
4323                                  " state at the same time",
4324                                  errors.ECODE_INVAL)
4325
4326     # Boolean value that tells us whether we might be demoting from MC
4327     self.might_demote = (self.op.master_candidate == False or
4328                          self.op.offline == True or
4329                          self.op.drained == True or
4330                          self.op.master_capable == False)
4331
4332     if self.op.secondary_ip:
4333       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4334         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4335                                    " address" % self.op.secondary_ip,
4336                                    errors.ECODE_INVAL)
4337
4338     self.lock_all = self.op.auto_promote and self.might_demote
4339     self.lock_instances = self.op.secondary_ip is not None
4340
4341   def ExpandNames(self):
4342     if self.lock_all:
4343       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4344     else:
4345       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4346
4347     if self.lock_instances:
4348       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4349
4350   def DeclareLocks(self, level):
4351     # If we have locked all instances, before waiting to lock nodes, release
4352     # all the ones living on nodes unrelated to the current operation.
4353     if level == locking.LEVEL_NODE and self.lock_instances:
4354       instances_release = []
4355       instances_keep = []
4356       self.affected_instances = []
4357       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4358         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4359           instance = self.context.cfg.GetInstanceInfo(instance_name)
4360           i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4361           if i_mirrored and self.op.node_name in instance.all_nodes:
4362             instances_keep.append(instance_name)
4363             self.affected_instances.append(instance)
4364           else:
4365             instances_release.append(instance_name)
4366         if instances_release:
4367           self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4368           self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4369
4370   def BuildHooksEnv(self):
4371     """Build hooks env.
4372
4373     This runs on the master node.
4374
4375     """
4376     env = {
4377       "OP_TARGET": self.op.node_name,
4378       "MASTER_CANDIDATE": str(self.op.master_candidate),
4379       "OFFLINE": str(self.op.offline),
4380       "DRAINED": str(self.op.drained),
4381       "MASTER_CAPABLE": str(self.op.master_capable),
4382       "VM_CAPABLE": str(self.op.vm_capable),
4383       }
4384     nl = [self.cfg.GetMasterNode(),
4385           self.op.node_name]
4386     return env, nl, nl
4387
4388   def CheckPrereq(self):
4389     """Check prerequisites.
4390
4391     This only checks the instance list against the existing names.
4392
4393     """
4394     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4395
4396     if (self.op.master_candidate is not None or
4397         self.op.drained is not None or
4398         self.op.offline is not None):
4399       # we can't change the master's node flags
4400       if self.op.node_name == self.cfg.GetMasterNode():
4401         raise errors.OpPrereqError("The master role can be changed"
4402                                    " only via master-failover",
4403                                    errors.ECODE_INVAL)
4404
4405     if self.op.master_candidate and not node.master_capable:
4406       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4407                                  " it a master candidate" % node.name,
4408                                  errors.ECODE_STATE)
4409
4410     if self.op.vm_capable == False:
4411       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4412       if ipri or isec:
4413         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4414                                    " the vm_capable flag" % node.name,
4415                                    errors.ECODE_STATE)
4416
4417     if node.master_candidate and self.might_demote and not self.lock_all:
4418       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4419       # check if after removing the current node, we're missing master
4420       # candidates
4421       (mc_remaining, mc_should, _) = \
4422           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4423       if mc_remaining < mc_should:
4424         raise errors.OpPrereqError("Not enough master candidates, please"
4425                                    " pass auto promote option to allow"
4426                                    " promotion", errors.ECODE_STATE)
4427
4428     self.old_flags = old_flags = (node.master_candidate,
4429                                   node.drained, node.offline)
4430     assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4431     self.old_role = old_role = self._F2R[old_flags]
4432
4433     # Check for ineffective changes
4434     for attr in self._FLAGS:
4435       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4436         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4437         setattr(self.op, attr, None)
4438
4439     # Past this point, any flag change to False means a transition
4440     # away from the respective state, as only real changes are kept
4441
4442     # TODO: We might query the real power state if it supports OOB
4443     if _SupportsOob(self.cfg, node):
4444       if self.op.offline is False and not (node.powered or
4445                                            self.op.powered == True):
4446         raise errors.OpPrereqError(("Please power on node %s first before you"
4447                                     " can reset offline state") %
4448                                    self.op.node_name)
4449     elif self.op.powered is not None:
4450       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4451                                   " which does not support out-of-band"
4452                                   " handling") % self.op.node_name)
4453
4454     # If we're being deofflined/drained, we'll MC ourself if needed
4455     if (self.op.drained == False or self.op.offline == False or
4456         (self.op.master_capable and not node.master_capable)):
4457       if _DecideSelfPromotion(self):
4458         self.op.master_candidate = True
4459         self.LogInfo("Auto-promoting node to master candidate")
4460
4461     # If we're no longer master capable, we'll demote ourselves from MC
4462     if self.op.master_capable == False and node.master_candidate:
4463       self.LogInfo("Demoting from master candidate")
4464       self.op.master_candidate = False
4465
4466     # Compute new role
4467     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4468     if self.op.master_candidate:
4469       new_role = self._ROLE_CANDIDATE
4470     elif self.op.drained:
4471       new_role = self._ROLE_DRAINED
4472     elif self.op.offline:
4473       new_role = self._ROLE_OFFLINE
4474     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4475       # False is still in new flags, which means we're un-setting (the
4476       # only) True flag
4477       new_role = self._ROLE_REGULAR
4478     else: # no new flags, nothing, keep old role
4479       new_role = old_role
4480
4481     self.new_role = new_role
4482
4483     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4484       # Trying to transition out of offline status
4485       result = self.rpc.call_version([node.name])[node.name]
4486       if result.fail_msg:
4487         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4488                                    " to report its version: %s" %
4489                                    (node.name, result.fail_msg),
4490                                    errors.ECODE_STATE)
4491       else:
4492         self.LogWarning("Transitioning node from offline to online state"
4493                         " without using re-add. Please make sure the node"
4494                         " is healthy!")
4495
4496     if self.op.secondary_ip:
4497       # Ok even without locking, because this can't be changed by any LU
4498       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4499       master_singlehomed = master.secondary_ip == master.primary_ip
4500       if master_singlehomed and self.op.secondary_ip:
4501         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4502                                    " homed cluster", errors.ECODE_INVAL)
4503
4504       if node.offline:
4505         if self.affected_instances:
4506           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4507                                      " node has instances (%s) configured"
4508                                      " to use it" % self.affected_instances)
4509       else:
4510         # On online nodes, check that no instances are running, and that
4511         # the node has the new ip and we can reach it.
4512         for instance in self.affected_instances:
4513           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4514
4515         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4516         if master.name != node.name:
4517           # check reachability from master secondary ip to new secondary ip
4518           if not netutils.TcpPing(self.op.secondary_ip,
4519                                   constants.DEFAULT_NODED_PORT,
4520                                   source=master.secondary_ip):
4521             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4522                                        " based ping to node daemon port",
4523                                        errors.ECODE_ENVIRON)
4524
4525     if self.op.ndparams:
4526       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4527       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4528       self.new_ndparams = new_ndparams
4529
4530   def Exec(self, feedback_fn):
4531     """Modifies a node.
4532
4533     """
4534     node = self.node
4535     old_role = self.old_role
4536     new_role = self.new_role
4537
4538     result = []
4539
4540     if self.op.ndparams:
4541       node.ndparams = self.new_ndparams
4542
4543     if self.op.powered is not None:
4544       node.powered = self.op.powered
4545
4546     for attr in ["master_capable", "vm_capable"]:
4547       val = getattr(self.op, attr)
4548       if val is not None:
4549         setattr(node, attr, val)
4550         result.append((attr, str(val)))
4551
4552     if new_role != old_role:
4553       # Tell the node to demote itself, if no longer MC and not offline
4554       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4555         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4556         if msg:
4557           self.LogWarning("Node failed to demote itself: %s", msg)
4558
4559       new_flags = self._R2F[new_role]
4560       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4561         if of != nf:
4562           result.append((desc, str(nf)))
4563       (node.master_candidate, node.drained, node.offline) = new_flags
4564
4565       # we locked all nodes, we adjust the CP before updating this node
4566       if self.lock_all:
4567         _AdjustCandidatePool(self, [node.name])
4568
4569     if self.op.secondary_ip:
4570       node.secondary_ip = self.op.secondary_ip
4571       result.append(("secondary_ip", self.op.secondary_ip))
4572
4573     # this will trigger configuration file update, if needed
4574     self.cfg.Update(node, feedback_fn)
4575
4576     # this will trigger job queue propagation or cleanup if the mc
4577     # flag changed
4578     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4579       self.context.ReaddNode(node)
4580
4581     return result
4582
4583
4584 class LUNodePowercycle(NoHooksLU):
4585   """Powercycles a node.
4586
4587   """
4588   REQ_BGL = False
4589
4590   def CheckArguments(self):
4591     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4592     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4593       raise errors.OpPrereqError("The node is the master and the force"
4594                                  " parameter was not set",
4595                                  errors.ECODE_INVAL)
4596
4597   def ExpandNames(self):
4598     """Locking for PowercycleNode.
4599
4600     This is a last-resort option and shouldn't block on other
4601     jobs. Therefore, we grab no locks.
4602
4603     """
4604     self.needed_locks = {}
4605
4606   def Exec(self, feedback_fn):
4607     """Reboots a node.
4608
4609     """
4610     result = self.rpc.call_node_powercycle(self.op.node_name,
4611                                            self.cfg.GetHypervisorType())
4612     result.Raise("Failed to schedule the reboot")
4613     return result.payload
4614
4615
4616 class LUClusterQuery(NoHooksLU):
4617   """Query cluster configuration.
4618
4619   """
4620   REQ_BGL = False
4621
4622   def ExpandNames(self):
4623     self.needed_locks = {}
4624
4625   def Exec(self, feedback_fn):
4626     """Return cluster config.
4627
4628     """
4629     cluster = self.cfg.GetClusterInfo()
4630     os_hvp = {}
4631
4632     # Filter just for enabled hypervisors
4633     for os_name, hv_dict in cluster.os_hvp.items():
4634       os_hvp[os_name] = {}
4635       for hv_name, hv_params in hv_dict.items():
4636         if hv_name in cluster.enabled_hypervisors:
4637           os_hvp[os_name][hv_name] = hv_params
4638
4639     # Convert ip_family to ip_version
4640     primary_ip_version = constants.IP4_VERSION
4641     if cluster.primary_ip_family == netutils.IP6Address.family:
4642       primary_ip_version = constants.IP6_VERSION
4643
4644     result = {
4645       "software_version": constants.RELEASE_VERSION,
4646       "protocol_version": constants.PROTOCOL_VERSION,
4647       "config_version": constants.CONFIG_VERSION,
4648       "os_api_version": max(constants.OS_API_VERSIONS),
4649       "export_version": constants.EXPORT_VERSION,
4650       "architecture": (platform.architecture()[0], platform.machine()),
4651       "name": cluster.cluster_name,
4652       "master": cluster.master_node,
4653       "default_hypervisor": cluster.enabled_hypervisors[0],
4654       "enabled_hypervisors": cluster.enabled_hypervisors,
4655       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4656                         for hypervisor_name in cluster.enabled_hypervisors]),
4657       "os_hvp": os_hvp,
4658       "beparams": cluster.beparams,
4659       "osparams": cluster.osparams,
4660       "nicparams": cluster.nicparams,
4661       "ndparams": cluster.ndparams,
4662       "candidate_pool_size": cluster.candidate_pool_size,
4663       "master_netdev": cluster.master_netdev,
4664       "volume_group_name": cluster.volume_group_name,
4665       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4666       "file_storage_dir": cluster.file_storage_dir,
4667       "maintain_node_health": cluster.maintain_node_health,
4668       "ctime": cluster.ctime,
4669       "mtime": cluster.mtime,
4670       "uuid": cluster.uuid,
4671       "tags": list(cluster.GetTags()),
4672       "uid_pool": cluster.uid_pool,
4673       "default_iallocator": cluster.default_iallocator,
4674       "reserved_lvs": cluster.reserved_lvs,
4675       "primary_ip_version": primary_ip_version,
4676       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4677       "hidden_os": cluster.hidden_os,
4678       "blacklisted_os": cluster.blacklisted_os,
4679       }
4680
4681     return result
4682
4683
4684 class LUClusterConfigQuery(NoHooksLU):
4685   """Return configuration values.
4686
4687   """
4688   REQ_BGL = False
4689   _FIELDS_DYNAMIC = utils.FieldSet()
4690   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4691                                   "watcher_pause", "volume_group_name")
4692
4693   def CheckArguments(self):
4694     _CheckOutputFields(static=self._FIELDS_STATIC,
4695                        dynamic=self._FIELDS_DYNAMIC,
4696                        selected=self.op.output_fields)
4697
4698   def ExpandNames(self):
4699     self.needed_locks = {}
4700
4701   def Exec(self, feedback_fn):
4702     """Dump a representation of the cluster config to the standard output.
4703
4704     """
4705     values = []
4706     for field in self.op.output_fields:
4707       if field == "cluster_name":
4708         entry = self.cfg.GetClusterName()
4709       elif field == "master_node":
4710         entry = self.cfg.GetMasterNode()
4711       elif field == "drain_flag":
4712         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4713       elif field == "watcher_pause":
4714         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4715       elif field == "volume_group_name":
4716         entry = self.cfg.GetVGName()
4717       else:
4718         raise errors.ParameterError(field)
4719       values.append(entry)
4720     return values
4721
4722
4723 class LUInstanceActivateDisks(NoHooksLU):
4724   """Bring up an instance's disks.
4725
4726   """
4727   REQ_BGL = False
4728
4729   def ExpandNames(self):
4730     self._ExpandAndLockInstance()
4731     self.needed_locks[locking.LEVEL_NODE] = []
4732     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4733
4734   def DeclareLocks(self, level):
4735     if level == locking.LEVEL_NODE:
4736       self._LockInstancesNodes()
4737
4738   def CheckPrereq(self):
4739     """Check prerequisites.
4740
4741     This checks that the instance is in the cluster.
4742
4743     """
4744     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4745     assert self.instance is not None, \
4746       "Cannot retrieve locked instance %s" % self.op.instance_name
4747     _CheckNodeOnline(self, self.instance.primary_node)
4748
4749   def Exec(self, feedback_fn):
4750     """Activate the disks.
4751
4752     """
4753     disks_ok, disks_info = \
4754               _AssembleInstanceDisks(self, self.instance,
4755                                      ignore_size=self.op.ignore_size)
4756     if not disks_ok:
4757       raise errors.OpExecError("Cannot activate block devices")
4758
4759     return disks_info
4760
4761
4762 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4763                            ignore_size=False):
4764   """Prepare the block devices for an instance.
4765
4766   This sets up the block devices on all nodes.
4767
4768   @type lu: L{LogicalUnit}
4769   @param lu: the logical unit on whose behalf we execute
4770   @type instance: L{objects.Instance}
4771   @param instance: the instance for whose disks we assemble
4772   @type disks: list of L{objects.Disk} or None
4773   @param disks: which disks to assemble (or all, if None)
4774   @type ignore_secondaries: boolean
4775   @param ignore_secondaries: if true, errors on secondary nodes
4776       won't result in an error return from the function
4777   @type ignore_size: boolean
4778   @param ignore_size: if true, the current known size of the disk
4779       will not be used during the disk activation, useful for cases
4780       when the size is wrong
4781   @return: False if the operation failed, otherwise a list of
4782       (host, instance_visible_name, node_visible_name)
4783       with the mapping from node devices to instance devices
4784
4785   """
4786   device_info = []
4787   disks_ok = True
4788   iname = instance.name
4789   disks = _ExpandCheckDisks(instance, disks)
4790
4791   # With the two passes mechanism we try to reduce the window of
4792   # opportunity for the race condition of switching DRBD to primary
4793   # before handshaking occured, but we do not eliminate it
4794
4795   # The proper fix would be to wait (with some limits) until the
4796   # connection has been made and drbd transitions from WFConnection
4797   # into any other network-connected state (Connected, SyncTarget,
4798   # SyncSource, etc.)
4799
4800   # 1st pass, assemble on all nodes in secondary mode
4801   for idx, inst_disk in enumerate(disks):
4802     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4803       if ignore_size:
4804         node_disk = node_disk.Copy()
4805         node_disk.UnsetSize()
4806       lu.cfg.SetDiskID(node_disk, node)
4807       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
4808       msg = result.fail_msg
4809       if msg:
4810         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4811                            " (is_primary=False, pass=1): %s",
4812                            inst_disk.iv_name, node, msg)
4813         if not ignore_secondaries:
4814           disks_ok = False
4815
4816   # FIXME: race condition on drbd migration to primary
4817
4818   # 2nd pass, do only the primary node
4819   for idx, inst_disk in enumerate(disks):
4820     dev_path = None
4821
4822     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4823       if node != instance.primary_node:
4824         continue
4825       if ignore_size:
4826         node_disk = node_disk.Copy()
4827         node_disk.UnsetSize()
4828       lu.cfg.SetDiskID(node_disk, node)
4829       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
4830       msg = result.fail_msg
4831       if msg:
4832         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4833                            " (is_primary=True, pass=2): %s",
4834                            inst_disk.iv_name, node, msg)
4835         disks_ok = False
4836       else:
4837         dev_path = result.payload
4838
4839     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4840
4841   # leave the disks configured for the primary node
4842   # this is a workaround that would be fixed better by
4843   # improving the logical/physical id handling
4844   for disk in disks:
4845     lu.cfg.SetDiskID(disk, instance.primary_node)
4846
4847   return disks_ok, device_info
4848
4849
4850 def _StartInstanceDisks(lu, instance, force):
4851   """Start the disks of an instance.
4852
4853   """
4854   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4855                                            ignore_secondaries=force)
4856   if not disks_ok:
4857     _ShutdownInstanceDisks(lu, instance)
4858     if force is not None and not force:
4859       lu.proc.LogWarning("", hint="If the message above refers to a"
4860                          " secondary node,"
4861                          " you can retry the operation using '--force'.")
4862     raise errors.OpExecError("Disk consistency error")
4863
4864
4865 class LUInstanceDeactivateDisks(NoHooksLU):
4866   """Shutdown an instance's disks.
4867
4868   """
4869   REQ_BGL = False
4870
4871   def ExpandNames(self):
4872     self._ExpandAndLockInstance()
4873     self.needed_locks[locking.LEVEL_NODE] = []
4874     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4875
4876   def DeclareLocks(self, level):
4877     if level == locking.LEVEL_NODE:
4878       self._LockInstancesNodes()
4879
4880   def CheckPrereq(self):
4881     """Check prerequisites.
4882
4883     This checks that the instance is in the cluster.
4884
4885     """
4886     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4887     assert self.instance is not None, \
4888       "Cannot retrieve locked instance %s" % self.op.instance_name
4889
4890   def Exec(self, feedback_fn):
4891     """Deactivate the disks
4892
4893     """
4894     instance = self.instance
4895     if self.op.force:
4896       _ShutdownInstanceDisks(self, instance)
4897     else:
4898       _SafeShutdownInstanceDisks(self, instance)
4899
4900
4901 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4902   """Shutdown block devices of an instance.
4903
4904   This function checks if an instance is running, before calling
4905   _ShutdownInstanceDisks.
4906
4907   """
4908   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4909   _ShutdownInstanceDisks(lu, instance, disks=disks)
4910
4911
4912 def _ExpandCheckDisks(instance, disks):
4913   """Return the instance disks selected by the disks list
4914
4915   @type disks: list of L{objects.Disk} or None
4916   @param disks: selected disks
4917   @rtype: list of L{objects.Disk}
4918   @return: selected instance disks to act on
4919
4920   """
4921   if disks is None:
4922     return instance.disks
4923   else:
4924     if not set(disks).issubset(instance.disks):
4925       raise errors.ProgrammerError("Can only act on disks belonging to the"
4926                                    " target instance")
4927     return disks
4928
4929
4930 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4931   """Shutdown block devices of an instance.
4932
4933   This does the shutdown on all nodes of the instance.
4934
4935   If the ignore_primary is false, errors on the primary node are
4936   ignored.
4937
4938   """
4939   all_result = True
4940   disks = _ExpandCheckDisks(instance, disks)
4941
4942   for disk in disks:
4943     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4944       lu.cfg.SetDiskID(top_disk, node)
4945       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4946       msg = result.fail_msg
4947       if msg:
4948         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4949                       disk.iv_name, node, msg)
4950         if ((node == instance.primary_node and not ignore_primary) or
4951             (node != instance.primary_node and not result.offline)):
4952           all_result = False
4953   return all_result
4954
4955
4956 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4957   """Checks if a node has enough free memory.
4958
4959   This function check if a given node has the needed amount of free
4960   memory. In case the node has less memory or we cannot get the
4961   information from the node, this function raise an OpPrereqError
4962   exception.
4963
4964   @type lu: C{LogicalUnit}
4965   @param lu: a logical unit from which we get configuration data
4966   @type node: C{str}
4967   @param node: the node to check
4968   @type reason: C{str}
4969   @param reason: string to use in the error message
4970   @type requested: C{int}
4971   @param requested: the amount of memory in MiB to check for
4972   @type hypervisor_name: C{str}
4973   @param hypervisor_name: the hypervisor to ask for memory stats
4974   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4975       we cannot check the node
4976
4977   """
4978   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4979   nodeinfo[node].Raise("Can't get data from node %s" % node,
4980                        prereq=True, ecode=errors.ECODE_ENVIRON)
4981   free_mem = nodeinfo[node].payload.get('memory_free', None)
4982   if not isinstance(free_mem, int):
4983     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4984                                " was '%s'" % (node, free_mem),
4985                                errors.ECODE_ENVIRON)
4986   if requested > free_mem:
4987     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4988                                " needed %s MiB, available %s MiB" %
4989                                (node, reason, requested, free_mem),
4990                                errors.ECODE_NORES)
4991
4992
4993 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
4994   """Checks if nodes have enough free disk space in the all VGs.
4995
4996   This function check if all given nodes have the needed amount of
4997   free disk. In case any node has less disk or we cannot get the
4998   information from the node, this function raise an OpPrereqError
4999   exception.
5000
5001   @type lu: C{LogicalUnit}
5002   @param lu: a logical unit from which we get configuration data
5003   @type nodenames: C{list}
5004   @param nodenames: the list of node names to check
5005   @type req_sizes: C{dict}
5006   @param req_sizes: the hash of vg and corresponding amount of disk in
5007       MiB to check for
5008   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5009       or we cannot check the node
5010
5011   """
5012   for vg, req_size in req_sizes.items():
5013     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5014
5015
5016 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5017   """Checks if nodes have enough free disk space in the specified VG.
5018
5019   This function check if all given nodes have the needed amount of
5020   free disk. In case any node has less disk or we cannot get the
5021   information from the node, this function raise an OpPrereqError
5022   exception.
5023
5024   @type lu: C{LogicalUnit}
5025   @param lu: a logical unit from which we get configuration data
5026   @type nodenames: C{list}
5027   @param nodenames: the list of node names to check
5028   @type vg: C{str}
5029   @param vg: the volume group to check
5030   @type requested: C{int}
5031   @param requested: the amount of disk in MiB to check for
5032   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5033       or we cannot check the node
5034
5035   """
5036   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5037   for node in nodenames:
5038     info = nodeinfo[node]
5039     info.Raise("Cannot get current information from node %s" % node,
5040                prereq=True, ecode=errors.ECODE_ENVIRON)
5041     vg_free = info.payload.get("vg_free", None)
5042     if not isinstance(vg_free, int):
5043       raise errors.OpPrereqError("Can't compute free disk space on node"
5044                                  " %s for vg %s, result was '%s'" %
5045                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5046     if requested > vg_free:
5047       raise errors.OpPrereqError("Not enough disk space on target node %s"
5048                                  " vg %s: required %d MiB, available %d MiB" %
5049                                  (node, vg, requested, vg_free),
5050                                  errors.ECODE_NORES)
5051
5052
5053 class LUInstanceStartup(LogicalUnit):
5054   """Starts an instance.
5055
5056   """
5057   HPATH = "instance-start"
5058   HTYPE = constants.HTYPE_INSTANCE
5059   REQ_BGL = False
5060
5061   def CheckArguments(self):
5062     # extra beparams
5063     if self.op.beparams:
5064       # fill the beparams dict
5065       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5066
5067   def ExpandNames(self):
5068     self._ExpandAndLockInstance()
5069
5070   def BuildHooksEnv(self):
5071     """Build hooks env.
5072
5073     This runs on master, primary and secondary nodes of the instance.
5074
5075     """
5076     env = {
5077       "FORCE": self.op.force,
5078       }
5079     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5080     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5081     return env, nl, nl
5082
5083   def CheckPrereq(self):
5084     """Check prerequisites.
5085
5086     This checks that the instance is in the cluster.
5087
5088     """
5089     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5090     assert self.instance is not None, \
5091       "Cannot retrieve locked instance %s" % self.op.instance_name
5092
5093     # extra hvparams
5094     if self.op.hvparams:
5095       # check hypervisor parameter syntax (locally)
5096       cluster = self.cfg.GetClusterInfo()
5097       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5098       filled_hvp = cluster.FillHV(instance)
5099       filled_hvp.update(self.op.hvparams)
5100       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5101       hv_type.CheckParameterSyntax(filled_hvp)
5102       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5103
5104     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5105
5106     if self.primary_offline and self.op.ignore_offline_nodes:
5107       self.proc.LogWarning("Ignoring offline primary node")
5108
5109       if self.op.hvparams or self.op.beparams:
5110         self.proc.LogWarning("Overridden parameters are ignored")
5111     else:
5112       _CheckNodeOnline(self, instance.primary_node)
5113
5114       bep = self.cfg.GetClusterInfo().FillBE(instance)
5115
5116       # check bridges existence
5117       _CheckInstanceBridgesExist(self, instance)
5118
5119       remote_info = self.rpc.call_instance_info(instance.primary_node,
5120                                                 instance.name,
5121                                                 instance.hypervisor)
5122       remote_info.Raise("Error checking node %s" % instance.primary_node,
5123                         prereq=True, ecode=errors.ECODE_ENVIRON)
5124       if not remote_info.payload: # not running already
5125         _CheckNodeFreeMemory(self, instance.primary_node,
5126                              "starting instance %s" % instance.name,
5127                              bep[constants.BE_MEMORY], instance.hypervisor)
5128
5129   def Exec(self, feedback_fn):
5130     """Start the instance.
5131
5132     """
5133     instance = self.instance
5134     force = self.op.force
5135
5136     self.cfg.MarkInstanceUp(instance.name)
5137
5138     if self.primary_offline:
5139       assert self.op.ignore_offline_nodes
5140       self.proc.LogInfo("Primary node offline, marked instance as started")
5141     else:
5142       node_current = instance.primary_node
5143
5144       _StartInstanceDisks(self, instance, force)
5145
5146       result = self.rpc.call_instance_start(node_current, instance,
5147                                             self.op.hvparams, self.op.beparams)
5148       msg = result.fail_msg
5149       if msg:
5150         _ShutdownInstanceDisks(self, instance)
5151         raise errors.OpExecError("Could not start instance: %s" % msg)
5152
5153
5154 class LUInstanceReboot(LogicalUnit):
5155   """Reboot an instance.
5156
5157   """
5158   HPATH = "instance-reboot"
5159   HTYPE = constants.HTYPE_INSTANCE
5160   REQ_BGL = False
5161
5162   def ExpandNames(self):
5163     self._ExpandAndLockInstance()
5164
5165   def BuildHooksEnv(self):
5166     """Build hooks env.
5167
5168     This runs on master, primary and secondary nodes of the instance.
5169
5170     """
5171     env = {
5172       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5173       "REBOOT_TYPE": self.op.reboot_type,
5174       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5175       }
5176     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5177     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5178     return env, nl, nl
5179
5180   def CheckPrereq(self):
5181     """Check prerequisites.
5182
5183     This checks that the instance is in the cluster.
5184
5185     """
5186     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5187     assert self.instance is not None, \
5188       "Cannot retrieve locked instance %s" % self.op.instance_name
5189
5190     _CheckNodeOnline(self, instance.primary_node)
5191
5192     # check bridges existence
5193     _CheckInstanceBridgesExist(self, instance)
5194
5195   def Exec(self, feedback_fn):
5196     """Reboot the instance.
5197
5198     """
5199     instance = self.instance
5200     ignore_secondaries = self.op.ignore_secondaries
5201     reboot_type = self.op.reboot_type
5202
5203     node_current = instance.primary_node
5204
5205     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5206                        constants.INSTANCE_REBOOT_HARD]:
5207       for disk in instance.disks:
5208         self.cfg.SetDiskID(disk, node_current)
5209       result = self.rpc.call_instance_reboot(node_current, instance,
5210                                              reboot_type,
5211                                              self.op.shutdown_timeout)
5212       result.Raise("Could not reboot instance")
5213     else:
5214       result = self.rpc.call_instance_shutdown(node_current, instance,
5215                                                self.op.shutdown_timeout)
5216       result.Raise("Could not shutdown instance for full reboot")
5217       _ShutdownInstanceDisks(self, instance)
5218       _StartInstanceDisks(self, instance, ignore_secondaries)
5219       result = self.rpc.call_instance_start(node_current, instance, None, None)
5220       msg = result.fail_msg
5221       if msg:
5222         _ShutdownInstanceDisks(self, instance)
5223         raise errors.OpExecError("Could not start instance for"
5224                                  " full reboot: %s" % msg)
5225
5226     self.cfg.MarkInstanceUp(instance.name)
5227
5228
5229 class LUInstanceShutdown(LogicalUnit):
5230   """Shutdown an instance.
5231
5232   """
5233   HPATH = "instance-stop"
5234   HTYPE = constants.HTYPE_INSTANCE
5235   REQ_BGL = False
5236
5237   def ExpandNames(self):
5238     self._ExpandAndLockInstance()
5239
5240   def BuildHooksEnv(self):
5241     """Build hooks env.
5242
5243     This runs on master, primary and secondary nodes of the instance.
5244
5245     """
5246     env = _BuildInstanceHookEnvByObject(self, self.instance)
5247     env["TIMEOUT"] = self.op.timeout
5248     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5249     return env, nl, nl
5250
5251   def CheckPrereq(self):
5252     """Check prerequisites.
5253
5254     This checks that the instance is in the cluster.
5255
5256     """
5257     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5258     assert self.instance is not None, \
5259       "Cannot retrieve locked instance %s" % self.op.instance_name
5260
5261     self.primary_offline = \
5262       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5263
5264     if self.primary_offline and self.op.ignore_offline_nodes:
5265       self.proc.LogWarning("Ignoring offline primary node")
5266     else:
5267       _CheckNodeOnline(self, self.instance.primary_node)
5268
5269   def Exec(self, feedback_fn):
5270     """Shutdown the instance.
5271
5272     """
5273     instance = self.instance
5274     node_current = instance.primary_node
5275     timeout = self.op.timeout
5276
5277     self.cfg.MarkInstanceDown(instance.name)
5278
5279     if self.primary_offline:
5280       assert self.op.ignore_offline_nodes
5281       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5282     else:
5283       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5284       msg = result.fail_msg
5285       if msg:
5286         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5287
5288       _ShutdownInstanceDisks(self, instance)
5289
5290
5291 class LUInstanceReinstall(LogicalUnit):
5292   """Reinstall an instance.
5293
5294   """
5295   HPATH = "instance-reinstall"
5296   HTYPE = constants.HTYPE_INSTANCE
5297   REQ_BGL = False
5298
5299   def ExpandNames(self):
5300     self._ExpandAndLockInstance()
5301
5302   def BuildHooksEnv(self):
5303     """Build hooks env.
5304
5305     This runs on master, primary and secondary nodes of the instance.
5306
5307     """
5308     env = _BuildInstanceHookEnvByObject(self, self.instance)
5309     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5310     return env, nl, nl
5311
5312   def CheckPrereq(self):
5313     """Check prerequisites.
5314
5315     This checks that the instance is in the cluster and is not running.
5316
5317     """
5318     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5319     assert instance is not None, \
5320       "Cannot retrieve locked instance %s" % self.op.instance_name
5321     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5322                      " offline, cannot reinstall")
5323     for node in instance.secondary_nodes:
5324       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5325                        " cannot reinstall")
5326
5327     if instance.disk_template == constants.DT_DISKLESS:
5328       raise errors.OpPrereqError("Instance '%s' has no disks" %
5329                                  self.op.instance_name,
5330                                  errors.ECODE_INVAL)
5331     _CheckInstanceDown(self, instance, "cannot reinstall")
5332
5333     if self.op.os_type is not None:
5334       # OS verification
5335       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5336       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5337       instance_os = self.op.os_type
5338     else:
5339       instance_os = instance.os
5340
5341     nodelist = list(instance.all_nodes)
5342
5343     if self.op.osparams:
5344       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5345       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5346       self.os_inst = i_osdict # the new dict (without defaults)
5347     else:
5348       self.os_inst = None
5349
5350     self.instance = instance
5351
5352   def Exec(self, feedback_fn):
5353     """Reinstall the instance.
5354
5355     """
5356     inst = self.instance
5357
5358     if self.op.os_type is not None:
5359       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5360       inst.os = self.op.os_type
5361       # Write to configuration
5362       self.cfg.Update(inst, feedback_fn)
5363
5364     _StartInstanceDisks(self, inst, None)
5365     try:
5366       feedback_fn("Running the instance OS create scripts...")
5367       # FIXME: pass debug option from opcode to backend
5368       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5369                                              self.op.debug_level,
5370                                              osparams=self.os_inst)
5371       result.Raise("Could not install OS for instance %s on node %s" %
5372                    (inst.name, inst.primary_node))
5373     finally:
5374       _ShutdownInstanceDisks(self, inst)
5375
5376
5377 class LUInstanceRecreateDisks(LogicalUnit):
5378   """Recreate an instance's missing disks.
5379
5380   """
5381   HPATH = "instance-recreate-disks"
5382   HTYPE = constants.HTYPE_INSTANCE
5383   REQ_BGL = False
5384
5385   def ExpandNames(self):
5386     self._ExpandAndLockInstance()
5387
5388   def BuildHooksEnv(self):
5389     """Build hooks env.
5390
5391     This runs on master, primary and secondary nodes of the instance.
5392
5393     """
5394     env = _BuildInstanceHookEnvByObject(self, self.instance)
5395     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5396     return env, nl, nl
5397
5398   def CheckPrereq(self):
5399     """Check prerequisites.
5400
5401     This checks that the instance is in the cluster and is not running.
5402
5403     """
5404     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5405     assert instance is not None, \
5406       "Cannot retrieve locked instance %s" % self.op.instance_name
5407     _CheckNodeOnline(self, instance.primary_node)
5408
5409     if instance.disk_template == constants.DT_DISKLESS:
5410       raise errors.OpPrereqError("Instance '%s' has no disks" %
5411                                  self.op.instance_name, errors.ECODE_INVAL)
5412     _CheckInstanceDown(self, instance, "cannot recreate disks")
5413
5414     if not self.op.disks:
5415       self.op.disks = range(len(instance.disks))
5416     else:
5417       for idx in self.op.disks:
5418         if idx >= len(instance.disks):
5419           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5420                                      errors.ECODE_INVAL)
5421
5422     self.instance = instance
5423
5424   def Exec(self, feedback_fn):
5425     """Recreate the disks.
5426
5427     """
5428     to_skip = []
5429     for idx, _ in enumerate(self.instance.disks):
5430       if idx not in self.op.disks: # disk idx has not been passed in
5431         to_skip.append(idx)
5432         continue
5433
5434     _CreateDisks(self, self.instance, to_skip=to_skip)
5435
5436
5437 class LUInstanceRename(LogicalUnit):
5438   """Rename an instance.
5439
5440   """
5441   HPATH = "instance-rename"
5442   HTYPE = constants.HTYPE_INSTANCE
5443
5444   def CheckArguments(self):
5445     """Check arguments.
5446
5447     """
5448     if self.op.ip_check and not self.op.name_check:
5449       # TODO: make the ip check more flexible and not depend on the name check
5450       raise errors.OpPrereqError("Cannot do ip check without a name check",
5451                                  errors.ECODE_INVAL)
5452
5453   def BuildHooksEnv(self):
5454     """Build hooks env.
5455
5456     This runs on master, primary and secondary nodes of the instance.
5457
5458     """
5459     env = _BuildInstanceHookEnvByObject(self, self.instance)
5460     env["INSTANCE_NEW_NAME"] = self.op.new_name
5461     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5462     return env, nl, nl
5463
5464   def CheckPrereq(self):
5465     """Check prerequisites.
5466
5467     This checks that the instance is in the cluster and is not running.
5468
5469     """
5470     self.op.instance_name = _ExpandInstanceName(self.cfg,
5471                                                 self.op.instance_name)
5472     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5473     assert instance is not None
5474     _CheckNodeOnline(self, instance.primary_node)
5475     _CheckInstanceDown(self, instance, "cannot rename")
5476     self.instance = instance
5477
5478     new_name = self.op.new_name
5479     if self.op.name_check:
5480       hostname = netutils.GetHostname(name=new_name)
5481       self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5482                    hostname.name)
5483       new_name = self.op.new_name = hostname.name
5484       if (self.op.ip_check and
5485           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5486         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5487                                    (hostname.ip, new_name),
5488                                    errors.ECODE_NOTUNIQUE)
5489
5490     instance_list = self.cfg.GetInstanceList()
5491     if new_name in instance_list and new_name != instance.name:
5492       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5493                                  new_name, errors.ECODE_EXISTS)
5494
5495   def Exec(self, feedback_fn):
5496     """Rename the instance.
5497
5498     """
5499     inst = self.instance
5500     old_name = inst.name
5501
5502     rename_file_storage = False
5503     if (inst.disk_template == constants.DT_FILE and
5504         self.op.new_name != inst.name):
5505       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5506       rename_file_storage = True
5507
5508     self.cfg.RenameInstance(inst.name, self.op.new_name)
5509     # Change the instance lock. This is definitely safe while we hold the BGL
5510     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5511     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5512
5513     # re-read the instance from the configuration after rename
5514     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5515
5516     if rename_file_storage:
5517       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5518       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5519                                                      old_file_storage_dir,
5520                                                      new_file_storage_dir)
5521       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5522                    " (but the instance has been renamed in Ganeti)" %
5523                    (inst.primary_node, old_file_storage_dir,
5524                     new_file_storage_dir))
5525
5526     _StartInstanceDisks(self, inst, None)
5527     try:
5528       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5529                                                  old_name, self.op.debug_level)
5530       msg = result.fail_msg
5531       if msg:
5532         msg = ("Could not run OS rename script for instance %s on node %s"
5533                " (but the instance has been renamed in Ganeti): %s" %
5534                (inst.name, inst.primary_node, msg))
5535         self.proc.LogWarning(msg)
5536     finally:
5537       _ShutdownInstanceDisks(self, inst)
5538
5539     return inst.name
5540
5541
5542 class LUInstanceRemove(LogicalUnit):
5543   """Remove an instance.
5544
5545   """
5546   HPATH = "instance-remove"
5547   HTYPE = constants.HTYPE_INSTANCE
5548   REQ_BGL = False
5549
5550   def ExpandNames(self):
5551     self._ExpandAndLockInstance()
5552     self.needed_locks[locking.LEVEL_NODE] = []
5553     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5554
5555   def DeclareLocks(self, level):
5556     if level == locking.LEVEL_NODE:
5557       self._LockInstancesNodes()
5558
5559   def BuildHooksEnv(self):
5560     """Build hooks env.
5561
5562     This runs on master, primary and secondary nodes of the instance.
5563
5564     """
5565     env = _BuildInstanceHookEnvByObject(self, self.instance)
5566     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5567     nl = [self.cfg.GetMasterNode()]
5568     nl_post = list(self.instance.all_nodes) + nl
5569     return env, nl, nl_post
5570
5571   def CheckPrereq(self):
5572     """Check prerequisites.
5573
5574     This checks that the instance is in the cluster.
5575
5576     """
5577     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5578     assert self.instance is not None, \
5579       "Cannot retrieve locked instance %s" % self.op.instance_name
5580
5581   def Exec(self, feedback_fn):
5582     """Remove the instance.
5583
5584     """
5585     instance = self.instance
5586     logging.info("Shutting down instance %s on node %s",
5587                  instance.name, instance.primary_node)
5588
5589     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5590                                              self.op.shutdown_timeout)
5591     msg = result.fail_msg
5592     if msg:
5593       if self.op.ignore_failures:
5594         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5595       else:
5596         raise errors.OpExecError("Could not shutdown instance %s on"
5597                                  " node %s: %s" %
5598                                  (instance.name, instance.primary_node, msg))
5599
5600     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5601
5602
5603 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5604   """Utility function to remove an instance.
5605
5606   """
5607   logging.info("Removing block devices for instance %s", instance.name)
5608
5609   if not _RemoveDisks(lu, instance):
5610     if not ignore_failures:
5611       raise errors.OpExecError("Can't remove instance's disks")
5612     feedback_fn("Warning: can't remove instance's disks")
5613
5614   logging.info("Removing instance %s out of cluster config", instance.name)
5615
5616   lu.cfg.RemoveInstance(instance.name)
5617
5618   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5619     "Instance lock removal conflict"
5620
5621   # Remove lock for the instance
5622   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5623
5624
5625 class LUInstanceQuery(NoHooksLU):
5626   """Logical unit for querying instances.
5627
5628   """
5629   # pylint: disable-msg=W0142
5630   REQ_BGL = False
5631
5632   def CheckArguments(self):
5633     self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5634                              self.op.use_locking)
5635
5636   def ExpandNames(self):
5637     self.iq.ExpandNames(self)
5638
5639   def DeclareLocks(self, level):
5640     self.iq.DeclareLocks(self, level)
5641
5642   def Exec(self, feedback_fn):
5643     return self.iq.OldStyleQuery(self)
5644
5645
5646 class LUInstanceFailover(LogicalUnit):
5647   """Failover an instance.
5648
5649   """
5650   HPATH = "instance-failover"
5651   HTYPE = constants.HTYPE_INSTANCE
5652   REQ_BGL = False
5653
5654   def ExpandNames(self):
5655     self._ExpandAndLockInstance()
5656     self.needed_locks[locking.LEVEL_NODE] = []
5657     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5658
5659   def DeclareLocks(self, level):
5660     if level == locking.LEVEL_NODE:
5661       self._LockInstancesNodes()
5662
5663   def BuildHooksEnv(self):
5664     """Build hooks env.
5665
5666     This runs on master, primary and secondary nodes of the instance.
5667
5668     """
5669     instance = self.instance
5670     source_node = instance.primary_node
5671     target_node = instance.secondary_nodes[0]
5672     env = {
5673       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5674       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5675       "OLD_PRIMARY": source_node,
5676       "OLD_SECONDARY": target_node,
5677       "NEW_PRIMARY": target_node,
5678       "NEW_SECONDARY": source_node,
5679       }
5680     env.update(_BuildInstanceHookEnvByObject(self, instance))
5681     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5682     nl_post = list(nl)
5683     nl_post.append(source_node)
5684     return env, nl, nl_post
5685
5686   def CheckPrereq(self):
5687     """Check prerequisites.
5688
5689     This checks that the instance is in the cluster.
5690
5691     """
5692     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5693     assert self.instance is not None, \
5694       "Cannot retrieve locked instance %s" % self.op.instance_name
5695
5696     bep = self.cfg.GetClusterInfo().FillBE(instance)
5697     if instance.disk_template not in constants.DTS_NET_MIRROR:
5698       raise errors.OpPrereqError("Instance's disk layout is not"
5699                                  " network mirrored, cannot failover.",
5700                                  errors.ECODE_STATE)
5701
5702     secondary_nodes = instance.secondary_nodes
5703     if not secondary_nodes:
5704       raise errors.ProgrammerError("no secondary node but using "
5705                                    "a mirrored disk template")
5706
5707     target_node = secondary_nodes[0]
5708     _CheckNodeOnline(self, target_node)
5709     _CheckNodeNotDrained(self, target_node)
5710     if instance.admin_up:
5711       # check memory requirements on the secondary node
5712       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5713                            instance.name, bep[constants.BE_MEMORY],
5714                            instance.hypervisor)
5715     else:
5716       self.LogInfo("Not checking memory on the secondary node as"
5717                    " instance will not be started")
5718
5719     # check bridge existance
5720     _CheckInstanceBridgesExist(self, instance, node=target_node)
5721
5722   def Exec(self, feedback_fn):
5723     """Failover an instance.
5724
5725     The failover is done by shutting it down on its present node and
5726     starting it on the secondary.
5727
5728     """
5729     instance = self.instance
5730     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5731
5732     source_node = instance.primary_node
5733     target_node = instance.secondary_nodes[0]
5734
5735     if instance.admin_up:
5736       feedback_fn("* checking disk consistency between source and target")
5737       for dev in instance.disks:
5738         # for drbd, these are drbd over lvm
5739         if not _CheckDiskConsistency(self, dev, target_node, False):
5740           if not self.op.ignore_consistency:
5741             raise errors.OpExecError("Disk %s is degraded on target node,"
5742                                      " aborting failover." % dev.iv_name)
5743     else:
5744       feedback_fn("* not checking disk consistency as instance is not running")
5745
5746     feedback_fn("* shutting down instance on source node")
5747     logging.info("Shutting down instance %s on node %s",
5748                  instance.name, source_node)
5749
5750     result = self.rpc.call_instance_shutdown(source_node, instance,
5751                                              self.op.shutdown_timeout)
5752     msg = result.fail_msg
5753     if msg:
5754       if self.op.ignore_consistency or primary_node.offline:
5755         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5756                              " Proceeding anyway. Please make sure node"
5757                              " %s is down. Error details: %s",
5758                              instance.name, source_node, source_node, msg)
5759       else:
5760         raise errors.OpExecError("Could not shutdown instance %s on"
5761                                  " node %s: %s" %
5762                                  (instance.name, source_node, msg))
5763
5764     feedback_fn("* deactivating the instance's disks on source node")
5765     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5766       raise errors.OpExecError("Can't shut down the instance's disks.")
5767
5768     instance.primary_node = target_node
5769     # distribute new instance config to the other nodes
5770     self.cfg.Update(instance, feedback_fn)
5771
5772     # Only start the instance if it's marked as up
5773     if instance.admin_up:
5774       feedback_fn("* activating the instance's disks on target node")
5775       logging.info("Starting instance %s on node %s",
5776                    instance.name, target_node)
5777
5778       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5779                                            ignore_secondaries=True)
5780       if not disks_ok:
5781         _ShutdownInstanceDisks(self, instance)
5782         raise errors.OpExecError("Can't activate the instance's disks")
5783
5784       feedback_fn("* starting the instance on the target node")
5785       result = self.rpc.call_instance_start(target_node, instance, None, None)
5786       msg = result.fail_msg
5787       if msg:
5788         _ShutdownInstanceDisks(self, instance)
5789         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5790                                  (instance.name, target_node, msg))
5791
5792
5793 class LUInstanceMigrate(LogicalUnit):
5794   """Migrate an instance.
5795
5796   This is migration without shutting down, compared to the failover,
5797   which is done with shutdown.
5798
5799   """
5800   HPATH = "instance-migrate"
5801   HTYPE = constants.HTYPE_INSTANCE
5802   REQ_BGL = False
5803
5804   def ExpandNames(self):
5805     self._ExpandAndLockInstance()
5806
5807     self.needed_locks[locking.LEVEL_NODE] = []
5808     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5809
5810     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5811                                        self.op.cleanup)
5812     self.tasklets = [self._migrater]
5813
5814   def DeclareLocks(self, level):
5815     if level == locking.LEVEL_NODE:
5816       self._LockInstancesNodes()
5817
5818   def BuildHooksEnv(self):
5819     """Build hooks env.
5820
5821     This runs on master, primary and secondary nodes of the instance.
5822
5823     """
5824     instance = self._migrater.instance
5825     source_node = instance.primary_node
5826     target_node = instance.secondary_nodes[0]
5827     env = _BuildInstanceHookEnvByObject(self, instance)
5828     env["MIGRATE_LIVE"] = self._migrater.live
5829     env["MIGRATE_CLEANUP"] = self.op.cleanup
5830     env.update({
5831         "OLD_PRIMARY": source_node,
5832         "OLD_SECONDARY": target_node,
5833         "NEW_PRIMARY": target_node,
5834         "NEW_SECONDARY": source_node,
5835         })
5836     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5837     nl_post = list(nl)
5838     nl_post.append(source_node)
5839     return env, nl, nl_post
5840
5841
5842 class LUInstanceMove(LogicalUnit):
5843   """Move an instance by data-copying.
5844
5845   """
5846   HPATH = "instance-move"
5847   HTYPE = constants.HTYPE_INSTANCE
5848   REQ_BGL = False
5849
5850   def ExpandNames(self):
5851     self._ExpandAndLockInstance()
5852     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5853     self.op.target_node = target_node
5854     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5855     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5856
5857   def DeclareLocks(self, level):
5858     if level == locking.LEVEL_NODE:
5859       self._LockInstancesNodes(primary_only=True)
5860
5861   def BuildHooksEnv(self):
5862     """Build hooks env.
5863
5864     This runs on master, primary and secondary nodes of the instance.
5865
5866     """
5867     env = {
5868       "TARGET_NODE": self.op.target_node,
5869       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5870       }
5871     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5872     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5873                                        self.op.target_node]
5874     return env, nl, nl
5875
5876   def CheckPrereq(self):
5877     """Check prerequisites.
5878
5879     This checks that the instance is in the cluster.
5880
5881     """
5882     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5883     assert self.instance is not None, \
5884       "Cannot retrieve locked instance %s" % self.op.instance_name
5885
5886     node = self.cfg.GetNodeInfo(self.op.target_node)
5887     assert node is not None, \
5888       "Cannot retrieve locked node %s" % self.op.target_node
5889
5890     self.target_node = target_node = node.name
5891
5892     if target_node == instance.primary_node:
5893       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5894                                  (instance.name, target_node),
5895                                  errors.ECODE_STATE)
5896
5897     bep = self.cfg.GetClusterInfo().FillBE(instance)
5898
5899     for idx, dsk in enumerate(instance.disks):
5900       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5901         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5902                                    " cannot copy" % idx, errors.ECODE_STATE)
5903
5904     _CheckNodeOnline(self, target_node)
5905     _CheckNodeNotDrained(self, target_node)
5906     _CheckNodeVmCapable(self, target_node)
5907
5908     if instance.admin_up:
5909       # check memory requirements on the secondary node
5910       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5911                            instance.name, bep[constants.BE_MEMORY],
5912                            instance.hypervisor)
5913     else:
5914       self.LogInfo("Not checking memory on the secondary node as"
5915                    " instance will not be started")
5916
5917     # check bridge existance
5918     _CheckInstanceBridgesExist(self, instance, node=target_node)
5919
5920   def Exec(self, feedback_fn):
5921     """Move an instance.
5922
5923     The move is done by shutting it down on its present node, copying
5924     the data over (slow) and starting it on the new node.
5925
5926     """
5927     instance = self.instance
5928
5929     source_node = instance.primary_node
5930     target_node = self.target_node
5931
5932     self.LogInfo("Shutting down instance %s on source node %s",
5933                  instance.name, source_node)
5934
5935     result = self.rpc.call_instance_shutdown(source_node, instance,
5936                                              self.op.shutdown_timeout)
5937     msg = result.fail_msg
5938     if msg:
5939       if self.op.ignore_consistency:
5940         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5941                              " Proceeding anyway. Please make sure node"
5942                              " %s is down. Error details: %s",
5943                              instance.name, source_node, source_node, msg)
5944       else:
5945         raise errors.OpExecError("Could not shutdown instance %s on"
5946                                  " node %s: %s" %
5947                                  (instance.name, source_node, msg))
5948
5949     # create the target disks
5950     try:
5951       _CreateDisks(self, instance, target_node=target_node)
5952     except errors.OpExecError:
5953       self.LogWarning("Device creation failed, reverting...")
5954       try:
5955         _RemoveDisks(self, instance, target_node=target_node)
5956       finally:
5957         self.cfg.ReleaseDRBDMinors(instance.name)
5958         raise
5959
5960     cluster_name = self.cfg.GetClusterInfo().cluster_name
5961
5962     errs = []
5963     # activate, get path, copy the data over
5964     for idx, disk in enumerate(instance.disks):
5965       self.LogInfo("Copying data for disk %d", idx)
5966       result = self.rpc.call_blockdev_assemble(target_node, disk,
5967                                                instance.name, True, idx)
5968       if result.fail_msg:
5969         self.LogWarning("Can't assemble newly created disk %d: %s",
5970                         idx, result.fail_msg)
5971         errs.append(result.fail_msg)
5972         break
5973       dev_path = result.payload
5974       result = self.rpc.call_blockdev_export(source_node, disk,
5975                                              target_node, dev_path,
5976                                              cluster_name)
5977       if result.fail_msg:
5978         self.LogWarning("Can't copy data over for disk %d: %s",
5979                         idx, result.fail_msg)
5980         errs.append(result.fail_msg)
5981         break
5982
5983     if errs:
5984       self.LogWarning("Some disks failed to copy, aborting")
5985       try:
5986         _RemoveDisks(self, instance, target_node=target_node)
5987       finally:
5988         self.cfg.ReleaseDRBDMinors(instance.name)
5989         raise errors.OpExecError("Errors during disk copy: %s" %
5990                                  (",".join(errs),))
5991
5992     instance.primary_node = target_node
5993     self.cfg.Update(instance, feedback_fn)
5994
5995     self.LogInfo("Removing the disks on the original node")
5996     _RemoveDisks(self, instance, target_node=source_node)
5997
5998     # Only start the instance if it's marked as up
5999     if instance.admin_up:
6000       self.LogInfo("Starting instance %s on node %s",
6001                    instance.name, target_node)
6002
6003       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6004                                            ignore_secondaries=True)
6005       if not disks_ok:
6006         _ShutdownInstanceDisks(self, instance)
6007         raise errors.OpExecError("Can't activate the instance's disks")
6008
6009       result = self.rpc.call_instance_start(target_node, instance, None, None)
6010       msg = result.fail_msg
6011       if msg:
6012         _ShutdownInstanceDisks(self, instance)
6013         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6014                                  (instance.name, target_node, msg))
6015
6016
6017 class LUNodeMigrate(LogicalUnit):
6018   """Migrate all instances from a node.
6019
6020   """
6021   HPATH = "node-migrate"
6022   HTYPE = constants.HTYPE_NODE
6023   REQ_BGL = False
6024
6025   def ExpandNames(self):
6026     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6027
6028     self.needed_locks = {
6029       locking.LEVEL_NODE: [self.op.node_name],
6030       }
6031
6032     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6033
6034     # Create tasklets for migrating instances for all instances on this node
6035     names = []
6036     tasklets = []
6037
6038     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6039       logging.debug("Migrating instance %s", inst.name)
6040       names.append(inst.name)
6041
6042       tasklets.append(TLMigrateInstance(self, inst.name, False))
6043
6044     self.tasklets = tasklets
6045
6046     # Declare instance locks
6047     self.needed_locks[locking.LEVEL_INSTANCE] = names
6048
6049   def DeclareLocks(self, level):
6050     if level == locking.LEVEL_NODE:
6051       self._LockInstancesNodes()
6052
6053   def BuildHooksEnv(self):
6054     """Build hooks env.
6055
6056     This runs on the master, the primary and all the secondaries.
6057
6058     """
6059     env = {
6060       "NODE_NAME": self.op.node_name,
6061       }
6062
6063     nl = [self.cfg.GetMasterNode()]
6064
6065     return (env, nl, nl)
6066
6067
6068 class TLMigrateInstance(Tasklet):
6069   """Tasklet class for instance migration.
6070
6071   @type live: boolean
6072   @ivar live: whether the migration will be done live or non-live;
6073       this variable is initalized only after CheckPrereq has run
6074
6075   """
6076   def __init__(self, lu, instance_name, cleanup):
6077     """Initializes this class.
6078
6079     """
6080     Tasklet.__init__(self, lu)
6081
6082     # Parameters
6083     self.instance_name = instance_name
6084     self.cleanup = cleanup
6085     self.live = False # will be overridden later
6086
6087   def CheckPrereq(self):
6088     """Check prerequisites.
6089
6090     This checks that the instance is in the cluster.
6091
6092     """
6093     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6094     instance = self.cfg.GetInstanceInfo(instance_name)
6095     assert instance is not None
6096
6097     if instance.disk_template != constants.DT_DRBD8:
6098       raise errors.OpPrereqError("Instance's disk layout is not"
6099                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
6100
6101     secondary_nodes = instance.secondary_nodes
6102     if not secondary_nodes:
6103       raise errors.ConfigurationError("No secondary node but using"
6104                                       " drbd8 disk template")
6105
6106     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6107
6108     target_node = secondary_nodes[0]
6109     # check memory requirements on the secondary node
6110     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6111                          instance.name, i_be[constants.BE_MEMORY],
6112                          instance.hypervisor)
6113
6114     # check bridge existance
6115     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6116
6117     if not self.cleanup:
6118       _CheckNodeNotDrained(self.lu, target_node)
6119       result = self.rpc.call_instance_migratable(instance.primary_node,
6120                                                  instance)
6121       result.Raise("Can't migrate, please use failover",
6122                    prereq=True, ecode=errors.ECODE_STATE)
6123
6124     self.instance = instance
6125
6126     if self.lu.op.live is not None and self.lu.op.mode is not None:
6127       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6128                                  " parameters are accepted",
6129                                  errors.ECODE_INVAL)
6130     if self.lu.op.live is not None:
6131       if self.lu.op.live:
6132         self.lu.op.mode = constants.HT_MIGRATION_LIVE
6133       else:
6134         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6135       # reset the 'live' parameter to None so that repeated
6136       # invocations of CheckPrereq do not raise an exception
6137       self.lu.op.live = None
6138     elif self.lu.op.mode is None:
6139       # read the default value from the hypervisor
6140       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6141       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6142
6143     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6144
6145   def _WaitUntilSync(self):
6146     """Poll with custom rpc for disk sync.
6147
6148     This uses our own step-based rpc call.
6149
6150     """
6151     self.feedback_fn("* wait until resync is done")
6152     all_done = False
6153     while not all_done:
6154       all_done = True
6155       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6156                                             self.nodes_ip,
6157                                             self.instance.disks)
6158       min_percent = 100
6159       for node, nres in result.items():
6160         nres.Raise("Cannot resync disks on node %s" % node)
6161         node_done, node_percent = nres.payload
6162         all_done = all_done and node_done
6163         if node_percent is not None:
6164           min_percent = min(min_percent, node_percent)
6165       if not all_done:
6166         if min_percent < 100:
6167           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6168         time.sleep(2)
6169
6170   def _EnsureSecondary(self, node):
6171     """Demote a node to secondary.
6172
6173     """
6174     self.feedback_fn("* switching node %s to secondary mode" % node)
6175
6176     for dev in self.instance.disks:
6177       self.cfg.SetDiskID(dev, node)
6178
6179     result = self.rpc.call_blockdev_close(node, self.instance.name,
6180                                           self.instance.disks)
6181     result.Raise("Cannot change disk to secondary on node %s" % node)
6182
6183   def _GoStandalone(self):
6184     """Disconnect from the network.
6185
6186     """
6187     self.feedback_fn("* changing into standalone mode")
6188     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6189                                                self.instance.disks)
6190     for node, nres in result.items():
6191       nres.Raise("Cannot disconnect disks node %s" % node)
6192
6193   def _GoReconnect(self, multimaster):
6194     """Reconnect to the network.
6195
6196     """
6197     if multimaster:
6198       msg = "dual-master"
6199     else:
6200       msg = "single-master"
6201     self.feedback_fn("* changing disks into %s mode" % msg)
6202     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6203                                            self.instance.disks,
6204                                            self.instance.name, multimaster)
6205     for node, nres in result.items():
6206       nres.Raise("Cannot change disks config on node %s" % node)
6207
6208   def _ExecCleanup(self):
6209     """Try to cleanup after a failed migration.
6210
6211     The cleanup is done by:
6212       - check that the instance is running only on one node
6213         (and update the config if needed)
6214       - change disks on its secondary node to secondary
6215       - wait until disks are fully synchronized
6216       - disconnect from the network
6217       - change disks into single-master mode
6218       - wait again until disks are fully synchronized
6219
6220     """
6221     instance = self.instance
6222     target_node = self.target_node
6223     source_node = self.source_node
6224
6225     # check running on only one node
6226     self.feedback_fn("* checking where the instance actually runs"
6227                      " (if this hangs, the hypervisor might be in"
6228                      " a bad state)")
6229     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6230     for node, result in ins_l.items():
6231       result.Raise("Can't contact node %s" % node)
6232
6233     runningon_source = instance.name in ins_l[source_node].payload
6234     runningon_target = instance.name in ins_l[target_node].payload
6235
6236     if runningon_source and runningon_target:
6237       raise errors.OpExecError("Instance seems to be running on two nodes,"
6238                                " or the hypervisor is confused. You will have"
6239                                " to ensure manually that it runs only on one"
6240                                " and restart this operation.")
6241
6242     if not (runningon_source or runningon_target):
6243       raise errors.OpExecError("Instance does not seem to be running at all."
6244                                " In this case, it's safer to repair by"
6245                                " running 'gnt-instance stop' to ensure disk"
6246                                " shutdown, and then restarting it.")
6247
6248     if runningon_target:
6249       # the migration has actually succeeded, we need to update the config
6250       self.feedback_fn("* instance running on secondary node (%s),"
6251                        " updating config" % target_node)
6252       instance.primary_node = target_node
6253       self.cfg.Update(instance, self.feedback_fn)
6254       demoted_node = source_node
6255     else:
6256       self.feedback_fn("* instance confirmed to be running on its"
6257                        " primary node (%s)" % source_node)
6258       demoted_node = target_node
6259
6260     self._EnsureSecondary(demoted_node)
6261     try:
6262       self._WaitUntilSync()
6263     except errors.OpExecError:
6264       # we ignore here errors, since if the device is standalone, it
6265       # won't be able to sync
6266       pass
6267     self._GoStandalone()
6268     self._GoReconnect(False)
6269     self._WaitUntilSync()
6270
6271     self.feedback_fn("* done")
6272
6273   def _RevertDiskStatus(self):
6274     """Try to revert the disk status after a failed migration.
6275
6276     """
6277     target_node = self.target_node
6278     try:
6279       self._EnsureSecondary(target_node)
6280       self._GoStandalone()
6281       self._GoReconnect(False)
6282       self._WaitUntilSync()
6283     except errors.OpExecError, err:
6284       self.lu.LogWarning("Migration failed and I can't reconnect the"
6285                          " drives: error '%s'\n"
6286                          "Please look and recover the instance status" %
6287                          str(err))
6288
6289   def _AbortMigration(self):
6290     """Call the hypervisor code to abort a started migration.
6291
6292     """
6293     instance = self.instance
6294     target_node = self.target_node
6295     migration_info = self.migration_info
6296
6297     abort_result = self.rpc.call_finalize_migration(target_node,
6298                                                     instance,
6299                                                     migration_info,
6300                                                     False)
6301     abort_msg = abort_result.fail_msg
6302     if abort_msg:
6303       logging.error("Aborting migration failed on target node %s: %s",
6304                     target_node, abort_msg)
6305       # Don't raise an exception here, as we stil have to try to revert the
6306       # disk status, even if this step failed.
6307
6308   def _ExecMigration(self):
6309     """Migrate an instance.
6310
6311     The migrate is done by:
6312       - change the disks into dual-master mode
6313       - wait until disks are fully synchronized again
6314       - migrate the instance
6315       - change disks on the new secondary node (the old primary) to secondary
6316       - wait until disks are fully synchronized
6317       - change disks into single-master mode
6318
6319     """
6320     instance = self.instance
6321     target_node = self.target_node
6322     source_node = self.source_node
6323
6324     self.feedback_fn("* checking disk consistency between source and target")
6325     for dev in instance.disks:
6326       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6327         raise errors.OpExecError("Disk %s is degraded or not fully"
6328                                  " synchronized on target node,"
6329                                  " aborting migrate." % dev.iv_name)
6330
6331     # First get the migration information from the remote node
6332     result = self.rpc.call_migration_info(source_node, instance)
6333     msg = result.fail_msg
6334     if msg:
6335       log_err = ("Failed fetching source migration information from %s: %s" %
6336                  (source_node, msg))
6337       logging.error(log_err)
6338       raise errors.OpExecError(log_err)
6339
6340     self.migration_info = migration_info = result.payload
6341
6342     # Then switch the disks to master/master mode
6343     self._EnsureSecondary(target_node)
6344     self._GoStandalone()
6345     self._GoReconnect(True)
6346     self._WaitUntilSync()
6347
6348     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6349     result = self.rpc.call_accept_instance(target_node,
6350                                            instance,
6351                                            migration_info,
6352                                            self.nodes_ip[target_node])
6353
6354     msg = result.fail_msg
6355     if msg:
6356       logging.error("Instance pre-migration failed, trying to revert"
6357                     " disk status: %s", msg)
6358       self.feedback_fn("Pre-migration failed, aborting")
6359       self._AbortMigration()
6360       self._RevertDiskStatus()
6361       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6362                                (instance.name, msg))
6363
6364     self.feedback_fn("* migrating instance to %s" % target_node)
6365     time.sleep(10)
6366     result = self.rpc.call_instance_migrate(source_node, instance,
6367                                             self.nodes_ip[target_node],
6368                                             self.live)
6369     msg = result.fail_msg
6370     if msg:
6371       logging.error("Instance migration failed, trying to revert"
6372                     " disk status: %s", msg)
6373       self.feedback_fn("Migration failed, aborting")
6374       self._AbortMigration()
6375       self._RevertDiskStatus()
6376       raise errors.OpExecError("Could not migrate instance %s: %s" %
6377                                (instance.name, msg))
6378     time.sleep(10)
6379
6380     instance.primary_node = target_node
6381     # distribute new instance config to the other nodes
6382     self.cfg.Update(instance, self.feedback_fn)
6383
6384     result = self.rpc.call_finalize_migration(target_node,
6385                                               instance,
6386                                               migration_info,
6387                                               True)
6388     msg = result.fail_msg
6389     if msg:
6390       logging.error("Instance migration succeeded, but finalization failed:"
6391                     " %s", msg)
6392       raise errors.OpExecError("Could not finalize instance migration: %s" %
6393                                msg)
6394
6395     self._EnsureSecondary(source_node)
6396     self._WaitUntilSync()
6397     self._GoStandalone()
6398     self._GoReconnect(False)
6399     self._WaitUntilSync()
6400
6401     self.feedback_fn("* done")
6402
6403   def Exec(self, feedback_fn):
6404     """Perform the migration.
6405
6406     """
6407     feedback_fn("Migrating instance %s" % self.instance.name)
6408
6409     self.feedback_fn = feedback_fn
6410
6411     self.source_node = self.instance.primary_node
6412     self.target_node = self.instance.secondary_nodes[0]
6413     self.all_nodes = [self.source_node, self.target_node]
6414     self.nodes_ip = {
6415       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6416       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6417       }
6418
6419     if self.cleanup:
6420       return self._ExecCleanup()
6421     else:
6422       return self._ExecMigration()
6423
6424
6425 def _CreateBlockDev(lu, node, instance, device, force_create,
6426                     info, force_open):
6427   """Create a tree of block devices on a given node.
6428
6429   If this device type has to be created on secondaries, create it and
6430   all its children.
6431
6432   If not, just recurse to children keeping the same 'force' value.
6433
6434   @param lu: the lu on whose behalf we execute
6435   @param node: the node on which to create the device
6436   @type instance: L{objects.Instance}
6437   @param instance: the instance which owns the device
6438   @type device: L{objects.Disk}
6439   @param device: the device to create
6440   @type force_create: boolean
6441   @param force_create: whether to force creation of this device; this
6442       will be change to True whenever we find a device which has
6443       CreateOnSecondary() attribute
6444   @param info: the extra 'metadata' we should attach to the device
6445       (this will be represented as a LVM tag)
6446   @type force_open: boolean
6447   @param force_open: this parameter will be passes to the
6448       L{backend.BlockdevCreate} function where it specifies
6449       whether we run on primary or not, and it affects both
6450       the child assembly and the device own Open() execution
6451
6452   """
6453   if device.CreateOnSecondary():
6454     force_create = True
6455
6456   if device.children:
6457     for child in device.children:
6458       _CreateBlockDev(lu, node, instance, child, force_create,
6459                       info, force_open)
6460
6461   if not force_create:
6462     return
6463
6464   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6465
6466
6467 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6468   """Create a single block device on a given node.
6469
6470   This will not recurse over children of the device, so they must be
6471   created in advance.
6472
6473   @param lu: the lu on whose behalf we execute
6474   @param node: the node on which to create the device
6475   @type instance: L{objects.Instance}
6476   @param instance: the instance which owns the device
6477   @type device: L{objects.Disk}
6478   @param device: the device to create
6479   @param info: the extra 'metadata' we should attach to the device
6480       (this will be represented as a LVM tag)
6481   @type force_open: boolean
6482   @param force_open: this parameter will be passes to the
6483       L{backend.BlockdevCreate} function where it specifies
6484       whether we run on primary or not, and it affects both
6485       the child assembly and the device own Open() execution
6486
6487   """
6488   lu.cfg.SetDiskID(device, node)
6489   result = lu.rpc.call_blockdev_create(node, device, device.size,
6490                                        instance.name, force_open, info)
6491   result.Raise("Can't create block device %s on"
6492                " node %s for instance %s" % (device, node, instance.name))
6493   if device.physical_id is None:
6494     device.physical_id = result.payload
6495
6496
6497 def _GenerateUniqueNames(lu, exts):
6498   """Generate a suitable LV name.
6499
6500   This will generate a logical volume name for the given instance.
6501
6502   """
6503   results = []
6504   for val in exts:
6505     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6506     results.append("%s%s" % (new_id, val))
6507   return results
6508
6509
6510 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6511                          p_minor, s_minor):
6512   """Generate a drbd8 device complete with its children.
6513
6514   """
6515   port = lu.cfg.AllocatePort()
6516   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6517   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6518                           logical_id=(vgname, names[0]))
6519   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6520                           logical_id=(vgname, names[1]))
6521   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6522                           logical_id=(primary, secondary, port,
6523                                       p_minor, s_minor,
6524                                       shared_secret),
6525                           children=[dev_data, dev_meta],
6526                           iv_name=iv_name)
6527   return drbd_dev
6528
6529
6530 def _GenerateDiskTemplate(lu, template_name,
6531                           instance_name, primary_node,
6532                           secondary_nodes, disk_info,
6533                           file_storage_dir, file_driver,
6534                           base_index, feedback_fn):
6535   """Generate the entire disk layout for a given template type.
6536
6537   """
6538   #TODO: compute space requirements
6539
6540   vgname = lu.cfg.GetVGName()
6541   disk_count = len(disk_info)
6542   disks = []
6543   if template_name == constants.DT_DISKLESS:
6544     pass
6545   elif template_name == constants.DT_PLAIN:
6546     if len(secondary_nodes) != 0:
6547       raise errors.ProgrammerError("Wrong template configuration")
6548
6549     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6550                                       for i in range(disk_count)])
6551     for idx, disk in enumerate(disk_info):
6552       disk_index = idx + base_index
6553       vg = disk.get("vg", vgname)
6554       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6555       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6556                               logical_id=(vg, names[idx]),
6557                               iv_name="disk/%d" % disk_index,
6558                               mode=disk["mode"])
6559       disks.append(disk_dev)
6560   elif template_name == constants.DT_DRBD8:
6561     if len(secondary_nodes) != 1:
6562       raise errors.ProgrammerError("Wrong template configuration")
6563     remote_node = secondary_nodes[0]
6564     minors = lu.cfg.AllocateDRBDMinor(
6565       [primary_node, remote_node] * len(disk_info), instance_name)
6566
6567     names = []
6568     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6569                                                for i in range(disk_count)]):
6570       names.append(lv_prefix + "_data")
6571       names.append(lv_prefix + "_meta")
6572     for idx, disk in enumerate(disk_info):
6573       disk_index = idx + base_index
6574       vg = disk.get("vg", vgname)
6575       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6576                                       disk["size"], vg, names[idx*2:idx*2+2],
6577                                       "disk/%d" % disk_index,
6578                                       minors[idx*2], minors[idx*2+1])
6579       disk_dev.mode = disk["mode"]
6580       disks.append(disk_dev)
6581   elif template_name == constants.DT_FILE:
6582     if len(secondary_nodes) != 0:
6583       raise errors.ProgrammerError("Wrong template configuration")
6584
6585     opcodes.RequireFileStorage()
6586
6587     for idx, disk in enumerate(disk_info):
6588       disk_index = idx + base_index
6589       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6590                               iv_name="disk/%d" % disk_index,
6591                               logical_id=(file_driver,
6592                                           "%s/disk%d" % (file_storage_dir,
6593                                                          disk_index)),
6594                               mode=disk["mode"])
6595       disks.append(disk_dev)
6596   else:
6597     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6598   return disks
6599
6600
6601 def _GetInstanceInfoText(instance):
6602   """Compute that text that should be added to the disk's metadata.
6603
6604   """
6605   return "originstname+%s" % instance.name
6606
6607
6608 def _CalcEta(time_taken, written, total_size):
6609   """Calculates the ETA based on size written and total size.
6610
6611   @param time_taken: The time taken so far
6612   @param written: amount written so far
6613   @param total_size: The total size of data to be written
6614   @return: The remaining time in seconds
6615
6616   """
6617   avg_time = time_taken / float(written)
6618   return (total_size - written) * avg_time
6619
6620
6621 def _WipeDisks(lu, instance):
6622   """Wipes instance disks.
6623
6624   @type lu: L{LogicalUnit}
6625   @param lu: the logical unit on whose behalf we execute
6626   @type instance: L{objects.Instance}
6627   @param instance: the instance whose disks we should create
6628   @return: the success of the wipe
6629
6630   """
6631   node = instance.primary_node
6632   logging.info("Pause sync of instance %s disks", instance.name)
6633   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6634
6635   for idx, success in enumerate(result.payload):
6636     if not success:
6637       logging.warn("pause-sync of instance %s for disks %d failed",
6638                    instance.name, idx)
6639
6640   try:
6641     for idx, device in enumerate(instance.disks):
6642       lu.LogInfo("* Wiping disk %d", idx)
6643       logging.info("Wiping disk %d for instance %s", idx, instance.name)
6644
6645       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6646       # MAX_WIPE_CHUNK at max
6647       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6648                             constants.MIN_WIPE_CHUNK_PERCENT)
6649
6650       offset = 0
6651       size = device.size
6652       last_output = 0
6653       start_time = time.time()
6654
6655       while offset < size:
6656         wipe_size = min(wipe_chunk_size, size - offset)
6657         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6658         result.Raise("Could not wipe disk %d at offset %d for size %d" %
6659                      (idx, offset, wipe_size))
6660         now = time.time()
6661         offset += wipe_size
6662         if now - last_output >= 60:
6663           eta = _CalcEta(now - start_time, offset, size)
6664           lu.LogInfo(" - done: %.1f%% ETA: %s" %
6665                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
6666           last_output = now
6667   finally:
6668     logging.info("Resume sync of instance %s disks", instance.name)
6669
6670     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6671
6672     for idx, success in enumerate(result.payload):
6673       if not success:
6674         lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6675                       " look at the status and troubleshoot the issue.", idx)
6676         logging.warn("resume-sync of instance %s for disks %d failed",
6677                      instance.name, idx)
6678
6679
6680 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6681   """Create all disks for an instance.
6682
6683   This abstracts away some work from AddInstance.
6684
6685   @type lu: L{LogicalUnit}
6686   @param lu: the logical unit on whose behalf we execute
6687   @type instance: L{objects.Instance}
6688   @param instance: the instance whose disks we should create
6689   @type to_skip: list
6690   @param to_skip: list of indices to skip
6691   @type target_node: string
6692   @param target_node: if passed, overrides the target node for creation
6693   @rtype: boolean
6694   @return: the success of the creation
6695
6696   """
6697   info = _GetInstanceInfoText(instance)
6698   if target_node is None:
6699     pnode = instance.primary_node
6700     all_nodes = instance.all_nodes
6701   else:
6702     pnode = target_node
6703     all_nodes = [pnode]
6704
6705   if instance.disk_template == constants.DT_FILE:
6706     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6707     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6708
6709     result.Raise("Failed to create directory '%s' on"
6710                  " node %s" % (file_storage_dir, pnode))
6711
6712   # Note: this needs to be kept in sync with adding of disks in
6713   # LUInstanceSetParams
6714   for idx, device in enumerate(instance.disks):
6715     if to_skip and idx in to_skip:
6716       continue
6717     logging.info("Creating volume %s for instance %s",
6718                  device.iv_name, instance.name)
6719     #HARDCODE
6720     for node in all_nodes:
6721       f_create = node == pnode
6722       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6723
6724
6725 def _RemoveDisks(lu, instance, target_node=None):
6726   """Remove all disks for an instance.
6727
6728   This abstracts away some work from `AddInstance()` and
6729   `RemoveInstance()`. Note that in case some of the devices couldn't
6730   be removed, the removal will continue with the other ones (compare
6731   with `_CreateDisks()`).
6732
6733   @type lu: L{LogicalUnit}
6734   @param lu: the logical unit on whose behalf we execute
6735   @type instance: L{objects.Instance}
6736   @param instance: the instance whose disks we should remove
6737   @type target_node: string
6738   @param target_node: used to override the node on which to remove the disks
6739   @rtype: boolean
6740   @return: the success of the removal
6741
6742   """
6743   logging.info("Removing block devices for instance %s", instance.name)
6744
6745   all_result = True
6746   for device in instance.disks:
6747     if target_node:
6748       edata = [(target_node, device)]
6749     else:
6750       edata = device.ComputeNodeTree(instance.primary_node)
6751     for node, disk in edata:
6752       lu.cfg.SetDiskID(disk, node)
6753       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6754       if msg:
6755         lu.LogWarning("Could not remove block device %s on node %s,"
6756                       " continuing anyway: %s", device.iv_name, node, msg)
6757         all_result = False
6758
6759   if instance.disk_template == constants.DT_FILE:
6760     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6761     if target_node:
6762       tgt = target_node
6763     else:
6764       tgt = instance.primary_node
6765     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6766     if result.fail_msg:
6767       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6768                     file_storage_dir, instance.primary_node, result.fail_msg)
6769       all_result = False
6770
6771   return all_result
6772
6773
6774 def _ComputeDiskSizePerVG(disk_template, disks):
6775   """Compute disk size requirements in the volume group
6776
6777   """
6778   def _compute(disks, payload):
6779     """Universal algorithm
6780
6781     """
6782     vgs = {}
6783     for disk in disks:
6784       vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6785
6786     return vgs
6787
6788   # Required free disk space as a function of disk and swap space
6789   req_size_dict = {
6790     constants.DT_DISKLESS: {},
6791     constants.DT_PLAIN: _compute(disks, 0),
6792     # 128 MB are added for drbd metadata for each disk
6793     constants.DT_DRBD8: _compute(disks, 128),
6794     constants.DT_FILE: {},
6795   }
6796
6797   if disk_template not in req_size_dict:
6798     raise errors.ProgrammerError("Disk template '%s' size requirement"
6799                                  " is unknown" %  disk_template)
6800
6801   return req_size_dict[disk_template]
6802
6803
6804 def _ComputeDiskSize(disk_template, disks):
6805   """Compute disk size requirements in the volume group
6806
6807   """
6808   # Required free disk space as a function of disk and swap space
6809   req_size_dict = {
6810     constants.DT_DISKLESS: None,
6811     constants.DT_PLAIN: sum(d["size"] for d in disks),
6812     # 128 MB are added for drbd metadata for each disk
6813     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6814     constants.DT_FILE: None,
6815   }
6816
6817   if disk_template not in req_size_dict:
6818     raise errors.ProgrammerError("Disk template '%s' size requirement"
6819                                  " is unknown" %  disk_template)
6820
6821   return req_size_dict[disk_template]
6822
6823
6824 def _FilterVmNodes(lu, nodenames):
6825   """Filters out non-vm_capable nodes from a list.
6826
6827   @type lu: L{LogicalUnit}
6828   @param lu: the logical unit for which we check
6829   @type nodenames: list
6830   @param nodenames: the list of nodes on which we should check
6831   @rtype: list
6832   @return: the list of vm-capable nodes
6833
6834   """
6835   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
6836   return [name for name in nodenames if name not in vm_nodes]
6837
6838
6839 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6840   """Hypervisor parameter validation.
6841
6842   This function abstract the hypervisor parameter validation to be
6843   used in both instance create and instance modify.
6844
6845   @type lu: L{LogicalUnit}
6846   @param lu: the logical unit for which we check
6847   @type nodenames: list
6848   @param nodenames: the list of nodes on which we should check
6849   @type hvname: string
6850   @param hvname: the name of the hypervisor we should use
6851   @type hvparams: dict
6852   @param hvparams: the parameters which we need to check
6853   @raise errors.OpPrereqError: if the parameters are not valid
6854
6855   """
6856   nodenames = _FilterVmNodes(lu, nodenames)
6857   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6858                                                   hvname,
6859                                                   hvparams)
6860   for node in nodenames:
6861     info = hvinfo[node]
6862     if info.offline:
6863       continue
6864     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6865
6866
6867 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6868   """OS parameters validation.
6869
6870   @type lu: L{LogicalUnit}
6871   @param lu: the logical unit for which we check
6872   @type required: boolean
6873   @param required: whether the validation should fail if the OS is not
6874       found
6875   @type nodenames: list
6876   @param nodenames: the list of nodes on which we should check
6877   @type osname: string
6878   @param osname: the name of the hypervisor we should use
6879   @type osparams: dict
6880   @param osparams: the parameters which we need to check
6881   @raise errors.OpPrereqError: if the parameters are not valid
6882
6883   """
6884   nodenames = _FilterVmNodes(lu, nodenames)
6885   result = lu.rpc.call_os_validate(required, nodenames, osname,
6886                                    [constants.OS_VALIDATE_PARAMETERS],
6887                                    osparams)
6888   for node, nres in result.items():
6889     # we don't check for offline cases since this should be run only
6890     # against the master node and/or an instance's nodes
6891     nres.Raise("OS Parameters validation failed on node %s" % node)
6892     if not nres.payload:
6893       lu.LogInfo("OS %s not found on node %s, validation skipped",
6894                  osname, node)
6895
6896
6897 class LUInstanceCreate(LogicalUnit):
6898   """Create an instance.
6899
6900   """
6901   HPATH = "instance-add"
6902   HTYPE = constants.HTYPE_INSTANCE
6903   REQ_BGL = False
6904
6905   def CheckArguments(self):
6906     """Check arguments.
6907
6908     """
6909     # do not require name_check to ease forward/backward compatibility
6910     # for tools
6911     if self.op.no_install and self.op.start:
6912       self.LogInfo("No-installation mode selected, disabling startup")
6913       self.op.start = False
6914     # validate/normalize the instance name
6915     self.op.instance_name = \
6916       netutils.Hostname.GetNormalizedName(self.op.instance_name)
6917
6918     if self.op.ip_check and not self.op.name_check:
6919       # TODO: make the ip check more flexible and not depend on the name check
6920       raise errors.OpPrereqError("Cannot do ip check without a name check",
6921                                  errors.ECODE_INVAL)
6922
6923     # check nics' parameter names
6924     for nic in self.op.nics:
6925       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6926
6927     # check disks. parameter names and consistent adopt/no-adopt strategy
6928     has_adopt = has_no_adopt = False
6929     for disk in self.op.disks:
6930       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6931       if "adopt" in disk:
6932         has_adopt = True
6933       else:
6934         has_no_adopt = True
6935     if has_adopt and has_no_adopt:
6936       raise errors.OpPrereqError("Either all disks are adopted or none is",
6937                                  errors.ECODE_INVAL)
6938     if has_adopt:
6939       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6940         raise errors.OpPrereqError("Disk adoption is not supported for the"
6941                                    " '%s' disk template" %
6942                                    self.op.disk_template,
6943                                    errors.ECODE_INVAL)
6944       if self.op.iallocator is not None:
6945         raise errors.OpPrereqError("Disk adoption not allowed with an"
6946                                    " iallocator script", errors.ECODE_INVAL)
6947       if self.op.mode == constants.INSTANCE_IMPORT:
6948         raise errors.OpPrereqError("Disk adoption not allowed for"
6949                                    " instance import", errors.ECODE_INVAL)
6950
6951     self.adopt_disks = has_adopt
6952
6953     # instance name verification
6954     if self.op.name_check:
6955       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6956       self.op.instance_name = self.hostname1.name
6957       # used in CheckPrereq for ip ping check
6958       self.check_ip = self.hostname1.ip
6959     else:
6960       self.check_ip = None
6961
6962     # file storage checks
6963     if (self.op.file_driver and
6964         not self.op.file_driver in constants.FILE_DRIVER):
6965       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6966                                  self.op.file_driver, errors.ECODE_INVAL)
6967
6968     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6969       raise errors.OpPrereqError("File storage directory path not absolute",
6970                                  errors.ECODE_INVAL)
6971
6972     ### Node/iallocator related checks
6973     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6974
6975     if self.op.pnode is not None:
6976       if self.op.disk_template in constants.DTS_NET_MIRROR:
6977         if self.op.snode is None:
6978           raise errors.OpPrereqError("The networked disk templates need"
6979                                      " a mirror node", errors.ECODE_INVAL)
6980       elif self.op.snode:
6981         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6982                         " template")
6983         self.op.snode = None
6984
6985     self._cds = _GetClusterDomainSecret()
6986
6987     if self.op.mode == constants.INSTANCE_IMPORT:
6988       # On import force_variant must be True, because if we forced it at
6989       # initial install, our only chance when importing it back is that it
6990       # works again!
6991       self.op.force_variant = True
6992
6993       if self.op.no_install:
6994         self.LogInfo("No-installation mode has no effect during import")
6995
6996     elif self.op.mode == constants.INSTANCE_CREATE:
6997       if self.op.os_type is None:
6998         raise errors.OpPrereqError("No guest OS specified",
6999                                    errors.ECODE_INVAL)
7000       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7001         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7002                                    " installation" % self.op.os_type,
7003                                    errors.ECODE_STATE)
7004       if self.op.disk_template is None:
7005         raise errors.OpPrereqError("No disk template specified",
7006                                    errors.ECODE_INVAL)
7007
7008     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7009       # Check handshake to ensure both clusters have the same domain secret
7010       src_handshake = self.op.source_handshake
7011       if not src_handshake:
7012         raise errors.OpPrereqError("Missing source handshake",
7013                                    errors.ECODE_INVAL)
7014
7015       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7016                                                            src_handshake)
7017       if errmsg:
7018         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7019                                    errors.ECODE_INVAL)
7020
7021       # Load and check source CA
7022       self.source_x509_ca_pem = self.op.source_x509_ca
7023       if not self.source_x509_ca_pem:
7024         raise errors.OpPrereqError("Missing source X509 CA",
7025                                    errors.ECODE_INVAL)
7026
7027       try:
7028         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7029                                                     self._cds)
7030       except OpenSSL.crypto.Error, err:
7031         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7032                                    (err, ), errors.ECODE_INVAL)
7033
7034       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7035       if errcode is not None:
7036         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7037                                    errors.ECODE_INVAL)
7038
7039       self.source_x509_ca = cert
7040
7041       src_instance_name = self.op.source_instance_name
7042       if not src_instance_name:
7043         raise errors.OpPrereqError("Missing source instance name",
7044                                    errors.ECODE_INVAL)
7045
7046       self.source_instance_name = \
7047           netutils.GetHostname(name=src_instance_name).name
7048
7049     else:
7050       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7051                                  self.op.mode, errors.ECODE_INVAL)
7052
7053   def ExpandNames(self):
7054     """ExpandNames for CreateInstance.
7055
7056     Figure out the right locks for instance creation.
7057
7058     """
7059     self.needed_locks = {}
7060
7061     instance_name = self.op.instance_name
7062     # this is just a preventive check, but someone might still add this
7063     # instance in the meantime, and creation will fail at lock-add time
7064     if instance_name in self.cfg.GetInstanceList():
7065       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7066                                  instance_name, errors.ECODE_EXISTS)
7067
7068     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7069
7070     if self.op.iallocator:
7071       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7072     else:
7073       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7074       nodelist = [self.op.pnode]
7075       if self.op.snode is not None:
7076         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7077         nodelist.append(self.op.snode)
7078       self.needed_locks[locking.LEVEL_NODE] = nodelist
7079
7080     # in case of import lock the source node too
7081     if self.op.mode == constants.INSTANCE_IMPORT:
7082       src_node = self.op.src_node
7083       src_path = self.op.src_path
7084
7085       if src_path is None:
7086         self.op.src_path = src_path = self.op.instance_name
7087
7088       if src_node is None:
7089         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7090         self.op.src_node = None
7091         if os.path.isabs(src_path):
7092           raise errors.OpPrereqError("Importing an instance from an absolute"
7093                                      " path requires a source node option.",
7094                                      errors.ECODE_INVAL)
7095       else:
7096         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7097         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7098           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7099         if not os.path.isabs(src_path):
7100           self.op.src_path = src_path = \
7101             utils.PathJoin(constants.EXPORT_DIR, src_path)
7102
7103   def _RunAllocator(self):
7104     """Run the allocator based on input opcode.
7105
7106     """
7107     nics = [n.ToDict() for n in self.nics]
7108     ial = IAllocator(self.cfg, self.rpc,
7109                      mode=constants.IALLOCATOR_MODE_ALLOC,
7110                      name=self.op.instance_name,
7111                      disk_template=self.op.disk_template,
7112                      tags=[],
7113                      os=self.op.os_type,
7114                      vcpus=self.be_full[constants.BE_VCPUS],
7115                      mem_size=self.be_full[constants.BE_MEMORY],
7116                      disks=self.disks,
7117                      nics=nics,
7118                      hypervisor=self.op.hypervisor,
7119                      )
7120
7121     ial.Run(self.op.iallocator)
7122
7123     if not ial.success:
7124       raise errors.OpPrereqError("Can't compute nodes using"
7125                                  " iallocator '%s': %s" %
7126                                  (self.op.iallocator, ial.info),
7127                                  errors.ECODE_NORES)
7128     if len(ial.result) != ial.required_nodes:
7129       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7130                                  " of nodes (%s), required %s" %
7131                                  (self.op.iallocator, len(ial.result),
7132                                   ial.required_nodes), errors.ECODE_FAULT)
7133     self.op.pnode = ial.result[0]
7134     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7135                  self.op.instance_name, self.op.iallocator,
7136                  utils.CommaJoin(ial.result))
7137     if ial.required_nodes == 2:
7138       self.op.snode = ial.result[1]
7139
7140   def BuildHooksEnv(self):
7141     """Build hooks env.
7142
7143     This runs on master, primary and secondary nodes of the instance.
7144
7145     """
7146     env = {
7147       "ADD_MODE": self.op.mode,
7148       }
7149     if self.op.mode == constants.INSTANCE_IMPORT:
7150       env["SRC_NODE"] = self.op.src_node
7151       env["SRC_PATH"] = self.op.src_path
7152       env["SRC_IMAGES"] = self.src_images
7153
7154     env.update(_BuildInstanceHookEnv(
7155       name=self.op.instance_name,
7156       primary_node=self.op.pnode,
7157       secondary_nodes=self.secondaries,
7158       status=self.op.start,
7159       os_type=self.op.os_type,
7160       memory=self.be_full[constants.BE_MEMORY],
7161       vcpus=self.be_full[constants.BE_VCPUS],
7162       nics=_NICListToTuple(self, self.nics),
7163       disk_template=self.op.disk_template,
7164       disks=[(d["size"], d["mode"]) for d in self.disks],
7165       bep=self.be_full,
7166       hvp=self.hv_full,
7167       hypervisor_name=self.op.hypervisor,
7168     ))
7169
7170     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7171           self.secondaries)
7172     return env, nl, nl
7173
7174   def _ReadExportInfo(self):
7175     """Reads the export information from disk.
7176
7177     It will override the opcode source node and path with the actual
7178     information, if these two were not specified before.
7179
7180     @return: the export information
7181
7182     """
7183     assert self.op.mode == constants.INSTANCE_IMPORT
7184
7185     src_node = self.op.src_node
7186     src_path = self.op.src_path
7187
7188     if src_node is None:
7189       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7190       exp_list = self.rpc.call_export_list(locked_nodes)
7191       found = False
7192       for node in exp_list:
7193         if exp_list[node].fail_msg:
7194           continue
7195         if src_path in exp_list[node].payload:
7196           found = True
7197           self.op.src_node = src_node = node
7198           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7199                                                        src_path)
7200           break
7201       if not found:
7202         raise errors.OpPrereqError("No export found for relative path %s" %
7203                                     src_path, errors.ECODE_INVAL)
7204
7205     _CheckNodeOnline(self, src_node)
7206     result = self.rpc.call_export_info(src_node, src_path)
7207     result.Raise("No export or invalid export found in dir %s" % src_path)
7208
7209     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7210     if not export_info.has_section(constants.INISECT_EXP):
7211       raise errors.ProgrammerError("Corrupted export config",
7212                                    errors.ECODE_ENVIRON)
7213
7214     ei_version = export_info.get(constants.INISECT_EXP, "version")
7215     if (int(ei_version) != constants.EXPORT_VERSION):
7216       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7217                                  (ei_version, constants.EXPORT_VERSION),
7218                                  errors.ECODE_ENVIRON)
7219     return export_info
7220
7221   def _ReadExportParams(self, einfo):
7222     """Use export parameters as defaults.
7223
7224     In case the opcode doesn't specify (as in override) some instance
7225     parameters, then try to use them from the export information, if
7226     that declares them.
7227
7228     """
7229     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7230
7231     if self.op.disk_template is None:
7232       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7233         self.op.disk_template = einfo.get(constants.INISECT_INS,
7234                                           "disk_template")
7235       else:
7236         raise errors.OpPrereqError("No disk template specified and the export"
7237                                    " is missing the disk_template information",
7238                                    errors.ECODE_INVAL)
7239
7240     if not self.op.disks:
7241       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7242         disks = []
7243         # TODO: import the disk iv_name too
7244         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7245           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7246           disks.append({"size": disk_sz})
7247         self.op.disks = disks
7248       else:
7249         raise errors.OpPrereqError("No disk info specified and the export"
7250                                    " is missing the disk information",
7251                                    errors.ECODE_INVAL)
7252
7253     if (not self.op.nics and
7254         einfo.has_option(constants.INISECT_INS, "nic_count")):
7255       nics = []
7256       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7257         ndict = {}
7258         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7259           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7260           ndict[name] = v
7261         nics.append(ndict)
7262       self.op.nics = nics
7263
7264     if (self.op.hypervisor is None and
7265         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7266       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7267     if einfo.has_section(constants.INISECT_HYP):
7268       # use the export parameters but do not override the ones
7269       # specified by the user
7270       for name, value in einfo.items(constants.INISECT_HYP):
7271         if name not in self.op.hvparams:
7272           self.op.hvparams[name] = value
7273
7274     if einfo.has_section(constants.INISECT_BEP):
7275       # use the parameters, without overriding
7276       for name, value in einfo.items(constants.INISECT_BEP):
7277         if name not in self.op.beparams:
7278           self.op.beparams[name] = value
7279     else:
7280       # try to read the parameters old style, from the main section
7281       for name in constants.BES_PARAMETERS:
7282         if (name not in self.op.beparams and
7283             einfo.has_option(constants.INISECT_INS, name)):
7284           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7285
7286     if einfo.has_section(constants.INISECT_OSP):
7287       # use the parameters, without overriding
7288       for name, value in einfo.items(constants.INISECT_OSP):
7289         if name not in self.op.osparams:
7290           self.op.osparams[name] = value
7291
7292   def _RevertToDefaults(self, cluster):
7293     """Revert the instance parameters to the default values.
7294
7295     """
7296     # hvparams
7297     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7298     for name in self.op.hvparams.keys():
7299       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7300         del self.op.hvparams[name]
7301     # beparams
7302     be_defs = cluster.SimpleFillBE({})
7303     for name in self.op.beparams.keys():
7304       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7305         del self.op.beparams[name]
7306     # nic params
7307     nic_defs = cluster.SimpleFillNIC({})
7308     for nic in self.op.nics:
7309       for name in constants.NICS_PARAMETERS:
7310         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7311           del nic[name]
7312     # osparams
7313     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7314     for name in self.op.osparams.keys():
7315       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7316         del self.op.osparams[name]
7317
7318   def CheckPrereq(self):
7319     """Check prerequisites.
7320
7321     """
7322     if self.op.mode == constants.INSTANCE_IMPORT:
7323       export_info = self._ReadExportInfo()
7324       self._ReadExportParams(export_info)
7325
7326     if (not self.cfg.GetVGName() and
7327         self.op.disk_template not in constants.DTS_NOT_LVM):
7328       raise errors.OpPrereqError("Cluster does not support lvm-based"
7329                                  " instances", errors.ECODE_STATE)
7330
7331     if self.op.hypervisor is None:
7332       self.op.hypervisor = self.cfg.GetHypervisorType()
7333
7334     cluster = self.cfg.GetClusterInfo()
7335     enabled_hvs = cluster.enabled_hypervisors
7336     if self.op.hypervisor not in enabled_hvs:
7337       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7338                                  " cluster (%s)" % (self.op.hypervisor,
7339                                   ",".join(enabled_hvs)),
7340                                  errors.ECODE_STATE)
7341
7342     # check hypervisor parameter syntax (locally)
7343     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7344     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7345                                       self.op.hvparams)
7346     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7347     hv_type.CheckParameterSyntax(filled_hvp)
7348     self.hv_full = filled_hvp
7349     # check that we don't specify global parameters on an instance
7350     _CheckGlobalHvParams(self.op.hvparams)
7351
7352     # fill and remember the beparams dict
7353     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7354     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7355
7356     # build os parameters
7357     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7358
7359     # now that hvp/bep are in final format, let's reset to defaults,
7360     # if told to do so
7361     if self.op.identify_defaults:
7362       self._RevertToDefaults(cluster)
7363
7364     # NIC buildup
7365     self.nics = []
7366     for idx, nic in enumerate(self.op.nics):
7367       nic_mode_req = nic.get("mode", None)
7368       nic_mode = nic_mode_req
7369       if nic_mode is None:
7370         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7371
7372       # in routed mode, for the first nic, the default ip is 'auto'
7373       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7374         default_ip_mode = constants.VALUE_AUTO
7375       else:
7376         default_ip_mode = constants.VALUE_NONE
7377
7378       # ip validity checks
7379       ip = nic.get("ip", default_ip_mode)
7380       if ip is None or ip.lower() == constants.VALUE_NONE:
7381         nic_ip = None
7382       elif ip.lower() == constants.VALUE_AUTO:
7383         if not self.op.name_check:
7384           raise errors.OpPrereqError("IP address set to auto but name checks"
7385                                      " have been skipped",
7386                                      errors.ECODE_INVAL)
7387         nic_ip = self.hostname1.ip
7388       else:
7389         if not netutils.IPAddress.IsValid(ip):
7390           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7391                                      errors.ECODE_INVAL)
7392         nic_ip = ip
7393
7394       # TODO: check the ip address for uniqueness
7395       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7396         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7397                                    errors.ECODE_INVAL)
7398
7399       # MAC address verification
7400       mac = nic.get("mac", constants.VALUE_AUTO)
7401       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7402         mac = utils.NormalizeAndValidateMac(mac)
7403
7404         try:
7405           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7406         except errors.ReservationError:
7407           raise errors.OpPrereqError("MAC address %s already in use"
7408                                      " in cluster" % mac,
7409                                      errors.ECODE_NOTUNIQUE)
7410
7411       # bridge verification
7412       bridge = nic.get("bridge", None)
7413       link = nic.get("link", None)
7414       if bridge and link:
7415         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7416                                    " at the same time", errors.ECODE_INVAL)
7417       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7418         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7419                                    errors.ECODE_INVAL)
7420       elif bridge:
7421         link = bridge
7422
7423       nicparams = {}
7424       if nic_mode_req:
7425         nicparams[constants.NIC_MODE] = nic_mode_req
7426       if link:
7427         nicparams[constants.NIC_LINK] = link
7428
7429       check_params = cluster.SimpleFillNIC(nicparams)
7430       objects.NIC.CheckParameterSyntax(check_params)
7431       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7432
7433     # disk checks/pre-build
7434     self.disks = []
7435     for disk in self.op.disks:
7436       mode = disk.get("mode", constants.DISK_RDWR)
7437       if mode not in constants.DISK_ACCESS_SET:
7438         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7439                                    mode, errors.ECODE_INVAL)
7440       size = disk.get("size", None)
7441       if size is None:
7442         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7443       try:
7444         size = int(size)
7445       except (TypeError, ValueError):
7446         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7447                                    errors.ECODE_INVAL)
7448       vg = disk.get("vg", self.cfg.GetVGName())
7449       new_disk = {"size": size, "mode": mode, "vg": vg}
7450       if "adopt" in disk:
7451         new_disk["adopt"] = disk["adopt"]
7452       self.disks.append(new_disk)
7453
7454     if self.op.mode == constants.INSTANCE_IMPORT:
7455
7456       # Check that the new instance doesn't have less disks than the export
7457       instance_disks = len(self.disks)
7458       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7459       if instance_disks < export_disks:
7460         raise errors.OpPrereqError("Not enough disks to import."
7461                                    " (instance: %d, export: %d)" %
7462                                    (instance_disks, export_disks),
7463                                    errors.ECODE_INVAL)
7464
7465       disk_images = []
7466       for idx in range(export_disks):
7467         option = 'disk%d_dump' % idx
7468         if export_info.has_option(constants.INISECT_INS, option):
7469           # FIXME: are the old os-es, disk sizes, etc. useful?
7470           export_name = export_info.get(constants.INISECT_INS, option)
7471           image = utils.PathJoin(self.op.src_path, export_name)
7472           disk_images.append(image)
7473         else:
7474           disk_images.append(False)
7475
7476       self.src_images = disk_images
7477
7478       old_name = export_info.get(constants.INISECT_INS, 'name')
7479       try:
7480         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7481       except (TypeError, ValueError), err:
7482         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7483                                    " an integer: %s" % str(err),
7484                                    errors.ECODE_STATE)
7485       if self.op.instance_name == old_name:
7486         for idx, nic in enumerate(self.nics):
7487           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7488             nic_mac_ini = 'nic%d_mac' % idx
7489             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7490
7491     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7492
7493     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7494     if self.op.ip_check:
7495       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7496         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7497                                    (self.check_ip, self.op.instance_name),
7498                                    errors.ECODE_NOTUNIQUE)
7499
7500     #### mac address generation
7501     # By generating here the mac address both the allocator and the hooks get
7502     # the real final mac address rather than the 'auto' or 'generate' value.
7503     # There is a race condition between the generation and the instance object
7504     # creation, which means that we know the mac is valid now, but we're not
7505     # sure it will be when we actually add the instance. If things go bad
7506     # adding the instance will abort because of a duplicate mac, and the
7507     # creation job will fail.
7508     for nic in self.nics:
7509       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7510         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7511
7512     #### allocator run
7513
7514     if self.op.iallocator is not None:
7515       self._RunAllocator()
7516
7517     #### node related checks
7518
7519     # check primary node
7520     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7521     assert self.pnode is not None, \
7522       "Cannot retrieve locked node %s" % self.op.pnode
7523     if pnode.offline:
7524       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7525                                  pnode.name, errors.ECODE_STATE)
7526     if pnode.drained:
7527       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7528                                  pnode.name, errors.ECODE_STATE)
7529     if not pnode.vm_capable:
7530       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7531                                  " '%s'" % pnode.name, errors.ECODE_STATE)
7532
7533     self.secondaries = []
7534
7535     # mirror node verification
7536     if self.op.disk_template in constants.DTS_NET_MIRROR:
7537       if self.op.snode == pnode.name:
7538         raise errors.OpPrereqError("The secondary node cannot be the"
7539                                    " primary node.", errors.ECODE_INVAL)
7540       _CheckNodeOnline(self, self.op.snode)
7541       _CheckNodeNotDrained(self, self.op.snode)
7542       _CheckNodeVmCapable(self, self.op.snode)
7543       self.secondaries.append(self.op.snode)
7544
7545     nodenames = [pnode.name] + self.secondaries
7546
7547     if not self.adopt_disks:
7548       # Check lv size requirements, if not adopting
7549       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7550       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7551
7552     else: # instead, we must check the adoption data
7553       all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7554       if len(all_lvs) != len(self.disks):
7555         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7556                                    errors.ECODE_INVAL)
7557       for lv_name in all_lvs:
7558         try:
7559           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7560           # to ReserveLV uses the same syntax
7561           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7562         except errors.ReservationError:
7563           raise errors.OpPrereqError("LV named %s used by another instance" %
7564                                      lv_name, errors.ECODE_NOTUNIQUE)
7565
7566       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7567       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7568
7569       node_lvs = self.rpc.call_lv_list([pnode.name],
7570                                        vg_names.payload.keys())[pnode.name]
7571       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7572       node_lvs = node_lvs.payload
7573
7574       delta = all_lvs.difference(node_lvs.keys())
7575       if delta:
7576         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7577                                    utils.CommaJoin(delta),
7578                                    errors.ECODE_INVAL)
7579       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7580       if online_lvs:
7581         raise errors.OpPrereqError("Online logical volumes found, cannot"
7582                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7583                                    errors.ECODE_STATE)
7584       # update the size of disk based on what is found
7585       for dsk in self.disks:
7586         dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7587
7588     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7589
7590     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7591     # check OS parameters (remotely)
7592     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7593
7594     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7595
7596     # memory check on primary node
7597     if self.op.start:
7598       _CheckNodeFreeMemory(self, self.pnode.name,
7599                            "creating instance %s" % self.op.instance_name,
7600                            self.be_full[constants.BE_MEMORY],
7601                            self.op.hypervisor)
7602
7603     self.dry_run_result = list(nodenames)
7604
7605   def Exec(self, feedback_fn):
7606     """Create and add the instance to the cluster.
7607
7608     """
7609     instance = self.op.instance_name
7610     pnode_name = self.pnode.name
7611
7612     ht_kind = self.op.hypervisor
7613     if ht_kind in constants.HTS_REQ_PORT:
7614       network_port = self.cfg.AllocatePort()
7615     else:
7616       network_port = None
7617
7618     if constants.ENABLE_FILE_STORAGE:
7619       # this is needed because os.path.join does not accept None arguments
7620       if self.op.file_storage_dir is None:
7621         string_file_storage_dir = ""
7622       else:
7623         string_file_storage_dir = self.op.file_storage_dir
7624
7625       # build the full file storage dir path
7626       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7627                                         string_file_storage_dir, instance)
7628     else:
7629       file_storage_dir = ""
7630
7631     disks = _GenerateDiskTemplate(self,
7632                                   self.op.disk_template,
7633                                   instance, pnode_name,
7634                                   self.secondaries,
7635                                   self.disks,
7636                                   file_storage_dir,
7637                                   self.op.file_driver,
7638                                   0,
7639                                   feedback_fn)
7640
7641     iobj = objects.Instance(name=instance, os=self.op.os_type,
7642                             primary_node=pnode_name,
7643                             nics=self.nics, disks=disks,
7644                             disk_template=self.op.disk_template,
7645                             admin_up=False,
7646                             network_port=network_port,
7647                             beparams=self.op.beparams,
7648                             hvparams=self.op.hvparams,
7649                             hypervisor=self.op.hypervisor,
7650                             osparams=self.op.osparams,
7651                             )
7652
7653     if self.adopt_disks:
7654       # rename LVs to the newly-generated names; we need to construct
7655       # 'fake' LV disks with the old data, plus the new unique_id
7656       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7657       rename_to = []
7658       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7659         rename_to.append(t_dsk.logical_id)
7660         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7661         self.cfg.SetDiskID(t_dsk, pnode_name)
7662       result = self.rpc.call_blockdev_rename(pnode_name,
7663                                              zip(tmp_disks, rename_to))
7664       result.Raise("Failed to rename adoped LVs")
7665     else:
7666       feedback_fn("* creating instance disks...")
7667       try:
7668         _CreateDisks(self, iobj)
7669       except errors.OpExecError:
7670         self.LogWarning("Device creation failed, reverting...")
7671         try:
7672           _RemoveDisks(self, iobj)
7673         finally:
7674           self.cfg.ReleaseDRBDMinors(instance)
7675           raise
7676
7677       if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7678         feedback_fn("* wiping instance disks...")
7679         try:
7680           _WipeDisks(self, iobj)
7681         except errors.OpExecError:
7682           self.LogWarning("Device wiping failed, reverting...")
7683           try:
7684             _RemoveDisks(self, iobj)
7685           finally:
7686             self.cfg.ReleaseDRBDMinors(instance)
7687             raise
7688
7689     feedback_fn("adding instance %s to cluster config" % instance)
7690
7691     self.cfg.AddInstance(iobj, self.proc.GetECId())
7692
7693     # Declare that we don't want to remove the instance lock anymore, as we've
7694     # added the instance to the config
7695     del self.remove_locks[locking.LEVEL_INSTANCE]
7696     # Unlock all the nodes
7697     if self.op.mode == constants.INSTANCE_IMPORT:
7698       nodes_keep = [self.op.src_node]
7699       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7700                        if node != self.op.src_node]
7701       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7702       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7703     else:
7704       self.context.glm.release(locking.LEVEL_NODE)
7705       del self.acquired_locks[locking.LEVEL_NODE]
7706
7707     if self.op.wait_for_sync:
7708       disk_abort = not _WaitForSync(self, iobj)
7709     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7710       # make sure the disks are not degraded (still sync-ing is ok)
7711       time.sleep(15)
7712       feedback_fn("* checking mirrors status")
7713       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7714     else:
7715       disk_abort = False
7716
7717     if disk_abort:
7718       _RemoveDisks(self, iobj)
7719       self.cfg.RemoveInstance(iobj.name)
7720       # Make sure the instance lock gets removed
7721       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7722       raise errors.OpExecError("There are some degraded disks for"
7723                                " this instance")
7724
7725     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7726       if self.op.mode == constants.INSTANCE_CREATE:
7727         if not self.op.no_install:
7728           feedback_fn("* running the instance OS create scripts...")
7729           # FIXME: pass debug option from opcode to backend
7730           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7731                                                  self.op.debug_level)
7732           result.Raise("Could not add os for instance %s"
7733                        " on node %s" % (instance, pnode_name))
7734
7735       elif self.op.mode == constants.INSTANCE_IMPORT:
7736         feedback_fn("* running the instance OS import scripts...")
7737
7738         transfers = []
7739
7740         for idx, image in enumerate(self.src_images):
7741           if not image:
7742             continue
7743
7744           # FIXME: pass debug option from opcode to backend
7745           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7746                                              constants.IEIO_FILE, (image, ),
7747                                              constants.IEIO_SCRIPT,
7748                                              (iobj.disks[idx], idx),
7749                                              None)
7750           transfers.append(dt)
7751
7752         import_result = \
7753           masterd.instance.TransferInstanceData(self, feedback_fn,
7754                                                 self.op.src_node, pnode_name,
7755                                                 self.pnode.secondary_ip,
7756                                                 iobj, transfers)
7757         if not compat.all(import_result):
7758           self.LogWarning("Some disks for instance %s on node %s were not"
7759                           " imported successfully" % (instance, pnode_name))
7760
7761       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7762         feedback_fn("* preparing remote import...")
7763         # The source cluster will stop the instance before attempting to make a
7764         # connection. In some cases stopping an instance can take a long time,
7765         # hence the shutdown timeout is added to the connection timeout.
7766         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7767                            self.op.source_shutdown_timeout)
7768         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7769
7770         assert iobj.primary_node == self.pnode.name
7771         disk_results = \
7772           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7773                                         self.source_x509_ca,
7774                                         self._cds, timeouts)
7775         if not compat.all(disk_results):
7776           # TODO: Should the instance still be started, even if some disks
7777           # failed to import (valid for local imports, too)?
7778           self.LogWarning("Some disks for instance %s on node %s were not"
7779                           " imported successfully" % (instance, pnode_name))
7780
7781         # Run rename script on newly imported instance
7782         assert iobj.name == instance
7783         feedback_fn("Running rename script for %s" % instance)
7784         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7785                                                    self.source_instance_name,
7786                                                    self.op.debug_level)
7787         if result.fail_msg:
7788           self.LogWarning("Failed to run rename script for %s on node"
7789                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7790
7791       else:
7792         # also checked in the prereq part
7793         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7794                                      % self.op.mode)
7795
7796     if self.op.start:
7797       iobj.admin_up = True
7798       self.cfg.Update(iobj, feedback_fn)
7799       logging.info("Starting instance %s on node %s", instance, pnode_name)
7800       feedback_fn("* starting instance...")
7801       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7802       result.Raise("Could not start instance")
7803
7804     return list(iobj.all_nodes)
7805
7806
7807 class LUInstanceConsole(NoHooksLU):
7808   """Connect to an instance's console.
7809
7810   This is somewhat special in that it returns the command line that
7811   you need to run on the master node in order to connect to the
7812   console.
7813
7814   """
7815   REQ_BGL = False
7816
7817   def ExpandNames(self):
7818     self._ExpandAndLockInstance()
7819
7820   def CheckPrereq(self):
7821     """Check prerequisites.
7822
7823     This checks that the instance is in the cluster.
7824
7825     """
7826     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7827     assert self.instance is not None, \
7828       "Cannot retrieve locked instance %s" % self.op.instance_name
7829     _CheckNodeOnline(self, self.instance.primary_node)
7830
7831   def Exec(self, feedback_fn):
7832     """Connect to the console of an instance
7833
7834     """
7835     instance = self.instance
7836     node = instance.primary_node
7837
7838     node_insts = self.rpc.call_instance_list([node],
7839                                              [instance.hypervisor])[node]
7840     node_insts.Raise("Can't get node information from %s" % node)
7841
7842     if instance.name not in node_insts.payload:
7843       if instance.admin_up:
7844         state = "ERROR_down"
7845       else:
7846         state = "ADMIN_down"
7847       raise errors.OpExecError("Instance %s is not running (state %s)" %
7848                                (instance.name, state))
7849
7850     logging.debug("Connecting to console of %s on %s", instance.name, node)
7851
7852     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
7853
7854
7855 def _GetInstanceConsole(cluster, instance):
7856   """Returns console information for an instance.
7857
7858   @type cluster: L{objects.Cluster}
7859   @type instance: L{objects.Instance}
7860   @rtype: dict
7861
7862   """
7863   hyper = hypervisor.GetHypervisor(instance.hypervisor)
7864   # beparams and hvparams are passed separately, to avoid editing the
7865   # instance and then saving the defaults in the instance itself.
7866   hvparams = cluster.FillHV(instance)
7867   beparams = cluster.FillBE(instance)
7868   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7869
7870   assert console.instance == instance.name
7871   assert console.Validate()
7872
7873   return console.ToDict()
7874
7875
7876 class LUInstanceReplaceDisks(LogicalUnit):
7877   """Replace the disks of an instance.
7878
7879   """
7880   HPATH = "mirrors-replace"
7881   HTYPE = constants.HTYPE_INSTANCE
7882   REQ_BGL = False
7883
7884   def CheckArguments(self):
7885     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7886                                   self.op.iallocator)
7887
7888   def ExpandNames(self):
7889     self._ExpandAndLockInstance()
7890
7891     if self.op.iallocator is not None:
7892       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7893
7894     elif self.op.remote_node is not None:
7895       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7896       self.op.remote_node = remote_node
7897
7898       # Warning: do not remove the locking of the new secondary here
7899       # unless DRBD8.AddChildren is changed to work in parallel;
7900       # currently it doesn't since parallel invocations of
7901       # FindUnusedMinor will conflict
7902       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7903       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7904
7905     else:
7906       self.needed_locks[locking.LEVEL_NODE] = []
7907       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7908
7909     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7910                                    self.op.iallocator, self.op.remote_node,
7911                                    self.op.disks, False, self.op.early_release)
7912
7913     self.tasklets = [self.replacer]
7914
7915   def DeclareLocks(self, level):
7916     # If we're not already locking all nodes in the set we have to declare the
7917     # instance's primary/secondary nodes.
7918     if (level == locking.LEVEL_NODE and
7919         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7920       self._LockInstancesNodes()
7921
7922   def BuildHooksEnv(self):
7923     """Build hooks env.
7924
7925     This runs on the master, the primary and all the secondaries.
7926
7927     """
7928     instance = self.replacer.instance
7929     env = {
7930       "MODE": self.op.mode,
7931       "NEW_SECONDARY": self.op.remote_node,
7932       "OLD_SECONDARY": instance.secondary_nodes[0],
7933       }
7934     env.update(_BuildInstanceHookEnvByObject(self, instance))
7935     nl = [
7936       self.cfg.GetMasterNode(),
7937       instance.primary_node,
7938       ]
7939     if self.op.remote_node is not None:
7940       nl.append(self.op.remote_node)
7941     return env, nl, nl
7942
7943
7944 class TLReplaceDisks(Tasklet):
7945   """Replaces disks for an instance.
7946
7947   Note: Locking is not within the scope of this class.
7948
7949   """
7950   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7951                disks, delay_iallocator, early_release):
7952     """Initializes this class.
7953
7954     """
7955     Tasklet.__init__(self, lu)
7956
7957     # Parameters
7958     self.instance_name = instance_name
7959     self.mode = mode
7960     self.iallocator_name = iallocator_name
7961     self.remote_node = remote_node
7962     self.disks = disks
7963     self.delay_iallocator = delay_iallocator
7964     self.early_release = early_release
7965
7966     # Runtime data
7967     self.instance = None
7968     self.new_node = None
7969     self.target_node = None
7970     self.other_node = None
7971     self.remote_node_info = None
7972     self.node_secondary_ip = None
7973
7974   @staticmethod
7975   def CheckArguments(mode, remote_node, iallocator):
7976     """Helper function for users of this class.
7977
7978     """
7979     # check for valid parameter combination
7980     if mode == constants.REPLACE_DISK_CHG:
7981       if remote_node is None and iallocator is None:
7982         raise errors.OpPrereqError("When changing the secondary either an"
7983                                    " iallocator script must be used or the"
7984                                    " new node given", errors.ECODE_INVAL)
7985
7986       if remote_node is not None and iallocator is not None:
7987         raise errors.OpPrereqError("Give either the iallocator or the new"
7988                                    " secondary, not both", errors.ECODE_INVAL)
7989
7990     elif remote_node is not None or iallocator is not None:
7991       # Not replacing the secondary
7992       raise errors.OpPrereqError("The iallocator and new node options can"
7993                                  " only be used when changing the"
7994                                  " secondary node", errors.ECODE_INVAL)
7995
7996   @staticmethod
7997   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7998     """Compute a new secondary node using an IAllocator.
7999
8000     """
8001     ial = IAllocator(lu.cfg, lu.rpc,
8002                      mode=constants.IALLOCATOR_MODE_RELOC,
8003                      name=instance_name,
8004                      relocate_from=relocate_from)
8005
8006     ial.Run(iallocator_name)
8007
8008     if not ial.success:
8009       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8010                                  " %s" % (iallocator_name, ial.info),
8011                                  errors.ECODE_NORES)
8012
8013     if len(ial.result) != ial.required_nodes:
8014       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8015                                  " of nodes (%s), required %s" %
8016                                  (iallocator_name,
8017                                   len(ial.result), ial.required_nodes),
8018                                  errors.ECODE_FAULT)
8019
8020     remote_node_name = ial.result[0]
8021
8022     lu.LogInfo("Selected new secondary for instance '%s': %s",
8023                instance_name, remote_node_name)
8024
8025     return remote_node_name
8026
8027   def _FindFaultyDisks(self, node_name):
8028     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8029                                     node_name, True)
8030
8031   def CheckPrereq(self):
8032     """Check prerequisites.
8033
8034     This checks that the instance is in the cluster.
8035
8036     """
8037     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8038     assert instance is not None, \
8039       "Cannot retrieve locked instance %s" % self.instance_name
8040
8041     if instance.disk_template != constants.DT_DRBD8:
8042       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8043                                  " instances", errors.ECODE_INVAL)
8044
8045     if len(instance.secondary_nodes) != 1:
8046       raise errors.OpPrereqError("The instance has a strange layout,"
8047                                  " expected one secondary but found %d" %
8048                                  len(instance.secondary_nodes),
8049                                  errors.ECODE_FAULT)
8050
8051     if not self.delay_iallocator:
8052       self._CheckPrereq2()
8053
8054   def _CheckPrereq2(self):
8055     """Check prerequisites, second part.
8056
8057     This function should always be part of CheckPrereq. It was separated and is
8058     now called from Exec because during node evacuation iallocator was only
8059     called with an unmodified cluster model, not taking planned changes into
8060     account.
8061
8062     """
8063     instance = self.instance
8064     secondary_node = instance.secondary_nodes[0]
8065
8066     if self.iallocator_name is None:
8067       remote_node = self.remote_node
8068     else:
8069       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8070                                        instance.name, instance.secondary_nodes)
8071
8072     if remote_node is not None:
8073       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8074       assert self.remote_node_info is not None, \
8075         "Cannot retrieve locked node %s" % remote_node
8076     else:
8077       self.remote_node_info = None
8078
8079     if remote_node == self.instance.primary_node:
8080       raise errors.OpPrereqError("The specified node is the primary node of"
8081                                  " the instance.", errors.ECODE_INVAL)
8082
8083     if remote_node == secondary_node:
8084       raise errors.OpPrereqError("The specified node is already the"
8085                                  " secondary node of the instance.",
8086                                  errors.ECODE_INVAL)
8087
8088     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8089                                     constants.REPLACE_DISK_CHG):
8090       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8091                                  errors.ECODE_INVAL)
8092
8093     if self.mode == constants.REPLACE_DISK_AUTO:
8094       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8095       faulty_secondary = self._FindFaultyDisks(secondary_node)
8096
8097       if faulty_primary and faulty_secondary:
8098         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8099                                    " one node and can not be repaired"
8100                                    " automatically" % self.instance_name,
8101                                    errors.ECODE_STATE)
8102
8103       if faulty_primary:
8104         self.disks = faulty_primary
8105         self.target_node = instance.primary_node
8106         self.other_node = secondary_node
8107         check_nodes = [self.target_node, self.other_node]
8108       elif faulty_secondary:
8109         self.disks = faulty_secondary
8110         self.target_node = secondary_node
8111         self.other_node = instance.primary_node
8112         check_nodes = [self.target_node, self.other_node]
8113       else:
8114         self.disks = []
8115         check_nodes = []
8116
8117     else:
8118       # Non-automatic modes
8119       if self.mode == constants.REPLACE_DISK_PRI:
8120         self.target_node = instance.primary_node
8121         self.other_node = secondary_node
8122         check_nodes = [self.target_node, self.other_node]
8123
8124       elif self.mode == constants.REPLACE_DISK_SEC:
8125         self.target_node = secondary_node
8126         self.other_node = instance.primary_node
8127         check_nodes = [self.target_node, self.other_node]
8128
8129       elif self.mode == constants.REPLACE_DISK_CHG:
8130         self.new_node = remote_node
8131         self.other_node = instance.primary_node
8132         self.target_node = secondary_node
8133         check_nodes = [self.new_node, self.other_node]
8134
8135         _CheckNodeNotDrained(self.lu, remote_node)
8136         _CheckNodeVmCapable(self.lu, remote_node)
8137
8138         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8139         assert old_node_info is not None
8140         if old_node_info.offline and not self.early_release:
8141           # doesn't make sense to delay the release
8142           self.early_release = True
8143           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8144                           " early-release mode", secondary_node)
8145
8146       else:
8147         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8148                                      self.mode)
8149
8150       # If not specified all disks should be replaced
8151       if not self.disks:
8152         self.disks = range(len(self.instance.disks))
8153
8154     for node in check_nodes:
8155       _CheckNodeOnline(self.lu, node)
8156
8157     # Check whether disks are valid
8158     for disk_idx in self.disks:
8159       instance.FindDisk(disk_idx)
8160
8161     # Get secondary node IP addresses
8162     node_2nd_ip = {}
8163
8164     for node_name in [self.target_node, self.other_node, self.new_node]:
8165       if node_name is not None:
8166         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8167
8168     self.node_secondary_ip = node_2nd_ip
8169
8170   def Exec(self, feedback_fn):
8171     """Execute disk replacement.
8172
8173     This dispatches the disk replacement to the appropriate handler.
8174
8175     """
8176     if self.delay_iallocator:
8177       self._CheckPrereq2()
8178
8179     if not self.disks:
8180       feedback_fn("No disks need replacement")
8181       return
8182
8183     feedback_fn("Replacing disk(s) %s for %s" %
8184                 (utils.CommaJoin(self.disks), self.instance.name))
8185
8186     activate_disks = (not self.instance.admin_up)
8187
8188     # Activate the instance disks if we're replacing them on a down instance
8189     if activate_disks:
8190       _StartInstanceDisks(self.lu, self.instance, True)
8191
8192     try:
8193       # Should we replace the secondary node?
8194       if self.new_node is not None:
8195         fn = self._ExecDrbd8Secondary
8196       else:
8197         fn = self._ExecDrbd8DiskOnly
8198
8199       return fn(feedback_fn)
8200
8201     finally:
8202       # Deactivate the instance disks if we're replacing them on a
8203       # down instance
8204       if activate_disks:
8205         _SafeShutdownInstanceDisks(self.lu, self.instance)
8206
8207   def _CheckVolumeGroup(self, nodes):
8208     self.lu.LogInfo("Checking volume groups")
8209
8210     vgname = self.cfg.GetVGName()
8211
8212     # Make sure volume group exists on all involved nodes
8213     results = self.rpc.call_vg_list(nodes)
8214     if not results:
8215       raise errors.OpExecError("Can't list volume groups on the nodes")
8216
8217     for node in nodes:
8218       res = results[node]
8219       res.Raise("Error checking node %s" % node)
8220       if vgname not in res.payload:
8221         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8222                                  (vgname, node))
8223
8224   def _CheckDisksExistence(self, nodes):
8225     # Check disk existence
8226     for idx, dev in enumerate(self.instance.disks):
8227       if idx not in self.disks:
8228         continue
8229
8230       for node in nodes:
8231         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8232         self.cfg.SetDiskID(dev, node)
8233
8234         result = self.rpc.call_blockdev_find(node, dev)
8235
8236         msg = result.fail_msg
8237         if msg or not result.payload:
8238           if not msg:
8239             msg = "disk not found"
8240           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8241                                    (idx, node, msg))
8242
8243   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8244     for idx, dev in enumerate(self.instance.disks):
8245       if idx not in self.disks:
8246         continue
8247
8248       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8249                       (idx, node_name))
8250
8251       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8252                                    ldisk=ldisk):
8253         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8254                                  " replace disks for instance %s" %
8255                                  (node_name, self.instance.name))
8256
8257   def _CreateNewStorage(self, node_name):
8258     vgname = self.cfg.GetVGName()
8259     iv_names = {}
8260
8261     for idx, dev in enumerate(self.instance.disks):
8262       if idx not in self.disks:
8263         continue
8264
8265       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8266
8267       self.cfg.SetDiskID(dev, node_name)
8268
8269       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8270       names = _GenerateUniqueNames(self.lu, lv_names)
8271
8272       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8273                              logical_id=(vgname, names[0]))
8274       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8275                              logical_id=(vgname, names[1]))
8276
8277       new_lvs = [lv_data, lv_meta]
8278       old_lvs = dev.children
8279       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8280
8281       # we pass force_create=True to force the LVM creation
8282       for new_lv in new_lvs:
8283         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8284                         _GetInstanceInfoText(self.instance), False)
8285
8286     return iv_names
8287
8288   def _CheckDevices(self, node_name, iv_names):
8289     for name, (dev, _, _) in iv_names.iteritems():
8290       self.cfg.SetDiskID(dev, node_name)
8291
8292       result = self.rpc.call_blockdev_find(node_name, dev)
8293
8294       msg = result.fail_msg
8295       if msg or not result.payload:
8296         if not msg:
8297           msg = "disk not found"
8298         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8299                                  (name, msg))
8300
8301       if result.payload.is_degraded:
8302         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8303
8304   def _RemoveOldStorage(self, node_name, iv_names):
8305     for name, (_, old_lvs, _) in iv_names.iteritems():
8306       self.lu.LogInfo("Remove logical volumes for %s" % name)
8307
8308       for lv in old_lvs:
8309         self.cfg.SetDiskID(lv, node_name)
8310
8311         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8312         if msg:
8313           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8314                              hint="remove unused LVs manually")
8315
8316   def _ReleaseNodeLock(self, node_name):
8317     """Releases the lock for a given node."""
8318     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8319
8320   def _ExecDrbd8DiskOnly(self, feedback_fn):
8321     """Replace a disk on the primary or secondary for DRBD 8.
8322
8323     The algorithm for replace is quite complicated:
8324
8325       1. for each disk to be replaced:
8326
8327         1. create new LVs on the target node with unique names
8328         1. detach old LVs from the drbd device
8329         1. rename old LVs to name_replaced.<time_t>
8330         1. rename new LVs to old LVs
8331         1. attach the new LVs (with the old names now) to the drbd device
8332
8333       1. wait for sync across all devices
8334
8335       1. for each modified disk:
8336
8337         1. remove old LVs (which have the name name_replaces.<time_t>)
8338
8339     Failures are not very well handled.
8340
8341     """
8342     steps_total = 6
8343
8344     # Step: check device activation
8345     self.lu.LogStep(1, steps_total, "Check device existence")
8346     self._CheckDisksExistence([self.other_node, self.target_node])
8347     self._CheckVolumeGroup([self.target_node, self.other_node])
8348
8349     # Step: check other node consistency
8350     self.lu.LogStep(2, steps_total, "Check peer consistency")
8351     self._CheckDisksConsistency(self.other_node,
8352                                 self.other_node == self.instance.primary_node,
8353                                 False)
8354
8355     # Step: create new storage
8356     self.lu.LogStep(3, steps_total, "Allocate new storage")
8357     iv_names = self._CreateNewStorage(self.target_node)
8358
8359     # Step: for each lv, detach+rename*2+attach
8360     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8361     for dev, old_lvs, new_lvs in iv_names.itervalues():
8362       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8363
8364       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8365                                                      old_lvs)
8366       result.Raise("Can't detach drbd from local storage on node"
8367                    " %s for device %s" % (self.target_node, dev.iv_name))
8368       #dev.children = []
8369       #cfg.Update(instance)
8370
8371       # ok, we created the new LVs, so now we know we have the needed
8372       # storage; as such, we proceed on the target node to rename
8373       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8374       # using the assumption that logical_id == physical_id (which in
8375       # turn is the unique_id on that node)
8376
8377       # FIXME(iustin): use a better name for the replaced LVs
8378       temp_suffix = int(time.time())
8379       ren_fn = lambda d, suff: (d.physical_id[0],
8380                                 d.physical_id[1] + "_replaced-%s" % suff)
8381
8382       # Build the rename list based on what LVs exist on the node
8383       rename_old_to_new = []
8384       for to_ren in old_lvs:
8385         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8386         if not result.fail_msg and result.payload:
8387           # device exists
8388           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8389
8390       self.lu.LogInfo("Renaming the old LVs on the target node")
8391       result = self.rpc.call_blockdev_rename(self.target_node,
8392                                              rename_old_to_new)
8393       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8394
8395       # Now we rename the new LVs to the old LVs
8396       self.lu.LogInfo("Renaming the new LVs on the target node")
8397       rename_new_to_old = [(new, old.physical_id)
8398                            for old, new in zip(old_lvs, new_lvs)]
8399       result = self.rpc.call_blockdev_rename(self.target_node,
8400                                              rename_new_to_old)
8401       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8402
8403       for old, new in zip(old_lvs, new_lvs):
8404         new.logical_id = old.logical_id
8405         self.cfg.SetDiskID(new, self.target_node)
8406
8407       for disk in old_lvs:
8408         disk.logical_id = ren_fn(disk, temp_suffix)
8409         self.cfg.SetDiskID(disk, self.target_node)
8410
8411       # Now that the new lvs have the old name, we can add them to the device
8412       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8413       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8414                                                   new_lvs)
8415       msg = result.fail_msg
8416       if msg:
8417         for new_lv in new_lvs:
8418           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8419                                                new_lv).fail_msg
8420           if msg2:
8421             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8422                                hint=("cleanup manually the unused logical"
8423                                      "volumes"))
8424         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8425
8426       dev.children = new_lvs
8427
8428       self.cfg.Update(self.instance, feedback_fn)
8429
8430     cstep = 5
8431     if self.early_release:
8432       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8433       cstep += 1
8434       self._RemoveOldStorage(self.target_node, iv_names)
8435       # WARNING: we release both node locks here, do not do other RPCs
8436       # than WaitForSync to the primary node
8437       self._ReleaseNodeLock([self.target_node, self.other_node])
8438
8439     # Wait for sync
8440     # This can fail as the old devices are degraded and _WaitForSync
8441     # does a combined result over all disks, so we don't check its return value
8442     self.lu.LogStep(cstep, steps_total, "Sync devices")
8443     cstep += 1
8444     _WaitForSync(self.lu, self.instance)
8445
8446     # Check all devices manually
8447     self._CheckDevices(self.instance.primary_node, iv_names)
8448
8449     # Step: remove old storage
8450     if not self.early_release:
8451       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8452       cstep += 1
8453       self._RemoveOldStorage(self.target_node, iv_names)
8454
8455   def _ExecDrbd8Secondary(self, feedback_fn):
8456     """Replace the secondary node for DRBD 8.
8457
8458     The algorithm for replace is quite complicated:
8459       - for all disks of the instance:
8460         - create new LVs on the new node with same names
8461         - shutdown the drbd device on the old secondary
8462         - disconnect the drbd network on the primary
8463         - create the drbd device on the new secondary
8464         - network attach the drbd on the primary, using an artifice:
8465           the drbd code for Attach() will connect to the network if it
8466           finds a device which is connected to the good local disks but
8467           not network enabled
8468       - wait for sync across all devices
8469       - remove all disks from the old secondary
8470
8471     Failures are not very well handled.
8472
8473     """
8474     steps_total = 6
8475
8476     # Step: check device activation
8477     self.lu.LogStep(1, steps_total, "Check device existence")
8478     self._CheckDisksExistence([self.instance.primary_node])
8479     self._CheckVolumeGroup([self.instance.primary_node])
8480
8481     # Step: check other node consistency
8482     self.lu.LogStep(2, steps_total, "Check peer consistency")
8483     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8484
8485     # Step: create new storage
8486     self.lu.LogStep(3, steps_total, "Allocate new storage")
8487     for idx, dev in enumerate(self.instance.disks):
8488       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8489                       (self.new_node, idx))
8490       # we pass force_create=True to force LVM creation
8491       for new_lv in dev.children:
8492         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8493                         _GetInstanceInfoText(self.instance), False)
8494
8495     # Step 4: dbrd minors and drbd setups changes
8496     # after this, we must manually remove the drbd minors on both the
8497     # error and the success paths
8498     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8499     minors = self.cfg.AllocateDRBDMinor([self.new_node
8500                                          for dev in self.instance.disks],
8501                                         self.instance.name)
8502     logging.debug("Allocated minors %r", minors)
8503
8504     iv_names = {}
8505     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8506       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8507                       (self.new_node, idx))
8508       # create new devices on new_node; note that we create two IDs:
8509       # one without port, so the drbd will be activated without
8510       # networking information on the new node at this stage, and one
8511       # with network, for the latter activation in step 4
8512       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8513       if self.instance.primary_node == o_node1:
8514         p_minor = o_minor1
8515       else:
8516         assert self.instance.primary_node == o_node2, "Three-node instance?"
8517         p_minor = o_minor2
8518
8519       new_alone_id = (self.instance.primary_node, self.new_node, None,
8520                       p_minor, new_minor, o_secret)
8521       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8522                     p_minor, new_minor, o_secret)
8523
8524       iv_names[idx] = (dev, dev.children, new_net_id)
8525       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8526                     new_net_id)
8527       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8528                               logical_id=new_alone_id,
8529                               children=dev.children,
8530                               size=dev.size)
8531       try:
8532         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8533                               _GetInstanceInfoText(self.instance), False)
8534       except errors.GenericError:
8535         self.cfg.ReleaseDRBDMinors(self.instance.name)
8536         raise
8537
8538     # We have new devices, shutdown the drbd on the old secondary
8539     for idx, dev in enumerate(self.instance.disks):
8540       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8541       self.cfg.SetDiskID(dev, self.target_node)
8542       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8543       if msg:
8544         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8545                            "node: %s" % (idx, msg),
8546                            hint=("Please cleanup this device manually as"
8547                                  " soon as possible"))
8548
8549     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8550     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8551                                                self.node_secondary_ip,
8552                                                self.instance.disks)\
8553                                               [self.instance.primary_node]
8554
8555     msg = result.fail_msg
8556     if msg:
8557       # detaches didn't succeed (unlikely)
8558       self.cfg.ReleaseDRBDMinors(self.instance.name)
8559       raise errors.OpExecError("Can't detach the disks from the network on"
8560                                " old node: %s" % (msg,))
8561
8562     # if we managed to detach at least one, we update all the disks of
8563     # the instance to point to the new secondary
8564     self.lu.LogInfo("Updating instance configuration")
8565     for dev, _, new_logical_id in iv_names.itervalues():
8566       dev.logical_id = new_logical_id
8567       self.cfg.SetDiskID(dev, self.instance.primary_node)
8568
8569     self.cfg.Update(self.instance, feedback_fn)
8570
8571     # and now perform the drbd attach
8572     self.lu.LogInfo("Attaching primary drbds to new secondary"
8573                     " (standalone => connected)")
8574     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8575                                             self.new_node],
8576                                            self.node_secondary_ip,
8577                                            self.instance.disks,
8578                                            self.instance.name,
8579                                            False)
8580     for to_node, to_result in result.items():
8581       msg = to_result.fail_msg
8582       if msg:
8583         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8584                            to_node, msg,
8585                            hint=("please do a gnt-instance info to see the"
8586                                  " status of disks"))
8587     cstep = 5
8588     if self.early_release:
8589       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8590       cstep += 1
8591       self._RemoveOldStorage(self.target_node, iv_names)
8592       # WARNING: we release all node locks here, do not do other RPCs
8593       # than WaitForSync to the primary node
8594       self._ReleaseNodeLock([self.instance.primary_node,
8595                              self.target_node,
8596                              self.new_node])
8597
8598     # Wait for sync
8599     # This can fail as the old devices are degraded and _WaitForSync
8600     # does a combined result over all disks, so we don't check its return value
8601     self.lu.LogStep(cstep, steps_total, "Sync devices")
8602     cstep += 1
8603     _WaitForSync(self.lu, self.instance)
8604
8605     # Check all devices manually
8606     self._CheckDevices(self.instance.primary_node, iv_names)
8607
8608     # Step: remove old storage
8609     if not self.early_release:
8610       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8611       self._RemoveOldStorage(self.target_node, iv_names)
8612
8613
8614 class LURepairNodeStorage(NoHooksLU):
8615   """Repairs the volume group on a node.
8616
8617   """
8618   REQ_BGL = False
8619
8620   def CheckArguments(self):
8621     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8622
8623     storage_type = self.op.storage_type
8624
8625     if (constants.SO_FIX_CONSISTENCY not in
8626         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8627       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8628                                  " repaired" % storage_type,
8629                                  errors.ECODE_INVAL)
8630
8631   def ExpandNames(self):
8632     self.needed_locks = {
8633       locking.LEVEL_NODE: [self.op.node_name],
8634       }
8635
8636   def _CheckFaultyDisks(self, instance, node_name):
8637     """Ensure faulty disks abort the opcode or at least warn."""
8638     try:
8639       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8640                                   node_name, True):
8641         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8642                                    " node '%s'" % (instance.name, node_name),
8643                                    errors.ECODE_STATE)
8644     except errors.OpPrereqError, err:
8645       if self.op.ignore_consistency:
8646         self.proc.LogWarning(str(err.args[0]))
8647       else:
8648         raise
8649
8650   def CheckPrereq(self):
8651     """Check prerequisites.
8652
8653     """
8654     # Check whether any instance on this node has faulty disks
8655     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8656       if not inst.admin_up:
8657         continue
8658       check_nodes = set(inst.all_nodes)
8659       check_nodes.discard(self.op.node_name)
8660       for inst_node_name in check_nodes:
8661         self._CheckFaultyDisks(inst, inst_node_name)
8662
8663   def Exec(self, feedback_fn):
8664     feedback_fn("Repairing storage unit '%s' on %s ..." %
8665                 (self.op.name, self.op.node_name))
8666
8667     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8668     result = self.rpc.call_storage_execute(self.op.node_name,
8669                                            self.op.storage_type, st_args,
8670                                            self.op.name,
8671                                            constants.SO_FIX_CONSISTENCY)
8672     result.Raise("Failed to repair storage unit '%s' on %s" %
8673                  (self.op.name, self.op.node_name))
8674
8675
8676 class LUNodeEvacStrategy(NoHooksLU):
8677   """Computes the node evacuation strategy.
8678
8679   """
8680   REQ_BGL = False
8681
8682   def CheckArguments(self):
8683     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8684
8685   def ExpandNames(self):
8686     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8687     self.needed_locks = locks = {}
8688     if self.op.remote_node is None:
8689       locks[locking.LEVEL_NODE] = locking.ALL_SET
8690     else:
8691       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8692       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8693
8694   def Exec(self, feedback_fn):
8695     if self.op.remote_node is not None:
8696       instances = []
8697       for node in self.op.nodes:
8698         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8699       result = []
8700       for i in instances:
8701         if i.primary_node == self.op.remote_node:
8702           raise errors.OpPrereqError("Node %s is the primary node of"
8703                                      " instance %s, cannot use it as"
8704                                      " secondary" %
8705                                      (self.op.remote_node, i.name),
8706                                      errors.ECODE_INVAL)
8707         result.append([i.name, self.op.remote_node])
8708     else:
8709       ial = IAllocator(self.cfg, self.rpc,
8710                        mode=constants.IALLOCATOR_MODE_MEVAC,
8711                        evac_nodes=self.op.nodes)
8712       ial.Run(self.op.iallocator, validate=True)
8713       if not ial.success:
8714         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8715                                  errors.ECODE_NORES)
8716       result = ial.result
8717     return result
8718
8719
8720 class LUInstanceGrowDisk(LogicalUnit):
8721   """Grow a disk of an instance.
8722
8723   """
8724   HPATH = "disk-grow"
8725   HTYPE = constants.HTYPE_INSTANCE
8726   REQ_BGL = False
8727
8728   def ExpandNames(self):
8729     self._ExpandAndLockInstance()
8730     self.needed_locks[locking.LEVEL_NODE] = []
8731     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8732
8733   def DeclareLocks(self, level):
8734     if level == locking.LEVEL_NODE:
8735       self._LockInstancesNodes()
8736
8737   def BuildHooksEnv(self):
8738     """Build hooks env.
8739
8740     This runs on the master, the primary and all the secondaries.
8741
8742     """
8743     env = {
8744       "DISK": self.op.disk,
8745       "AMOUNT": self.op.amount,
8746       }
8747     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8748     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8749     return env, nl, nl
8750
8751   def CheckPrereq(self):
8752     """Check prerequisites.
8753
8754     This checks that the instance is in the cluster.
8755
8756     """
8757     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8758     assert instance is not None, \
8759       "Cannot retrieve locked instance %s" % self.op.instance_name
8760     nodenames = list(instance.all_nodes)
8761     for node in nodenames:
8762       _CheckNodeOnline(self, node)
8763
8764     self.instance = instance
8765
8766     if instance.disk_template not in constants.DTS_GROWABLE:
8767       raise errors.OpPrereqError("Instance's disk layout does not support"
8768                                  " growing.", errors.ECODE_INVAL)
8769
8770     self.disk = instance.FindDisk(self.op.disk)
8771
8772     if instance.disk_template != constants.DT_FILE:
8773       # TODO: check the free disk space for file, when that feature
8774       # will be supported
8775       _CheckNodesFreeDiskPerVG(self, nodenames,
8776                                self.disk.ComputeGrowth(self.op.amount))
8777
8778   def Exec(self, feedback_fn):
8779     """Execute disk grow.
8780
8781     """
8782     instance = self.instance
8783     disk = self.disk
8784
8785     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8786     if not disks_ok:
8787       raise errors.OpExecError("Cannot activate block device to grow")
8788
8789     for node in instance.all_nodes:
8790       self.cfg.SetDiskID(disk, node)
8791       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8792       result.Raise("Grow request failed to node %s" % node)
8793
8794       # TODO: Rewrite code to work properly
8795       # DRBD goes into sync mode for a short amount of time after executing the
8796       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8797       # calling "resize" in sync mode fails. Sleeping for a short amount of
8798       # time is a work-around.
8799       time.sleep(5)
8800
8801     disk.RecordGrow(self.op.amount)
8802     self.cfg.Update(instance, feedback_fn)
8803     if self.op.wait_for_sync:
8804       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8805       if disk_abort:
8806         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8807                              " status.\nPlease check the instance.")
8808       if not instance.admin_up:
8809         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8810     elif not instance.admin_up:
8811       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8812                            " not supposed to be running because no wait for"
8813                            " sync mode was requested.")
8814
8815
8816 class LUInstanceQueryData(NoHooksLU):
8817   """Query runtime instance data.
8818
8819   """
8820   REQ_BGL = False
8821
8822   def ExpandNames(self):
8823     self.needed_locks = {}
8824     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8825
8826     if self.op.instances:
8827       self.wanted_names = []
8828       for name in self.op.instances:
8829         full_name = _ExpandInstanceName(self.cfg, name)
8830         self.wanted_names.append(full_name)
8831       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8832     else:
8833       self.wanted_names = None
8834       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8835
8836     self.needed_locks[locking.LEVEL_NODE] = []
8837     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8838
8839   def DeclareLocks(self, level):
8840     if level == locking.LEVEL_NODE:
8841       self._LockInstancesNodes()
8842
8843   def CheckPrereq(self):
8844     """Check prerequisites.
8845
8846     This only checks the optional instance list against the existing names.
8847
8848     """
8849     if self.wanted_names is None:
8850       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8851
8852     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8853                              in self.wanted_names]
8854
8855   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8856     """Returns the status of a block device
8857
8858     """
8859     if self.op.static or not node:
8860       return None
8861
8862     self.cfg.SetDiskID(dev, node)
8863
8864     result = self.rpc.call_blockdev_find(node, dev)
8865     if result.offline:
8866       return None
8867
8868     result.Raise("Can't compute disk status for %s" % instance_name)
8869
8870     status = result.payload
8871     if status is None:
8872       return None
8873
8874     return (status.dev_path, status.major, status.minor,
8875             status.sync_percent, status.estimated_time,
8876             status.is_degraded, status.ldisk_status)
8877
8878   def _ComputeDiskStatus(self, instance, snode, dev):
8879     """Compute block device status.
8880
8881     """
8882     if dev.dev_type in constants.LDS_DRBD:
8883       # we change the snode then (otherwise we use the one passed in)
8884       if dev.logical_id[0] == instance.primary_node:
8885         snode = dev.logical_id[1]
8886       else:
8887         snode = dev.logical_id[0]
8888
8889     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8890                                               instance.name, dev)
8891     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8892
8893     if dev.children:
8894       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8895                       for child in dev.children]
8896     else:
8897       dev_children = []
8898
8899     data = {
8900       "iv_name": dev.iv_name,
8901       "dev_type": dev.dev_type,
8902       "logical_id": dev.logical_id,
8903       "physical_id": dev.physical_id,
8904       "pstatus": dev_pstatus,
8905       "sstatus": dev_sstatus,
8906       "children": dev_children,
8907       "mode": dev.mode,
8908       "size": dev.size,
8909       }
8910
8911     return data
8912
8913   def Exec(self, feedback_fn):
8914     """Gather and return data"""
8915     result = {}
8916
8917     cluster = self.cfg.GetClusterInfo()
8918
8919     for instance in self.wanted_instances:
8920       if not self.op.static:
8921         remote_info = self.rpc.call_instance_info(instance.primary_node,
8922                                                   instance.name,
8923                                                   instance.hypervisor)
8924         remote_info.Raise("Error checking node %s" % instance.primary_node)
8925         remote_info = remote_info.payload
8926         if remote_info and "state" in remote_info:
8927           remote_state = "up"
8928         else:
8929           remote_state = "down"
8930       else:
8931         remote_state = None
8932       if instance.admin_up:
8933         config_state = "up"
8934       else:
8935         config_state = "down"
8936
8937       disks = [self._ComputeDiskStatus(instance, None, device)
8938                for device in instance.disks]
8939
8940       idict = {
8941         "name": instance.name,
8942         "config_state": config_state,
8943         "run_state": remote_state,
8944         "pnode": instance.primary_node,
8945         "snodes": instance.secondary_nodes,
8946         "os": instance.os,
8947         # this happens to be the same format used for hooks
8948         "nics": _NICListToTuple(self, instance.nics),
8949         "disk_template": instance.disk_template,
8950         "disks": disks,
8951         "hypervisor": instance.hypervisor,
8952         "network_port": instance.network_port,
8953         "hv_instance": instance.hvparams,
8954         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8955         "be_instance": instance.beparams,
8956         "be_actual": cluster.FillBE(instance),
8957         "os_instance": instance.osparams,
8958         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8959         "serial_no": instance.serial_no,
8960         "mtime": instance.mtime,
8961         "ctime": instance.ctime,
8962         "uuid": instance.uuid,
8963         }
8964
8965       result[instance.name] = idict
8966
8967     return result
8968
8969
8970 class LUInstanceSetParams(LogicalUnit):
8971   """Modifies an instances's parameters.
8972
8973   """
8974   HPATH = "instance-modify"
8975   HTYPE = constants.HTYPE_INSTANCE
8976   REQ_BGL = False
8977
8978   def CheckArguments(self):
8979     if not (self.op.nics or self.op.disks or self.op.disk_template or
8980             self.op.hvparams or self.op.beparams or self.op.os_name):
8981       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8982
8983     if self.op.hvparams:
8984       _CheckGlobalHvParams(self.op.hvparams)
8985
8986     # Disk validation
8987     disk_addremove = 0
8988     for disk_op, disk_dict in self.op.disks:
8989       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8990       if disk_op == constants.DDM_REMOVE:
8991         disk_addremove += 1
8992         continue
8993       elif disk_op == constants.DDM_ADD:
8994         disk_addremove += 1
8995       else:
8996         if not isinstance(disk_op, int):
8997           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8998         if not isinstance(disk_dict, dict):
8999           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9000           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9001
9002       if disk_op == constants.DDM_ADD:
9003         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9004         if mode not in constants.DISK_ACCESS_SET:
9005           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9006                                      errors.ECODE_INVAL)
9007         size = disk_dict.get('size', None)
9008         if size is None:
9009           raise errors.OpPrereqError("Required disk parameter size missing",
9010                                      errors.ECODE_INVAL)
9011         try:
9012           size = int(size)
9013         except (TypeError, ValueError), err:
9014           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9015                                      str(err), errors.ECODE_INVAL)
9016         disk_dict['size'] = size
9017       else:
9018         # modification of disk
9019         if 'size' in disk_dict:
9020           raise errors.OpPrereqError("Disk size change not possible, use"
9021                                      " grow-disk", errors.ECODE_INVAL)
9022
9023     if disk_addremove > 1:
9024       raise errors.OpPrereqError("Only one disk add or remove operation"
9025                                  " supported at a time", errors.ECODE_INVAL)
9026
9027     if self.op.disks and self.op.disk_template is not None:
9028       raise errors.OpPrereqError("Disk template conversion and other disk"
9029                                  " changes not supported at the same time",
9030                                  errors.ECODE_INVAL)
9031
9032     if (self.op.disk_template and
9033         self.op.disk_template in constants.DTS_NET_MIRROR and
9034         self.op.remote_node is None):
9035       raise errors.OpPrereqError("Changing the disk template to a mirrored"
9036                                  " one requires specifying a secondary node",
9037                                  errors.ECODE_INVAL)
9038
9039     # NIC validation
9040     nic_addremove = 0
9041     for nic_op, nic_dict in self.op.nics:
9042       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9043       if nic_op == constants.DDM_REMOVE:
9044         nic_addremove += 1
9045         continue
9046       elif nic_op == constants.DDM_ADD:
9047         nic_addremove += 1
9048       else:
9049         if not isinstance(nic_op, int):
9050           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9051         if not isinstance(nic_dict, dict):
9052           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9053           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9054
9055       # nic_dict should be a dict
9056       nic_ip = nic_dict.get('ip', None)
9057       if nic_ip is not None:
9058         if nic_ip.lower() == constants.VALUE_NONE:
9059           nic_dict['ip'] = None
9060         else:
9061           if not netutils.IPAddress.IsValid(nic_ip):
9062             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9063                                        errors.ECODE_INVAL)
9064
9065       nic_bridge = nic_dict.get('bridge', None)
9066       nic_link = nic_dict.get('link', None)
9067       if nic_bridge and nic_link:
9068         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9069                                    " at the same time", errors.ECODE_INVAL)
9070       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9071         nic_dict['bridge'] = None
9072       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9073         nic_dict['link'] = None
9074
9075       if nic_op == constants.DDM_ADD:
9076         nic_mac = nic_dict.get('mac', None)
9077         if nic_mac is None:
9078           nic_dict['mac'] = constants.VALUE_AUTO
9079
9080       if 'mac' in nic_dict:
9081         nic_mac = nic_dict['mac']
9082         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9083           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9084
9085         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9086           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9087                                      " modifying an existing nic",
9088                                      errors.ECODE_INVAL)
9089
9090     if nic_addremove > 1:
9091       raise errors.OpPrereqError("Only one NIC add or remove operation"
9092                                  " supported at a time", errors.ECODE_INVAL)
9093
9094   def ExpandNames(self):
9095     self._ExpandAndLockInstance()
9096     self.needed_locks[locking.LEVEL_NODE] = []
9097     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9098
9099   def DeclareLocks(self, level):
9100     if level == locking.LEVEL_NODE:
9101       self._LockInstancesNodes()
9102       if self.op.disk_template and self.op.remote_node:
9103         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9104         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9105
9106   def BuildHooksEnv(self):
9107     """Build hooks env.
9108
9109     This runs on the master, primary and secondaries.
9110
9111     """
9112     args = dict()
9113     if constants.BE_MEMORY in self.be_new:
9114       args['memory'] = self.be_new[constants.BE_MEMORY]
9115     if constants.BE_VCPUS in self.be_new:
9116       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9117     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9118     # information at all.
9119     if self.op.nics:
9120       args['nics'] = []
9121       nic_override = dict(self.op.nics)
9122       for idx, nic in enumerate(self.instance.nics):
9123         if idx in nic_override:
9124           this_nic_override = nic_override[idx]
9125         else:
9126           this_nic_override = {}
9127         if 'ip' in this_nic_override:
9128           ip = this_nic_override['ip']
9129         else:
9130           ip = nic.ip
9131         if 'mac' in this_nic_override:
9132           mac = this_nic_override['mac']
9133         else:
9134           mac = nic.mac
9135         if idx in self.nic_pnew:
9136           nicparams = self.nic_pnew[idx]
9137         else:
9138           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9139         mode = nicparams[constants.NIC_MODE]
9140         link = nicparams[constants.NIC_LINK]
9141         args['nics'].append((ip, mac, mode, link))
9142       if constants.DDM_ADD in nic_override:
9143         ip = nic_override[constants.DDM_ADD].get('ip', None)
9144         mac = nic_override[constants.DDM_ADD]['mac']
9145         nicparams = self.nic_pnew[constants.DDM_ADD]
9146         mode = nicparams[constants.NIC_MODE]
9147         link = nicparams[constants.NIC_LINK]
9148         args['nics'].append((ip, mac, mode, link))
9149       elif constants.DDM_REMOVE in nic_override:
9150         del args['nics'][-1]
9151
9152     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9153     if self.op.disk_template:
9154       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9155     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9156     return env, nl, nl
9157
9158   def CheckPrereq(self):
9159     """Check prerequisites.
9160
9161     This only checks the instance list against the existing names.
9162
9163     """
9164     # checking the new params on the primary/secondary nodes
9165
9166     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9167     cluster = self.cluster = self.cfg.GetClusterInfo()
9168     assert self.instance is not None, \
9169       "Cannot retrieve locked instance %s" % self.op.instance_name
9170     pnode = instance.primary_node
9171     nodelist = list(instance.all_nodes)
9172
9173     # OS change
9174     if self.op.os_name and not self.op.force:
9175       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9176                       self.op.force_variant)
9177       instance_os = self.op.os_name
9178     else:
9179       instance_os = instance.os
9180
9181     if self.op.disk_template:
9182       if instance.disk_template == self.op.disk_template:
9183         raise errors.OpPrereqError("Instance already has disk template %s" %
9184                                    instance.disk_template, errors.ECODE_INVAL)
9185
9186       if (instance.disk_template,
9187           self.op.disk_template) not in self._DISK_CONVERSIONS:
9188         raise errors.OpPrereqError("Unsupported disk template conversion from"
9189                                    " %s to %s" % (instance.disk_template,
9190                                                   self.op.disk_template),
9191                                    errors.ECODE_INVAL)
9192       _CheckInstanceDown(self, instance, "cannot change disk template")
9193       if self.op.disk_template in constants.DTS_NET_MIRROR:
9194         if self.op.remote_node == pnode:
9195           raise errors.OpPrereqError("Given new secondary node %s is the same"
9196                                      " as the primary node of the instance" %
9197                                      self.op.remote_node, errors.ECODE_STATE)
9198         _CheckNodeOnline(self, self.op.remote_node)
9199         _CheckNodeNotDrained(self, self.op.remote_node)
9200         # FIXME: here we assume that the old instance type is DT_PLAIN
9201         assert instance.disk_template == constants.DT_PLAIN
9202         disks = [{"size": d.size, "vg": d.logical_id[0]}
9203                  for d in instance.disks]
9204         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9205         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9206
9207     # hvparams processing
9208     if self.op.hvparams:
9209       hv_type = instance.hypervisor
9210       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9211       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9212       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9213
9214       # local check
9215       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9216       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9217       self.hv_new = hv_new # the new actual values
9218       self.hv_inst = i_hvdict # the new dict (without defaults)
9219     else:
9220       self.hv_new = self.hv_inst = {}
9221
9222     # beparams processing
9223     if self.op.beparams:
9224       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9225                                    use_none=True)
9226       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9227       be_new = cluster.SimpleFillBE(i_bedict)
9228       self.be_new = be_new # the new actual values
9229       self.be_inst = i_bedict # the new dict (without defaults)
9230     else:
9231       self.be_new = self.be_inst = {}
9232
9233     # osparams processing
9234     if self.op.osparams:
9235       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9236       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9237       self.os_inst = i_osdict # the new dict (without defaults)
9238     else:
9239       self.os_inst = {}
9240
9241     self.warn = []
9242
9243     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9244       mem_check_list = [pnode]
9245       if be_new[constants.BE_AUTO_BALANCE]:
9246         # either we changed auto_balance to yes or it was from before
9247         mem_check_list.extend(instance.secondary_nodes)
9248       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9249                                                   instance.hypervisor)
9250       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9251                                          instance.hypervisor)
9252       pninfo = nodeinfo[pnode]
9253       msg = pninfo.fail_msg
9254       if msg:
9255         # Assume the primary node is unreachable and go ahead
9256         self.warn.append("Can't get info from primary node %s: %s" %
9257                          (pnode,  msg))
9258       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9259         self.warn.append("Node data from primary node %s doesn't contain"
9260                          " free memory information" % pnode)
9261       elif instance_info.fail_msg:
9262         self.warn.append("Can't get instance runtime information: %s" %
9263                         instance_info.fail_msg)
9264       else:
9265         if instance_info.payload:
9266           current_mem = int(instance_info.payload['memory'])
9267         else:
9268           # Assume instance not running
9269           # (there is a slight race condition here, but it's not very probable,
9270           # and we have no other way to check)
9271           current_mem = 0
9272         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9273                     pninfo.payload['memory_free'])
9274         if miss_mem > 0:
9275           raise errors.OpPrereqError("This change will prevent the instance"
9276                                      " from starting, due to %d MB of memory"
9277                                      " missing on its primary node" % miss_mem,
9278                                      errors.ECODE_NORES)
9279
9280       if be_new[constants.BE_AUTO_BALANCE]:
9281         for node, nres in nodeinfo.items():
9282           if node not in instance.secondary_nodes:
9283             continue
9284           msg = nres.fail_msg
9285           if msg:
9286             self.warn.append("Can't get info from secondary node %s: %s" %
9287                              (node, msg))
9288           elif not isinstance(nres.payload.get('memory_free', None), int):
9289             self.warn.append("Secondary node %s didn't return free"
9290                              " memory information" % node)
9291           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9292             self.warn.append("Not enough memory to failover instance to"
9293                              " secondary node %s" % node)
9294
9295     # NIC processing
9296     self.nic_pnew = {}
9297     self.nic_pinst = {}
9298     for nic_op, nic_dict in self.op.nics:
9299       if nic_op == constants.DDM_REMOVE:
9300         if not instance.nics:
9301           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9302                                      errors.ECODE_INVAL)
9303         continue
9304       if nic_op != constants.DDM_ADD:
9305         # an existing nic
9306         if not instance.nics:
9307           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9308                                      " no NICs" % nic_op,
9309                                      errors.ECODE_INVAL)
9310         if nic_op < 0 or nic_op >= len(instance.nics):
9311           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9312                                      " are 0 to %d" %
9313                                      (nic_op, len(instance.nics) - 1),
9314                                      errors.ECODE_INVAL)
9315         old_nic_params = instance.nics[nic_op].nicparams
9316         old_nic_ip = instance.nics[nic_op].ip
9317       else:
9318         old_nic_params = {}
9319         old_nic_ip = None
9320
9321       update_params_dict = dict([(key, nic_dict[key])
9322                                  for key in constants.NICS_PARAMETERS
9323                                  if key in nic_dict])
9324
9325       if 'bridge' in nic_dict:
9326         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9327
9328       new_nic_params = _GetUpdatedParams(old_nic_params,
9329                                          update_params_dict)
9330       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9331       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9332       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9333       self.nic_pinst[nic_op] = new_nic_params
9334       self.nic_pnew[nic_op] = new_filled_nic_params
9335       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9336
9337       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9338         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9339         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9340         if msg:
9341           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9342           if self.op.force:
9343             self.warn.append(msg)
9344           else:
9345             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9346       if new_nic_mode == constants.NIC_MODE_ROUTED:
9347         if 'ip' in nic_dict:
9348           nic_ip = nic_dict['ip']
9349         else:
9350           nic_ip = old_nic_ip
9351         if nic_ip is None:
9352           raise errors.OpPrereqError('Cannot set the nic ip to None'
9353                                      ' on a routed nic', errors.ECODE_INVAL)
9354       if 'mac' in nic_dict:
9355         nic_mac = nic_dict['mac']
9356         if nic_mac is None:
9357           raise errors.OpPrereqError('Cannot set the nic mac to None',
9358                                      errors.ECODE_INVAL)
9359         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9360           # otherwise generate the mac
9361           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9362         else:
9363           # or validate/reserve the current one
9364           try:
9365             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9366           except errors.ReservationError:
9367             raise errors.OpPrereqError("MAC address %s already in use"
9368                                        " in cluster" % nic_mac,
9369                                        errors.ECODE_NOTUNIQUE)
9370
9371     # DISK processing
9372     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9373       raise errors.OpPrereqError("Disk operations not supported for"
9374                                  " diskless instances",
9375                                  errors.ECODE_INVAL)
9376     for disk_op, _ in self.op.disks:
9377       if disk_op == constants.DDM_REMOVE:
9378         if len(instance.disks) == 1:
9379           raise errors.OpPrereqError("Cannot remove the last disk of"
9380                                      " an instance", errors.ECODE_INVAL)
9381         _CheckInstanceDown(self, instance, "cannot remove disks")
9382
9383       if (disk_op == constants.DDM_ADD and
9384           len(instance.disks) >= constants.MAX_DISKS):
9385         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9386                                    " add more" % constants.MAX_DISKS,
9387                                    errors.ECODE_STATE)
9388       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9389         # an existing disk
9390         if disk_op < 0 or disk_op >= len(instance.disks):
9391           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9392                                      " are 0 to %d" %
9393                                      (disk_op, len(instance.disks)),
9394                                      errors.ECODE_INVAL)
9395
9396     return
9397
9398   def _ConvertPlainToDrbd(self, feedback_fn):
9399     """Converts an instance from plain to drbd.
9400
9401     """
9402     feedback_fn("Converting template to drbd")
9403     instance = self.instance
9404     pnode = instance.primary_node
9405     snode = self.op.remote_node
9406
9407     # create a fake disk info for _GenerateDiskTemplate
9408     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9409     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9410                                       instance.name, pnode, [snode],
9411                                       disk_info, None, None, 0, feedback_fn)
9412     info = _GetInstanceInfoText(instance)
9413     feedback_fn("Creating aditional volumes...")
9414     # first, create the missing data and meta devices
9415     for disk in new_disks:
9416       # unfortunately this is... not too nice
9417       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9418                             info, True)
9419       for child in disk.children:
9420         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9421     # at this stage, all new LVs have been created, we can rename the
9422     # old ones
9423     feedback_fn("Renaming original volumes...")
9424     rename_list = [(o, n.children[0].logical_id)
9425                    for (o, n) in zip(instance.disks, new_disks)]
9426     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9427     result.Raise("Failed to rename original LVs")
9428
9429     feedback_fn("Initializing DRBD devices...")
9430     # all child devices are in place, we can now create the DRBD devices
9431     for disk in new_disks:
9432       for node in [pnode, snode]:
9433         f_create = node == pnode
9434         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9435
9436     # at this point, the instance has been modified
9437     instance.disk_template = constants.DT_DRBD8
9438     instance.disks = new_disks
9439     self.cfg.Update(instance, feedback_fn)
9440
9441     # disks are created, waiting for sync
9442     disk_abort = not _WaitForSync(self, instance)
9443     if disk_abort:
9444       raise errors.OpExecError("There are some degraded disks for"
9445                                " this instance, please cleanup manually")
9446
9447   def _ConvertDrbdToPlain(self, feedback_fn):
9448     """Converts an instance from drbd to plain.
9449
9450     """
9451     instance = self.instance
9452     assert len(instance.secondary_nodes) == 1
9453     pnode = instance.primary_node
9454     snode = instance.secondary_nodes[0]
9455     feedback_fn("Converting template to plain")
9456
9457     old_disks = instance.disks
9458     new_disks = [d.children[0] for d in old_disks]
9459
9460     # copy over size and mode
9461     for parent, child in zip(old_disks, new_disks):
9462       child.size = parent.size
9463       child.mode = parent.mode
9464
9465     # update instance structure
9466     instance.disks = new_disks
9467     instance.disk_template = constants.DT_PLAIN
9468     self.cfg.Update(instance, feedback_fn)
9469
9470     feedback_fn("Removing volumes on the secondary node...")
9471     for disk in old_disks:
9472       self.cfg.SetDiskID(disk, snode)
9473       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9474       if msg:
9475         self.LogWarning("Could not remove block device %s on node %s,"
9476                         " continuing anyway: %s", disk.iv_name, snode, msg)
9477
9478     feedback_fn("Removing unneeded volumes on the primary node...")
9479     for idx, disk in enumerate(old_disks):
9480       meta = disk.children[1]
9481       self.cfg.SetDiskID(meta, pnode)
9482       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9483       if msg:
9484         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9485                         " continuing anyway: %s", idx, pnode, msg)
9486
9487   def Exec(self, feedback_fn):
9488     """Modifies an instance.
9489
9490     All parameters take effect only at the next restart of the instance.
9491
9492     """
9493     # Process here the warnings from CheckPrereq, as we don't have a
9494     # feedback_fn there.
9495     for warn in self.warn:
9496       feedback_fn("WARNING: %s" % warn)
9497
9498     result = []
9499     instance = self.instance
9500     # disk changes
9501     for disk_op, disk_dict in self.op.disks:
9502       if disk_op == constants.DDM_REMOVE:
9503         # remove the last disk
9504         device = instance.disks.pop()
9505         device_idx = len(instance.disks)
9506         for node, disk in device.ComputeNodeTree(instance.primary_node):
9507           self.cfg.SetDiskID(disk, node)
9508           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9509           if msg:
9510             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9511                             " continuing anyway", device_idx, node, msg)
9512         result.append(("disk/%d" % device_idx, "remove"))
9513       elif disk_op == constants.DDM_ADD:
9514         # add a new disk
9515         if instance.disk_template == constants.DT_FILE:
9516           file_driver, file_path = instance.disks[0].logical_id
9517           file_path = os.path.dirname(file_path)
9518         else:
9519           file_driver = file_path = None
9520         disk_idx_base = len(instance.disks)
9521         new_disk = _GenerateDiskTemplate(self,
9522                                          instance.disk_template,
9523                                          instance.name, instance.primary_node,
9524                                          instance.secondary_nodes,
9525                                          [disk_dict],
9526                                          file_path,
9527                                          file_driver,
9528                                          disk_idx_base, feedback_fn)[0]
9529         instance.disks.append(new_disk)
9530         info = _GetInstanceInfoText(instance)
9531
9532         logging.info("Creating volume %s for instance %s",
9533                      new_disk.iv_name, instance.name)
9534         # Note: this needs to be kept in sync with _CreateDisks
9535         #HARDCODE
9536         for node in instance.all_nodes:
9537           f_create = node == instance.primary_node
9538           try:
9539             _CreateBlockDev(self, node, instance, new_disk,
9540                             f_create, info, f_create)
9541           except errors.OpExecError, err:
9542             self.LogWarning("Failed to create volume %s (%s) on"
9543                             " node %s: %s",
9544                             new_disk.iv_name, new_disk, node, err)
9545         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9546                        (new_disk.size, new_disk.mode)))
9547       else:
9548         # change a given disk
9549         instance.disks[disk_op].mode = disk_dict['mode']
9550         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9551
9552     if self.op.disk_template:
9553       r_shut = _ShutdownInstanceDisks(self, instance)
9554       if not r_shut:
9555         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9556                                  " proceed with disk template conversion")
9557       mode = (instance.disk_template, self.op.disk_template)
9558       try:
9559         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9560       except:
9561         self.cfg.ReleaseDRBDMinors(instance.name)
9562         raise
9563       result.append(("disk_template", self.op.disk_template))
9564
9565     # NIC changes
9566     for nic_op, nic_dict in self.op.nics:
9567       if nic_op == constants.DDM_REMOVE:
9568         # remove the last nic
9569         del instance.nics[-1]
9570         result.append(("nic.%d" % len(instance.nics), "remove"))
9571       elif nic_op == constants.DDM_ADD:
9572         # mac and bridge should be set, by now
9573         mac = nic_dict['mac']
9574         ip = nic_dict.get('ip', None)
9575         nicparams = self.nic_pinst[constants.DDM_ADD]
9576         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9577         instance.nics.append(new_nic)
9578         result.append(("nic.%d" % (len(instance.nics) - 1),
9579                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9580                        (new_nic.mac, new_nic.ip,
9581                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9582                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9583                        )))
9584       else:
9585         for key in 'mac', 'ip':
9586           if key in nic_dict:
9587             setattr(instance.nics[nic_op], key, nic_dict[key])
9588         if nic_op in self.nic_pinst:
9589           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9590         for key, val in nic_dict.iteritems():
9591           result.append(("nic.%s/%d" % (key, nic_op), val))
9592
9593     # hvparams changes
9594     if self.op.hvparams:
9595       instance.hvparams = self.hv_inst
9596       for key, val in self.op.hvparams.iteritems():
9597         result.append(("hv/%s" % key, val))
9598
9599     # beparams changes
9600     if self.op.beparams:
9601       instance.beparams = self.be_inst
9602       for key, val in self.op.beparams.iteritems():
9603         result.append(("be/%s" % key, val))
9604
9605     # OS change
9606     if self.op.os_name:
9607       instance.os = self.op.os_name
9608
9609     # osparams changes
9610     if self.op.osparams:
9611       instance.osparams = self.os_inst
9612       for key, val in self.op.osparams.iteritems():
9613         result.append(("os/%s" % key, val))
9614
9615     self.cfg.Update(instance, feedback_fn)
9616
9617     return result
9618
9619   _DISK_CONVERSIONS = {
9620     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9621     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9622     }
9623
9624
9625 class LUBackupQuery(NoHooksLU):
9626   """Query the exports list
9627
9628   """
9629   REQ_BGL = False
9630
9631   def ExpandNames(self):
9632     self.needed_locks = {}
9633     self.share_locks[locking.LEVEL_NODE] = 1
9634     if not self.op.nodes:
9635       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9636     else:
9637       self.needed_locks[locking.LEVEL_NODE] = \
9638         _GetWantedNodes(self, self.op.nodes)
9639
9640   def Exec(self, feedback_fn):
9641     """Compute the list of all the exported system images.
9642
9643     @rtype: dict
9644     @return: a dictionary with the structure node->(export-list)
9645         where export-list is a list of the instances exported on
9646         that node.
9647
9648     """
9649     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9650     rpcresult = self.rpc.call_export_list(self.nodes)
9651     result = {}
9652     for node in rpcresult:
9653       if rpcresult[node].fail_msg:
9654         result[node] = False
9655       else:
9656         result[node] = rpcresult[node].payload
9657
9658     return result
9659
9660
9661 class LUBackupPrepare(NoHooksLU):
9662   """Prepares an instance for an export and returns useful information.
9663
9664   """
9665   REQ_BGL = False
9666
9667   def ExpandNames(self):
9668     self._ExpandAndLockInstance()
9669
9670   def CheckPrereq(self):
9671     """Check prerequisites.
9672
9673     """
9674     instance_name = self.op.instance_name
9675
9676     self.instance = self.cfg.GetInstanceInfo(instance_name)
9677     assert self.instance is not None, \
9678           "Cannot retrieve locked instance %s" % self.op.instance_name
9679     _CheckNodeOnline(self, self.instance.primary_node)
9680
9681     self._cds = _GetClusterDomainSecret()
9682
9683   def Exec(self, feedback_fn):
9684     """Prepares an instance for an export.
9685
9686     """
9687     instance = self.instance
9688
9689     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9690       salt = utils.GenerateSecret(8)
9691
9692       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9693       result = self.rpc.call_x509_cert_create(instance.primary_node,
9694                                               constants.RIE_CERT_VALIDITY)
9695       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9696
9697       (name, cert_pem) = result.payload
9698
9699       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9700                                              cert_pem)
9701
9702       return {
9703         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9704         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9705                           salt),
9706         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9707         }
9708
9709     return None
9710
9711
9712 class LUBackupExport(LogicalUnit):
9713   """Export an instance to an image in the cluster.
9714
9715   """
9716   HPATH = "instance-export"
9717   HTYPE = constants.HTYPE_INSTANCE
9718   REQ_BGL = False
9719
9720   def CheckArguments(self):
9721     """Check the arguments.
9722
9723     """
9724     self.x509_key_name = self.op.x509_key_name
9725     self.dest_x509_ca_pem = self.op.destination_x509_ca
9726
9727     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9728       if not self.x509_key_name:
9729         raise errors.OpPrereqError("Missing X509 key name for encryption",
9730                                    errors.ECODE_INVAL)
9731
9732       if not self.dest_x509_ca_pem:
9733         raise errors.OpPrereqError("Missing destination X509 CA",
9734                                    errors.ECODE_INVAL)
9735
9736   def ExpandNames(self):
9737     self._ExpandAndLockInstance()
9738
9739     # Lock all nodes for local exports
9740     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9741       # FIXME: lock only instance primary and destination node
9742       #
9743       # Sad but true, for now we have do lock all nodes, as we don't know where
9744       # the previous export might be, and in this LU we search for it and
9745       # remove it from its current node. In the future we could fix this by:
9746       #  - making a tasklet to search (share-lock all), then create the
9747       #    new one, then one to remove, after
9748       #  - removing the removal operation altogether
9749       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9750
9751   def DeclareLocks(self, level):
9752     """Last minute lock declaration."""
9753     # All nodes are locked anyway, so nothing to do here.
9754
9755   def BuildHooksEnv(self):
9756     """Build hooks env.
9757
9758     This will run on the master, primary node and target node.
9759
9760     """
9761     env = {
9762       "EXPORT_MODE": self.op.mode,
9763       "EXPORT_NODE": self.op.target_node,
9764       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9765       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9766       # TODO: Generic function for boolean env variables
9767       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9768       }
9769
9770     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9771
9772     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9773
9774     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9775       nl.append(self.op.target_node)
9776
9777     return env, nl, nl
9778
9779   def CheckPrereq(self):
9780     """Check prerequisites.
9781
9782     This checks that the instance and node names are valid.
9783
9784     """
9785     instance_name = self.op.instance_name
9786
9787     self.instance = self.cfg.GetInstanceInfo(instance_name)
9788     assert self.instance is not None, \
9789           "Cannot retrieve locked instance %s" % self.op.instance_name
9790     _CheckNodeOnline(self, self.instance.primary_node)
9791
9792     if (self.op.remove_instance and self.instance.admin_up and
9793         not self.op.shutdown):
9794       raise errors.OpPrereqError("Can not remove instance without shutting it"
9795                                  " down before")
9796
9797     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9798       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9799       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9800       assert self.dst_node is not None
9801
9802       _CheckNodeOnline(self, self.dst_node.name)
9803       _CheckNodeNotDrained(self, self.dst_node.name)
9804
9805       self._cds = None
9806       self.dest_disk_info = None
9807       self.dest_x509_ca = None
9808
9809     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9810       self.dst_node = None
9811
9812       if len(self.op.target_node) != len(self.instance.disks):
9813         raise errors.OpPrereqError(("Received destination information for %s"
9814                                     " disks, but instance %s has %s disks") %
9815                                    (len(self.op.target_node), instance_name,
9816                                     len(self.instance.disks)),
9817                                    errors.ECODE_INVAL)
9818
9819       cds = _GetClusterDomainSecret()
9820
9821       # Check X509 key name
9822       try:
9823         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9824       except (TypeError, ValueError), err:
9825         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9826
9827       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9828         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9829                                    errors.ECODE_INVAL)
9830
9831       # Load and verify CA
9832       try:
9833         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9834       except OpenSSL.crypto.Error, err:
9835         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9836                                    (err, ), errors.ECODE_INVAL)
9837
9838       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9839       if errcode is not None:
9840         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9841                                    (msg, ), errors.ECODE_INVAL)
9842
9843       self.dest_x509_ca = cert
9844
9845       # Verify target information
9846       disk_info = []
9847       for idx, disk_data in enumerate(self.op.target_node):
9848         try:
9849           (host, port, magic) = \
9850             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9851         except errors.GenericError, err:
9852           raise errors.OpPrereqError("Target info for disk %s: %s" %
9853                                      (idx, err), errors.ECODE_INVAL)
9854
9855         disk_info.append((host, port, magic))
9856
9857       assert len(disk_info) == len(self.op.target_node)
9858       self.dest_disk_info = disk_info
9859
9860     else:
9861       raise errors.ProgrammerError("Unhandled export mode %r" %
9862                                    self.op.mode)
9863
9864     # instance disk type verification
9865     # TODO: Implement export support for file-based disks
9866     for disk in self.instance.disks:
9867       if disk.dev_type == constants.LD_FILE:
9868         raise errors.OpPrereqError("Export not supported for instances with"
9869                                    " file-based disks", errors.ECODE_INVAL)
9870
9871   def _CleanupExports(self, feedback_fn):
9872     """Removes exports of current instance from all other nodes.
9873
9874     If an instance in a cluster with nodes A..D was exported to node C, its
9875     exports will be removed from the nodes A, B and D.
9876
9877     """
9878     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9879
9880     nodelist = self.cfg.GetNodeList()
9881     nodelist.remove(self.dst_node.name)
9882
9883     # on one-node clusters nodelist will be empty after the removal
9884     # if we proceed the backup would be removed because OpBackupQuery
9885     # substitutes an empty list with the full cluster node list.
9886     iname = self.instance.name
9887     if nodelist:
9888       feedback_fn("Removing old exports for instance %s" % iname)
9889       exportlist = self.rpc.call_export_list(nodelist)
9890       for node in exportlist:
9891         if exportlist[node].fail_msg:
9892           continue
9893         if iname in exportlist[node].payload:
9894           msg = self.rpc.call_export_remove(node, iname).fail_msg
9895           if msg:
9896             self.LogWarning("Could not remove older export for instance %s"
9897                             " on node %s: %s", iname, node, msg)
9898
9899   def Exec(self, feedback_fn):
9900     """Export an instance to an image in the cluster.
9901
9902     """
9903     assert self.op.mode in constants.EXPORT_MODES
9904
9905     instance = self.instance
9906     src_node = instance.primary_node
9907
9908     if self.op.shutdown:
9909       # shutdown the instance, but not the disks
9910       feedback_fn("Shutting down instance %s" % instance.name)
9911       result = self.rpc.call_instance_shutdown(src_node, instance,
9912                                                self.op.shutdown_timeout)
9913       # TODO: Maybe ignore failures if ignore_remove_failures is set
9914       result.Raise("Could not shutdown instance %s on"
9915                    " node %s" % (instance.name, src_node))
9916
9917     # set the disks ID correctly since call_instance_start needs the
9918     # correct drbd minor to create the symlinks
9919     for disk in instance.disks:
9920       self.cfg.SetDiskID(disk, src_node)
9921
9922     activate_disks = (not instance.admin_up)
9923
9924     if activate_disks:
9925       # Activate the instance disks if we'exporting a stopped instance
9926       feedback_fn("Activating disks for %s" % instance.name)
9927       _StartInstanceDisks(self, instance, None)
9928
9929     try:
9930       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9931                                                      instance)
9932
9933       helper.CreateSnapshots()
9934       try:
9935         if (self.op.shutdown and instance.admin_up and
9936             not self.op.remove_instance):
9937           assert not activate_disks
9938           feedback_fn("Starting instance %s" % instance.name)
9939           result = self.rpc.call_instance_start(src_node, instance, None, None)
9940           msg = result.fail_msg
9941           if msg:
9942             feedback_fn("Failed to start instance: %s" % msg)
9943             _ShutdownInstanceDisks(self, instance)
9944             raise errors.OpExecError("Could not start instance: %s" % msg)
9945
9946         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9947           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9948         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9949           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9950           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9951
9952           (key_name, _, _) = self.x509_key_name
9953
9954           dest_ca_pem = \
9955             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9956                                             self.dest_x509_ca)
9957
9958           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9959                                                      key_name, dest_ca_pem,
9960                                                      timeouts)
9961       finally:
9962         helper.Cleanup()
9963
9964       # Check for backwards compatibility
9965       assert len(dresults) == len(instance.disks)
9966       assert compat.all(isinstance(i, bool) for i in dresults), \
9967              "Not all results are boolean: %r" % dresults
9968
9969     finally:
9970       if activate_disks:
9971         feedback_fn("Deactivating disks for %s" % instance.name)
9972         _ShutdownInstanceDisks(self, instance)
9973
9974     if not (compat.all(dresults) and fin_resu):
9975       failures = []
9976       if not fin_resu:
9977         failures.append("export finalization")
9978       if not compat.all(dresults):
9979         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9980                                if not dsk)
9981         failures.append("disk export: disk(s) %s" % fdsk)
9982
9983       raise errors.OpExecError("Export failed, errors in %s" %
9984                                utils.CommaJoin(failures))
9985
9986     # At this point, the export was successful, we can cleanup/finish
9987
9988     # Remove instance if requested
9989     if self.op.remove_instance:
9990       feedback_fn("Removing instance %s" % instance.name)
9991       _RemoveInstance(self, feedback_fn, instance,
9992                       self.op.ignore_remove_failures)
9993
9994     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9995       self._CleanupExports(feedback_fn)
9996
9997     return fin_resu, dresults
9998
9999
10000 class LUBackupRemove(NoHooksLU):
10001   """Remove exports related to the named instance.
10002
10003   """
10004   REQ_BGL = False
10005
10006   def ExpandNames(self):
10007     self.needed_locks = {}
10008     # We need all nodes to be locked in order for RemoveExport to work, but we
10009     # don't need to lock the instance itself, as nothing will happen to it (and
10010     # we can remove exports also for a removed instance)
10011     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10012
10013   def Exec(self, feedback_fn):
10014     """Remove any export.
10015
10016     """
10017     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10018     # If the instance was not found we'll try with the name that was passed in.
10019     # This will only work if it was an FQDN, though.
10020     fqdn_warn = False
10021     if not instance_name:
10022       fqdn_warn = True
10023       instance_name = self.op.instance_name
10024
10025     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10026     exportlist = self.rpc.call_export_list(locked_nodes)
10027     found = False
10028     for node in exportlist:
10029       msg = exportlist[node].fail_msg
10030       if msg:
10031         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10032         continue
10033       if instance_name in exportlist[node].payload:
10034         found = True
10035         result = self.rpc.call_export_remove(node, instance_name)
10036         msg = result.fail_msg
10037         if msg:
10038           logging.error("Could not remove export for instance %s"
10039                         " on node %s: %s", instance_name, node, msg)
10040
10041     if fqdn_warn and not found:
10042       feedback_fn("Export not found. If trying to remove an export belonging"
10043                   " to a deleted instance please use its Fully Qualified"
10044                   " Domain Name.")
10045
10046
10047 class LUGroupAdd(LogicalUnit):
10048   """Logical unit for creating node groups.
10049
10050   """
10051   HPATH = "group-add"
10052   HTYPE = constants.HTYPE_GROUP
10053   REQ_BGL = False
10054
10055   def ExpandNames(self):
10056     # We need the new group's UUID here so that we can create and acquire the
10057     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10058     # that it should not check whether the UUID exists in the configuration.
10059     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10060     self.needed_locks = {}
10061     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10062
10063   def CheckPrereq(self):
10064     """Check prerequisites.
10065
10066     This checks that the given group name is not an existing node group
10067     already.
10068
10069     """
10070     try:
10071       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10072     except errors.OpPrereqError:
10073       pass
10074     else:
10075       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10076                                  " node group (UUID: %s)" %
10077                                  (self.op.group_name, existing_uuid),
10078                                  errors.ECODE_EXISTS)
10079
10080     if self.op.ndparams:
10081       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10082
10083   def BuildHooksEnv(self):
10084     """Build hooks env.
10085
10086     """
10087     env = {
10088       "GROUP_NAME": self.op.group_name,
10089       }
10090     mn = self.cfg.GetMasterNode()
10091     return env, [mn], [mn]
10092
10093   def Exec(self, feedback_fn):
10094     """Add the node group to the cluster.
10095
10096     """
10097     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10098                                   uuid=self.group_uuid,
10099                                   alloc_policy=self.op.alloc_policy,
10100                                   ndparams=self.op.ndparams)
10101
10102     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10103     del self.remove_locks[locking.LEVEL_NODEGROUP]
10104
10105
10106 class LUGroupAssignNodes(NoHooksLU):
10107   """Logical unit for assigning nodes to groups.
10108
10109   """
10110   REQ_BGL = False
10111
10112   def ExpandNames(self):
10113     # These raise errors.OpPrereqError on their own:
10114     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10115     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10116
10117     # We want to lock all the affected nodes and groups. We have readily
10118     # available the list of nodes, and the *destination* group. To gather the
10119     # list of "source" groups, we need to fetch node information.
10120     self.node_data = self.cfg.GetAllNodesInfo()
10121     affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10122     affected_groups.add(self.group_uuid)
10123
10124     self.needed_locks = {
10125       locking.LEVEL_NODEGROUP: list(affected_groups),
10126       locking.LEVEL_NODE: self.op.nodes,
10127       }
10128
10129   def CheckPrereq(self):
10130     """Check prerequisites.
10131
10132     """
10133     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10134     instance_data = self.cfg.GetAllInstancesInfo()
10135
10136     if self.group is None:
10137       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10138                                (self.op.group_name, self.group_uuid))
10139
10140     (new_splits, previous_splits) = \
10141       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10142                                              for node in self.op.nodes],
10143                                             self.node_data, instance_data)
10144
10145     if new_splits:
10146       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10147
10148       if not self.op.force:
10149         raise errors.OpExecError("The following instances get split by this"
10150                                  " change and --force was not given: %s" %
10151                                  fmt_new_splits)
10152       else:
10153         self.LogWarning("This operation will split the following instances: %s",
10154                         fmt_new_splits)
10155
10156         if previous_splits:
10157           self.LogWarning("In addition, these already-split instances continue"
10158                           " to be spit across groups: %s",
10159                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10160
10161   def Exec(self, feedback_fn):
10162     """Assign nodes to a new group.
10163
10164     """
10165     for node in self.op.nodes:
10166       self.node_data[node].group = self.group_uuid
10167
10168     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10169
10170   @staticmethod
10171   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10172     """Check for split instances after a node assignment.
10173
10174     This method considers a series of node assignments as an atomic operation,
10175     and returns information about split instances after applying the set of
10176     changes.
10177
10178     In particular, it returns information about newly split instances, and
10179     instances that were already split, and remain so after the change.
10180
10181     Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10182     considered.
10183
10184     @type changes: list of (node_name, new_group_uuid) pairs.
10185     @param changes: list of node assignments to consider.
10186     @param node_data: a dict with data for all nodes
10187     @param instance_data: a dict with all instances to consider
10188     @rtype: a two-tuple
10189     @return: a list of instances that were previously okay and result split as a
10190       consequence of this change, and a list of instances that were previously
10191       split and this change does not fix.
10192
10193     """
10194     changed_nodes = dict((node, group) for node, group in changes
10195                          if node_data[node].group != group)
10196
10197     all_split_instances = set()
10198     previously_split_instances = set()
10199
10200     def InstanceNodes(instance):
10201       return [instance.primary_node] + list(instance.secondary_nodes)
10202
10203     for inst in instance_data.values():
10204       if inst.disk_template not in constants.DTS_NET_MIRROR:
10205         continue
10206
10207       instance_nodes = InstanceNodes(inst)
10208
10209       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10210         previously_split_instances.add(inst.name)
10211
10212       if len(set(changed_nodes.get(node, node_data[node].group)
10213                  for node in instance_nodes)) > 1:
10214         all_split_instances.add(inst.name)
10215
10216     return (list(all_split_instances - previously_split_instances),
10217             list(previously_split_instances & all_split_instances))
10218
10219
10220 class _GroupQuery(_QueryBase):
10221
10222   FIELDS = query.GROUP_FIELDS
10223
10224   def ExpandNames(self, lu):
10225     lu.needed_locks = {}
10226
10227     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10228     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10229
10230     if not self.names:
10231       self.wanted = [name_to_uuid[name]
10232                      for name in utils.NiceSort(name_to_uuid.keys())]
10233     else:
10234       # Accept names to be either names or UUIDs.
10235       missing = []
10236       self.wanted = []
10237       all_uuid = frozenset(self._all_groups.keys())
10238
10239       for name in self.names:
10240         if name in all_uuid:
10241           self.wanted.append(name)
10242         elif name in name_to_uuid:
10243           self.wanted.append(name_to_uuid[name])
10244         else:
10245           missing.append(name)
10246
10247       if missing:
10248         raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10249                                    errors.ECODE_NOENT)
10250
10251   def DeclareLocks(self, lu, level):
10252     pass
10253
10254   def _GetQueryData(self, lu):
10255     """Computes the list of node groups and their attributes.
10256
10257     """
10258     do_nodes = query.GQ_NODE in self.requested_data
10259     do_instances = query.GQ_INST in self.requested_data
10260
10261     group_to_nodes = None
10262     group_to_instances = None
10263
10264     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10265     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10266     # latter GetAllInstancesInfo() is not enough, for we have to go through
10267     # instance->node. Hence, we will need to process nodes even if we only need
10268     # instance information.
10269     if do_nodes or do_instances:
10270       all_nodes = lu.cfg.GetAllNodesInfo()
10271       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10272       node_to_group = {}
10273
10274       for node in all_nodes.values():
10275         if node.group in group_to_nodes:
10276           group_to_nodes[node.group].append(node.name)
10277           node_to_group[node.name] = node.group
10278
10279       if do_instances:
10280         all_instances = lu.cfg.GetAllInstancesInfo()
10281         group_to_instances = dict((uuid, []) for uuid in self.wanted)
10282
10283         for instance in all_instances.values():
10284           node = instance.primary_node
10285           if node in node_to_group:
10286             group_to_instances[node_to_group[node]].append(instance.name)
10287
10288         if not do_nodes:
10289           # Do not pass on node information if it was not requested.
10290           group_to_nodes = None
10291
10292     return query.GroupQueryData([self._all_groups[uuid]
10293                                  for uuid in self.wanted],
10294                                 group_to_nodes, group_to_instances)
10295
10296
10297 class LUGroupQuery(NoHooksLU):
10298   """Logical unit for querying node groups.
10299
10300   """
10301   REQ_BGL = False
10302
10303   def CheckArguments(self):
10304     self.gq = _GroupQuery(self.op.names, self.op.output_fields, False)
10305
10306   def ExpandNames(self):
10307     self.gq.ExpandNames(self)
10308
10309   def Exec(self, feedback_fn):
10310     return self.gq.OldStyleQuery(self)
10311
10312
10313 class LUGroupSetParams(LogicalUnit):
10314   """Modifies the parameters of a node group.
10315
10316   """
10317   HPATH = "group-modify"
10318   HTYPE = constants.HTYPE_GROUP
10319   REQ_BGL = False
10320
10321   def CheckArguments(self):
10322     all_changes = [
10323       self.op.ndparams,
10324       self.op.alloc_policy,
10325       ]
10326
10327     if all_changes.count(None) == len(all_changes):
10328       raise errors.OpPrereqError("Please pass at least one modification",
10329                                  errors.ECODE_INVAL)
10330
10331   def ExpandNames(self):
10332     # This raises errors.OpPrereqError on its own:
10333     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10334
10335     self.needed_locks = {
10336       locking.LEVEL_NODEGROUP: [self.group_uuid],
10337       }
10338
10339   def CheckPrereq(self):
10340     """Check prerequisites.
10341
10342     """
10343     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10344
10345     if self.group is None:
10346       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10347                                (self.op.group_name, self.group_uuid))
10348
10349     if self.op.ndparams:
10350       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10351       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10352       self.new_ndparams = new_ndparams
10353
10354   def BuildHooksEnv(self):
10355     """Build hooks env.
10356
10357     """
10358     env = {
10359       "GROUP_NAME": self.op.group_name,
10360       "NEW_ALLOC_POLICY": self.op.alloc_policy,
10361       }
10362     mn = self.cfg.GetMasterNode()
10363     return env, [mn], [mn]
10364
10365   def Exec(self, feedback_fn):
10366     """Modifies the node group.
10367
10368     """
10369     result = []
10370
10371     if self.op.ndparams:
10372       self.group.ndparams = self.new_ndparams
10373       result.append(("ndparams", str(self.group.ndparams)))
10374
10375     if self.op.alloc_policy:
10376       self.group.alloc_policy = self.op.alloc_policy
10377
10378     self.cfg.Update(self.group, feedback_fn)
10379     return result
10380
10381
10382
10383 class LUGroupRemove(LogicalUnit):
10384   HPATH = "group-remove"
10385   HTYPE = constants.HTYPE_GROUP
10386   REQ_BGL = False
10387
10388   def ExpandNames(self):
10389     # This will raises errors.OpPrereqError on its own:
10390     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10391     self.needed_locks = {
10392       locking.LEVEL_NODEGROUP: [self.group_uuid],
10393       }
10394
10395   def CheckPrereq(self):
10396     """Check prerequisites.
10397
10398     This checks that the given group name exists as a node group, that is
10399     empty (i.e., contains no nodes), and that is not the last group of the
10400     cluster.
10401
10402     """
10403     # Verify that the group is empty.
10404     group_nodes = [node.name
10405                    for node in self.cfg.GetAllNodesInfo().values()
10406                    if node.group == self.group_uuid]
10407
10408     if group_nodes:
10409       raise errors.OpPrereqError("Group '%s' not empty, has the following"
10410                                  " nodes: %s" %
10411                                  (self.op.group_name,
10412                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
10413                                  errors.ECODE_STATE)
10414
10415     # Verify the cluster would not be left group-less.
10416     if len(self.cfg.GetNodeGroupList()) == 1:
10417       raise errors.OpPrereqError("Group '%s' is the only group,"
10418                                  " cannot be removed" %
10419                                  self.op.group_name,
10420                                  errors.ECODE_STATE)
10421
10422   def BuildHooksEnv(self):
10423     """Build hooks env.
10424
10425     """
10426     env = {
10427       "GROUP_NAME": self.op.group_name,
10428       }
10429     mn = self.cfg.GetMasterNode()
10430     return env, [mn], [mn]
10431
10432   def Exec(self, feedback_fn):
10433     """Remove the node group.
10434
10435     """
10436     try:
10437       self.cfg.RemoveNodeGroup(self.group_uuid)
10438     except errors.ConfigurationError:
10439       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10440                                (self.op.group_name, self.group_uuid))
10441
10442     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10443
10444
10445 class LUGroupRename(LogicalUnit):
10446   HPATH = "group-rename"
10447   HTYPE = constants.HTYPE_GROUP
10448   REQ_BGL = False
10449
10450   def ExpandNames(self):
10451     # This raises errors.OpPrereqError on its own:
10452     self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10453
10454     self.needed_locks = {
10455       locking.LEVEL_NODEGROUP: [self.group_uuid],
10456       }
10457
10458   def CheckPrereq(self):
10459     """Check prerequisites.
10460
10461     This checks that the given old_name exists as a node group, and that
10462     new_name doesn't.
10463
10464     """
10465     try:
10466       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10467     except errors.OpPrereqError:
10468       pass
10469     else:
10470       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10471                                  " node group (UUID: %s)" %
10472                                  (self.op.new_name, new_name_uuid),
10473                                  errors.ECODE_EXISTS)
10474
10475   def BuildHooksEnv(self):
10476     """Build hooks env.
10477
10478     """
10479     env = {
10480       "OLD_NAME": self.op.old_name,
10481       "NEW_NAME": self.op.new_name,
10482       }
10483
10484     mn = self.cfg.GetMasterNode()
10485     all_nodes = self.cfg.GetAllNodesInfo()
10486     run_nodes = [mn]
10487     all_nodes.pop(mn, None)
10488
10489     for node in all_nodes.values():
10490       if node.group == self.group_uuid:
10491         run_nodes.append(node.name)
10492
10493     return env, run_nodes, run_nodes
10494
10495   def Exec(self, feedback_fn):
10496     """Rename the node group.
10497
10498     """
10499     group = self.cfg.GetNodeGroup(self.group_uuid)
10500
10501     if group is None:
10502       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10503                                (self.op.old_name, self.group_uuid))
10504
10505     group.name = self.op.new_name
10506     self.cfg.Update(group, feedback_fn)
10507
10508     return self.op.new_name
10509
10510
10511 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10512   """Generic tags LU.
10513
10514   This is an abstract class which is the parent of all the other tags LUs.
10515
10516   """
10517
10518   def ExpandNames(self):
10519     self.needed_locks = {}
10520     if self.op.kind == constants.TAG_NODE:
10521       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10522       self.needed_locks[locking.LEVEL_NODE] = self.op.name
10523     elif self.op.kind == constants.TAG_INSTANCE:
10524       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10525       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10526
10527     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10528     # not possible to acquire the BGL based on opcode parameters)
10529
10530   def CheckPrereq(self):
10531     """Check prerequisites.
10532
10533     """
10534     if self.op.kind == constants.TAG_CLUSTER:
10535       self.target = self.cfg.GetClusterInfo()
10536     elif self.op.kind == constants.TAG_NODE:
10537       self.target = self.cfg.GetNodeInfo(self.op.name)
10538     elif self.op.kind == constants.TAG_INSTANCE:
10539       self.target = self.cfg.GetInstanceInfo(self.op.name)
10540     else:
10541       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10542                                  str(self.op.kind), errors.ECODE_INVAL)
10543
10544
10545 class LUTagsGet(TagsLU):
10546   """Returns the tags of a given object.
10547
10548   """
10549   REQ_BGL = False
10550
10551   def ExpandNames(self):
10552     TagsLU.ExpandNames(self)
10553
10554     # Share locks as this is only a read operation
10555     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10556
10557   def Exec(self, feedback_fn):
10558     """Returns the tag list.
10559
10560     """
10561     return list(self.target.GetTags())
10562
10563
10564 class LUTagsSearch(NoHooksLU):
10565   """Searches the tags for a given pattern.
10566
10567   """
10568   REQ_BGL = False
10569
10570   def ExpandNames(self):
10571     self.needed_locks = {}
10572
10573   def CheckPrereq(self):
10574     """Check prerequisites.
10575
10576     This checks the pattern passed for validity by compiling it.
10577
10578     """
10579     try:
10580       self.re = re.compile(self.op.pattern)
10581     except re.error, err:
10582       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10583                                  (self.op.pattern, err), errors.ECODE_INVAL)
10584
10585   def Exec(self, feedback_fn):
10586     """Returns the tag list.
10587
10588     """
10589     cfg = self.cfg
10590     tgts = [("/cluster", cfg.GetClusterInfo())]
10591     ilist = cfg.GetAllInstancesInfo().values()
10592     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10593     nlist = cfg.GetAllNodesInfo().values()
10594     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10595     results = []
10596     for path, target in tgts:
10597       for tag in target.GetTags():
10598         if self.re.search(tag):
10599           results.append((path, tag))
10600     return results
10601
10602
10603 class LUTagsSet(TagsLU):
10604   """Sets a tag on a given object.
10605
10606   """
10607   REQ_BGL = False
10608
10609   def CheckPrereq(self):
10610     """Check prerequisites.
10611
10612     This checks the type and length of the tag name and value.
10613
10614     """
10615     TagsLU.CheckPrereq(self)
10616     for tag in self.op.tags:
10617       objects.TaggableObject.ValidateTag(tag)
10618
10619   def Exec(self, feedback_fn):
10620     """Sets the tag.
10621
10622     """
10623     try:
10624       for tag in self.op.tags:
10625         self.target.AddTag(tag)
10626     except errors.TagError, err:
10627       raise errors.OpExecError("Error while setting tag: %s" % str(err))
10628     self.cfg.Update(self.target, feedback_fn)
10629
10630
10631 class LUTagsDel(TagsLU):
10632   """Delete a list of tags from a given object.
10633
10634   """
10635   REQ_BGL = False
10636
10637   def CheckPrereq(self):
10638     """Check prerequisites.
10639
10640     This checks that we have the given tag.
10641
10642     """
10643     TagsLU.CheckPrereq(self)
10644     for tag in self.op.tags:
10645       objects.TaggableObject.ValidateTag(tag)
10646     del_tags = frozenset(self.op.tags)
10647     cur_tags = self.target.GetTags()
10648
10649     diff_tags = del_tags - cur_tags
10650     if diff_tags:
10651       diff_names = ("'%s'" % i for i in sorted(diff_tags))
10652       raise errors.OpPrereqError("Tag(s) %s not found" %
10653                                  (utils.CommaJoin(diff_names), ),
10654                                  errors.ECODE_NOENT)
10655
10656   def Exec(self, feedback_fn):
10657     """Remove the tag from the object.
10658
10659     """
10660     for tag in self.op.tags:
10661       self.target.RemoveTag(tag)
10662     self.cfg.Update(self.target, feedback_fn)
10663
10664
10665 class LUTestDelay(NoHooksLU):
10666   """Sleep for a specified amount of time.
10667
10668   This LU sleeps on the master and/or nodes for a specified amount of
10669   time.
10670
10671   """
10672   REQ_BGL = False
10673
10674   def ExpandNames(self):
10675     """Expand names and set required locks.
10676
10677     This expands the node list, if any.
10678
10679     """
10680     self.needed_locks = {}
10681     if self.op.on_nodes:
10682       # _GetWantedNodes can be used here, but is not always appropriate to use
10683       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10684       # more information.
10685       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10686       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10687
10688   def _TestDelay(self):
10689     """Do the actual sleep.
10690
10691     """
10692     if self.op.on_master:
10693       if not utils.TestDelay(self.op.duration):
10694         raise errors.OpExecError("Error during master delay test")
10695     if self.op.on_nodes:
10696       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10697       for node, node_result in result.items():
10698         node_result.Raise("Failure during rpc call to node %s" % node)
10699
10700   def Exec(self, feedback_fn):
10701     """Execute the test delay opcode, with the wanted repetitions.
10702
10703     """
10704     if self.op.repeat == 0:
10705       self._TestDelay()
10706     else:
10707       top_value = self.op.repeat - 1
10708       for i in range(self.op.repeat):
10709         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10710         self._TestDelay()
10711
10712
10713 class LUTestJqueue(NoHooksLU):
10714   """Utility LU to test some aspects of the job queue.
10715
10716   """
10717   REQ_BGL = False
10718
10719   # Must be lower than default timeout for WaitForJobChange to see whether it
10720   # notices changed jobs
10721   _CLIENT_CONNECT_TIMEOUT = 20.0
10722   _CLIENT_CONFIRM_TIMEOUT = 60.0
10723
10724   @classmethod
10725   def _NotifyUsingSocket(cls, cb, errcls):
10726     """Opens a Unix socket and waits for another program to connect.
10727
10728     @type cb: callable
10729     @param cb: Callback to send socket name to client
10730     @type errcls: class
10731     @param errcls: Exception class to use for errors
10732
10733     """
10734     # Using a temporary directory as there's no easy way to create temporary
10735     # sockets without writing a custom loop around tempfile.mktemp and
10736     # socket.bind
10737     tmpdir = tempfile.mkdtemp()
10738     try:
10739       tmpsock = utils.PathJoin(tmpdir, "sock")
10740
10741       logging.debug("Creating temporary socket at %s", tmpsock)
10742       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10743       try:
10744         sock.bind(tmpsock)
10745         sock.listen(1)
10746
10747         # Send details to client
10748         cb(tmpsock)
10749
10750         # Wait for client to connect before continuing
10751         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10752         try:
10753           (conn, _) = sock.accept()
10754         except socket.error, err:
10755           raise errcls("Client didn't connect in time (%s)" % err)
10756       finally:
10757         sock.close()
10758     finally:
10759       # Remove as soon as client is connected
10760       shutil.rmtree(tmpdir)
10761
10762     # Wait for client to close
10763     try:
10764       try:
10765         # pylint: disable-msg=E1101
10766         # Instance of '_socketobject' has no ... member
10767         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10768         conn.recv(1)
10769       except socket.error, err:
10770         raise errcls("Client failed to confirm notification (%s)" % err)
10771     finally:
10772       conn.close()
10773
10774   def _SendNotification(self, test, arg, sockname):
10775     """Sends a notification to the client.
10776
10777     @type test: string
10778     @param test: Test name
10779     @param arg: Test argument (depends on test)
10780     @type sockname: string
10781     @param sockname: Socket path
10782
10783     """
10784     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10785
10786   def _Notify(self, prereq, test, arg):
10787     """Notifies the client of a test.
10788
10789     @type prereq: bool
10790     @param prereq: Whether this is a prereq-phase test
10791     @type test: string
10792     @param test: Test name
10793     @param arg: Test argument (depends on test)
10794
10795     """
10796     if prereq:
10797       errcls = errors.OpPrereqError
10798     else:
10799       errcls = errors.OpExecError
10800
10801     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10802                                                   test, arg),
10803                                    errcls)
10804
10805   def CheckArguments(self):
10806     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10807     self.expandnames_calls = 0
10808
10809   def ExpandNames(self):
10810     checkargs_calls = getattr(self, "checkargs_calls", 0)
10811     if checkargs_calls < 1:
10812       raise errors.ProgrammerError("CheckArguments was not called")
10813
10814     self.expandnames_calls += 1
10815
10816     if self.op.notify_waitlock:
10817       self._Notify(True, constants.JQT_EXPANDNAMES, None)
10818
10819     self.LogInfo("Expanding names")
10820
10821     # Get lock on master node (just to get a lock, not for a particular reason)
10822     self.needed_locks = {
10823       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10824       }
10825
10826   def Exec(self, feedback_fn):
10827     if self.expandnames_calls < 1:
10828       raise errors.ProgrammerError("ExpandNames was not called")
10829
10830     if self.op.notify_exec:
10831       self._Notify(False, constants.JQT_EXEC, None)
10832
10833     self.LogInfo("Executing")
10834
10835     if self.op.log_messages:
10836       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10837       for idx, msg in enumerate(self.op.log_messages):
10838         self.LogInfo("Sending log message %s", idx + 1)
10839         feedback_fn(constants.JQT_MSGPREFIX + msg)
10840         # Report how many test messages have been sent
10841         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10842
10843     if self.op.fail:
10844       raise errors.OpExecError("Opcode failure was requested")
10845
10846     return True
10847
10848
10849 class IAllocator(object):
10850   """IAllocator framework.
10851
10852   An IAllocator instance has three sets of attributes:
10853     - cfg that is needed to query the cluster
10854     - input data (all members of the _KEYS class attribute are required)
10855     - four buffer attributes (in|out_data|text), that represent the
10856       input (to the external script) in text and data structure format,
10857       and the output from it, again in two formats
10858     - the result variables from the script (success, info, nodes) for
10859       easy usage
10860
10861   """
10862   # pylint: disable-msg=R0902
10863   # lots of instance attributes
10864   _ALLO_KEYS = [
10865     "name", "mem_size", "disks", "disk_template",
10866     "os", "tags", "nics", "vcpus", "hypervisor",
10867     ]
10868   _RELO_KEYS = [
10869     "name", "relocate_from",
10870     ]
10871   _EVAC_KEYS = [
10872     "evac_nodes",
10873     ]
10874
10875   def __init__(self, cfg, rpc, mode, **kwargs):
10876     self.cfg = cfg
10877     self.rpc = rpc
10878     # init buffer variables
10879     self.in_text = self.out_text = self.in_data = self.out_data = None
10880     # init all input fields so that pylint is happy
10881     self.mode = mode
10882     self.mem_size = self.disks = self.disk_template = None
10883     self.os = self.tags = self.nics = self.vcpus = None
10884     self.hypervisor = None
10885     self.relocate_from = None
10886     self.name = None
10887     self.evac_nodes = None
10888     # computed fields
10889     self.required_nodes = None
10890     # init result fields
10891     self.success = self.info = self.result = None
10892     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10893       keyset = self._ALLO_KEYS
10894       fn = self._AddNewInstance
10895     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10896       keyset = self._RELO_KEYS
10897       fn = self._AddRelocateInstance
10898     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10899       keyset = self._EVAC_KEYS
10900       fn = self._AddEvacuateNodes
10901     else:
10902       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10903                                    " IAllocator" % self.mode)
10904     for key in kwargs:
10905       if key not in keyset:
10906         raise errors.ProgrammerError("Invalid input parameter '%s' to"
10907                                      " IAllocator" % key)
10908       setattr(self, key, kwargs[key])
10909
10910     for key in keyset:
10911       if key not in kwargs:
10912         raise errors.ProgrammerError("Missing input parameter '%s' to"
10913                                      " IAllocator" % key)
10914     self._BuildInputData(fn)
10915
10916   def _ComputeClusterData(self):
10917     """Compute the generic allocator input data.
10918
10919     This is the data that is independent of the actual operation.
10920
10921     """
10922     cfg = self.cfg
10923     cluster_info = cfg.GetClusterInfo()
10924     # cluster data
10925     data = {
10926       "version": constants.IALLOCATOR_VERSION,
10927       "cluster_name": cfg.GetClusterName(),
10928       "cluster_tags": list(cluster_info.GetTags()),
10929       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10930       # we don't have job IDs
10931       }
10932     ninfo = cfg.GetAllNodesInfo()
10933     iinfo = cfg.GetAllInstancesInfo().values()
10934     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10935
10936     # node data
10937     node_list = [n.name for n in ninfo.values() if n.vm_capable]
10938
10939     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10940       hypervisor_name = self.hypervisor
10941     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10942       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10943     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10944       hypervisor_name = cluster_info.enabled_hypervisors[0]
10945
10946     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10947                                         hypervisor_name)
10948     node_iinfo = \
10949       self.rpc.call_all_instances_info(node_list,
10950                                        cluster_info.enabled_hypervisors)
10951
10952     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10953
10954     config_ndata = self._ComputeBasicNodeData(ninfo)
10955     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
10956                                                  i_list, config_ndata)
10957     assert len(data["nodes"]) == len(ninfo), \
10958         "Incomplete node data computed"
10959
10960     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10961
10962     self.in_data = data
10963
10964   @staticmethod
10965   def _ComputeNodeGroupData(cfg):
10966     """Compute node groups data.
10967
10968     """
10969     ng = {}
10970     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10971       ng[guuid] = {
10972         "name": gdata.name,
10973         "alloc_policy": gdata.alloc_policy,
10974         }
10975     return ng
10976
10977   @staticmethod
10978   def _ComputeBasicNodeData(node_cfg):
10979     """Compute global node data.
10980
10981     @rtype: dict
10982     @returns: a dict of name: (node dict, node config)
10983
10984     """
10985     node_results = {}
10986     for ninfo in node_cfg.values():
10987       # fill in static (config-based) values
10988       pnr = {
10989         "tags": list(ninfo.GetTags()),
10990         "primary_ip": ninfo.primary_ip,
10991         "secondary_ip": ninfo.secondary_ip,
10992         "offline": ninfo.offline,
10993         "drained": ninfo.drained,
10994         "master_candidate": ninfo.master_candidate,
10995         "group": ninfo.group,
10996         "master_capable": ninfo.master_capable,
10997         "vm_capable": ninfo.vm_capable,
10998         }
10999
11000       node_results[ninfo.name] = pnr
11001
11002     return node_results
11003
11004   @staticmethod
11005   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11006                               node_results):
11007     """Compute global node data.
11008
11009     @param node_results: the basic node structures as filled from the config
11010
11011     """
11012     # make a copy of the current dict
11013     node_results = dict(node_results)
11014     for nname, nresult in node_data.items():
11015       assert nname in node_results, "Missing basic data for node %s" % nname
11016       ninfo = node_cfg[nname]
11017
11018       if not (ninfo.offline or ninfo.drained):
11019         nresult.Raise("Can't get data for node %s" % nname)
11020         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11021                                 nname)
11022         remote_info = nresult.payload
11023
11024         for attr in ['memory_total', 'memory_free', 'memory_dom0',
11025                      'vg_size', 'vg_free', 'cpu_total']:
11026           if attr not in remote_info:
11027             raise errors.OpExecError("Node '%s' didn't return attribute"
11028                                      " '%s'" % (nname, attr))
11029           if not isinstance(remote_info[attr], int):
11030             raise errors.OpExecError("Node '%s' returned invalid value"
11031                                      " for '%s': %s" %
11032                                      (nname, attr, remote_info[attr]))
11033         # compute memory used by primary instances
11034         i_p_mem = i_p_up_mem = 0
11035         for iinfo, beinfo in i_list:
11036           if iinfo.primary_node == nname:
11037             i_p_mem += beinfo[constants.BE_MEMORY]
11038             if iinfo.name not in node_iinfo[nname].payload:
11039               i_used_mem = 0
11040             else:
11041               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11042             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11043             remote_info['memory_free'] -= max(0, i_mem_diff)
11044
11045             if iinfo.admin_up:
11046               i_p_up_mem += beinfo[constants.BE_MEMORY]
11047
11048         # compute memory used by instances
11049         pnr_dyn = {
11050           "total_memory": remote_info['memory_total'],
11051           "reserved_memory": remote_info['memory_dom0'],
11052           "free_memory": remote_info['memory_free'],
11053           "total_disk": remote_info['vg_size'],
11054           "free_disk": remote_info['vg_free'],
11055           "total_cpus": remote_info['cpu_total'],
11056           "i_pri_memory": i_p_mem,
11057           "i_pri_up_memory": i_p_up_mem,
11058           }
11059         pnr_dyn.update(node_results[nname])
11060         node_results[nname] = pnr_dyn
11061
11062     return node_results
11063
11064   @staticmethod
11065   def _ComputeInstanceData(cluster_info, i_list):
11066     """Compute global instance data.
11067
11068     """
11069     instance_data = {}
11070     for iinfo, beinfo in i_list:
11071       nic_data = []
11072       for nic in iinfo.nics:
11073         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11074         nic_dict = {"mac": nic.mac,
11075                     "ip": nic.ip,
11076                     "mode": filled_params[constants.NIC_MODE],
11077                     "link": filled_params[constants.NIC_LINK],
11078                    }
11079         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11080           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11081         nic_data.append(nic_dict)
11082       pir = {
11083         "tags": list(iinfo.GetTags()),
11084         "admin_up": iinfo.admin_up,
11085         "vcpus": beinfo[constants.BE_VCPUS],
11086         "memory": beinfo[constants.BE_MEMORY],
11087         "os": iinfo.os,
11088         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11089         "nics": nic_data,
11090         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11091         "disk_template": iinfo.disk_template,
11092         "hypervisor": iinfo.hypervisor,
11093         }
11094       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11095                                                  pir["disks"])
11096       instance_data[iinfo.name] = pir
11097
11098     return instance_data
11099
11100   def _AddNewInstance(self):
11101     """Add new instance data to allocator structure.
11102
11103     This in combination with _AllocatorGetClusterData will create the
11104     correct structure needed as input for the allocator.
11105
11106     The checks for the completeness of the opcode must have already been
11107     done.
11108
11109     """
11110     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11111
11112     if self.disk_template in constants.DTS_NET_MIRROR:
11113       self.required_nodes = 2
11114     else:
11115       self.required_nodes = 1
11116     request = {
11117       "name": self.name,
11118       "disk_template": self.disk_template,
11119       "tags": self.tags,
11120       "os": self.os,
11121       "vcpus": self.vcpus,
11122       "memory": self.mem_size,
11123       "disks": self.disks,
11124       "disk_space_total": disk_space,
11125       "nics": self.nics,
11126       "required_nodes": self.required_nodes,
11127       }
11128     return request
11129
11130   def _AddRelocateInstance(self):
11131     """Add relocate instance data to allocator structure.
11132
11133     This in combination with _IAllocatorGetClusterData will create the
11134     correct structure needed as input for the allocator.
11135
11136     The checks for the completeness of the opcode must have already been
11137     done.
11138
11139     """
11140     instance = self.cfg.GetInstanceInfo(self.name)
11141     if instance is None:
11142       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11143                                    " IAllocator" % self.name)
11144
11145     if instance.disk_template not in constants.DTS_NET_MIRROR:
11146       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11147                                  errors.ECODE_INVAL)
11148
11149     if len(instance.secondary_nodes) != 1:
11150       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11151                                  errors.ECODE_STATE)
11152
11153     self.required_nodes = 1
11154     disk_sizes = [{'size': disk.size} for disk in instance.disks]
11155     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11156
11157     request = {
11158       "name": self.name,
11159       "disk_space_total": disk_space,
11160       "required_nodes": self.required_nodes,
11161       "relocate_from": self.relocate_from,
11162       }
11163     return request
11164
11165   def _AddEvacuateNodes(self):
11166     """Add evacuate nodes data to allocator structure.
11167
11168     """
11169     request = {
11170       "evac_nodes": self.evac_nodes
11171       }
11172     return request
11173
11174   def _BuildInputData(self, fn):
11175     """Build input data structures.
11176
11177     """
11178     self._ComputeClusterData()
11179
11180     request = fn()
11181     request["type"] = self.mode
11182     self.in_data["request"] = request
11183
11184     self.in_text = serializer.Dump(self.in_data)
11185
11186   def Run(self, name, validate=True, call_fn=None):
11187     """Run an instance allocator and return the results.
11188
11189     """
11190     if call_fn is None:
11191       call_fn = self.rpc.call_iallocator_runner
11192
11193     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11194     result.Raise("Failure while running the iallocator script")
11195
11196     self.out_text = result.payload
11197     if validate:
11198       self._ValidateResult()
11199
11200   def _ValidateResult(self):
11201     """Process the allocator results.
11202
11203     This will process and if successful save the result in
11204     self.out_data and the other parameters.
11205
11206     """
11207     try:
11208       rdict = serializer.Load(self.out_text)
11209     except Exception, err:
11210       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11211
11212     if not isinstance(rdict, dict):
11213       raise errors.OpExecError("Can't parse iallocator results: not a dict")
11214
11215     # TODO: remove backwards compatiblity in later versions
11216     if "nodes" in rdict and "result" not in rdict:
11217       rdict["result"] = rdict["nodes"]
11218       del rdict["nodes"]
11219
11220     for key in "success", "info", "result":
11221       if key not in rdict:
11222         raise errors.OpExecError("Can't parse iallocator results:"
11223                                  " missing key '%s'" % key)
11224       setattr(self, key, rdict[key])
11225
11226     if not isinstance(rdict["result"], list):
11227       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11228                                " is not a list")
11229     self.out_data = rdict
11230
11231
11232 class LUTestAllocator(NoHooksLU):
11233   """Run allocator tests.
11234
11235   This LU runs the allocator tests
11236
11237   """
11238   def CheckPrereq(self):
11239     """Check prerequisites.
11240
11241     This checks the opcode parameters depending on the director and mode test.
11242
11243     """
11244     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11245       for attr in ["mem_size", "disks", "disk_template",
11246                    "os", "tags", "nics", "vcpus"]:
11247         if not hasattr(self.op, attr):
11248           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11249                                      attr, errors.ECODE_INVAL)
11250       iname = self.cfg.ExpandInstanceName(self.op.name)
11251       if iname is not None:
11252         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11253                                    iname, errors.ECODE_EXISTS)
11254       if not isinstance(self.op.nics, list):
11255         raise errors.OpPrereqError("Invalid parameter 'nics'",
11256                                    errors.ECODE_INVAL)
11257       if not isinstance(self.op.disks, list):
11258         raise errors.OpPrereqError("Invalid parameter 'disks'",
11259                                    errors.ECODE_INVAL)
11260       for row in self.op.disks:
11261         if (not isinstance(row, dict) or
11262             "size" not in row or
11263             not isinstance(row["size"], int) or
11264             "mode" not in row or
11265             row["mode"] not in ['r', 'w']):
11266           raise errors.OpPrereqError("Invalid contents of the 'disks'"
11267                                      " parameter", errors.ECODE_INVAL)
11268       if self.op.hypervisor is None:
11269         self.op.hypervisor = self.cfg.GetHypervisorType()
11270     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11271       fname = _ExpandInstanceName(self.cfg, self.op.name)
11272       self.op.name = fname
11273       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11274     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11275       if not hasattr(self.op, "evac_nodes"):
11276         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11277                                    " opcode input", errors.ECODE_INVAL)
11278     else:
11279       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11280                                  self.op.mode, errors.ECODE_INVAL)
11281
11282     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11283       if self.op.allocator is None:
11284         raise errors.OpPrereqError("Missing allocator name",
11285                                    errors.ECODE_INVAL)
11286     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11287       raise errors.OpPrereqError("Wrong allocator test '%s'" %
11288                                  self.op.direction, errors.ECODE_INVAL)
11289
11290   def Exec(self, feedback_fn):
11291     """Run the allocator test.
11292
11293     """
11294     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11295       ial = IAllocator(self.cfg, self.rpc,
11296                        mode=self.op.mode,
11297                        name=self.op.name,
11298                        mem_size=self.op.mem_size,
11299                        disks=self.op.disks,
11300                        disk_template=self.op.disk_template,
11301                        os=self.op.os,
11302                        tags=self.op.tags,
11303                        nics=self.op.nics,
11304                        vcpus=self.op.vcpus,
11305                        hypervisor=self.op.hypervisor,
11306                        )
11307     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11308       ial = IAllocator(self.cfg, self.rpc,
11309                        mode=self.op.mode,
11310                        name=self.op.name,
11311                        relocate_from=list(self.relocate_from),
11312                        )
11313     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11314       ial = IAllocator(self.cfg, self.rpc,
11315                        mode=self.op.mode,
11316                        evac_nodes=self.op.evac_nodes)
11317     else:
11318       raise errors.ProgrammerError("Uncatched mode %s in"
11319                                    " LUTestAllocator.Exec", self.op.mode)
11320
11321     if self.op.direction == constants.IALLOCATOR_DIR_IN:
11322       result = ial.in_text
11323     else:
11324       ial.Run(self.op.allocator, validate=False)
11325       result = ial.out_text
11326     return result
11327
11328
11329 #: Query type implementations
11330 _QUERY_IMPL = {
11331   constants.QR_INSTANCE: _InstanceQuery,
11332   constants.QR_NODE: _NodeQuery,
11333   constants.QR_GROUP: _GroupQuery,
11334   }
11335
11336
11337 def _GetQueryImplementation(name):
11338   """Returns the implemtnation for a query type.
11339
11340   @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11341
11342   """
11343   try:
11344     return _QUERY_IMPL[name]
11345   except KeyError:
11346     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11347                                errors.ECODE_INVAL)