code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 # End types
  78 class LogicalUnit(object):
  79   """Logical Unit base class.
  80
  81   Subclasses must follow these rules:
  82     - implement ExpandNames
  83     - implement CheckPrereq (except when tasklets are used)
  84     - implement Exec (except when tasklets are used)
  85     - implement BuildHooksEnv
  86     - redefine HPATH and HTYPE
  87     - optionally redefine their run requirements:
  88         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  89
  90   Note that all commands require root permissions.
  91
  92   @ivar dry_run_result: the value (if any) that will be returned to the caller
  93       in dry-run mode (signalled by opcode dry_run parameter)
  94
  95   """
  96   HPATH = None
  97   HTYPE = None
  98   REQ_BGL = True
  99
 100   def __init__(self, processor, op, context, rpc):
 101     """Constructor for LogicalUnit.
 102
 103     This needs to be overridden in derived classes in order to check op
 104     validity.
 105
 106     """
 107     self.proc = processor
 108     self.op = op
 109     self.cfg = context.cfg
 110     self.context = context
 111     self.rpc = rpc
 112     # Dicts used to declare locking needs to mcpu
 113     self.needed_locks = None
 114     self.acquired_locks = {}
 115     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 116     self.add_locks = {}
 117     self.remove_locks = {}
 118     # Used to force good behavior when calling helper functions
 119     self.recalculate_locks = {}
 120     self.__ssh = None
 121     # logging
 122     self.Log = processor.Log # pylint: disable-msg=C0103
 123     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 124     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 125     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 126     # support for dry-run
 127     self.dry_run_result = None
 128     # support for generic debug attribute
 129     if (not hasattr(self.op, "debug_level") or
 130         not isinstance(self.op.debug_level, int)):
 131       self.op.debug_level = 0
 132
 133     # Tasklets
 134     self.tasklets = None
 135
 136     # Validate opcode parameters and set defaults
 137     self.op.Validate(True)
 138
 139     self.CheckArguments()
 140
 141   def __GetSSH(self):
 142     """Returns the SshRunner object
 143
 144     """
 145     if not self.__ssh:
 146       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 147     return self.__ssh
 148
 149   ssh = property(fget=__GetSSH)
 150
 151   def CheckArguments(self):
 152     """Check syntactic validity for the opcode arguments.
 153
 154     This method is for doing a simple syntactic check and ensure
 155     validity of opcode parameters, without any cluster-related
 156     checks. While the same can be accomplished in ExpandNames and/or
 157     CheckPrereq, doing these separate is better because:
 158
 159       - ExpandNames is left as as purely a lock-related function
 160       - CheckPrereq is run after we have acquired locks (and possible
 161         waited for them)
 162
 163     The function is allowed to change the self.op attribute so that
 164     later methods can no longer worry about missing parameters.
 165
 166     """
 167     pass
 168
 169   def ExpandNames(self):
 170     """Expand names for this LU.
 171
 172     This method is called before starting to execute the opcode, and it should
 173     update all the parameters of the opcode to their canonical form (e.g. a
 174     short node name must be fully expanded after this method has successfully
 175     completed). This way locking, hooks, logging, etc. can work correctly.
 176
 177     LUs which implement this method must also populate the self.needed_locks
 178     member, as a dict with lock levels as keys, and a list of needed lock names
 179     as values. Rules:
 180
 181       - use an empty dict if you don't need any lock
 182       - if you don't need any lock at a particular level omit that level
 183       - don't put anything for the BGL level
 184       - if you want all locks at a level use locking.ALL_SET as a value
 185
 186     If you need to share locks (rather than acquire them exclusively) at one
 187     level you can modify self.share_locks, setting a true value (usually 1) for
 188     that level. By default locks are not shared.
 189
 190     This function can also define a list of tasklets, which then will be
 191     executed in order instead of the usual LU-level CheckPrereq and Exec
 192     functions, if those are not defined by the LU.
 193
 194     Examples::
 195
 196       # Acquire all nodes and one instance
 197       self.needed_locks = {
 198         locking.LEVEL_NODE: locking.ALL_SET,
 199         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 200       }
 201       # Acquire just two nodes
 202       self.needed_locks = {
 203         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 204       }
 205       # Acquire no locks
 206       self.needed_locks = {} # No, you can't leave it to the default value None
 207
 208     """
 209     # The implementation of this method is mandatory only if the new LU is
 210     # concurrent, so that old LUs don't need to be changed all at the same
 211     # time.
 212     if self.REQ_BGL:
 213       self.needed_locks = {} # Exclusive LUs don't need locks.
 214     else:
 215       raise NotImplementedError
 216
 217   def DeclareLocks(self, level):
 218     """Declare LU locking needs for a level
 219
 220     While most LUs can just declare their locking needs at ExpandNames time,
 221     sometimes there's the need to calculate some locks after having acquired
 222     the ones before. This function is called just before acquiring locks at a
 223     particular level, but after acquiring the ones at lower levels, and permits
 224     such calculations. It can be used to modify self.needed_locks, and by
 225     default it does nothing.
 226
 227     This function is only called if you have something already set in
 228     self.needed_locks for the level.
 229
 230     @param level: Locking level which is going to be locked
 231     @type level: member of ganeti.locking.LEVELS
 232
 233     """
 234
 235   def CheckPrereq(self):
 236     """Check prerequisites for this LU.
 237
 238     This method should check that the prerequisites for the execution
 239     of this LU are fulfilled. It can do internode communication, but
 240     it should be idempotent - no cluster or system changes are
 241     allowed.
 242
 243     The method should raise errors.OpPrereqError in case something is
 244     not fulfilled. Its return value is ignored.
 245
 246     This method should also update all the parameters of the opcode to
 247     their canonical form if it hasn't been done by ExpandNames before.
 248
 249     """
 250     if self.tasklets is not None:
 251       for (idx, tl) in enumerate(self.tasklets):
 252         logging.debug("Checking prerequisites for tasklet %s/%s",
 253                       idx + 1, len(self.tasklets))
 254         tl.CheckPrereq()
 255     else:
 256       pass
 257
 258   def Exec(self, feedback_fn):
 259     """Execute the LU.
 260
 261     This method should implement the actual work. It should raise
 262     errors.OpExecError for failures that are somewhat dealt with in
 263     code, or expected.
 264
 265     """
 266     if self.tasklets is not None:
 267       for (idx, tl) in enumerate(self.tasklets):
 268         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 269         tl.Exec(feedback_fn)
 270     else:
 271       raise NotImplementedError
 272
 273   def BuildHooksEnv(self):
 274     """Build hooks environment for this LU.
 275
 276     This method should return a three-node tuple consisting of: a dict
 277     containing the environment that will be used for running the
 278     specific hook for this LU, a list of node names on which the hook
 279     should run before the execution, and a list of node names on which
 280     the hook should run after the execution.
 281
 282     The keys of the dict must not have 'GANETI_' prefixed as this will
 283     be handled in the hooks runner. Also note additional keys will be
 284     added by the hooks runner. If the LU doesn't define any
 285     environment, an empty dict (and not None) should be returned.
 286
 287     No nodes should be returned as an empty list (and not None).
 288
 289     Note that if the HPATH for a LU class is None, this function will
 290     not be called.
 291
 292     """
 293     raise NotImplementedError
 294
 295   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 296     """Notify the LU about the results of its hooks.
 297
 298     This method is called every time a hooks phase is executed, and notifies
 299     the Logical Unit about the hooks' result. The LU can then use it to alter
 300     its result based on the hooks.  By default the method does nothing and the
 301     previous result is passed back unchanged but any LU can define it if it
 302     wants to use the local cluster hook-scripts somehow.
 303
 304     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 305         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 306     @param hook_results: the results of the multi-node hooks rpc call
 307     @param feedback_fn: function used send feedback back to the caller
 308     @param lu_result: the previous Exec result this LU had, or None
 309         in the PRE phase
 310     @return: the new Exec result, based on the previous result
 311         and hook results
 312
 313     """
 314     # API must be kept, thus we ignore the unused argument and could
 315     # be a function warnings
 316     # pylint: disable-msg=W0613,R0201
 317     return lu_result
 318
 319   def _ExpandAndLockInstance(self):
 320     """Helper function to expand and lock an instance.
 321
 322     Many LUs that work on an instance take its name in self.op.instance_name
 323     and need to expand it and then declare the expanded name for locking. This
 324     function does it, and then updates self.op.instance_name to the expanded
 325     name. It also initializes needed_locks as a dict, if this hasn't been done
 326     before.
 327
 328     """
 329     if self.needed_locks is None:
 330       self.needed_locks = {}
 331     else:
 332       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 333         "_ExpandAndLockInstance called with instance-level locks set"
 334     self.op.instance_name = _ExpandInstanceName(self.cfg,
 335                                                 self.op.instance_name)
 336     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 337
 338   def _LockInstancesNodes(self, primary_only=False):
 339     """Helper function to declare instances' nodes for locking.
 340
 341     This function should be called after locking one or more instances to lock
 342     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 343     with all primary or secondary nodes for instances already locked and
 344     present in self.needed_locks[locking.LEVEL_INSTANCE].
 345
 346     It should be called from DeclareLocks, and for safety only works if
 347     self.recalculate_locks[locking.LEVEL_NODE] is set.
 348
 349     In the future it may grow parameters to just lock some instance's nodes, or
 350     to just lock primaries or secondary nodes, if needed.
 351
 352     If should be called in DeclareLocks in a way similar to::
 353
 354       if level == locking.LEVEL_NODE:
 355         self._LockInstancesNodes()
 356
 357     @type primary_only: boolean
 358     @param primary_only: only lock primary nodes of locked instances
 359
 360     """
 361     assert locking.LEVEL_NODE in self.recalculate_locks, \
 362       "_LockInstancesNodes helper function called with no nodes to recalculate"
 363
 364     # TODO: check if we're really been called with the instance locks held
 365
 366     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 367     # future we might want to have different behaviors depending on the value
 368     # of self.recalculate_locks[locking.LEVEL_NODE]
 369     wanted_nodes = []
 370     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 371       instance = self.context.cfg.GetInstanceInfo(instance_name)
 372       wanted_nodes.append(instance.primary_node)
 373       if not primary_only:
 374         wanted_nodes.extend(instance.secondary_nodes)
 375
 376     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 377       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 378     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 379       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 380
 381     del self.recalculate_locks[locking.LEVEL_NODE]
 382
 383
 384 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 385   """Simple LU which runs no hooks.
 386
 387   This LU is intended as a parent for other LogicalUnits which will
 388   run no hooks, in order to reduce duplicate code.
 389
 390   """
 391   HPATH = None
 392   HTYPE = None
 393
 394   def BuildHooksEnv(self):
 395     """Empty BuildHooksEnv for NoHooksLu.
 396
 397     This just raises an error.
 398
 399     """
 400     assert False, "BuildHooksEnv called for NoHooksLUs"
 401
 402
 403 class Tasklet:
 404   """Tasklet base class.
 405
 406   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 407   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 408   tasklets know nothing about locks.
 409
 410   Subclasses must follow these rules:
 411     - Implement CheckPrereq
 412     - Implement Exec
 413
 414   """
 415   def __init__(self, lu):
 416     self.lu = lu
 417
 418     # Shortcuts
 419     self.cfg = lu.cfg
 420     self.rpc = lu.rpc
 421
 422   def CheckPrereq(self):
 423     """Check prerequisites for this tasklets.
 424
 425     This method should check whether the prerequisites for the execution of
 426     this tasklet are fulfilled. It can do internode communication, but it
 427     should be idempotent - no cluster or system changes are allowed.
 428
 429     The method should raise errors.OpPrereqError in case something is not
 430     fulfilled. Its return value is ignored.
 431
 432     This method should also update all parameters to their canonical form if it
 433     hasn't been done before.
 434
 435     """
 436     pass
 437
 438   def Exec(self, feedback_fn):
 439     """Execute the tasklet.
 440
 441     This method should implement the actual work. It should raise
 442     errors.OpExecError for failures that are somewhat dealt with in code, or
 443     expected.
 444
 445     """
 446     raise NotImplementedError
 447
 448
 449 class _QueryBase:
 450   """Base for query utility classes.
 451
 452   """
 453   #: Attribute holding field definitions
 454   FIELDS = None
 455
 456   def __init__(self, names, fields, use_locking):
 457     """Initializes this class.
 458
 459     """
 460     self.names = names
 461     self.use_locking = use_locking
 462
 463     self.query = query.Query(self.FIELDS, fields)
 464     self.requested_data = self.query.RequestedData()
 465
 466     self.do_locking = None
 467     self.wanted = None
 468
 469   def _GetNames(self, lu, all_names, lock_level):
 470     """Helper function to determine names asked for in the query.
 471
 472     """
 473     if self.do_locking:
 474       names = lu.acquired_locks[lock_level]
 475     else:
 476       names = all_names
 477
 478     if self.wanted == locking.ALL_SET:
 479       assert not self.names
 480       # caller didn't specify names, so ordering is not important
 481       return utils.NiceSort(names)
 482
 483     # caller specified names and we must keep the same order
 484     assert self.names
 485     assert not self.do_locking or lu.acquired_locks[lock_level]
 486
 487     missing = set(self.wanted).difference(names)
 488     if missing:
 489       raise errors.OpExecError("Some items were removed before retrieving"
 490                                " their data: %s" % missing)
 491
 492     # Return expanded names
 493     return self.wanted
 494
 495   @classmethod
 496   def FieldsQuery(cls, fields):
 497     """Returns list of available fields.
 498
 499     @return: List of L{objects.QueryFieldDefinition}
 500
 501     """
 502     return query.QueryFields(cls.FIELDS, fields)
 503
 504   def ExpandNames(self, lu):
 505     """Expand names for this query.
 506
 507     See L{LogicalUnit.ExpandNames}.
 508
 509     """
 510     raise NotImplementedError()
 511
 512   def DeclareLocks(self, lu, level):
 513     """Declare locks for this query.
 514
 515     See L{LogicalUnit.DeclareLocks}.
 516
 517     """
 518     raise NotImplementedError()
 519
 520   def _GetQueryData(self, lu):
 521     """Collects all data for this query.
 522
 523     @return: Query data object
 524
 525     """
 526     raise NotImplementedError()
 527
 528   def NewStyleQuery(self, lu):
 529     """Collect data and execute query.
 530
 531     """
 532     return query.GetQueryResponse(self.query, self._GetQueryData(lu))
 533
 534   def OldStyleQuery(self, lu):
 535     """Collect data and execute query.
 536
 537     """
 538     return self.query.OldStyleQuery(self._GetQueryData(lu))
 539
 540
 541 def _GetWantedNodes(lu, nodes):
 542   """Returns list of checked and expanded node names.
 543
 544   @type lu: L{LogicalUnit}
 545   @param lu: the logical unit on whose behalf we execute
 546   @type nodes: list
 547   @param nodes: list of node names or None for all nodes
 548   @rtype: list
 549   @return: the list of nodes, sorted
 550   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 551
 552   """
 553   if nodes:
 554     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 555
 556   return utils.NiceSort(lu.cfg.GetNodeList())
 557
 558
 559 def _GetWantedInstances(lu, instances):
 560   """Returns list of checked and expanded instance names.
 561
 562   @type lu: L{LogicalUnit}
 563   @param lu: the logical unit on whose behalf we execute
 564   @type instances: list
 565   @param instances: list of instance names or None for all instances
 566   @rtype: list
 567   @return: the list of instances, sorted
 568   @raise errors.OpPrereqError: if the instances parameter is wrong type
 569   @raise errors.OpPrereqError: if any of the passed instances is not found
 570
 571   """
 572   if instances:
 573     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 574   else:
 575     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 576   return wanted
 577
 578
 579 def _GetUpdatedParams(old_params, update_dict,
 580                       use_default=True, use_none=False):
 581   """Return the new version of a parameter dictionary.
 582
 583   @type old_params: dict
 584   @param old_params: old parameters
 585   @type update_dict: dict
 586   @param update_dict: dict containing new parameter values, or
 587       constants.VALUE_DEFAULT to reset the parameter to its default
 588       value
 589   @param use_default: boolean
 590   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 591       values as 'to be deleted' values
 592   @param use_none: boolean
 593   @type use_none: whether to recognise C{None} values as 'to be
 594       deleted' values
 595   @rtype: dict
 596   @return: the new parameter dictionary
 597
 598   """
 599   params_copy = copy.deepcopy(old_params)
 600   for key, val in update_dict.iteritems():
 601     if ((use_default and val == constants.VALUE_DEFAULT) or
 602         (use_none and val is None)):
 603       try:
 604         del params_copy[key]
 605       except KeyError:
 606         pass
 607     else:
 608       params_copy[key] = val
 609   return params_copy
 610
 611
 612 def _CheckOutputFields(static, dynamic, selected):
 613   """Checks whether all selected fields are valid.
 614
 615   @type static: L{utils.FieldSet}
 616   @param static: static fields set
 617   @type dynamic: L{utils.FieldSet}
 618   @param dynamic: dynamic fields set
 619
 620   """
 621   f = utils.FieldSet()
 622   f.Extend(static)
 623   f.Extend(dynamic)
 624
 625   delta = f.NonMatching(selected)
 626   if delta:
 627     raise errors.OpPrereqError("Unknown output fields selected: %s"
 628                                % ",".join(delta), errors.ECODE_INVAL)
 629
 630
 631 def _CheckGlobalHvParams(params):
 632   """Validates that given hypervisor params are not global ones.
 633
 634   This will ensure that instances don't get customised versions of
 635   global params.
 636
 637   """
 638   used_globals = constants.HVC_GLOBALS.intersection(params)
 639   if used_globals:
 640     msg = ("The following hypervisor parameters are global and cannot"
 641            " be customized at instance level, please modify them at"
 642            " cluster level: %s" % utils.CommaJoin(used_globals))
 643     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 644
 645
 646 def _CheckNodeOnline(lu, node, msg=None):
 647   """Ensure that a given node is online.
 648
 649   @param lu: the LU on behalf of which we make the check
 650   @param node: the node to check
 651   @param msg: if passed, should be a message to replace the default one
 652   @raise errors.OpPrereqError: if the node is offline
 653
 654   """
 655   if msg is None:
 656     msg = "Can't use offline node"
 657   if lu.cfg.GetNodeInfo(node).offline:
 658     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 659
 660
 661 def _CheckNodeNotDrained(lu, node):
 662   """Ensure that a given node is not drained.
 663
 664   @param lu: the LU on behalf of which we make the check
 665   @param node: the node to check
 666   @raise errors.OpPrereqError: if the node is drained
 667
 668   """
 669   if lu.cfg.GetNodeInfo(node).drained:
 670     raise errors.OpPrereqError("Can't use drained node %s" % node,
 671                                errors.ECODE_STATE)
 672
 673
 674 def _CheckNodeVmCapable(lu, node):
 675   """Ensure that a given node is vm capable.
 676
 677   @param lu: the LU on behalf of which we make the check
 678   @param node: the node to check
 679   @raise errors.OpPrereqError: if the node is not vm capable
 680
 681   """
 682   if not lu.cfg.GetNodeInfo(node).vm_capable:
 683     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 684                                errors.ECODE_STATE)
 685
 686
 687 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 688   """Ensure that a node supports a given OS.
 689
 690   @param lu: the LU on behalf of which we make the check
 691   @param node: the node to check
 692   @param os_name: the OS to query about
 693   @param force_variant: whether to ignore variant errors
 694   @raise errors.OpPrereqError: if the node is not supporting the OS
 695
 696   """
 697   result = lu.rpc.call_os_get(node, os_name)
 698   result.Raise("OS '%s' not in supported OS list for node %s" %
 699                (os_name, node),
 700                prereq=True, ecode=errors.ECODE_INVAL)
 701   if not force_variant:
 702     _CheckOSVariant(result.payload, os_name)
 703
 704
 705 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 706   """Ensure that a node has the given secondary ip.
 707
 708   @type lu: L{LogicalUnit}
 709   @param lu: the LU on behalf of which we make the check
 710   @type node: string
 711   @param node: the node to check
 712   @type secondary_ip: string
 713   @param secondary_ip: the ip to check
 714   @type prereq: boolean
 715   @param prereq: whether to throw a prerequisite or an execute error
 716   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 717   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 718
 719   """
 720   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 721   result.Raise("Failure checking secondary ip on node %s" % node,
 722                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 723   if not result.payload:
 724     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 725            " please fix and re-run this command" % secondary_ip)
 726     if prereq:
 727       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 728     else:
 729       raise errors.OpExecError(msg)
 730
 731
 732 def _GetClusterDomainSecret():
 733   """Reads the cluster domain secret.
 734
 735   """
 736   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 737                                strict=True)
 738
 739
 740 def _CheckInstanceDown(lu, instance, reason):
 741   """Ensure that an instance is not running."""
 742   if instance.admin_up:
 743     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 744                                (instance.name, reason), errors.ECODE_STATE)
 745
 746   pnode = instance.primary_node
 747   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 748   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 749               prereq=True, ecode=errors.ECODE_ENVIRON)
 750
 751   if instance.name in ins_l.payload:
 752     raise errors.OpPrereqError("Instance %s is running, %s" %
 753                                (instance.name, reason), errors.ECODE_STATE)
 754
 755
 756 def _ExpandItemName(fn, name, kind):
 757   """Expand an item name.
 758
 759   @param fn: the function to use for expansion
 760   @param name: requested item name
 761   @param kind: text description ('Node' or 'Instance')
 762   @return: the resolved (full) name
 763   @raise errors.OpPrereqError: if the item is not found
 764
 765   """
 766   full_name = fn(name)
 767   if full_name is None:
 768     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 769                                errors.ECODE_NOENT)
 770   return full_name
 771
 772
 773 def _ExpandNodeName(cfg, name):
 774   """Wrapper over L{_ExpandItemName} for nodes."""
 775   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 776
 777
 778 def _ExpandInstanceName(cfg, name):
 779   """Wrapper over L{_ExpandItemName} for instance."""
 780   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 781
 782
 783 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 784                           memory, vcpus, nics, disk_template, disks,
 785                           bep, hvp, hypervisor_name):
 786   """Builds instance related env variables for hooks
 787
 788   This builds the hook environment from individual variables.
 789
 790   @type name: string
 791   @param name: the name of the instance
 792   @type primary_node: string
 793   @param primary_node: the name of the instance's primary node
 794   @type secondary_nodes: list
 795   @param secondary_nodes: list of secondary nodes as strings
 796   @type os_type: string
 797   @param os_type: the name of the instance's OS
 798   @type status: boolean
 799   @param status: the should_run status of the instance
 800   @type memory: string
 801   @param memory: the memory size of the instance
 802   @type vcpus: string
 803   @param vcpus: the count of VCPUs the instance has
 804   @type nics: list
 805   @param nics: list of tuples (ip, mac, mode, link) representing
 806       the NICs the instance has
 807   @type disk_template: string
 808   @param disk_template: the disk template of the instance
 809   @type disks: list
 810   @param disks: the list of (size, mode) pairs
 811   @type bep: dict
 812   @param bep: the backend parameters for the instance
 813   @type hvp: dict
 814   @param hvp: the hypervisor parameters for the instance
 815   @type hypervisor_name: string
 816   @param hypervisor_name: the hypervisor for the instance
 817   @rtype: dict
 818   @return: the hook environment for this instance
 819
 820   """
 821   if status:
 822     str_status = "up"
 823   else:
 824     str_status = "down"
 825   env = {
 826     "OP_TARGET": name,
 827     "INSTANCE_NAME": name,
 828     "INSTANCE_PRIMARY": primary_node,
 829     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 830     "INSTANCE_OS_TYPE": os_type,
 831     "INSTANCE_STATUS": str_status,
 832     "INSTANCE_MEMORY": memory,
 833     "INSTANCE_VCPUS": vcpus,
 834     "INSTANCE_DISK_TEMPLATE": disk_template,
 835     "INSTANCE_HYPERVISOR": hypervisor_name,
 836   }
 837
 838   if nics:
 839     nic_count = len(nics)
 840     for idx, (ip, mac, mode, link) in enumerate(nics):
 841       if ip is None:
 842         ip = ""
 843       env["INSTANCE_NIC%d_IP" % idx] = ip
 844       env["INSTANCE_NIC%d_MAC" % idx] = mac
 845       env["INSTANCE_NIC%d_MODE" % idx] = mode
 846       env["INSTANCE_NIC%d_LINK" % idx] = link
 847       if mode == constants.NIC_MODE_BRIDGED:
 848         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 849   else:
 850     nic_count = 0
 851
 852   env["INSTANCE_NIC_COUNT"] = nic_count
 853
 854   if disks:
 855     disk_count = len(disks)
 856     for idx, (size, mode) in enumerate(disks):
 857       env["INSTANCE_DISK%d_SIZE" % idx] = size
 858       env["INSTANCE_DISK%d_MODE" % idx] = mode
 859   else:
 860     disk_count = 0
 861
 862   env["INSTANCE_DISK_COUNT"] = disk_count
 863
 864   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 865     for key, value in source.items():
 866       env["INSTANCE_%s_%s" % (kind, key)] = value
 867
 868   return env
 869
 870
 871 def _NICListToTuple(lu, nics):
 872   """Build a list of nic information tuples.
 873
 874   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 875   value in LUInstanceQueryData.
 876
 877   @type lu:  L{LogicalUnit}
 878   @param lu: the logical unit on whose behalf we execute
 879   @type nics: list of L{objects.NIC}
 880   @param nics: list of nics to convert to hooks tuples
 881
 882   """
 883   hooks_nics = []
 884   cluster = lu.cfg.GetClusterInfo()
 885   for nic in nics:
 886     ip = nic.ip
 887     mac = nic.mac
 888     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 889     mode = filled_params[constants.NIC_MODE]
 890     link = filled_params[constants.NIC_LINK]
 891     hooks_nics.append((ip, mac, mode, link))
 892   return hooks_nics
 893
 894
 895 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 896   """Builds instance related env variables for hooks from an object.
 897
 898   @type lu: L{LogicalUnit}
 899   @param lu: the logical unit on whose behalf we execute
 900   @type instance: L{objects.Instance}
 901   @param instance: the instance for which we should build the
 902       environment
 903   @type override: dict
 904   @param override: dictionary with key/values that will override
 905       our values
 906   @rtype: dict
 907   @return: the hook environment dictionary
 908
 909   """
 910   cluster = lu.cfg.GetClusterInfo()
 911   bep = cluster.FillBE(instance)
 912   hvp = cluster.FillHV(instance)
 913   args = {
 914     'name': instance.name,
 915     'primary_node': instance.primary_node,
 916     'secondary_nodes': instance.secondary_nodes,
 917     'os_type': instance.os,
 918     'status': instance.admin_up,
 919     'memory': bep[constants.BE_MEMORY],
 920     'vcpus': bep[constants.BE_VCPUS],
 921     'nics': _NICListToTuple(lu, instance.nics),
 922     'disk_template': instance.disk_template,
 923     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 924     'bep': bep,
 925     'hvp': hvp,
 926     'hypervisor_name': instance.hypervisor,
 927   }
 928   if override:
 929     args.update(override)
 930   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 931
 932
 933 def _AdjustCandidatePool(lu, exceptions):
 934   """Adjust the candidate pool after node operations.
 935
 936   """
 937   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 938   if mod_list:
 939     lu.LogInfo("Promoted nodes to master candidate role: %s",
 940                utils.CommaJoin(node.name for node in mod_list))
 941     for name in mod_list:
 942       lu.context.ReaddNode(name)
 943   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 944   if mc_now > mc_max:
 945     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 946                (mc_now, mc_max))
 947
 948
 949 def _DecideSelfPromotion(lu, exceptions=None):
 950   """Decide whether I should promote myself as a master candidate.
 951
 952   """
 953   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 954   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 955   # the new node will increase mc_max with one, so:
 956   mc_should = min(mc_should + 1, cp_size)
 957   return mc_now < mc_should
 958
 959
 960 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 961   """Check that the brigdes needed by a list of nics exist.
 962
 963   """
 964   cluster = lu.cfg.GetClusterInfo()
 965   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 966   brlist = [params[constants.NIC_LINK] for params in paramslist
 967             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 968   if brlist:
 969     result = lu.rpc.call_bridges_exist(target_node, brlist)
 970     result.Raise("Error checking bridges on destination node '%s'" %
 971                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 972
 973
 974 def _CheckInstanceBridgesExist(lu, instance, node=None):
 975   """Check that the brigdes needed by an instance exist.
 976
 977   """
 978   if node is None:
 979     node = instance.primary_node
 980   _CheckNicsBridgesExist(lu, instance.nics, node)
 981
 982
 983 def _CheckOSVariant(os_obj, name):
 984   """Check whether an OS name conforms to the os variants specification.
 985
 986   @type os_obj: L{objects.OS}
 987   @param os_obj: OS object to check
 988   @type name: string
 989   @param name: OS name passed by the user, to check for validity
 990
 991   """
 992   if not os_obj.supported_variants:
 993     return
 994   variant = objects.OS.GetVariant(name)
 995   if not variant:
 996     raise errors.OpPrereqError("OS name must include a variant",
 997                                errors.ECODE_INVAL)
 998
 999   if variant not in os_obj.supported_variants:
1000     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1001
1002
1003 def _GetNodeInstancesInner(cfg, fn):
1004   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1005
1006
1007 def _GetNodeInstances(cfg, node_name):
1008   """Returns a list of all primary and secondary instances on a node.
1009
1010   """
1011
1012   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1013
1014
1015 def _GetNodePrimaryInstances(cfg, node_name):
1016   """Returns primary instances on a node.
1017
1018   """
1019   return _GetNodeInstancesInner(cfg,
1020                                 lambda inst: node_name == inst.primary_node)
1021
1022
1023 def _GetNodeSecondaryInstances(cfg, node_name):
1024   """Returns secondary instances on a node.
1025
1026   """
1027   return _GetNodeInstancesInner(cfg,
1028                                 lambda inst: node_name in inst.secondary_nodes)
1029
1030
1031 def _GetStorageTypeArgs(cfg, storage_type):
1032   """Returns the arguments for a storage type.
1033
1034   """
1035   # Special case for file storage
1036   if storage_type == constants.ST_FILE:
1037     # storage.FileStorage wants a list of storage directories
1038     return [[cfg.GetFileStorageDir()]]
1039
1040   return []
1041
1042
1043 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1044   faulty = []
1045
1046   for dev in instance.disks:
1047     cfg.SetDiskID(dev, node_name)
1048
1049   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1050   result.Raise("Failed to get disk status from node %s" % node_name,
1051                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1052
1053   for idx, bdev_status in enumerate(result.payload):
1054     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1055       faulty.append(idx)
1056
1057   return faulty
1058
1059
1060 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1061   """Check the sanity of iallocator and node arguments and use the
1062   cluster-wide iallocator if appropriate.
1063
1064   Check that at most one of (iallocator, node) is specified. If none is
1065   specified, then the LU's opcode's iallocator slot is filled with the
1066   cluster-wide default iallocator.
1067
1068   @type iallocator_slot: string
1069   @param iallocator_slot: the name of the opcode iallocator slot
1070   @type node_slot: string
1071   @param node_slot: the name of the opcode target node slot
1072
1073   """
1074   node = getattr(lu.op, node_slot, None)
1075   iallocator = getattr(lu.op, iallocator_slot, None)
1076
1077   if node is not None and iallocator is not None:
1078     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1079                                errors.ECODE_INVAL)
1080   elif node is None and iallocator is None:
1081     default_iallocator = lu.cfg.GetDefaultIAllocator()
1082     if default_iallocator:
1083       setattr(lu.op, iallocator_slot, default_iallocator)
1084     else:
1085       raise errors.OpPrereqError("No iallocator or node given and no"
1086                                  " cluster-wide default iallocator found."
1087                                  " Please specify either an iallocator or a"
1088                                  " node, or set a cluster-wide default"
1089                                  " iallocator.")
1090
1091
1092 class LUClusterPostInit(LogicalUnit):
1093   """Logical unit for running hooks after cluster initialization.
1094
1095   """
1096   HPATH = "cluster-init"
1097   HTYPE = constants.HTYPE_CLUSTER
1098
1099   def BuildHooksEnv(self):
1100     """Build hooks env.
1101
1102     """
1103     env = {"OP_TARGET": self.cfg.GetClusterName()}
1104     mn = self.cfg.GetMasterNode()
1105     return env, [], [mn]
1106
1107   def Exec(self, feedback_fn):
1108     """Nothing to do.
1109
1110     """
1111     return True
1112
1113
1114 class LUClusterDestroy(LogicalUnit):
1115   """Logical unit for destroying the cluster.
1116
1117   """
1118   HPATH = "cluster-destroy"
1119   HTYPE = constants.HTYPE_CLUSTER
1120
1121   def BuildHooksEnv(self):
1122     """Build hooks env.
1123
1124     """
1125     env = {"OP_TARGET": self.cfg.GetClusterName()}
1126     return env, [], []
1127
1128   def CheckPrereq(self):
1129     """Check prerequisites.
1130
1131     This checks whether the cluster is empty.
1132
1133     Any errors are signaled by raising errors.OpPrereqError.
1134
1135     """
1136     master = self.cfg.GetMasterNode()
1137
1138     nodelist = self.cfg.GetNodeList()
1139     if len(nodelist) != 1 or nodelist[0] != master:
1140       raise errors.OpPrereqError("There are still %d node(s) in"
1141                                  " this cluster." % (len(nodelist) - 1),
1142                                  errors.ECODE_INVAL)
1143     instancelist = self.cfg.GetInstanceList()
1144     if instancelist:
1145       raise errors.OpPrereqError("There are still %d instance(s) in"
1146                                  " this cluster." % len(instancelist),
1147                                  errors.ECODE_INVAL)
1148
1149   def Exec(self, feedback_fn):
1150     """Destroys the cluster.
1151
1152     """
1153     master = self.cfg.GetMasterNode()
1154
1155     # Run post hooks on master node before it's removed
1156     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1157     try:
1158       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1159     except:
1160       # pylint: disable-msg=W0702
1161       self.LogWarning("Errors occurred running hooks on %s" % master)
1162
1163     result = self.rpc.call_node_stop_master(master, False)
1164     result.Raise("Could not disable the master role")
1165
1166     return master
1167
1168
1169 def _VerifyCertificate(filename):
1170   """Verifies a certificate for LUClusterVerify.
1171
1172   @type filename: string
1173   @param filename: Path to PEM file
1174
1175   """
1176   try:
1177     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1178                                            utils.ReadFile(filename))
1179   except Exception, err: # pylint: disable-msg=W0703
1180     return (LUClusterVerify.ETYPE_ERROR,
1181             "Failed to load X509 certificate %s: %s" % (filename, err))
1182
1183   (errcode, msg) = \
1184     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1185                                 constants.SSL_CERT_EXPIRATION_ERROR)
1186
1187   if msg:
1188     fnamemsg = "While verifying %s: %s" % (filename, msg)
1189   else:
1190     fnamemsg = None
1191
1192   if errcode is None:
1193     return (None, fnamemsg)
1194   elif errcode == utils.CERT_WARNING:
1195     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1196   elif errcode == utils.CERT_ERROR:
1197     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1198
1199   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1200
1201
1202 class LUClusterVerify(LogicalUnit):
1203   """Verifies the cluster status.
1204
1205   """
1206   HPATH = "cluster-verify"
1207   HTYPE = constants.HTYPE_CLUSTER
1208   REQ_BGL = False
1209
1210   TCLUSTER = "cluster"
1211   TNODE = "node"
1212   TINSTANCE = "instance"
1213
1214   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1215   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1216   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1217   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1218   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1219   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1220   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1221   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1222   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1223   ENODEDRBD = (TNODE, "ENODEDRBD")
1224   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1225   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1226   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1227   ENODEHV = (TNODE, "ENODEHV")
1228   ENODELVM = (TNODE, "ENODELVM")
1229   ENODEN1 = (TNODE, "ENODEN1")
1230   ENODENET = (TNODE, "ENODENET")
1231   ENODEOS = (TNODE, "ENODEOS")
1232   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1233   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1234   ENODERPC = (TNODE, "ENODERPC")
1235   ENODESSH = (TNODE, "ENODESSH")
1236   ENODEVERSION = (TNODE, "ENODEVERSION")
1237   ENODESETUP = (TNODE, "ENODESETUP")
1238   ENODETIME = (TNODE, "ENODETIME")
1239   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1240
1241   ETYPE_FIELD = "code"
1242   ETYPE_ERROR = "ERROR"
1243   ETYPE_WARNING = "WARNING"
1244
1245   _HOOKS_INDENT_RE = re.compile("^", re.M)
1246
1247   class NodeImage(object):
1248     """A class representing the logical and physical status of a node.
1249
1250     @type name: string
1251     @ivar name: the node name to which this object refers
1252     @ivar volumes: a structure as returned from
1253         L{ganeti.backend.GetVolumeList} (runtime)
1254     @ivar instances: a list of running instances (runtime)
1255     @ivar pinst: list of configured primary instances (config)
1256     @ivar sinst: list of configured secondary instances (config)
1257     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1258         of this node (config)
1259     @ivar mfree: free memory, as reported by hypervisor (runtime)
1260     @ivar dfree: free disk, as reported by the node (runtime)
1261     @ivar offline: the offline status (config)
1262     @type rpc_fail: boolean
1263     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1264         not whether the individual keys were correct) (runtime)
1265     @type lvm_fail: boolean
1266     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1267     @type hyp_fail: boolean
1268     @ivar hyp_fail: whether the RPC call didn't return the instance list
1269     @type ghost: boolean
1270     @ivar ghost: whether this is a known node or not (config)
1271     @type os_fail: boolean
1272     @ivar os_fail: whether the RPC call didn't return valid OS data
1273     @type oslist: list
1274     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1275     @type vm_capable: boolean
1276     @ivar vm_capable: whether the node can host instances
1277
1278     """
1279     def __init__(self, offline=False, name=None, vm_capable=True):
1280       self.name = name
1281       self.volumes = {}
1282       self.instances = []
1283       self.pinst = []
1284       self.sinst = []
1285       self.sbp = {}
1286       self.mfree = 0
1287       self.dfree = 0
1288       self.offline = offline
1289       self.vm_capable = vm_capable
1290       self.rpc_fail = False
1291       self.lvm_fail = False
1292       self.hyp_fail = False
1293       self.ghost = False
1294       self.os_fail = False
1295       self.oslist = {}
1296
1297   def ExpandNames(self):
1298     self.needed_locks = {
1299       locking.LEVEL_NODE: locking.ALL_SET,
1300       locking.LEVEL_INSTANCE: locking.ALL_SET,
1301     }
1302     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1303
1304   def _Error(self, ecode, item, msg, *args, **kwargs):
1305     """Format an error message.
1306
1307     Based on the opcode's error_codes parameter, either format a
1308     parseable error code, or a simpler error string.
1309
1310     This must be called only from Exec and functions called from Exec.
1311
1312     """
1313     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1314     itype, etxt = ecode
1315     # first complete the msg
1316     if args:
1317       msg = msg % args
1318     # then format the whole message
1319     if self.op.error_codes:
1320       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1321     else:
1322       if item:
1323         item = " " + item
1324       else:
1325         item = ""
1326       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1327     # and finally report it via the feedback_fn
1328     self._feedback_fn("  - %s" % msg)
1329
1330   def _ErrorIf(self, cond, *args, **kwargs):
1331     """Log an error message if the passed condition is True.
1332
1333     """
1334     cond = bool(cond) or self.op.debug_simulate_errors
1335     if cond:
1336       self._Error(*args, **kwargs)
1337     # do not mark the operation as failed for WARN cases only
1338     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1339       self.bad = self.bad or cond
1340
1341   def _VerifyNode(self, ninfo, nresult):
1342     """Perform some basic validation on data returned from a node.
1343
1344       - check the result data structure is well formed and has all the
1345         mandatory fields
1346       - check ganeti version
1347
1348     @type ninfo: L{objects.Node}
1349     @param ninfo: the node to check
1350     @param nresult: the results from the node
1351     @rtype: boolean
1352     @return: whether overall this call was successful (and we can expect
1353          reasonable values in the respose)
1354
1355     """
1356     node = ninfo.name
1357     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1358
1359     # main result, nresult should be a non-empty dict
1360     test = not nresult or not isinstance(nresult, dict)
1361     _ErrorIf(test, self.ENODERPC, node,
1362                   "unable to verify node: no data returned")
1363     if test:
1364       return False
1365
1366     # compares ganeti version
1367     local_version = constants.PROTOCOL_VERSION
1368     remote_version = nresult.get("version", None)
1369     test = not (remote_version and
1370                 isinstance(remote_version, (list, tuple)) and
1371                 len(remote_version) == 2)
1372     _ErrorIf(test, self.ENODERPC, node,
1373              "connection to node returned invalid data")
1374     if test:
1375       return False
1376
1377     test = local_version != remote_version[0]
1378     _ErrorIf(test, self.ENODEVERSION, node,
1379              "incompatible protocol versions: master %s,"
1380              " node %s", local_version, remote_version[0])
1381     if test:
1382       return False
1383
1384     # node seems compatible, we can actually try to look into its results
1385
1386     # full package version
1387     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1388                   self.ENODEVERSION, node,
1389                   "software version mismatch: master %s, node %s",
1390                   constants.RELEASE_VERSION, remote_version[1],
1391                   code=self.ETYPE_WARNING)
1392
1393     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1394     if ninfo.vm_capable and isinstance(hyp_result, dict):
1395       for hv_name, hv_result in hyp_result.iteritems():
1396         test = hv_result is not None
1397         _ErrorIf(test, self.ENODEHV, node,
1398                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1399
1400     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1401     if ninfo.vm_capable and isinstance(hvp_result, list):
1402       for item, hv_name, hv_result in hvp_result:
1403         _ErrorIf(True, self.ENODEHV, node,
1404                  "hypervisor %s parameter verify failure (source %s): %s",
1405                  hv_name, item, hv_result)
1406
1407     test = nresult.get(constants.NV_NODESETUP,
1408                            ["Missing NODESETUP results"])
1409     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1410              "; ".join(test))
1411
1412     return True
1413
1414   def _VerifyNodeTime(self, ninfo, nresult,
1415                       nvinfo_starttime, nvinfo_endtime):
1416     """Check the node time.
1417
1418     @type ninfo: L{objects.Node}
1419     @param ninfo: the node to check
1420     @param nresult: the remote results for the node
1421     @param nvinfo_starttime: the start time of the RPC call
1422     @param nvinfo_endtime: the end time of the RPC call
1423
1424     """
1425     node = ninfo.name
1426     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1427
1428     ntime = nresult.get(constants.NV_TIME, None)
1429     try:
1430       ntime_merged = utils.MergeTime(ntime)
1431     except (ValueError, TypeError):
1432       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1433       return
1434
1435     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1436       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1437     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1438       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1439     else:
1440       ntime_diff = None
1441
1442     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1443              "Node time diverges by at least %s from master node time",
1444              ntime_diff)
1445
1446   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1447     """Check the node time.
1448
1449     @type ninfo: L{objects.Node}
1450     @param ninfo: the node to check
1451     @param nresult: the remote results for the node
1452     @param vg_name: the configured VG name
1453
1454     """
1455     if vg_name is None:
1456       return
1457
1458     node = ninfo.name
1459     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1460
1461     # checks vg existence and size > 20G
1462     vglist = nresult.get(constants.NV_VGLIST, None)
1463     test = not vglist
1464     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1465     if not test:
1466       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1467                                             constants.MIN_VG_SIZE)
1468       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1469
1470     # check pv names
1471     pvlist = nresult.get(constants.NV_PVLIST, None)
1472     test = pvlist is None
1473     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1474     if not test:
1475       # check that ':' is not present in PV names, since it's a
1476       # special character for lvcreate (denotes the range of PEs to
1477       # use on the PV)
1478       for _, pvname, owner_vg in pvlist:
1479         test = ":" in pvname
1480         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1481                  " '%s' of VG '%s'", pvname, owner_vg)
1482
1483   def _VerifyNodeNetwork(self, ninfo, nresult):
1484     """Check the node time.
1485
1486     @type ninfo: L{objects.Node}
1487     @param ninfo: the node to check
1488     @param nresult: the remote results for the node
1489
1490     """
1491     node = ninfo.name
1492     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1493
1494     test = constants.NV_NODELIST not in nresult
1495     _ErrorIf(test, self.ENODESSH, node,
1496              "node hasn't returned node ssh connectivity data")
1497     if not test:
1498       if nresult[constants.NV_NODELIST]:
1499         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1500           _ErrorIf(True, self.ENODESSH, node,
1501                    "ssh communication with node '%s': %s", a_node, a_msg)
1502
1503     test = constants.NV_NODENETTEST not in nresult
1504     _ErrorIf(test, self.ENODENET, node,
1505              "node hasn't returned node tcp connectivity data")
1506     if not test:
1507       if nresult[constants.NV_NODENETTEST]:
1508         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1509         for anode in nlist:
1510           _ErrorIf(True, self.ENODENET, node,
1511                    "tcp communication with node '%s': %s",
1512                    anode, nresult[constants.NV_NODENETTEST][anode])
1513
1514     test = constants.NV_MASTERIP not in nresult
1515     _ErrorIf(test, self.ENODENET, node,
1516              "node hasn't returned node master IP reachability data")
1517     if not test:
1518       if not nresult[constants.NV_MASTERIP]:
1519         if node == self.master_node:
1520           msg = "the master node cannot reach the master IP (not configured?)"
1521         else:
1522           msg = "cannot reach the master IP"
1523         _ErrorIf(True, self.ENODENET, node, msg)
1524
1525   def _VerifyInstance(self, instance, instanceconfig, node_image,
1526                       diskstatus):
1527     """Verify an instance.
1528
1529     This function checks to see if the required block devices are
1530     available on the instance's node.
1531
1532     """
1533     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1534     node_current = instanceconfig.primary_node
1535
1536     node_vol_should = {}
1537     instanceconfig.MapLVsByNode(node_vol_should)
1538
1539     for node in node_vol_should:
1540       n_img = node_image[node]
1541       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1542         # ignore missing volumes on offline or broken nodes
1543         continue
1544       for volume in node_vol_should[node]:
1545         test = volume not in n_img.volumes
1546         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1547                  "volume %s missing on node %s", volume, node)
1548
1549     if instanceconfig.admin_up:
1550       pri_img = node_image[node_current]
1551       test = instance not in pri_img.instances and not pri_img.offline
1552       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1553                "instance not running on its primary node %s",
1554                node_current)
1555
1556     for node, n_img in node_image.items():
1557       if node != node_current:
1558         test = instance in n_img.instances
1559         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1560                  "instance should not run on node %s", node)
1561
1562     diskdata = [(nname, success, status, idx)
1563                 for (nname, disks) in diskstatus.items()
1564                 for idx, (success, status) in enumerate(disks)]
1565
1566     for nname, success, bdev_status, idx in diskdata:
1567       # the 'ghost node' construction in Exec() ensures that we have a
1568       # node here
1569       snode = node_image[nname]
1570       bad_snode = snode.ghost or snode.offline
1571       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1572                self.EINSTANCEFAULTYDISK, instance,
1573                "couldn't retrieve status for disk/%s on %s: %s",
1574                idx, nname, bdev_status)
1575       _ErrorIf((instanceconfig.admin_up and success and
1576                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1577                self.EINSTANCEFAULTYDISK, instance,
1578                "disk/%s on %s is faulty", idx, nname)
1579
1580   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1581     """Verify if there are any unknown volumes in the cluster.
1582
1583     The .os, .swap and backup volumes are ignored. All other volumes are
1584     reported as unknown.
1585
1586     @type reserved: L{ganeti.utils.FieldSet}
1587     @param reserved: a FieldSet of reserved volume names
1588
1589     """
1590     for node, n_img in node_image.items():
1591       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1592         # skip non-healthy nodes
1593         continue
1594       for volume in n_img.volumes:
1595         test = ((node not in node_vol_should or
1596                 volume not in node_vol_should[node]) and
1597                 not reserved.Matches(volume))
1598         self._ErrorIf(test, self.ENODEORPHANLV, node,
1599                       "volume %s is unknown", volume)
1600
1601   def _VerifyOrphanInstances(self, instancelist, node_image):
1602     """Verify the list of running instances.
1603
1604     This checks what instances are running but unknown to the cluster.
1605
1606     """
1607     for node, n_img in node_image.items():
1608       for o_inst in n_img.instances:
1609         test = o_inst not in instancelist
1610         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1611                       "instance %s on node %s should not exist", o_inst, node)
1612
1613   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1614     """Verify N+1 Memory Resilience.
1615
1616     Check that if one single node dies we can still start all the
1617     instances it was primary for.
1618
1619     """
1620     for node, n_img in node_image.items():
1621       # This code checks that every node which is now listed as
1622       # secondary has enough memory to host all instances it is
1623       # supposed to should a single other node in the cluster fail.
1624       # FIXME: not ready for failover to an arbitrary node
1625       # FIXME: does not support file-backed instances
1626       # WARNING: we currently take into account down instances as well
1627       # as up ones, considering that even if they're down someone
1628       # might want to start them even in the event of a node failure.
1629       if n_img.offline:
1630         # we're skipping offline nodes from the N+1 warning, since
1631         # most likely we don't have good memory infromation from them;
1632         # we already list instances living on such nodes, and that's
1633         # enough warning
1634         continue
1635       for prinode, instances in n_img.sbp.items():
1636         needed_mem = 0
1637         for instance in instances:
1638           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1639           if bep[constants.BE_AUTO_BALANCE]:
1640             needed_mem += bep[constants.BE_MEMORY]
1641         test = n_img.mfree < needed_mem
1642         self._ErrorIf(test, self.ENODEN1, node,
1643                       "not enough memory to accomodate instance failovers"
1644                       " should node %s fail (%dMiB needed, %dMiB available)",
1645                       prinode, needed_mem, n_img.mfree)
1646
1647   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1648                        master_files):
1649     """Verifies and computes the node required file checksums.
1650
1651     @type ninfo: L{objects.Node}
1652     @param ninfo: the node to check
1653     @param nresult: the remote results for the node
1654     @param file_list: required list of files
1655     @param local_cksum: dictionary of local files and their checksums
1656     @param master_files: list of files that only masters should have
1657
1658     """
1659     node = ninfo.name
1660     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1661
1662     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1663     test = not isinstance(remote_cksum, dict)
1664     _ErrorIf(test, self.ENODEFILECHECK, node,
1665              "node hasn't returned file checksum data")
1666     if test:
1667       return
1668
1669     for file_name in file_list:
1670       node_is_mc = ninfo.master_candidate
1671       must_have = (file_name not in master_files) or node_is_mc
1672       # missing
1673       test1 = file_name not in remote_cksum
1674       # invalid checksum
1675       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1676       # existing and good
1677       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1678       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1679                "file '%s' missing", file_name)
1680       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1681                "file '%s' has wrong checksum", file_name)
1682       # not candidate and this is not a must-have file
1683       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1684                "file '%s' should not exist on non master"
1685                " candidates (and the file is outdated)", file_name)
1686       # all good, except non-master/non-must have combination
1687       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1688                "file '%s' should not exist"
1689                " on non master candidates", file_name)
1690
1691   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1692                       drbd_map):
1693     """Verifies and the node DRBD status.
1694
1695     @type ninfo: L{objects.Node}
1696     @param ninfo: the node to check
1697     @param nresult: the remote results for the node
1698     @param instanceinfo: the dict of instances
1699     @param drbd_helper: the configured DRBD usermode helper
1700     @param drbd_map: the DRBD map as returned by
1701         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1702
1703     """
1704     node = ninfo.name
1705     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1706
1707     if drbd_helper:
1708       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1709       test = (helper_result == None)
1710       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1711                "no drbd usermode helper returned")
1712       if helper_result:
1713         status, payload = helper_result
1714         test = not status
1715         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1716                  "drbd usermode helper check unsuccessful: %s", payload)
1717         test = status and (payload != drbd_helper)
1718         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1719                  "wrong drbd usermode helper: %s", payload)
1720
1721     # compute the DRBD minors
1722     node_drbd = {}
1723     for minor, instance in drbd_map[node].items():
1724       test = instance not in instanceinfo
1725       _ErrorIf(test, self.ECLUSTERCFG, None,
1726                "ghost instance '%s' in temporary DRBD map", instance)
1727         # ghost instance should not be running, but otherwise we
1728         # don't give double warnings (both ghost instance and
1729         # unallocated minor in use)
1730       if test:
1731         node_drbd[minor] = (instance, False)
1732       else:
1733         instance = instanceinfo[instance]
1734         node_drbd[minor] = (instance.name, instance.admin_up)
1735
1736     # and now check them
1737     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1738     test = not isinstance(used_minors, (tuple, list))
1739     _ErrorIf(test, self.ENODEDRBD, node,
1740              "cannot parse drbd status file: %s", str(used_minors))
1741     if test:
1742       # we cannot check drbd status
1743       return
1744
1745     for minor, (iname, must_exist) in node_drbd.items():
1746       test = minor not in used_minors and must_exist
1747       _ErrorIf(test, self.ENODEDRBD, node,
1748                "drbd minor %d of instance %s is not active", minor, iname)
1749     for minor in used_minors:
1750       test = minor not in node_drbd
1751       _ErrorIf(test, self.ENODEDRBD, node,
1752                "unallocated drbd minor %d is in use", minor)
1753
1754   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1755     """Builds the node OS structures.
1756
1757     @type ninfo: L{objects.Node}
1758     @param ninfo: the node to check
1759     @param nresult: the remote results for the node
1760     @param nimg: the node image object
1761
1762     """
1763     node = ninfo.name
1764     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1765
1766     remote_os = nresult.get(constants.NV_OSLIST, None)
1767     test = (not isinstance(remote_os, list) or
1768             not compat.all(isinstance(v, list) and len(v) == 7
1769                            for v in remote_os))
1770
1771     _ErrorIf(test, self.ENODEOS, node,
1772              "node hasn't returned valid OS data")
1773
1774     nimg.os_fail = test
1775
1776     if test:
1777       return
1778
1779     os_dict = {}
1780
1781     for (name, os_path, status, diagnose,
1782          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1783
1784       if name not in os_dict:
1785         os_dict[name] = []
1786
1787       # parameters is a list of lists instead of list of tuples due to
1788       # JSON lacking a real tuple type, fix it:
1789       parameters = [tuple(v) for v in parameters]
1790       os_dict[name].append((os_path, status, diagnose,
1791                             set(variants), set(parameters), set(api_ver)))
1792
1793     nimg.oslist = os_dict
1794
1795   def _VerifyNodeOS(self, ninfo, nimg, base):
1796     """Verifies the node OS list.
1797
1798     @type ninfo: L{objects.Node}
1799     @param ninfo: the node to check
1800     @param nimg: the node image object
1801     @param base: the 'template' node we match against (e.g. from the master)
1802
1803     """
1804     node = ninfo.name
1805     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1806
1807     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1808
1809     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1810     for os_name, os_data in nimg.oslist.items():
1811       assert os_data, "Empty OS status for OS %s?!" % os_name
1812       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1813       _ErrorIf(not f_status, self.ENODEOS, node,
1814                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1815       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1816                "OS '%s' has multiple entries (first one shadows the rest): %s",
1817                os_name, utils.CommaJoin([v[0] for v in os_data]))
1818       # this will catched in backend too
1819       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1820                and not f_var, self.ENODEOS, node,
1821                "OS %s with API at least %d does not declare any variant",
1822                os_name, constants.OS_API_V15)
1823       # comparisons with the 'base' image
1824       test = os_name not in base.oslist
1825       _ErrorIf(test, self.ENODEOS, node,
1826                "Extra OS %s not present on reference node (%s)",
1827                os_name, base.name)
1828       if test:
1829         continue
1830       assert base.oslist[os_name], "Base node has empty OS status?"
1831       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1832       if not b_status:
1833         # base OS is invalid, skipping
1834         continue
1835       for kind, a, b in [("API version", f_api, b_api),
1836                          ("variants list", f_var, b_var),
1837                          ("parameters", beautify_params(f_param),
1838                           beautify_params(b_param))]:
1839         _ErrorIf(a != b, self.ENODEOS, node,
1840                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
1841                  kind, os_name, base.name,
1842                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
1843
1844     # check any missing OSes
1845     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1846     _ErrorIf(missing, self.ENODEOS, node,
1847              "OSes present on reference node %s but missing on this node: %s",
1848              base.name, utils.CommaJoin(missing))
1849
1850   def _VerifyOob(self, ninfo, nresult):
1851     """Verifies out of band functionality of a node.
1852
1853     @type ninfo: L{objects.Node}
1854     @param ninfo: the node to check
1855     @param nresult: the remote results for the node
1856
1857     """
1858     node = ninfo.name
1859     # We just have to verify the paths on master and/or master candidates
1860     # as the oob helper is invoked on the master
1861     if ((ninfo.master_candidate or ninfo.master_capable) and
1862         constants.NV_OOB_PATHS in nresult):
1863       for path_result in nresult[constants.NV_OOB_PATHS]:
1864         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1865
1866   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1867     """Verifies and updates the node volume data.
1868
1869     This function will update a L{NodeImage}'s internal structures
1870     with data from the remote call.
1871
1872     @type ninfo: L{objects.Node}
1873     @param ninfo: the node to check
1874     @param nresult: the remote results for the node
1875     @param nimg: the node image object
1876     @param vg_name: the configured VG name
1877
1878     """
1879     node = ninfo.name
1880     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1881
1882     nimg.lvm_fail = True
1883     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1884     if vg_name is None:
1885       pass
1886     elif isinstance(lvdata, basestring):
1887       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1888                utils.SafeEncode(lvdata))
1889     elif not isinstance(lvdata, dict):
1890       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1891     else:
1892       nimg.volumes = lvdata
1893       nimg.lvm_fail = False
1894
1895   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1896     """Verifies and updates the node instance list.
1897
1898     If the listing was successful, then updates this node's instance
1899     list. Otherwise, it marks the RPC call as failed for the instance
1900     list key.
1901
1902     @type ninfo: L{objects.Node}
1903     @param ninfo: the node to check
1904     @param nresult: the remote results for the node
1905     @param nimg: the node image object
1906
1907     """
1908     idata = nresult.get(constants.NV_INSTANCELIST, None)
1909     test = not isinstance(idata, list)
1910     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1911                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1912     if test:
1913       nimg.hyp_fail = True
1914     else:
1915       nimg.instances = idata
1916
1917   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1918     """Verifies and computes a node information map
1919
1920     @type ninfo: L{objects.Node}
1921     @param ninfo: the node to check
1922     @param nresult: the remote results for the node
1923     @param nimg: the node image object
1924     @param vg_name: the configured VG name
1925
1926     """
1927     node = ninfo.name
1928     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1929
1930     # try to read free memory (from the hypervisor)
1931     hv_info = nresult.get(constants.NV_HVINFO, None)
1932     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1933     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1934     if not test:
1935       try:
1936         nimg.mfree = int(hv_info["memory_free"])
1937       except (ValueError, TypeError):
1938         _ErrorIf(True, self.ENODERPC, node,
1939                  "node returned invalid nodeinfo, check hypervisor")
1940
1941     # FIXME: devise a free space model for file based instances as well
1942     if vg_name is not None:
1943       test = (constants.NV_VGLIST not in nresult or
1944               vg_name not in nresult[constants.NV_VGLIST])
1945       _ErrorIf(test, self.ENODELVM, node,
1946                "node didn't return data for the volume group '%s'"
1947                " - it is either missing or broken", vg_name)
1948       if not test:
1949         try:
1950           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1951         except (ValueError, TypeError):
1952           _ErrorIf(True, self.ENODERPC, node,
1953                    "node returned invalid LVM info, check LVM status")
1954
1955   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1956     """Gets per-disk status information for all instances.
1957
1958     @type nodelist: list of strings
1959     @param nodelist: Node names
1960     @type node_image: dict of (name, L{objects.Node})
1961     @param node_image: Node objects
1962     @type instanceinfo: dict of (name, L{objects.Instance})
1963     @param instanceinfo: Instance objects
1964     @rtype: {instance: {node: [(succes, payload)]}}
1965     @return: a dictionary of per-instance dictionaries with nodes as
1966         keys and disk information as values; the disk information is a
1967         list of tuples (success, payload)
1968
1969     """
1970     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1971
1972     node_disks = {}
1973     node_disks_devonly = {}
1974     diskless_instances = set()
1975     diskless = constants.DT_DISKLESS
1976
1977     for nname in nodelist:
1978       node_instances = list(itertools.chain(node_image[nname].pinst,
1979                                             node_image[nname].sinst))
1980       diskless_instances.update(inst for inst in node_instances
1981                                 if instanceinfo[inst].disk_template == diskless)
1982       disks = [(inst, disk)
1983                for inst in node_instances
1984                for disk in instanceinfo[inst].disks]
1985
1986       if not disks:
1987         # No need to collect data
1988         continue
1989
1990       node_disks[nname] = disks
1991
1992       # Creating copies as SetDiskID below will modify the objects and that can
1993       # lead to incorrect data returned from nodes
1994       devonly = [dev.Copy() for (_, dev) in disks]
1995
1996       for dev in devonly:
1997         self.cfg.SetDiskID(dev, nname)
1998
1999       node_disks_devonly[nname] = devonly
2000
2001     assert len(node_disks) == len(node_disks_devonly)
2002
2003     # Collect data from all nodes with disks
2004     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2005                                                           node_disks_devonly)
2006
2007     assert len(result) == len(node_disks)
2008
2009     instdisk = {}
2010
2011     for (nname, nres) in result.items():
2012       disks = node_disks[nname]
2013
2014       if nres.offline:
2015         # No data from this node
2016         data = len(disks) * [(False, "node offline")]
2017       else:
2018         msg = nres.fail_msg
2019         _ErrorIf(msg, self.ENODERPC, nname,
2020                  "while getting disk information: %s", msg)
2021         if msg:
2022           # No data from this node
2023           data = len(disks) * [(False, msg)]
2024         else:
2025           data = []
2026           for idx, i in enumerate(nres.payload):
2027             if isinstance(i, (tuple, list)) and len(i) == 2:
2028               data.append(i)
2029             else:
2030               logging.warning("Invalid result from node %s, entry %d: %s",
2031                               nname, idx, i)
2032               data.append((False, "Invalid result from the remote node"))
2033
2034       for ((inst, _), status) in zip(disks, data):
2035         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2036
2037     # Add empty entries for diskless instances.
2038     for inst in diskless_instances:
2039       assert inst not in instdisk
2040       instdisk[inst] = {}
2041
2042     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2043                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2044                       compat.all(isinstance(s, (tuple, list)) and
2045                                  len(s) == 2 for s in statuses)
2046                       for inst, nnames in instdisk.items()
2047                       for nname, statuses in nnames.items())
2048     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2049
2050     return instdisk
2051
2052   def _VerifyHVP(self, hvp_data):
2053     """Verifies locally the syntax of the hypervisor parameters.
2054
2055     """
2056     for item, hv_name, hv_params in hvp_data:
2057       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2058              (item, hv_name))
2059       try:
2060         hv_class = hypervisor.GetHypervisor(hv_name)
2061         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2062         hv_class.CheckParameterSyntax(hv_params)
2063       except errors.GenericError, err:
2064         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2065
2066
2067   def BuildHooksEnv(self):
2068     """Build hooks env.
2069
2070     Cluster-Verify hooks just ran in the post phase and their failure makes
2071     the output be logged in the verify output and the verification to fail.
2072
2073     """
2074     all_nodes = self.cfg.GetNodeList()
2075     env = {
2076       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2077       }
2078     for node in self.cfg.GetAllNodesInfo().values():
2079       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2080
2081     return env, [], all_nodes
2082
2083   def Exec(self, feedback_fn):
2084     """Verify integrity of cluster, performing various test on nodes.
2085
2086     """
2087     # This method has too many local variables. pylint: disable-msg=R0914
2088     self.bad = False
2089     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2090     verbose = self.op.verbose
2091     self._feedback_fn = feedback_fn
2092     feedback_fn("* Verifying global settings")
2093     for msg in self.cfg.VerifyConfig():
2094       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2095
2096     # Check the cluster certificates
2097     for cert_filename in constants.ALL_CERT_FILES:
2098       (errcode, msg) = _VerifyCertificate(cert_filename)
2099       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2100
2101     vg_name = self.cfg.GetVGName()
2102     drbd_helper = self.cfg.GetDRBDHelper()
2103     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2104     cluster = self.cfg.GetClusterInfo()
2105     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2106     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2107     nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2108     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2109     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2110                         for iname in instancelist)
2111     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2112     i_non_redundant = [] # Non redundant instances
2113     i_non_a_balanced = [] # Non auto-balanced instances
2114     n_offline = 0 # Count of offline nodes
2115     n_drained = 0 # Count of nodes being drained
2116     node_vol_should = {}
2117
2118     # FIXME: verify OS list
2119     # do local checksums
2120     master_files = [constants.CLUSTER_CONF_FILE]
2121     master_node = self.master_node = self.cfg.GetMasterNode()
2122     master_ip = self.cfg.GetMasterIP()
2123
2124     file_names = ssconf.SimpleStore().GetFileList()
2125     file_names.extend(constants.ALL_CERT_FILES)
2126     file_names.extend(master_files)
2127     if cluster.modify_etc_hosts:
2128       file_names.append(constants.ETC_HOSTS)
2129
2130     local_checksums = utils.FingerprintFiles(file_names)
2131
2132     # Compute the set of hypervisor parameters
2133     hvp_data = []
2134     for hv_name in hypervisors:
2135       hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2136     for os_name, os_hvp in cluster.os_hvp.items():
2137       for hv_name, hv_params in os_hvp.items():
2138         if not hv_params:
2139           continue
2140         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2141         hvp_data.append(("os %s" % os_name, hv_name, full_params))
2142     # TODO: collapse identical parameter values in a single one
2143     for instance in instanceinfo.values():
2144       if not instance.hvparams:
2145         continue
2146       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2147                        cluster.FillHV(instance)))
2148     # and verify them locally
2149     self._VerifyHVP(hvp_data)
2150
2151     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2152     node_verify_param = {
2153       constants.NV_FILELIST: file_names,
2154       constants.NV_NODELIST: [node.name for node in nodeinfo
2155                               if not node.offline],
2156       constants.NV_HYPERVISOR: hypervisors,
2157       constants.NV_HVPARAMS: hvp_data,
2158       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2159                                   node.secondary_ip) for node in nodeinfo
2160                                  if not node.offline],
2161       constants.NV_INSTANCELIST: hypervisors,
2162       constants.NV_VERSION: None,
2163       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2164       constants.NV_NODESETUP: None,
2165       constants.NV_TIME: None,
2166       constants.NV_MASTERIP: (master_node, master_ip),
2167       constants.NV_OSLIST: None,
2168       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2169       }
2170
2171     if vg_name is not None:
2172       node_verify_param[constants.NV_VGLIST] = None
2173       node_verify_param[constants.NV_LVLIST] = vg_name
2174       node_verify_param[constants.NV_PVLIST] = [vg_name]
2175       node_verify_param[constants.NV_DRBDLIST] = None
2176
2177     if drbd_helper:
2178       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2179
2180     # Build our expected cluster state
2181     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2182                                                  name=node.name,
2183                                                  vm_capable=node.vm_capable))
2184                       for node in nodeinfo)
2185
2186     # Gather OOB paths
2187     oob_paths = []
2188     for node in nodeinfo:
2189       path = _SupportsOob(self.cfg, node)
2190       if path and path not in oob_paths:
2191         oob_paths.append(path)
2192
2193     if oob_paths:
2194       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2195
2196     for instance in instancelist:
2197       inst_config = instanceinfo[instance]
2198
2199       for nname in inst_config.all_nodes:
2200         if nname not in node_image:
2201           # ghost node
2202           gnode = self.NodeImage(name=nname)
2203           gnode.ghost = True
2204           node_image[nname] = gnode
2205
2206       inst_config.MapLVsByNode(node_vol_should)
2207
2208       pnode = inst_config.primary_node
2209       node_image[pnode].pinst.append(instance)
2210
2211       for snode in inst_config.secondary_nodes:
2212         nimg = node_image[snode]
2213         nimg.sinst.append(instance)
2214         if pnode not in nimg.sbp:
2215           nimg.sbp[pnode] = []
2216         nimg.sbp[pnode].append(instance)
2217
2218     # At this point, we have the in-memory data structures complete,
2219     # except for the runtime information, which we'll gather next
2220
2221     # Due to the way our RPC system works, exact response times cannot be
2222     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2223     # time before and after executing the request, we can at least have a time
2224     # window.
2225     nvinfo_starttime = time.time()
2226     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2227                                            self.cfg.GetClusterName())
2228     nvinfo_endtime = time.time()
2229
2230     all_drbd_map = self.cfg.ComputeDRBDMap()
2231
2232     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2233     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2234
2235     feedback_fn("* Verifying node status")
2236
2237     refos_img = None
2238
2239     for node_i in nodeinfo:
2240       node = node_i.name
2241       nimg = node_image[node]
2242
2243       if node_i.offline:
2244         if verbose:
2245           feedback_fn("* Skipping offline node %s" % (node,))
2246         n_offline += 1
2247         continue
2248
2249       if node == master_node:
2250         ntype = "master"
2251       elif node_i.master_candidate:
2252         ntype = "master candidate"
2253       elif node_i.drained:
2254         ntype = "drained"
2255         n_drained += 1
2256       else:
2257         ntype = "regular"
2258       if verbose:
2259         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2260
2261       msg = all_nvinfo[node].fail_msg
2262       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2263       if msg:
2264         nimg.rpc_fail = True
2265         continue
2266
2267       nresult = all_nvinfo[node].payload
2268
2269       nimg.call_ok = self._VerifyNode(node_i, nresult)
2270       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2271       self._VerifyNodeNetwork(node_i, nresult)
2272       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2273                             master_files)
2274
2275       self._VerifyOob(node_i, nresult)
2276
2277       if nimg.vm_capable:
2278         self._VerifyNodeLVM(node_i, nresult, vg_name)
2279         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2280                              all_drbd_map)
2281
2282         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2283         self._UpdateNodeInstances(node_i, nresult, nimg)
2284         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2285         self._UpdateNodeOS(node_i, nresult, nimg)
2286         if not nimg.os_fail:
2287           if refos_img is None:
2288             refos_img = nimg
2289           self._VerifyNodeOS(node_i, nimg, refos_img)
2290
2291     feedback_fn("* Verifying instance status")
2292     for instance in instancelist:
2293       if verbose:
2294         feedback_fn("* Verifying instance %s" % instance)
2295       inst_config = instanceinfo[instance]
2296       self._VerifyInstance(instance, inst_config, node_image,
2297                            instdisk[instance])
2298       inst_nodes_offline = []
2299
2300       pnode = inst_config.primary_node
2301       pnode_img = node_image[pnode]
2302       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2303                self.ENODERPC, pnode, "instance %s, connection to"
2304                " primary node failed", instance)
2305
2306       _ErrorIf(pnode_img.offline, self.EINSTANCEBADNODE, instance,
2307                "instance lives on offline node %s", inst_config.primary_node)
2308
2309       # If the instance is non-redundant we cannot survive losing its primary
2310       # node, so we are not N+1 compliant. On the other hand we have no disk
2311       # templates with more than one secondary so that situation is not well
2312       # supported either.
2313       # FIXME: does not support file-backed instances
2314       if not inst_config.secondary_nodes:
2315         i_non_redundant.append(instance)
2316
2317       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2318                instance, "instance has multiple secondary nodes: %s",
2319                utils.CommaJoin(inst_config.secondary_nodes),
2320                code=self.ETYPE_WARNING)
2321
2322       if inst_config.disk_template in constants.DTS_NET_MIRROR:
2323         pnode = inst_config.primary_node
2324         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2325         instance_groups = {}
2326
2327         for node in instance_nodes:
2328           instance_groups.setdefault(nodeinfo_byname[node].group,
2329                                      []).append(node)
2330
2331         pretty_list = [
2332           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2333           # Sort so that we always list the primary node first.
2334           for group, nodes in sorted(instance_groups.items(),
2335                                      key=lambda (_, nodes): pnode in nodes,
2336                                      reverse=True)]
2337
2338         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2339                       instance, "instance has primary and secondary nodes in"
2340                       " different groups: %s", utils.CommaJoin(pretty_list),
2341                       code=self.ETYPE_WARNING)
2342
2343       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2344         i_non_a_balanced.append(instance)
2345
2346       for snode in inst_config.secondary_nodes:
2347         s_img = node_image[snode]
2348         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2349                  "instance %s, connection to secondary node failed", instance)
2350
2351         if s_img.offline:
2352           inst_nodes_offline.append(snode)
2353
2354       # warn that the instance lives on offline nodes
2355       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2356                "instance has offline secondary node(s) %s",
2357                utils.CommaJoin(inst_nodes_offline))
2358       # ... or ghost/non-vm_capable nodes
2359       for node in inst_config.all_nodes:
2360         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2361                  "instance lives on ghost node %s", node)
2362         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2363                  instance, "instance lives on non-vm_capable node %s", node)
2364
2365     feedback_fn("* Verifying orphan volumes")
2366     reserved = utils.FieldSet(*cluster.reserved_lvs)
2367     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2368
2369     feedback_fn("* Verifying orphan instances")
2370     self._VerifyOrphanInstances(instancelist, node_image)
2371
2372     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2373       feedback_fn("* Verifying N+1 Memory redundancy")
2374       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2375
2376     feedback_fn("* Other Notes")
2377     if i_non_redundant:
2378       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2379                   % len(i_non_redundant))
2380
2381     if i_non_a_balanced:
2382       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2383                   % len(i_non_a_balanced))
2384
2385     if n_offline:
2386       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2387
2388     if n_drained:
2389       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2390
2391     return not self.bad
2392
2393   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2394     """Analyze the post-hooks' result
2395
2396     This method analyses the hook result, handles it, and sends some
2397     nicely-formatted feedback back to the user.
2398
2399     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2400         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2401     @param hooks_results: the results of the multi-node hooks rpc call
2402     @param feedback_fn: function used send feedback back to the caller
2403     @param lu_result: previous Exec result
2404     @return: the new Exec result, based on the previous result
2405         and hook results
2406
2407     """
2408     # We only really run POST phase hooks, and are only interested in
2409     # their results
2410     if phase == constants.HOOKS_PHASE_POST:
2411       # Used to change hooks' output to proper indentation
2412       feedback_fn("* Hooks Results")
2413       assert hooks_results, "invalid result from hooks"
2414
2415       for node_name in hooks_results:
2416         res = hooks_results[node_name]
2417         msg = res.fail_msg
2418         test = msg and not res.offline
2419         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2420                       "Communication failure in hooks execution: %s", msg)
2421         if res.offline or msg:
2422           # No need to investigate payload if node is offline or gave an error.
2423           # override manually lu_result here as _ErrorIf only
2424           # overrides self.bad
2425           lu_result = 1
2426           continue
2427         for script, hkr, output in res.payload:
2428           test = hkr == constants.HKR_FAIL
2429           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2430                         "Script %s failed, output:", script)
2431           if test:
2432             output = self._HOOKS_INDENT_RE.sub('      ', output)
2433             feedback_fn("%s" % output)
2434             lu_result = 0
2435
2436       return lu_result
2437
2438
2439 class LUClusterVerifyDisks(NoHooksLU):
2440   """Verifies the cluster disks status.
2441
2442   """
2443   REQ_BGL = False
2444
2445   def ExpandNames(self):
2446     self.needed_locks = {
2447       locking.LEVEL_NODE: locking.ALL_SET,
2448       locking.LEVEL_INSTANCE: locking.ALL_SET,
2449     }
2450     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2451
2452   def Exec(self, feedback_fn):
2453     """Verify integrity of cluster disks.
2454
2455     @rtype: tuple of three items
2456     @return: a tuple of (dict of node-to-node_error, list of instances
2457         which need activate-disks, dict of instance: (node, volume) for
2458         missing volumes
2459
2460     """
2461     result = res_nodes, res_instances, res_missing = {}, [], {}
2462
2463     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2464     instances = self.cfg.GetAllInstancesInfo().values()
2465
2466     nv_dict = {}
2467     for inst in instances:
2468       inst_lvs = {}
2469       if not inst.admin_up:
2470         continue
2471       inst.MapLVsByNode(inst_lvs)
2472       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2473       for node, vol_list in inst_lvs.iteritems():
2474         for vol in vol_list:
2475           nv_dict[(node, vol)] = inst
2476
2477     if not nv_dict:
2478       return result
2479
2480     node_lvs = self.rpc.call_lv_list(nodes, [])
2481     for node, node_res in node_lvs.items():
2482       if node_res.offline:
2483         continue
2484       msg = node_res.fail_msg
2485       if msg:
2486         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2487         res_nodes[node] = msg
2488         continue
2489
2490       lvs = node_res.payload
2491       for lv_name, (_, _, lv_online) in lvs.items():
2492         inst = nv_dict.pop((node, lv_name), None)
2493         if (not lv_online and inst is not None
2494             and inst.name not in res_instances):
2495           res_instances.append(inst.name)
2496
2497     # any leftover items in nv_dict are missing LVs, let's arrange the
2498     # data better
2499     for key, inst in nv_dict.iteritems():
2500       if inst.name not in res_missing:
2501         res_missing[inst.name] = []
2502       res_missing[inst.name].append(key)
2503
2504     return result
2505
2506
2507 class LUClusterRepairDiskSizes(NoHooksLU):
2508   """Verifies the cluster disks sizes.
2509
2510   """
2511   REQ_BGL = False
2512
2513   def ExpandNames(self):
2514     if self.op.instances:
2515       self.wanted_names = []
2516       for name in self.op.instances:
2517         full_name = _ExpandInstanceName(self.cfg, name)
2518         self.wanted_names.append(full_name)
2519       self.needed_locks = {
2520         locking.LEVEL_NODE: [],
2521         locking.LEVEL_INSTANCE: self.wanted_names,
2522         }
2523       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2524     else:
2525       self.wanted_names = None
2526       self.needed_locks = {
2527         locking.LEVEL_NODE: locking.ALL_SET,
2528         locking.LEVEL_INSTANCE: locking.ALL_SET,
2529         }
2530     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2531
2532   def DeclareLocks(self, level):
2533     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2534       self._LockInstancesNodes(primary_only=True)
2535
2536   def CheckPrereq(self):
2537     """Check prerequisites.
2538
2539     This only checks the optional instance list against the existing names.
2540
2541     """
2542     if self.wanted_names is None:
2543       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2544
2545     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2546                              in self.wanted_names]
2547
2548   def _EnsureChildSizes(self, disk):
2549     """Ensure children of the disk have the needed disk size.
2550
2551     This is valid mainly for DRBD8 and fixes an issue where the
2552     children have smaller disk size.
2553
2554     @param disk: an L{ganeti.objects.Disk} object
2555
2556     """
2557     if disk.dev_type == constants.LD_DRBD8:
2558       assert disk.children, "Empty children for DRBD8?"
2559       fchild = disk.children[0]
2560       mismatch = fchild.size < disk.size
2561       if mismatch:
2562         self.LogInfo("Child disk has size %d, parent %d, fixing",
2563                      fchild.size, disk.size)
2564         fchild.size = disk.size
2565
2566       # and we recurse on this child only, not on the metadev
2567       return self._EnsureChildSizes(fchild) or mismatch
2568     else:
2569       return False
2570
2571   def Exec(self, feedback_fn):
2572     """Verify the size of cluster disks.
2573
2574     """
2575     # TODO: check child disks too
2576     # TODO: check differences in size between primary/secondary nodes
2577     per_node_disks = {}
2578     for instance in self.wanted_instances:
2579       pnode = instance.primary_node
2580       if pnode not in per_node_disks:
2581         per_node_disks[pnode] = []
2582       for idx, disk in enumerate(instance.disks):
2583         per_node_disks[pnode].append((instance, idx, disk))
2584
2585     changed = []
2586     for node, dskl in per_node_disks.items():
2587       newl = [v[2].Copy() for v in dskl]
2588       for dsk in newl:
2589         self.cfg.SetDiskID(dsk, node)
2590       result = self.rpc.call_blockdev_getsize(node, newl)
2591       if result.fail_msg:
2592         self.LogWarning("Failure in blockdev_getsize call to node"
2593                         " %s, ignoring", node)
2594         continue
2595       if len(result.payload) != len(dskl):
2596         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2597                         " result.payload=%s", node, len(dskl), result.payload)
2598         self.LogWarning("Invalid result from node %s, ignoring node results",
2599                         node)
2600         continue
2601       for ((instance, idx, disk), size) in zip(dskl, result.payload):
2602         if size is None:
2603           self.LogWarning("Disk %d of instance %s did not return size"
2604                           " information, ignoring", idx, instance.name)
2605           continue
2606         if not isinstance(size, (int, long)):
2607           self.LogWarning("Disk %d of instance %s did not return valid"
2608                           " size information, ignoring", idx, instance.name)
2609           continue
2610         size = size >> 20
2611         if size != disk.size:
2612           self.LogInfo("Disk %d of instance %s has mismatched size,"
2613                        " correcting: recorded %d, actual %d", idx,
2614                        instance.name, disk.size, size)
2615           disk.size = size
2616           self.cfg.Update(instance, feedback_fn)
2617           changed.append((instance.name, idx, size))
2618         if self._EnsureChildSizes(disk):
2619           self.cfg.Update(instance, feedback_fn)
2620           changed.append((instance.name, idx, disk.size))
2621     return changed
2622
2623
2624 class LUClusterRename(LogicalUnit):
2625   """Rename the cluster.
2626
2627   """
2628   HPATH = "cluster-rename"
2629   HTYPE = constants.HTYPE_CLUSTER
2630
2631   def BuildHooksEnv(self):
2632     """Build hooks env.
2633
2634     """
2635     env = {
2636       "OP_TARGET": self.cfg.GetClusterName(),
2637       "NEW_NAME": self.op.name,
2638       }
2639     mn = self.cfg.GetMasterNode()
2640     all_nodes = self.cfg.GetNodeList()
2641     return env, [mn], all_nodes
2642
2643   def CheckPrereq(self):
2644     """Verify that the passed name is a valid one.
2645
2646     """
2647     hostname = netutils.GetHostname(name=self.op.name,
2648                                     family=self.cfg.GetPrimaryIPFamily())
2649
2650     new_name = hostname.name
2651     self.ip = new_ip = hostname.ip
2652     old_name = self.cfg.GetClusterName()
2653     old_ip = self.cfg.GetMasterIP()
2654     if new_name == old_name and new_ip == old_ip:
2655       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2656                                  " cluster has changed",
2657                                  errors.ECODE_INVAL)
2658     if new_ip != old_ip:
2659       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2660         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2661                                    " reachable on the network" %
2662                                    new_ip, errors.ECODE_NOTUNIQUE)
2663
2664     self.op.name = new_name
2665
2666   def Exec(self, feedback_fn):
2667     """Rename the cluster.
2668
2669     """
2670     clustername = self.op.name
2671     ip = self.ip
2672
2673     # shutdown the master IP
2674     master = self.cfg.GetMasterNode()
2675     result = self.rpc.call_node_stop_master(master, False)
2676     result.Raise("Could not disable the master role")
2677
2678     try:
2679       cluster = self.cfg.GetClusterInfo()
2680       cluster.cluster_name = clustername
2681       cluster.master_ip = ip
2682       self.cfg.Update(cluster, feedback_fn)
2683
2684       # update the known hosts file
2685       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2686       node_list = self.cfg.GetOnlineNodeList()
2687       try:
2688         node_list.remove(master)
2689       except ValueError:
2690         pass
2691       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2692     finally:
2693       result = self.rpc.call_node_start_master(master, False, False)
2694       msg = result.fail_msg
2695       if msg:
2696         self.LogWarning("Could not re-enable the master role on"
2697                         " the master, please restart manually: %s", msg)
2698
2699     return clustername
2700
2701
2702 class LUClusterSetParams(LogicalUnit):
2703   """Change the parameters of the cluster.
2704
2705   """
2706   HPATH = "cluster-modify"
2707   HTYPE = constants.HTYPE_CLUSTER
2708   REQ_BGL = False
2709
2710   def CheckArguments(self):
2711     """Check parameters
2712
2713     """
2714     if self.op.uid_pool:
2715       uidpool.CheckUidPool(self.op.uid_pool)
2716
2717     if self.op.add_uids:
2718       uidpool.CheckUidPool(self.op.add_uids)
2719
2720     if self.op.remove_uids:
2721       uidpool.CheckUidPool(self.op.remove_uids)
2722
2723   def ExpandNames(self):
2724     # FIXME: in the future maybe other cluster params won't require checking on
2725     # all nodes to be modified.
2726     self.needed_locks = {
2727       locking.LEVEL_NODE: locking.ALL_SET,
2728     }
2729     self.share_locks[locking.LEVEL_NODE] = 1
2730
2731   def BuildHooksEnv(self):
2732     """Build hooks env.
2733
2734     """
2735     env = {
2736       "OP_TARGET": self.cfg.GetClusterName(),
2737       "NEW_VG_NAME": self.op.vg_name,
2738       }
2739     mn = self.cfg.GetMasterNode()
2740     return env, [mn], [mn]
2741
2742   def CheckPrereq(self):
2743     """Check prerequisites.
2744
2745     This checks whether the given params don't conflict and
2746     if the given volume group is valid.
2747
2748     """
2749     if self.op.vg_name is not None and not self.op.vg_name:
2750       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2751         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2752                                    " instances exist", errors.ECODE_INVAL)
2753
2754     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2755       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2756         raise errors.OpPrereqError("Cannot disable drbd helper while"
2757                                    " drbd-based instances exist",
2758                                    errors.ECODE_INVAL)
2759
2760     node_list = self.acquired_locks[locking.LEVEL_NODE]
2761
2762     # if vg_name not None, checks given volume group on all nodes
2763     if self.op.vg_name:
2764       vglist = self.rpc.call_vg_list(node_list)
2765       for node in node_list:
2766         msg = vglist[node].fail_msg
2767         if msg:
2768           # ignoring down node
2769           self.LogWarning("Error while gathering data on node %s"
2770                           " (ignoring node): %s", node, msg)
2771           continue
2772         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2773                                               self.op.vg_name,
2774                                               constants.MIN_VG_SIZE)
2775         if vgstatus:
2776           raise errors.OpPrereqError("Error on node '%s': %s" %
2777                                      (node, vgstatus), errors.ECODE_ENVIRON)
2778
2779     if self.op.drbd_helper:
2780       # checks given drbd helper on all nodes
2781       helpers = self.rpc.call_drbd_helper(node_list)
2782       for node in node_list:
2783         ninfo = self.cfg.GetNodeInfo(node)
2784         if ninfo.offline:
2785           self.LogInfo("Not checking drbd helper on offline node %s", node)
2786           continue
2787         msg = helpers[node].fail_msg
2788         if msg:
2789           raise errors.OpPrereqError("Error checking drbd helper on node"
2790                                      " '%s': %s" % (node, msg),
2791                                      errors.ECODE_ENVIRON)
2792         node_helper = helpers[node].payload
2793         if node_helper != self.op.drbd_helper:
2794           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2795                                      (node, node_helper), errors.ECODE_ENVIRON)
2796
2797     self.cluster = cluster = self.cfg.GetClusterInfo()
2798     # validate params changes
2799     if self.op.beparams:
2800       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2801       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2802
2803     if self.op.ndparams:
2804       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2805       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2806
2807       # TODO: we need a more general way to handle resetting
2808       # cluster-level parameters to default values
2809       if self.new_ndparams["oob_program"] == "":
2810         self.new_ndparams["oob_program"] = \
2811             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2812
2813     if self.op.nicparams:
2814       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2815       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2816       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2817       nic_errors = []
2818
2819       # check all instances for consistency
2820       for instance in self.cfg.GetAllInstancesInfo().values():
2821         for nic_idx, nic in enumerate(instance.nics):
2822           params_copy = copy.deepcopy(nic.nicparams)
2823           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2824
2825           # check parameter syntax
2826           try:
2827             objects.NIC.CheckParameterSyntax(params_filled)
2828           except errors.ConfigurationError, err:
2829             nic_errors.append("Instance %s, nic/%d: %s" %
2830                               (instance.name, nic_idx, err))
2831
2832           # if we're moving instances to routed, check that they have an ip
2833           target_mode = params_filled[constants.NIC_MODE]
2834           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2835             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2836                               (instance.name, nic_idx))
2837       if nic_errors:
2838         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2839                                    "\n".join(nic_errors))
2840
2841     # hypervisor list/parameters
2842     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2843     if self.op.hvparams:
2844       for hv_name, hv_dict in self.op.hvparams.items():
2845         if hv_name not in self.new_hvparams:
2846           self.new_hvparams[hv_name] = hv_dict
2847         else:
2848           self.new_hvparams[hv_name].update(hv_dict)
2849
2850     # os hypervisor parameters
2851     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2852     if self.op.os_hvp:
2853       for os_name, hvs in self.op.os_hvp.items():
2854         if os_name not in self.new_os_hvp:
2855           self.new_os_hvp[os_name] = hvs
2856         else:
2857           for hv_name, hv_dict in hvs.items():
2858             if hv_name not in self.new_os_hvp[os_name]:
2859               self.new_os_hvp[os_name][hv_name] = hv_dict
2860             else:
2861               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2862
2863     # os parameters
2864     self.new_osp = objects.FillDict(cluster.osparams, {})
2865     if self.op.osparams:
2866       for os_name, osp in self.op.osparams.items():
2867         if os_name not in self.new_osp:
2868           self.new_osp[os_name] = {}
2869
2870         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2871                                                   use_none=True)
2872
2873         if not self.new_osp[os_name]:
2874           # we removed all parameters
2875           del self.new_osp[os_name]
2876         else:
2877           # check the parameter validity (remote check)
2878           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2879                          os_name, self.new_osp[os_name])
2880
2881     # changes to the hypervisor list
2882     if self.op.enabled_hypervisors is not None:
2883       self.hv_list = self.op.enabled_hypervisors
2884       for hv in self.hv_list:
2885         # if the hypervisor doesn't already exist in the cluster
2886         # hvparams, we initialize it to empty, and then (in both
2887         # cases) we make sure to fill the defaults, as we might not
2888         # have a complete defaults list if the hypervisor wasn't
2889         # enabled before
2890         if hv not in new_hvp:
2891           new_hvp[hv] = {}
2892         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2893         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2894     else:
2895       self.hv_list = cluster.enabled_hypervisors
2896
2897     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2898       # either the enabled list has changed, or the parameters have, validate
2899       for hv_name, hv_params in self.new_hvparams.items():
2900         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2901             (self.op.enabled_hypervisors and
2902              hv_name in self.op.enabled_hypervisors)):
2903           # either this is a new hypervisor, or its parameters have changed
2904           hv_class = hypervisor.GetHypervisor(hv_name)
2905           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2906           hv_class.CheckParameterSyntax(hv_params)
2907           _CheckHVParams(self, node_list, hv_name, hv_params)
2908
2909     if self.op.os_hvp:
2910       # no need to check any newly-enabled hypervisors, since the
2911       # defaults have already been checked in the above code-block
2912       for os_name, os_hvp in self.new_os_hvp.items():
2913         for hv_name, hv_params in os_hvp.items():
2914           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2915           # we need to fill in the new os_hvp on top of the actual hv_p
2916           cluster_defaults = self.new_hvparams.get(hv_name, {})
2917           new_osp = objects.FillDict(cluster_defaults, hv_params)
2918           hv_class = hypervisor.GetHypervisor(hv_name)
2919           hv_class.CheckParameterSyntax(new_osp)
2920           _CheckHVParams(self, node_list, hv_name, new_osp)
2921
2922     if self.op.default_iallocator:
2923       alloc_script = utils.FindFile(self.op.default_iallocator,
2924                                     constants.IALLOCATOR_SEARCH_PATH,
2925                                     os.path.isfile)
2926       if alloc_script is None:
2927         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2928                                    " specified" % self.op.default_iallocator,
2929                                    errors.ECODE_INVAL)
2930
2931   def Exec(self, feedback_fn):
2932     """Change the parameters of the cluster.
2933
2934     """
2935     if self.op.vg_name is not None:
2936       new_volume = self.op.vg_name
2937       if not new_volume:
2938         new_volume = None
2939       if new_volume != self.cfg.GetVGName():
2940         self.cfg.SetVGName(new_volume)
2941       else:
2942         feedback_fn("Cluster LVM configuration already in desired"
2943                     " state, not changing")
2944     if self.op.drbd_helper is not None:
2945       new_helper = self.op.drbd_helper
2946       if not new_helper:
2947         new_helper = None
2948       if new_helper != self.cfg.GetDRBDHelper():
2949         self.cfg.SetDRBDHelper(new_helper)
2950       else:
2951         feedback_fn("Cluster DRBD helper already in desired state,"
2952                     " not changing")
2953     if self.op.hvparams:
2954       self.cluster.hvparams = self.new_hvparams
2955     if self.op.os_hvp:
2956       self.cluster.os_hvp = self.new_os_hvp
2957     if self.op.enabled_hypervisors is not None:
2958       self.cluster.hvparams = self.new_hvparams
2959       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2960     if self.op.beparams:
2961       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2962     if self.op.nicparams:
2963       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2964     if self.op.osparams:
2965       self.cluster.osparams = self.new_osp
2966     if self.op.ndparams:
2967       self.cluster.ndparams = self.new_ndparams
2968
2969     if self.op.candidate_pool_size is not None:
2970       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2971       # we need to update the pool size here, otherwise the save will fail
2972       _AdjustCandidatePool(self, [])
2973
2974     if self.op.maintain_node_health is not None:
2975       self.cluster.maintain_node_health = self.op.maintain_node_health
2976
2977     if self.op.prealloc_wipe_disks is not None:
2978       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2979
2980     if self.op.add_uids is not None:
2981       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2982
2983     if self.op.remove_uids is not None:
2984       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2985
2986     if self.op.uid_pool is not None:
2987       self.cluster.uid_pool = self.op.uid_pool
2988
2989     if self.op.default_iallocator is not None:
2990       self.cluster.default_iallocator = self.op.default_iallocator
2991
2992     if self.op.reserved_lvs is not None:
2993       self.cluster.reserved_lvs = self.op.reserved_lvs
2994
2995     def helper_os(aname, mods, desc):
2996       desc += " OS list"
2997       lst = getattr(self.cluster, aname)
2998       for key, val in mods:
2999         if key == constants.DDM_ADD:
3000           if val in lst:
3001             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3002           else:
3003             lst.append(val)
3004         elif key == constants.DDM_REMOVE:
3005           if val in lst:
3006             lst.remove(val)
3007           else:
3008             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3009         else:
3010           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3011
3012     if self.op.hidden_os:
3013       helper_os("hidden_os", self.op.hidden_os, "hidden")
3014
3015     if self.op.blacklisted_os:
3016       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3017
3018     if self.op.master_netdev:
3019       master = self.cfg.GetMasterNode()
3020       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3021                   self.cluster.master_netdev)
3022       result = self.rpc.call_node_stop_master(master, False)
3023       result.Raise("Could not disable the master ip")
3024       feedback_fn("Changing master_netdev from %s to %s" %
3025                   (self.cluster.master_netdev, self.op.master_netdev))
3026       self.cluster.master_netdev = self.op.master_netdev
3027
3028     self.cfg.Update(self.cluster, feedback_fn)
3029
3030     if self.op.master_netdev:
3031       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3032                   self.op.master_netdev)
3033       result = self.rpc.call_node_start_master(master, False, False)
3034       if result.fail_msg:
3035         self.LogWarning("Could not re-enable the master ip on"
3036                         " the master, please restart manually: %s",
3037                         result.fail_msg)
3038
3039
3040 def _UploadHelper(lu, nodes, fname):
3041   """Helper for uploading a file and showing warnings.
3042
3043   """
3044   if os.path.exists(fname):
3045     result = lu.rpc.call_upload_file(nodes, fname)
3046     for to_node, to_result in result.items():
3047       msg = to_result.fail_msg
3048       if msg:
3049         msg = ("Copy of file %s to node %s failed: %s" %
3050                (fname, to_node, msg))
3051         lu.proc.LogWarning(msg)
3052
3053
3054 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3055   """Distribute additional files which are part of the cluster configuration.
3056
3057   ConfigWriter takes care of distributing the config and ssconf files, but
3058   there are more files which should be distributed to all nodes. This function
3059   makes sure those are copied.
3060
3061   @param lu: calling logical unit
3062   @param additional_nodes: list of nodes not in the config to distribute to
3063   @type additional_vm: boolean
3064   @param additional_vm: whether the additional nodes are vm-capable or not
3065
3066   """
3067   # 1. Gather target nodes
3068   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3069   dist_nodes = lu.cfg.GetOnlineNodeList()
3070   nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3071   vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3072   if additional_nodes is not None:
3073     dist_nodes.extend(additional_nodes)
3074     if additional_vm:
3075       vm_nodes.extend(additional_nodes)
3076   if myself.name in dist_nodes:
3077     dist_nodes.remove(myself.name)
3078   if myself.name in vm_nodes:
3079     vm_nodes.remove(myself.name)
3080
3081   # 2. Gather files to distribute
3082   dist_files = set([constants.ETC_HOSTS,
3083                     constants.SSH_KNOWN_HOSTS_FILE,
3084                     constants.RAPI_CERT_FILE,
3085                     constants.RAPI_USERS_FILE,
3086                     constants.CONFD_HMAC_KEY,
3087                     constants.CLUSTER_DOMAIN_SECRET_FILE,
3088                    ])
3089
3090   vm_files = set()
3091   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3092   for hv_name in enabled_hypervisors:
3093     hv_class = hypervisor.GetHypervisor(hv_name)
3094     vm_files.update(hv_class.GetAncillaryFiles())
3095
3096   # 3. Perform the files upload
3097   for fname in dist_files:
3098     _UploadHelper(lu, dist_nodes, fname)
3099   for fname in vm_files:
3100     _UploadHelper(lu, vm_nodes, fname)
3101
3102
3103 class LUClusterRedistConf(NoHooksLU):
3104   """Force the redistribution of cluster configuration.
3105
3106   This is a very simple LU.
3107
3108   """
3109   REQ_BGL = False
3110
3111   def ExpandNames(self):
3112     self.needed_locks = {
3113       locking.LEVEL_NODE: locking.ALL_SET,
3114     }
3115     self.share_locks[locking.LEVEL_NODE] = 1
3116
3117   def Exec(self, feedback_fn):
3118     """Redistribute the configuration.
3119
3120     """
3121     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3122     _RedistributeAncillaryFiles(self)
3123
3124
3125 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3126   """Sleep and poll for an instance's disk to sync.
3127
3128   """
3129   if not instance.disks or disks is not None and not disks:
3130     return True
3131
3132   disks = _ExpandCheckDisks(instance, disks)
3133
3134   if not oneshot:
3135     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3136
3137   node = instance.primary_node
3138
3139   for dev in disks:
3140     lu.cfg.SetDiskID(dev, node)
3141
3142   # TODO: Convert to utils.Retry
3143
3144   retries = 0
3145   degr_retries = 10 # in seconds, as we sleep 1 second each time
3146   while True:
3147     max_time = 0
3148     done = True
3149     cumul_degraded = False
3150     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3151     msg = rstats.fail_msg
3152     if msg:
3153       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3154       retries += 1
3155       if retries >= 10:
3156         raise errors.RemoteError("Can't contact node %s for mirror data,"
3157                                  " aborting." % node)
3158       time.sleep(6)
3159       continue
3160     rstats = rstats.payload
3161     retries = 0
3162     for i, mstat in enumerate(rstats):
3163       if mstat is None:
3164         lu.LogWarning("Can't compute data for node %s/%s",
3165                            node, disks[i].iv_name)
3166         continue
3167
3168       cumul_degraded = (cumul_degraded or
3169                         (mstat.is_degraded and mstat.sync_percent is None))
3170       if mstat.sync_percent is not None:
3171         done = False
3172         if mstat.estimated_time is not None:
3173           rem_time = ("%s remaining (estimated)" %
3174                       utils.FormatSeconds(mstat.estimated_time))
3175           max_time = mstat.estimated_time
3176         else:
3177           rem_time = "no time estimate"
3178         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3179                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3180
3181     # if we're done but degraded, let's do a few small retries, to
3182     # make sure we see a stable and not transient situation; therefore
3183     # we force restart of the loop
3184     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3185       logging.info("Degraded disks found, %d retries left", degr_retries)
3186       degr_retries -= 1
3187       time.sleep(1)
3188       continue
3189
3190     if done or oneshot:
3191       break
3192
3193     time.sleep(min(60, max_time))
3194
3195   if done:
3196     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3197   return not cumul_degraded
3198
3199
3200 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3201   """Check that mirrors are not degraded.
3202
3203   The ldisk parameter, if True, will change the test from the
3204   is_degraded attribute (which represents overall non-ok status for
3205   the device(s)) to the ldisk (representing the local storage status).
3206
3207   """
3208   lu.cfg.SetDiskID(dev, node)
3209
3210   result = True
3211
3212   if on_primary or dev.AssembleOnSecondary():
3213     rstats = lu.rpc.call_blockdev_find(node, dev)
3214     msg = rstats.fail_msg
3215     if msg:
3216       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3217       result = False
3218     elif not rstats.payload:
3219       lu.LogWarning("Can't find disk on node %s", node)
3220       result = False
3221     else:
3222       if ldisk:
3223         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3224       else:
3225         result = result and not rstats.payload.is_degraded
3226
3227   if dev.children:
3228     for child in dev.children:
3229       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3230
3231   return result
3232
3233
3234 class LUOobCommand(NoHooksLU):
3235   """Logical unit for OOB handling.
3236
3237   """
3238   REG_BGL = False
3239
3240   def CheckPrereq(self):
3241     """Check prerequisites.
3242
3243     This checks:
3244      - the node exists in the configuration
3245      - OOB is supported
3246
3247     Any errors are signaled by raising errors.OpPrereqError.
3248
3249     """
3250     self.nodes = []
3251     for node_name in self.op.node_names:
3252       node = self.cfg.GetNodeInfo(node_name)
3253
3254       if node is None:
3255         raise errors.OpPrereqError("Node %s not found" % node_name,
3256                                    errors.ECODE_NOENT)
3257       else:
3258         self.nodes.append(node)
3259
3260       if (self.op.command == constants.OOB_POWER_OFF and not node.offline):
3261         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3262                                     " not marked offline") % node_name,
3263                                    errors.ECODE_STATE)
3264
3265   def ExpandNames(self):
3266     """Gather locks we need.
3267
3268     """
3269     if self.op.node_names:
3270       self.op.node_names = [_ExpandNodeName(self.cfg, name)
3271                             for name in self.op.node_names]
3272     else:
3273       self.op.node_names = self.cfg.GetNodeList()
3274
3275     self.needed_locks = {
3276       locking.LEVEL_NODE: self.op.node_names,
3277       }
3278
3279   def Exec(self, feedback_fn):
3280     """Execute OOB and return result if we expect any.
3281
3282     """
3283     master_node = self.cfg.GetMasterNode()
3284     ret = []
3285
3286     for node in self.nodes:
3287       node_entry = [(constants.RS_NORMAL, node.name)]
3288       ret.append(node_entry)
3289
3290       oob_program = _SupportsOob(self.cfg, node)
3291
3292       if not oob_program:
3293         node_entry.append((constants.RS_UNAVAIL, None))
3294         continue
3295
3296       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3297                    self.op.command, oob_program, node.name)
3298       result = self.rpc.call_run_oob(master_node, oob_program,
3299                                      self.op.command, node.name,
3300                                      self.op.timeout)
3301
3302       if result.fail_msg:
3303         self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3304                         node.name, result.fail_msg)
3305         node_entry.append((constants.RS_NODATA, None))
3306       else:
3307         try:
3308           self._CheckPayload(result)
3309         except errors.OpExecError, err:
3310           self.LogWarning("The payload returned by '%s' is not valid: %s",
3311                           node.name, err)
3312           node_entry.append((constants.RS_NODATA, None))
3313         else:
3314           if self.op.command == constants.OOB_HEALTH:
3315             # For health we should log important events
3316             for item, status in result.payload:
3317               if status in [constants.OOB_STATUS_WARNING,
3318                             constants.OOB_STATUS_CRITICAL]:
3319                 self.LogWarning("On node '%s' item '%s' has status '%s'",
3320                                 node.name, item, status)
3321
3322           if self.op.command == constants.OOB_POWER_ON:
3323             node.powered = True
3324           elif self.op.command == constants.OOB_POWER_OFF:
3325             node.powered = False
3326           elif self.op.command == constants.OOB_POWER_STATUS:
3327             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3328             if powered != node.powered:
3329               logging.warning(("Recorded power state (%s) of node '%s' does not"
3330                                " match actual power state (%s)"), node.powered,
3331                               node.name, powered)
3332
3333           # For configuration changing commands we should update the node
3334           if self.op.command in (constants.OOB_POWER_ON,
3335                                  constants.OOB_POWER_OFF):
3336             self.cfg.Update(node, feedback_fn)
3337
3338           node_entry.append((constants.RS_NORMAL, result.payload))
3339
3340     return ret
3341
3342   def _CheckPayload(self, result):
3343     """Checks if the payload is valid.
3344
3345     @param result: RPC result
3346     @raises errors.OpExecError: If payload is not valid
3347
3348     """
3349     errs = []
3350     if self.op.command == constants.OOB_HEALTH:
3351       if not isinstance(result.payload, list):
3352         errs.append("command 'health' is expected to return a list but got %s" %
3353                     type(result.payload))
3354       else:
3355         for item, status in result.payload:
3356           if status not in constants.OOB_STATUSES:
3357             errs.append("health item '%s' has invalid status '%s'" %
3358                         (item, status))
3359
3360     if self.op.command == constants.OOB_POWER_STATUS:
3361       if not isinstance(result.payload, dict):
3362         errs.append("power-status is expected to return a dict but got %s" %
3363                     type(result.payload))
3364
3365     if self.op.command in [
3366         constants.OOB_POWER_ON,
3367         constants.OOB_POWER_OFF,
3368         constants.OOB_POWER_CYCLE,
3369         ]:
3370       if result.payload is not None:
3371         errs.append("%s is expected to not return payload but got '%s'" %
3372                     (self.op.command, result.payload))
3373
3374     if errs:
3375       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3376                                utils.CommaJoin(errs))
3377
3378
3379
3380 class LUOsDiagnose(NoHooksLU):
3381   """Logical unit for OS diagnose/query.
3382
3383   """
3384   REQ_BGL = False
3385   _HID = "hidden"
3386   _BLK = "blacklisted"
3387   _VLD = "valid"
3388   _FIELDS_STATIC = utils.FieldSet()
3389   _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3390                                    "parameters", "api_versions", _HID, _BLK)
3391
3392   def CheckArguments(self):
3393     if self.op.names:
3394       raise errors.OpPrereqError("Selective OS query not supported",
3395                                  errors.ECODE_INVAL)
3396
3397     _CheckOutputFields(static=self._FIELDS_STATIC,
3398                        dynamic=self._FIELDS_DYNAMIC,
3399                        selected=self.op.output_fields)
3400
3401   def ExpandNames(self):
3402     # Lock all nodes, in shared mode
3403     # Temporary removal of locks, should be reverted later
3404     # TODO: reintroduce locks when they are lighter-weight
3405     self.needed_locks = {}
3406     #self.share_locks[locking.LEVEL_NODE] = 1
3407     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3408
3409   @staticmethod
3410   def _DiagnoseByOS(rlist):
3411     """Remaps a per-node return list into an a per-os per-node dictionary
3412
3413     @param rlist: a map with node names as keys and OS objects as values
3414
3415     @rtype: dict
3416     @return: a dictionary with osnames as keys and as value another
3417         map, with nodes as keys and tuples of (path, status, diagnose,
3418         variants, parameters, api_versions) as values, eg::
3419
3420           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3421                                      (/srv/..., False, "invalid api")],
3422                            "node2": [(/srv/..., True, "", [], [])]}
3423           }
3424
3425     """
3426     all_os = {}
3427     # we build here the list of nodes that didn't fail the RPC (at RPC
3428     # level), so that nodes with a non-responding node daemon don't
3429     # make all OSes invalid
3430     good_nodes = [node_name for node_name in rlist
3431                   if not rlist[node_name].fail_msg]
3432     for node_name, nr in rlist.items():
3433       if nr.fail_msg or not nr.payload:
3434         continue
3435       for (name, path, status, diagnose, variants,
3436            params, api_versions) in nr.payload:
3437         if name not in all_os:
3438           # build a list of nodes for this os containing empty lists
3439           # for each node in node_list
3440           all_os[name] = {}
3441           for nname in good_nodes:
3442             all_os[name][nname] = []
3443         # convert params from [name, help] to (name, help)
3444         params = [tuple(v) for v in params]
3445         all_os[name][node_name].append((path, status, diagnose,
3446                                         variants, params, api_versions))
3447     return all_os
3448
3449   def Exec(self, feedback_fn):
3450     """Compute the list of OSes.
3451
3452     """
3453     valid_nodes = [node.name
3454                    for node in self.cfg.GetAllNodesInfo().values()
3455                    if not node.offline and node.vm_capable]
3456     node_data = self.rpc.call_os_diagnose(valid_nodes)
3457     pol = self._DiagnoseByOS(node_data)
3458     output = []
3459     cluster = self.cfg.GetClusterInfo()
3460
3461     for os_name in utils.NiceSort(pol.keys()):
3462       os_data = pol[os_name]
3463       row = []
3464       valid = True
3465       (variants, params, api_versions) = null_state = (set(), set(), set())
3466       for idx, osl in enumerate(os_data.values()):
3467         valid = bool(valid and osl and osl[0][1])
3468         if not valid:
3469           (variants, params, api_versions) = null_state
3470           break
3471         node_variants, node_params, node_api = osl[0][3:6]
3472         if idx == 0: # first entry
3473           variants = set(node_variants)
3474           params = set(node_params)
3475           api_versions = set(node_api)
3476         else: # keep consistency
3477           variants.intersection_update(node_variants)
3478           params.intersection_update(node_params)
3479           api_versions.intersection_update(node_api)
3480
3481       is_hid = os_name in cluster.hidden_os
3482       is_blk = os_name in cluster.blacklisted_os
3483       if ((self._HID not in self.op.output_fields and is_hid) or
3484           (self._BLK not in self.op.output_fields and is_blk) or
3485           (self._VLD not in self.op.output_fields and not valid)):
3486         continue
3487
3488       for field in self.op.output_fields:
3489         if field == "name":
3490           val = os_name
3491         elif field == self._VLD:
3492           val = valid
3493         elif field == "node_status":
3494           # this is just a copy of the dict
3495           val = {}
3496           for node_name, nos_list in os_data.items():
3497             val[node_name] = nos_list
3498         elif field == "variants":
3499           val = utils.NiceSort(list(variants))
3500         elif field == "parameters":
3501           val = list(params)
3502         elif field == "api_versions":
3503           val = list(api_versions)
3504         elif field == self._HID:
3505           val = is_hid
3506         elif field == self._BLK:
3507           val = is_blk
3508         else:
3509           raise errors.ParameterError(field)
3510         row.append(val)
3511       output.append(row)
3512
3513     return output
3514
3515
3516 class LUNodeRemove(LogicalUnit):
3517   """Logical unit for removing a node.
3518
3519   """
3520   HPATH = "node-remove"
3521   HTYPE = constants.HTYPE_NODE
3522
3523   def BuildHooksEnv(self):
3524     """Build hooks env.
3525
3526     This doesn't run on the target node in the pre phase as a failed
3527     node would then be impossible to remove.
3528
3529     """
3530     env = {
3531       "OP_TARGET": self.op.node_name,
3532       "NODE_NAME": self.op.node_name,
3533       }
3534     all_nodes = self.cfg.GetNodeList()
3535     try:
3536       all_nodes.remove(self.op.node_name)
3537     except ValueError:
3538       logging.warning("Node %s which is about to be removed not found"
3539                       " in the all nodes list", self.op.node_name)
3540     return env, all_nodes, all_nodes
3541
3542   def CheckPrereq(self):
3543     """Check prerequisites.
3544
3545     This checks:
3546      - the node exists in the configuration
3547      - it does not have primary or secondary instances
3548      - it's not the master
3549
3550     Any errors are signaled by raising errors.OpPrereqError.
3551
3552     """
3553     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3554     node = self.cfg.GetNodeInfo(self.op.node_name)
3555     assert node is not None
3556
3557     instance_list = self.cfg.GetInstanceList()
3558
3559     masternode = self.cfg.GetMasterNode()
3560     if node.name == masternode:
3561       raise errors.OpPrereqError("Node is the master node,"
3562                                  " you need to failover first.",
3563                                  errors.ECODE_INVAL)
3564
3565     for instance_name in instance_list:
3566       instance = self.cfg.GetInstanceInfo(instance_name)
3567       if node.name in instance.all_nodes:
3568         raise errors.OpPrereqError("Instance %s is still running on the node,"
3569                                    " please remove first." % instance_name,
3570                                    errors.ECODE_INVAL)
3571     self.op.node_name = node.name
3572     self.node = node
3573
3574   def Exec(self, feedback_fn):
3575     """Removes the node from the cluster.
3576
3577     """
3578     node = self.node
3579     logging.info("Stopping the node daemon and removing configs from node %s",
3580                  node.name)
3581
3582     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3583
3584     # Promote nodes to master candidate as needed
3585     _AdjustCandidatePool(self, exceptions=[node.name])
3586     self.context.RemoveNode(node.name)
3587
3588     # Run post hooks on the node before it's removed
3589     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3590     try:
3591       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3592     except:
3593       # pylint: disable-msg=W0702
3594       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3595
3596     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3597     msg = result.fail_msg
3598     if msg:
3599       self.LogWarning("Errors encountered on the remote node while leaving"
3600                       " the cluster: %s", msg)
3601
3602     # Remove node from our /etc/hosts
3603     if self.cfg.GetClusterInfo().modify_etc_hosts:
3604       master_node = self.cfg.GetMasterNode()
3605       result = self.rpc.call_etc_hosts_modify(master_node,
3606                                               constants.ETC_HOSTS_REMOVE,
3607                                               node.name, None)
3608       result.Raise("Can't update hosts file with new host data")
3609       _RedistributeAncillaryFiles(self)
3610
3611
3612 class _NodeQuery(_QueryBase):
3613   FIELDS = query.NODE_FIELDS
3614
3615   def ExpandNames(self, lu):
3616     lu.needed_locks = {}
3617     lu.share_locks[locking.LEVEL_NODE] = 1
3618
3619     if self.names:
3620       self.wanted = _GetWantedNodes(lu, self.names)
3621     else:
3622       self.wanted = locking.ALL_SET
3623
3624     self.do_locking = (self.use_locking and
3625                        query.NQ_LIVE in self.requested_data)
3626
3627     if self.do_locking:
3628       # if we don't request only static fields, we need to lock the nodes
3629       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3630
3631   def DeclareLocks(self, lu, level):
3632     pass
3633
3634   def _GetQueryData(self, lu):
3635     """Computes the list of nodes and their attributes.
3636
3637     """
3638     all_info = lu.cfg.GetAllNodesInfo()
3639
3640     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3641
3642     # Gather data as requested
3643     if query.NQ_LIVE in self.requested_data:
3644       # filter out non-vm_capable nodes
3645       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3646
3647       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3648                                         lu.cfg.GetHypervisorType())
3649       live_data = dict((name, nresult.payload)
3650                        for (name, nresult) in node_data.items()
3651                        if not nresult.fail_msg and nresult.payload)
3652     else:
3653       live_data = None
3654
3655     if query.NQ_INST in self.requested_data:
3656       node_to_primary = dict([(name, set()) for name in nodenames])
3657       node_to_secondary = dict([(name, set()) for name in nodenames])
3658
3659       inst_data = lu.cfg.GetAllInstancesInfo()
3660
3661       for inst in inst_data.values():
3662         if inst.primary_node in node_to_primary:
3663           node_to_primary[inst.primary_node].add(inst.name)
3664         for secnode in inst.secondary_nodes:
3665           if secnode in node_to_secondary:
3666             node_to_secondary[secnode].add(inst.name)
3667     else:
3668       node_to_primary = None
3669       node_to_secondary = None
3670
3671     if query.NQ_OOB in self.requested_data:
3672       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3673                          for name, node in all_info.iteritems())
3674     else:
3675       oob_support = None
3676
3677     if query.NQ_GROUP in self.requested_data:
3678       groups = lu.cfg.GetAllNodeGroupsInfo()
3679     else:
3680       groups = {}
3681
3682     return query.NodeQueryData([all_info[name] for name in nodenames],
3683                                live_data, lu.cfg.GetMasterNode(),
3684                                node_to_primary, node_to_secondary, groups,
3685                                oob_support, lu.cfg.GetClusterInfo())
3686
3687
3688 class LUNodeQuery(NoHooksLU):
3689   """Logical unit for querying nodes.
3690
3691   """
3692   # pylint: disable-msg=W0142
3693   REQ_BGL = False
3694
3695   def CheckArguments(self):
3696     self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3697                          self.op.use_locking)
3698
3699   def ExpandNames(self):
3700     self.nq.ExpandNames(self)
3701
3702   def Exec(self, feedback_fn):
3703     return self.nq.OldStyleQuery(self)
3704
3705
3706 class LUNodeQueryvols(NoHooksLU):
3707   """Logical unit for getting volumes on node(s).
3708
3709   """
3710   REQ_BGL = False
3711   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3712   _FIELDS_STATIC = utils.FieldSet("node")
3713
3714   def CheckArguments(self):
3715     _CheckOutputFields(static=self._FIELDS_STATIC,
3716                        dynamic=self._FIELDS_DYNAMIC,
3717                        selected=self.op.output_fields)
3718
3719   def ExpandNames(self):
3720     self.needed_locks = {}
3721     self.share_locks[locking.LEVEL_NODE] = 1
3722     if not self.op.nodes:
3723       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3724     else:
3725       self.needed_locks[locking.LEVEL_NODE] = \
3726         _GetWantedNodes(self, self.op.nodes)
3727
3728   def Exec(self, feedback_fn):
3729     """Computes the list of nodes and their attributes.
3730
3731     """
3732     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3733     volumes = self.rpc.call_node_volumes(nodenames)
3734
3735     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3736              in self.cfg.GetInstanceList()]
3737
3738     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3739
3740     output = []
3741     for node in nodenames:
3742       nresult = volumes[node]
3743       if nresult.offline:
3744         continue
3745       msg = nresult.fail_msg
3746       if msg:
3747         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3748         continue
3749
3750       node_vols = nresult.payload[:]
3751       node_vols.sort(key=lambda vol: vol['dev'])
3752
3753       for vol in node_vols:
3754         node_output = []
3755         for field in self.op.output_fields:
3756           if field == "node":
3757             val = node
3758           elif field == "phys":
3759             val = vol['dev']
3760           elif field == "vg":
3761             val = vol['vg']
3762           elif field == "name":
3763             val = vol['name']
3764           elif field == "size":
3765             val = int(float(vol['size']))
3766           elif field == "instance":
3767             for inst in ilist:
3768               if node not in lv_by_node[inst]:
3769                 continue
3770               if vol['name'] in lv_by_node[inst][node]:
3771                 val = inst.name
3772                 break
3773             else:
3774               val = '-'
3775           else:
3776             raise errors.ParameterError(field)
3777           node_output.append(str(val))
3778
3779         output.append(node_output)
3780
3781     return output
3782
3783
3784 class LUNodeQueryStorage(NoHooksLU):
3785   """Logical unit for getting information on storage units on node(s).
3786
3787   """
3788   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3789   REQ_BGL = False
3790
3791   def CheckArguments(self):
3792     _CheckOutputFields(static=self._FIELDS_STATIC,
3793                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3794                        selected=self.op.output_fields)
3795
3796   def ExpandNames(self):
3797     self.needed_locks = {}
3798     self.share_locks[locking.LEVEL_NODE] = 1
3799
3800     if self.op.nodes:
3801       self.needed_locks[locking.LEVEL_NODE] = \
3802         _GetWantedNodes(self, self.op.nodes)
3803     else:
3804       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3805
3806   def Exec(self, feedback_fn):
3807     """Computes the list of nodes and their attributes.
3808
3809     """
3810     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3811
3812     # Always get name to sort by
3813     if constants.SF_NAME in self.op.output_fields:
3814       fields = self.op.output_fields[:]
3815     else:
3816       fields = [constants.SF_NAME] + self.op.output_fields
3817
3818     # Never ask for node or type as it's only known to the LU
3819     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3820       while extra in fields:
3821         fields.remove(extra)
3822
3823     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3824     name_idx = field_idx[constants.SF_NAME]
3825
3826     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3827     data = self.rpc.call_storage_list(self.nodes,
3828                                       self.op.storage_type, st_args,
3829                                       self.op.name, fields)
3830
3831     result = []
3832
3833     for node in utils.NiceSort(self.nodes):
3834       nresult = data[node]
3835       if nresult.offline:
3836         continue
3837
3838       msg = nresult.fail_msg
3839       if msg:
3840         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3841         continue
3842
3843       rows = dict([(row[name_idx], row) for row in nresult.payload])
3844
3845       for name in utils.NiceSort(rows.keys()):
3846         row = rows[name]
3847
3848         out = []
3849
3850         for field in self.op.output_fields:
3851           if field == constants.SF_NODE:
3852             val = node
3853           elif field == constants.SF_TYPE:
3854             val = self.op.storage_type
3855           elif field in field_idx:
3856             val = row[field_idx[field]]
3857           else:
3858             raise errors.ParameterError(field)
3859
3860           out.append(val)
3861
3862         result.append(out)
3863
3864     return result
3865
3866
3867 class _InstanceQuery(_QueryBase):
3868   FIELDS = query.INSTANCE_FIELDS
3869
3870   def ExpandNames(self, lu):
3871     lu.needed_locks = {}
3872     lu.share_locks[locking.LEVEL_INSTANCE] = 1
3873     lu.share_locks[locking.LEVEL_NODE] = 1
3874
3875     if self.names:
3876       self.wanted = _GetWantedInstances(lu, self.names)
3877     else:
3878       self.wanted = locking.ALL_SET
3879
3880     self.do_locking = (self.use_locking and
3881                        query.IQ_LIVE in self.requested_data)
3882     if self.do_locking:
3883       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3884       lu.needed_locks[locking.LEVEL_NODE] = []
3885       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3886
3887   def DeclareLocks(self, lu, level):
3888     if level == locking.LEVEL_NODE and self.do_locking:
3889       lu._LockInstancesNodes() # pylint: disable-msg=W0212
3890
3891   def _GetQueryData(self, lu):
3892     """Computes the list of instances and their attributes.
3893
3894     """
3895     cluster = lu.cfg.GetClusterInfo()
3896     all_info = lu.cfg.GetAllInstancesInfo()
3897
3898     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3899
3900     instance_list = [all_info[name] for name in instance_names]
3901     nodes = frozenset(itertools.chain(*(inst.all_nodes
3902                                         for inst in instance_list)))
3903     hv_list = list(set([inst.hypervisor for inst in instance_list]))
3904     bad_nodes = []
3905     offline_nodes = []
3906     wrongnode_inst = set()
3907
3908     # Gather data as requested
3909     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
3910       live_data = {}
3911       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3912       for name in nodes:
3913         result = node_data[name]
3914         if result.offline:
3915           # offline nodes will be in both lists
3916           assert result.fail_msg
3917           offline_nodes.append(name)
3918         if result.fail_msg:
3919           bad_nodes.append(name)
3920         elif result.payload:
3921           for inst in result.payload:
3922             if inst in all_info:
3923               if all_info[inst].primary_node == name:
3924                 live_data.update(result.payload)
3925               else:
3926                 wrongnode_inst.add(inst)
3927             else:
3928               # orphan instance; we don't list it here as we don't
3929               # handle this case yet in the output of instance listing
3930               logging.warning("Orphan instance '%s' found on node %s",
3931                               inst, name)
3932         # else no instance is alive
3933     else:
3934       live_data = {}
3935
3936     if query.IQ_DISKUSAGE in self.requested_data:
3937       disk_usage = dict((inst.name,
3938                          _ComputeDiskSize(inst.disk_template,
3939                                           [{"size": disk.size}
3940                                            for disk in inst.disks]))
3941                         for inst in instance_list)
3942     else:
3943       disk_usage = None
3944
3945     if query.IQ_CONSOLE in self.requested_data:
3946       consinfo = {}
3947       for inst in instance_list:
3948         if inst.name in live_data:
3949           # Instance is running
3950           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
3951         else:
3952           consinfo[inst.name] = None
3953       assert set(consinfo.keys()) == set(instance_names)
3954     else:
3955       consinfo = None
3956
3957     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3958                                    disk_usage, offline_nodes, bad_nodes,
3959                                    live_data, wrongnode_inst, consinfo)
3960
3961
3962 class LUQuery(NoHooksLU):
3963   """Query for resources/items of a certain kind.
3964
3965   """
3966   # pylint: disable-msg=W0142
3967   REQ_BGL = False
3968
3969   def CheckArguments(self):
3970     qcls = _GetQueryImplementation(self.op.what)
3971     names = qlang.ReadSimpleFilter("name", self.op.filter)
3972
3973     self.impl = qcls(names, self.op.fields, False)
3974
3975   def ExpandNames(self):
3976     self.impl.ExpandNames(self)
3977
3978   def DeclareLocks(self, level):
3979     self.impl.DeclareLocks(self, level)
3980
3981   def Exec(self, feedback_fn):
3982     return self.impl.NewStyleQuery(self)
3983
3984
3985 class LUQueryFields(NoHooksLU):
3986   """Query for resources/items of a certain kind.
3987
3988   """
3989   # pylint: disable-msg=W0142
3990   REQ_BGL = False
3991
3992   def CheckArguments(self):
3993     self.qcls = _GetQueryImplementation(self.op.what)
3994
3995   def ExpandNames(self):
3996     self.needed_locks = {}
3997
3998   def Exec(self, feedback_fn):
3999     return self.qcls.FieldsQuery(self.op.fields)
4000
4001
4002 class LUNodeModifyStorage(NoHooksLU):
4003   """Logical unit for modifying a storage volume on a node.
4004
4005   """
4006   REQ_BGL = False
4007
4008   def CheckArguments(self):
4009     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4010
4011     storage_type = self.op.storage_type
4012
4013     try:
4014       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4015     except KeyError:
4016       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4017                                  " modified" % storage_type,
4018                                  errors.ECODE_INVAL)
4019
4020     diff = set(self.op.changes.keys()) - modifiable
4021     if diff:
4022       raise errors.OpPrereqError("The following fields can not be modified for"
4023                                  " storage units of type '%s': %r" %
4024                                  (storage_type, list(diff)),
4025                                  errors.ECODE_INVAL)
4026
4027   def ExpandNames(self):
4028     self.needed_locks = {
4029       locking.LEVEL_NODE: self.op.node_name,
4030       }
4031
4032   def Exec(self, feedback_fn):
4033     """Computes the list of nodes and their attributes.
4034
4035     """
4036     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4037     result = self.rpc.call_storage_modify(self.op.node_name,
4038                                           self.op.storage_type, st_args,
4039                                           self.op.name, self.op.changes)
4040     result.Raise("Failed to modify storage unit '%s' on %s" %
4041                  (self.op.name, self.op.node_name))
4042
4043
4044 class LUNodeAdd(LogicalUnit):
4045   """Logical unit for adding node to the cluster.
4046
4047   """
4048   HPATH = "node-add"
4049   HTYPE = constants.HTYPE_NODE
4050   _NFLAGS = ["master_capable", "vm_capable"]
4051
4052   def CheckArguments(self):
4053     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4054     # validate/normalize the node name
4055     self.hostname = netutils.GetHostname(name=self.op.node_name,
4056                                          family=self.primary_ip_family)
4057     self.op.node_name = self.hostname.name
4058
4059     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4060       raise errors.OpPrereqError("Cannot readd the master node",
4061                                  errors.ECODE_STATE)
4062
4063     if self.op.readd and self.op.group:
4064       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4065                                  " being readded", errors.ECODE_INVAL)
4066
4067   def BuildHooksEnv(self):
4068     """Build hooks env.
4069
4070     This will run on all nodes before, and on all nodes + the new node after.
4071
4072     """
4073     env = {
4074       "OP_TARGET": self.op.node_name,
4075       "NODE_NAME": self.op.node_name,
4076       "NODE_PIP": self.op.primary_ip,
4077       "NODE_SIP": self.op.secondary_ip,
4078       "MASTER_CAPABLE": str(self.op.master_capable),
4079       "VM_CAPABLE": str(self.op.vm_capable),
4080       }
4081     nodes_0 = self.cfg.GetNodeList()
4082     nodes_1 = nodes_0 + [self.op.node_name, ]
4083     return env, nodes_0, nodes_1
4084
4085   def CheckPrereq(self):
4086     """Check prerequisites.
4087
4088     This checks:
4089      - the new node is not already in the config
4090      - it is resolvable
4091      - its parameters (single/dual homed) matches the cluster
4092
4093     Any errors are signaled by raising errors.OpPrereqError.
4094
4095     """
4096     cfg = self.cfg
4097     hostname = self.hostname
4098     node = hostname.name
4099     primary_ip = self.op.primary_ip = hostname.ip
4100     if self.op.secondary_ip is None:
4101       if self.primary_ip_family == netutils.IP6Address.family:
4102         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4103                                    " IPv4 address must be given as secondary",
4104                                    errors.ECODE_INVAL)
4105       self.op.secondary_ip = primary_ip
4106
4107     secondary_ip = self.op.secondary_ip
4108     if not netutils.IP4Address.IsValid(secondary_ip):
4109       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4110                                  " address" % secondary_ip, errors.ECODE_INVAL)
4111
4112     node_list = cfg.GetNodeList()
4113     if not self.op.readd and node in node_list:
4114       raise errors.OpPrereqError("Node %s is already in the configuration" %
4115                                  node, errors.ECODE_EXISTS)
4116     elif self.op.readd and node not in node_list:
4117       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4118                                  errors.ECODE_NOENT)
4119
4120     self.changed_primary_ip = False
4121
4122     for existing_node_name in node_list:
4123       existing_node = cfg.GetNodeInfo(existing_node_name)
4124
4125       if self.op.readd and node == existing_node_name:
4126         if existing_node.secondary_ip != secondary_ip:
4127           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4128                                      " address configuration as before",
4129                                      errors.ECODE_INVAL)
4130         if existing_node.primary_ip != primary_ip:
4131           self.changed_primary_ip = True
4132
4133         continue
4134
4135       if (existing_node.primary_ip == primary_ip or
4136           existing_node.secondary_ip == primary_ip or
4137           existing_node.primary_ip == secondary_ip or
4138           existing_node.secondary_ip == secondary_ip):
4139         raise errors.OpPrereqError("New node ip address(es) conflict with"
4140                                    " existing node %s" % existing_node.name,
4141                                    errors.ECODE_NOTUNIQUE)
4142
4143     # After this 'if' block, None is no longer a valid value for the
4144     # _capable op attributes
4145     if self.op.readd:
4146       old_node = self.cfg.GetNodeInfo(node)
4147       assert old_node is not None, "Can't retrieve locked node %s" % node
4148       for attr in self._NFLAGS:
4149         if getattr(self.op, attr) is None:
4150           setattr(self.op, attr, getattr(old_node, attr))
4151     else:
4152       for attr in self._NFLAGS:
4153         if getattr(self.op, attr) is None:
4154           setattr(self.op, attr, True)
4155
4156     if self.op.readd and not self.op.vm_capable:
4157       pri, sec = cfg.GetNodeInstances(node)
4158       if pri or sec:
4159         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4160                                    " flag set to false, but it already holds"
4161                                    " instances" % node,
4162                                    errors.ECODE_STATE)
4163
4164     # check that the type of the node (single versus dual homed) is the
4165     # same as for the master
4166     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4167     master_singlehomed = myself.secondary_ip == myself.primary_ip
4168     newbie_singlehomed = secondary_ip == primary_ip
4169     if master_singlehomed != newbie_singlehomed:
4170       if master_singlehomed:
4171         raise errors.OpPrereqError("The master has no secondary ip but the"
4172                                    " new node has one",
4173                                    errors.ECODE_INVAL)
4174       else:
4175         raise errors.OpPrereqError("The master has a secondary ip but the"
4176                                    " new node doesn't have one",
4177                                    errors.ECODE_INVAL)
4178
4179     # checks reachability
4180     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4181       raise errors.OpPrereqError("Node not reachable by ping",
4182                                  errors.ECODE_ENVIRON)
4183
4184     if not newbie_singlehomed:
4185       # check reachability from my secondary ip to newbie's secondary ip
4186       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4187                            source=myself.secondary_ip):
4188         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4189                                    " based ping to node daemon port",
4190                                    errors.ECODE_ENVIRON)
4191
4192     if self.op.readd:
4193       exceptions = [node]
4194     else:
4195       exceptions = []
4196
4197     if self.op.master_capable:
4198       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4199     else:
4200       self.master_candidate = False
4201
4202     if self.op.readd:
4203       self.new_node = old_node
4204     else:
4205       node_group = cfg.LookupNodeGroup(self.op.group)
4206       self.new_node = objects.Node(name=node,
4207                                    primary_ip=primary_ip,
4208                                    secondary_ip=secondary_ip,
4209                                    master_candidate=self.master_candidate,
4210                                    offline=False, drained=False,
4211                                    group=node_group)
4212
4213     if self.op.ndparams:
4214       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4215
4216   def Exec(self, feedback_fn):
4217     """Adds the new node to the cluster.
4218
4219     """
4220     new_node = self.new_node
4221     node = new_node.name
4222
4223     # We adding a new node so we assume it's powered
4224     new_node.powered = True
4225
4226     # for re-adds, reset the offline/drained/master-candidate flags;
4227     # we need to reset here, otherwise offline would prevent RPC calls
4228     # later in the procedure; this also means that if the re-add
4229     # fails, we are left with a non-offlined, broken node
4230     if self.op.readd:
4231       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4232       self.LogInfo("Readding a node, the offline/drained flags were reset")
4233       # if we demote the node, we do cleanup later in the procedure
4234       new_node.master_candidate = self.master_candidate
4235       if self.changed_primary_ip:
4236         new_node.primary_ip = self.op.primary_ip
4237
4238     # copy the master/vm_capable flags
4239     for attr in self._NFLAGS:
4240       setattr(new_node, attr, getattr(self.op, attr))
4241
4242     # notify the user about any possible mc promotion
4243     if new_node.master_candidate:
4244       self.LogInfo("Node will be a master candidate")
4245
4246     if self.op.ndparams:
4247       new_node.ndparams = self.op.ndparams
4248     else:
4249       new_node.ndparams = {}
4250
4251     # check connectivity
4252     result = self.rpc.call_version([node])[node]
4253     result.Raise("Can't get version information from node %s" % node)
4254     if constants.PROTOCOL_VERSION == result.payload:
4255       logging.info("Communication to node %s fine, sw version %s match",
4256                    node, result.payload)
4257     else:
4258       raise errors.OpExecError("Version mismatch master version %s,"
4259                                " node version %s" %
4260                                (constants.PROTOCOL_VERSION, result.payload))
4261
4262     # Add node to our /etc/hosts, and add key to known_hosts
4263     if self.cfg.GetClusterInfo().modify_etc_hosts:
4264       master_node = self.cfg.GetMasterNode()
4265       result = self.rpc.call_etc_hosts_modify(master_node,
4266                                               constants.ETC_HOSTS_ADD,
4267                                               self.hostname.name,
4268                                               self.hostname.ip)
4269       result.Raise("Can't update hosts file with new host data")
4270
4271     if new_node.secondary_ip != new_node.primary_ip:
4272       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4273                                False)
4274
4275     node_verify_list = [self.cfg.GetMasterNode()]
4276     node_verify_param = {
4277       constants.NV_NODELIST: [node],
4278       # TODO: do a node-net-test as well?
4279     }
4280
4281     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4282                                        self.cfg.GetClusterName())
4283     for verifier in node_verify_list:
4284       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4285       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4286       if nl_payload:
4287         for failed in nl_payload:
4288           feedback_fn("ssh/hostname verification failed"
4289                       " (checking from %s): %s" %
4290                       (verifier, nl_payload[failed]))
4291         raise errors.OpExecError("ssh/hostname verification failed")
4292
4293     if self.op.readd:
4294       _RedistributeAncillaryFiles(self)
4295       self.context.ReaddNode(new_node)
4296       # make sure we redistribute the config
4297       self.cfg.Update(new_node, feedback_fn)
4298       # and make sure the new node will not have old files around
4299       if not new_node.master_candidate:
4300         result = self.rpc.call_node_demote_from_mc(new_node.name)
4301         msg = result.fail_msg
4302         if msg:
4303           self.LogWarning("Node failed to demote itself from master"
4304                           " candidate status: %s" % msg)
4305     else:
4306       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4307                                   additional_vm=self.op.vm_capable)
4308       self.context.AddNode(new_node, self.proc.GetECId())
4309
4310
4311 class LUNodeSetParams(LogicalUnit):
4312   """Modifies the parameters of a node.
4313
4314   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4315       to the node role (as _ROLE_*)
4316   @cvar _R2F: a dictionary from node role to tuples of flags
4317   @cvar _FLAGS: a list of attribute names corresponding to the flags
4318
4319   """
4320   HPATH = "node-modify"
4321   HTYPE = constants.HTYPE_NODE
4322   REQ_BGL = False
4323   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4324   _F2R = {
4325     (True, False, False): _ROLE_CANDIDATE,
4326     (False, True, False): _ROLE_DRAINED,
4327     (False, False, True): _ROLE_OFFLINE,
4328     (False, False, False): _ROLE_REGULAR,
4329     }
4330   _R2F = dict((v, k) for k, v in _F2R.items())
4331   _FLAGS = ["master_candidate", "drained", "offline"]
4332
4333   def CheckArguments(self):
4334     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4335     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4336                 self.op.master_capable, self.op.vm_capable,
4337                 self.op.secondary_ip, self.op.ndparams]
4338     if all_mods.count(None) == len(all_mods):
4339       raise errors.OpPrereqError("Please pass at least one modification",
4340                                  errors.ECODE_INVAL)
4341     if all_mods.count(True) > 1:
4342       raise errors.OpPrereqError("Can't set the node into more than one"
4343                                  " state at the same time",
4344                                  errors.ECODE_INVAL)
4345
4346     # Boolean value that tells us whether we might be demoting from MC
4347     self.might_demote = (self.op.master_candidate == False or
4348                          self.op.offline == True or
4349                          self.op.drained == True or
4350                          self.op.master_capable == False)
4351
4352     if self.op.secondary_ip:
4353       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4354         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4355                                    " address" % self.op.secondary_ip,
4356                                    errors.ECODE_INVAL)
4357
4358     self.lock_all = self.op.auto_promote and self.might_demote
4359     self.lock_instances = self.op.secondary_ip is not None
4360
4361   def ExpandNames(self):
4362     if self.lock_all:
4363       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4364     else:
4365       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4366
4367     if self.lock_instances:
4368       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4369
4370   def DeclareLocks(self, level):
4371     # If we have locked all instances, before waiting to lock nodes, release
4372     # all the ones living on nodes unrelated to the current operation.
4373     if level == locking.LEVEL_NODE and self.lock_instances:
4374       instances_release = []
4375       instances_keep = []
4376       self.affected_instances = []
4377       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4378         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4379           instance = self.context.cfg.GetInstanceInfo(instance_name)
4380           i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4381           if i_mirrored and self.op.node_name in instance.all_nodes:
4382             instances_keep.append(instance_name)
4383             self.affected_instances.append(instance)
4384           else:
4385             instances_release.append(instance_name)
4386         if instances_release:
4387           self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4388           self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4389
4390   def BuildHooksEnv(self):
4391     """Build hooks env.
4392
4393     This runs on the master node.
4394
4395     """
4396     env = {
4397       "OP_TARGET": self.op.node_name,
4398       "MASTER_CANDIDATE": str(self.op.master_candidate),
4399       "OFFLINE": str(self.op.offline),
4400       "DRAINED": str(self.op.drained),
4401       "MASTER_CAPABLE": str(self.op.master_capable),
4402       "VM_CAPABLE": str(self.op.vm_capable),
4403       }
4404     nl = [self.cfg.GetMasterNode(),
4405           self.op.node_name]
4406     return env, nl, nl
4407
4408   def CheckPrereq(self):
4409     """Check prerequisites.
4410
4411     This only checks the instance list against the existing names.
4412
4413     """
4414     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4415
4416     if (self.op.master_candidate is not None or
4417         self.op.drained is not None or
4418         self.op.offline is not None):
4419       # we can't change the master's node flags
4420       if self.op.node_name == self.cfg.GetMasterNode():
4421         raise errors.OpPrereqError("The master role can be changed"
4422                                    " only via master-failover",
4423                                    errors.ECODE_INVAL)
4424
4425     if self.op.master_candidate and not node.master_capable:
4426       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4427                                  " it a master candidate" % node.name,
4428                                  errors.ECODE_STATE)
4429
4430     if self.op.vm_capable == False:
4431       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4432       if ipri or isec:
4433         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4434                                    " the vm_capable flag" % node.name,
4435                                    errors.ECODE_STATE)
4436
4437     if node.master_candidate and self.might_demote and not self.lock_all:
4438       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4439       # check if after removing the current node, we're missing master
4440       # candidates
4441       (mc_remaining, mc_should, _) = \
4442           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4443       if mc_remaining < mc_should:
4444         raise errors.OpPrereqError("Not enough master candidates, please"
4445                                    " pass auto promote option to allow"
4446                                    " promotion", errors.ECODE_STATE)
4447
4448     self.old_flags = old_flags = (node.master_candidate,
4449                                   node.drained, node.offline)
4450     assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4451     self.old_role = old_role = self._F2R[old_flags]
4452
4453     # Check for ineffective changes
4454     for attr in self._FLAGS:
4455       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4456         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4457         setattr(self.op, attr, None)
4458
4459     # Past this point, any flag change to False means a transition
4460     # away from the respective state, as only real changes are kept
4461
4462     # TODO: We might query the real power state if it supports OOB
4463     if _SupportsOob(self.cfg, node):
4464       if self.op.offline is False and not (node.powered or
4465                                            self.op.powered == True):
4466         raise errors.OpPrereqError(("Please power on node %s first before you"
4467                                     " can reset offline state") %
4468                                    self.op.node_name)
4469     elif self.op.powered is not None:
4470       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4471                                   " which does not support out-of-band"
4472                                   " handling") % self.op.node_name)
4473
4474     # If we're being deofflined/drained, we'll MC ourself if needed
4475     if (self.op.drained == False or self.op.offline == False or
4476         (self.op.master_capable and not node.master_capable)):
4477       if _DecideSelfPromotion(self):
4478         self.op.master_candidate = True
4479         self.LogInfo("Auto-promoting node to master candidate")
4480
4481     # If we're no longer master capable, we'll demote ourselves from MC
4482     if self.op.master_capable == False and node.master_candidate:
4483       self.LogInfo("Demoting from master candidate")
4484       self.op.master_candidate = False
4485
4486     # Compute new role
4487     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4488     if self.op.master_candidate:
4489       new_role = self._ROLE_CANDIDATE
4490     elif self.op.drained:
4491       new_role = self._ROLE_DRAINED
4492     elif self.op.offline:
4493       new_role = self._ROLE_OFFLINE
4494     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4495       # False is still in new flags, which means we're un-setting (the
4496       # only) True flag
4497       new_role = self._ROLE_REGULAR
4498     else: # no new flags, nothing, keep old role
4499       new_role = old_role
4500
4501     self.new_role = new_role
4502
4503     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4504       # Trying to transition out of offline status
4505       result = self.rpc.call_version([node.name])[node.name]
4506       if result.fail_msg:
4507         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4508                                    " to report its version: %s" %
4509                                    (node.name, result.fail_msg),
4510                                    errors.ECODE_STATE)
4511       else:
4512         self.LogWarning("Transitioning node from offline to online state"
4513                         " without using re-add. Please make sure the node"
4514                         " is healthy!")
4515
4516     if self.op.secondary_ip:
4517       # Ok even without locking, because this can't be changed by any LU
4518       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4519       master_singlehomed = master.secondary_ip == master.primary_ip
4520       if master_singlehomed and self.op.secondary_ip:
4521         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4522                                    " homed cluster", errors.ECODE_INVAL)
4523
4524       if node.offline:
4525         if self.affected_instances:
4526           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4527                                      " node has instances (%s) configured"
4528                                      " to use it" % self.affected_instances)
4529       else:
4530         # On online nodes, check that no instances are running, and that
4531         # the node has the new ip and we can reach it.
4532         for instance in self.affected_instances:
4533           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4534
4535         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4536         if master.name != node.name:
4537           # check reachability from master secondary ip to new secondary ip
4538           if not netutils.TcpPing(self.op.secondary_ip,
4539                                   constants.DEFAULT_NODED_PORT,
4540                                   source=master.secondary_ip):
4541             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4542                                        " based ping to node daemon port",
4543                                        errors.ECODE_ENVIRON)
4544
4545     if self.op.ndparams:
4546       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4547       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4548       self.new_ndparams = new_ndparams
4549
4550   def Exec(self, feedback_fn):
4551     """Modifies a node.
4552
4553     """
4554     node = self.node
4555     old_role = self.old_role
4556     new_role = self.new_role
4557
4558     result = []
4559
4560     if self.op.ndparams:
4561       node.ndparams = self.new_ndparams
4562
4563     if self.op.powered is not None:
4564       node.powered = self.op.powered
4565
4566     for attr in ["master_capable", "vm_capable"]:
4567       val = getattr(self.op, attr)
4568       if val is not None:
4569         setattr(node, attr, val)
4570         result.append((attr, str(val)))
4571
4572     if new_role != old_role:
4573       # Tell the node to demote itself, if no longer MC and not offline
4574       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4575         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4576         if msg:
4577           self.LogWarning("Node failed to demote itself: %s", msg)
4578
4579       new_flags = self._R2F[new_role]
4580       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4581         if of != nf:
4582           result.append((desc, str(nf)))
4583       (node.master_candidate, node.drained, node.offline) = new_flags
4584
4585       # we locked all nodes, we adjust the CP before updating this node
4586       if self.lock_all:
4587         _AdjustCandidatePool(self, [node.name])
4588
4589     if self.op.secondary_ip:
4590       node.secondary_ip = self.op.secondary_ip
4591       result.append(("secondary_ip", self.op.secondary_ip))
4592
4593     # this will trigger configuration file update, if needed
4594     self.cfg.Update(node, feedback_fn)
4595
4596     # this will trigger job queue propagation or cleanup if the mc
4597     # flag changed
4598     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4599       self.context.ReaddNode(node)
4600
4601     return result
4602
4603
4604 class LUNodePowercycle(NoHooksLU):
4605   """Powercycles a node.
4606
4607   """
4608   REQ_BGL = False
4609
4610   def CheckArguments(self):
4611     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4612     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4613       raise errors.OpPrereqError("The node is the master and the force"
4614                                  " parameter was not set",
4615                                  errors.ECODE_INVAL)
4616
4617   def ExpandNames(self):
4618     """Locking for PowercycleNode.
4619
4620     This is a last-resort option and shouldn't block on other
4621     jobs. Therefore, we grab no locks.
4622
4623     """
4624     self.needed_locks = {}
4625
4626   def Exec(self, feedback_fn):
4627     """Reboots a node.
4628
4629     """
4630     result = self.rpc.call_node_powercycle(self.op.node_name,
4631                                            self.cfg.GetHypervisorType())
4632     result.Raise("Failed to schedule the reboot")
4633     return result.payload
4634
4635
4636 class LUClusterQuery(NoHooksLU):
4637   """Query cluster configuration.
4638
4639   """
4640   REQ_BGL = False
4641
4642   def ExpandNames(self):
4643     self.needed_locks = {}
4644
4645   def Exec(self, feedback_fn):
4646     """Return cluster config.
4647
4648     """
4649     cluster = self.cfg.GetClusterInfo()
4650     os_hvp = {}
4651
4652     # Filter just for enabled hypervisors
4653     for os_name, hv_dict in cluster.os_hvp.items():
4654       os_hvp[os_name] = {}
4655       for hv_name, hv_params in hv_dict.items():
4656         if hv_name in cluster.enabled_hypervisors:
4657           os_hvp[os_name][hv_name] = hv_params
4658
4659     # Convert ip_family to ip_version
4660     primary_ip_version = constants.IP4_VERSION
4661     if cluster.primary_ip_family == netutils.IP6Address.family:
4662       primary_ip_version = constants.IP6_VERSION
4663
4664     result = {
4665       "software_version": constants.RELEASE_VERSION,
4666       "protocol_version": constants.PROTOCOL_VERSION,
4667       "config_version": constants.CONFIG_VERSION,
4668       "os_api_version": max(constants.OS_API_VERSIONS),
4669       "export_version": constants.EXPORT_VERSION,
4670       "architecture": (platform.architecture()[0], platform.machine()),
4671       "name": cluster.cluster_name,
4672       "master": cluster.master_node,
4673       "default_hypervisor": cluster.enabled_hypervisors[0],
4674       "enabled_hypervisors": cluster.enabled_hypervisors,
4675       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4676                         for hypervisor_name in cluster.enabled_hypervisors]),
4677       "os_hvp": os_hvp,
4678       "beparams": cluster.beparams,
4679       "osparams": cluster.osparams,
4680       "nicparams": cluster.nicparams,
4681       "ndparams": cluster.ndparams,
4682       "candidate_pool_size": cluster.candidate_pool_size,
4683       "master_netdev": cluster.master_netdev,
4684       "volume_group_name": cluster.volume_group_name,
4685       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4686       "file_storage_dir": cluster.file_storage_dir,
4687       "maintain_node_health": cluster.maintain_node_health,
4688       "ctime": cluster.ctime,
4689       "mtime": cluster.mtime,
4690       "uuid": cluster.uuid,
4691       "tags": list(cluster.GetTags()),
4692       "uid_pool": cluster.uid_pool,
4693       "default_iallocator": cluster.default_iallocator,
4694       "reserved_lvs": cluster.reserved_lvs,
4695       "primary_ip_version": primary_ip_version,
4696       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4697       "hidden_os": cluster.hidden_os,
4698       "blacklisted_os": cluster.blacklisted_os,
4699       }
4700
4701     return result
4702
4703
4704 class LUClusterConfigQuery(NoHooksLU):
4705   """Return configuration values.
4706
4707   """
4708   REQ_BGL = False
4709   _FIELDS_DYNAMIC = utils.FieldSet()
4710   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4711                                   "watcher_pause", "volume_group_name")
4712
4713   def CheckArguments(self):
4714     _CheckOutputFields(static=self._FIELDS_STATIC,
4715                        dynamic=self._FIELDS_DYNAMIC,
4716                        selected=self.op.output_fields)
4717
4718   def ExpandNames(self):
4719     self.needed_locks = {}
4720
4721   def Exec(self, feedback_fn):
4722     """Dump a representation of the cluster config to the standard output.
4723
4724     """
4725     values = []
4726     for field in self.op.output_fields:
4727       if field == "cluster_name":
4728         entry = self.cfg.GetClusterName()
4729       elif field == "master_node":
4730         entry = self.cfg.GetMasterNode()
4731       elif field == "drain_flag":
4732         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4733       elif field == "watcher_pause":
4734         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4735       elif field == "volume_group_name":
4736         entry = self.cfg.GetVGName()
4737       else:
4738         raise errors.ParameterError(field)
4739       values.append(entry)
4740     return values
4741
4742
4743 class LUInstanceActivateDisks(NoHooksLU):
4744   """Bring up an instance's disks.
4745
4746   """
4747   REQ_BGL = False
4748
4749   def ExpandNames(self):
4750     self._ExpandAndLockInstance()
4751     self.needed_locks[locking.LEVEL_NODE] = []
4752     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4753
4754   def DeclareLocks(self, level):
4755     if level == locking.LEVEL_NODE:
4756       self._LockInstancesNodes()
4757
4758   def CheckPrereq(self):
4759     """Check prerequisites.
4760
4761     This checks that the instance is in the cluster.
4762
4763     """
4764     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4765     assert self.instance is not None, \
4766       "Cannot retrieve locked instance %s" % self.op.instance_name
4767     _CheckNodeOnline(self, self.instance.primary_node)
4768
4769   def Exec(self, feedback_fn):
4770     """Activate the disks.
4771
4772     """
4773     disks_ok, disks_info = \
4774               _AssembleInstanceDisks(self, self.instance,
4775                                      ignore_size=self.op.ignore_size)
4776     if not disks_ok:
4777       raise errors.OpExecError("Cannot activate block devices")
4778
4779     return disks_info
4780
4781
4782 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4783                            ignore_size=False):
4784   """Prepare the block devices for an instance.
4785
4786   This sets up the block devices on all nodes.
4787
4788   @type lu: L{LogicalUnit}
4789   @param lu: the logical unit on whose behalf we execute
4790   @type instance: L{objects.Instance}
4791   @param instance: the instance for whose disks we assemble
4792   @type disks: list of L{objects.Disk} or None
4793   @param disks: which disks to assemble (or all, if None)
4794   @type ignore_secondaries: boolean
4795   @param ignore_secondaries: if true, errors on secondary nodes
4796       won't result in an error return from the function
4797   @type ignore_size: boolean
4798   @param ignore_size: if true, the current known size of the disk
4799       will not be used during the disk activation, useful for cases
4800       when the size is wrong
4801   @return: False if the operation failed, otherwise a list of
4802       (host, instance_visible_name, node_visible_name)
4803       with the mapping from node devices to instance devices
4804
4805   """
4806   device_info = []
4807   disks_ok = True
4808   iname = instance.name
4809   disks = _ExpandCheckDisks(instance, disks)
4810
4811   # With the two passes mechanism we try to reduce the window of
4812   # opportunity for the race condition of switching DRBD to primary
4813   # before handshaking occured, but we do not eliminate it
4814
4815   # The proper fix would be to wait (with some limits) until the
4816   # connection has been made and drbd transitions from WFConnection
4817   # into any other network-connected state (Connected, SyncTarget,
4818   # SyncSource, etc.)
4819
4820   # 1st pass, assemble on all nodes in secondary mode
4821   for idx, inst_disk in enumerate(disks):
4822     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4823       if ignore_size:
4824         node_disk = node_disk.Copy()
4825         node_disk.UnsetSize()
4826       lu.cfg.SetDiskID(node_disk, node)
4827       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
4828       msg = result.fail_msg
4829       if msg:
4830         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4831                            " (is_primary=False, pass=1): %s",
4832                            inst_disk.iv_name, node, msg)
4833         if not ignore_secondaries:
4834           disks_ok = False
4835
4836   # FIXME: race condition on drbd migration to primary
4837
4838   # 2nd pass, do only the primary node
4839   for idx, inst_disk in enumerate(disks):
4840     dev_path = None
4841
4842     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4843       if node != instance.primary_node:
4844         continue
4845       if ignore_size:
4846         node_disk = node_disk.Copy()
4847         node_disk.UnsetSize()
4848       lu.cfg.SetDiskID(node_disk, node)
4849       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
4850       msg = result.fail_msg
4851       if msg:
4852         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4853                            " (is_primary=True, pass=2): %s",
4854                            inst_disk.iv_name, node, msg)
4855         disks_ok = False
4856       else:
4857         dev_path = result.payload
4858
4859     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4860
4861   # leave the disks configured for the primary node
4862   # this is a workaround that would be fixed better by
4863   # improving the logical/physical id handling
4864   for disk in disks:
4865     lu.cfg.SetDiskID(disk, instance.primary_node)
4866
4867   return disks_ok, device_info
4868
4869
4870 def _StartInstanceDisks(lu, instance, force):
4871   """Start the disks of an instance.
4872
4873   """
4874   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4875                                            ignore_secondaries=force)
4876   if not disks_ok:
4877     _ShutdownInstanceDisks(lu, instance)
4878     if force is not None and not force:
4879       lu.proc.LogWarning("", hint="If the message above refers to a"
4880                          " secondary node,"
4881                          " you can retry the operation using '--force'.")
4882     raise errors.OpExecError("Disk consistency error")
4883
4884
4885 class LUInstanceDeactivateDisks(NoHooksLU):
4886   """Shutdown an instance's disks.
4887
4888   """
4889   REQ_BGL = False
4890
4891   def ExpandNames(self):
4892     self._ExpandAndLockInstance()
4893     self.needed_locks[locking.LEVEL_NODE] = []
4894     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4895
4896   def DeclareLocks(self, level):
4897     if level == locking.LEVEL_NODE:
4898       self._LockInstancesNodes()
4899
4900   def CheckPrereq(self):
4901     """Check prerequisites.
4902
4903     This checks that the instance is in the cluster.
4904
4905     """
4906     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4907     assert self.instance is not None, \
4908       "Cannot retrieve locked instance %s" % self.op.instance_name
4909
4910   def Exec(self, feedback_fn):
4911     """Deactivate the disks
4912
4913     """
4914     instance = self.instance
4915     if self.op.force:
4916       _ShutdownInstanceDisks(self, instance)
4917     else:
4918       _SafeShutdownInstanceDisks(self, instance)
4919
4920
4921 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4922   """Shutdown block devices of an instance.
4923
4924   This function checks if an instance is running, before calling
4925   _ShutdownInstanceDisks.
4926
4927   """
4928   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4929   _ShutdownInstanceDisks(lu, instance, disks=disks)
4930
4931
4932 def _ExpandCheckDisks(instance, disks):
4933   """Return the instance disks selected by the disks list
4934
4935   @type disks: list of L{objects.Disk} or None
4936   @param disks: selected disks
4937   @rtype: list of L{objects.Disk}
4938   @return: selected instance disks to act on
4939
4940   """
4941   if disks is None:
4942     return instance.disks
4943   else:
4944     if not set(disks).issubset(instance.disks):
4945       raise errors.ProgrammerError("Can only act on disks belonging to the"
4946                                    " target instance")
4947     return disks
4948
4949
4950 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4951   """Shutdown block devices of an instance.
4952
4953   This does the shutdown on all nodes of the instance.
4954
4955   If the ignore_primary is false, errors on the primary node are
4956   ignored.
4957
4958   """
4959   all_result = True
4960   disks = _ExpandCheckDisks(instance, disks)
4961
4962   for disk in disks:
4963     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4964       lu.cfg.SetDiskID(top_disk, node)
4965       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4966       msg = result.fail_msg
4967       if msg:
4968         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4969                       disk.iv_name, node, msg)
4970         if ((node == instance.primary_node and not ignore_primary) or
4971             (node != instance.primary_node and not result.offline)):
4972           all_result = False
4973   return all_result
4974
4975
4976 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4977   """Checks if a node has enough free memory.
4978
4979   This function check if a given node has the needed amount of free
4980   memory. In case the node has less memory or we cannot get the
4981   information from the node, this function raise an OpPrereqError
4982   exception.
4983
4984   @type lu: C{LogicalUnit}
4985   @param lu: a logical unit from which we get configuration data
4986   @type node: C{str}
4987   @param node: the node to check
4988   @type reason: C{str}
4989   @param reason: string to use in the error message
4990   @type requested: C{int}
4991   @param requested: the amount of memory in MiB to check for
4992   @type hypervisor_name: C{str}
4993   @param hypervisor_name: the hypervisor to ask for memory stats
4994   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4995       we cannot check the node
4996
4997   """
4998   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4999   nodeinfo[node].Raise("Can't get data from node %s" % node,
5000                        prereq=True, ecode=errors.ECODE_ENVIRON)
5001   free_mem = nodeinfo[node].payload.get('memory_free', None)
5002   if not isinstance(free_mem, int):
5003     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5004                                " was '%s'" % (node, free_mem),
5005                                errors.ECODE_ENVIRON)
5006   if requested > free_mem:
5007     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5008                                " needed %s MiB, available %s MiB" %
5009                                (node, reason, requested, free_mem),
5010                                errors.ECODE_NORES)
5011
5012
5013 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5014   """Checks if nodes have enough free disk space in the all VGs.
5015
5016   This function check if all given nodes have the needed amount of
5017   free disk. In case any node has less disk or we cannot get the
5018   information from the node, this function raise an OpPrereqError
5019   exception.
5020
5021   @type lu: C{LogicalUnit}
5022   @param lu: a logical unit from which we get configuration data
5023   @type nodenames: C{list}
5024   @param nodenames: the list of node names to check
5025   @type req_sizes: C{dict}
5026   @param req_sizes: the hash of vg and corresponding amount of disk in
5027       MiB to check for
5028   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5029       or we cannot check the node
5030
5031   """
5032   for vg, req_size in req_sizes.items():
5033     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5034
5035
5036 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5037   """Checks if nodes have enough free disk space in the specified VG.
5038
5039   This function check if all given nodes have the needed amount of
5040   free disk. In case any node has less disk or we cannot get the
5041   information from the node, this function raise an OpPrereqError
5042   exception.
5043
5044   @type lu: C{LogicalUnit}
5045   @param lu: a logical unit from which we get configuration data
5046   @type nodenames: C{list}
5047   @param nodenames: the list of node names to check
5048   @type vg: C{str}
5049   @param vg: the volume group to check
5050   @type requested: C{int}
5051   @param requested: the amount of disk in MiB to check for
5052   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5053       or we cannot check the node
5054
5055   """
5056   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5057   for node in nodenames:
5058     info = nodeinfo[node]
5059     info.Raise("Cannot get current information from node %s" % node,
5060                prereq=True, ecode=errors.ECODE_ENVIRON)
5061     vg_free = info.payload.get("vg_free", None)
5062     if not isinstance(vg_free, int):
5063       raise errors.OpPrereqError("Can't compute free disk space on node"
5064                                  " %s for vg %s, result was '%s'" %
5065                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5066     if requested > vg_free:
5067       raise errors.OpPrereqError("Not enough disk space on target node %s"
5068                                  " vg %s: required %d MiB, available %d MiB" %
5069                                  (node, vg, requested, vg_free),
5070                                  errors.ECODE_NORES)
5071
5072
5073 class LUInstanceStartup(LogicalUnit):
5074   """Starts an instance.
5075
5076   """
5077   HPATH = "instance-start"
5078   HTYPE = constants.HTYPE_INSTANCE
5079   REQ_BGL = False
5080
5081   def CheckArguments(self):
5082     # extra beparams
5083     if self.op.beparams:
5084       # fill the beparams dict
5085       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5086
5087   def ExpandNames(self):
5088     self._ExpandAndLockInstance()
5089
5090   def BuildHooksEnv(self):
5091     """Build hooks env.
5092
5093     This runs on master, primary and secondary nodes of the instance.
5094
5095     """
5096     env = {
5097       "FORCE": self.op.force,
5098       }
5099     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5100     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5101     return env, nl, nl
5102
5103   def CheckPrereq(self):
5104     """Check prerequisites.
5105
5106     This checks that the instance is in the cluster.
5107
5108     """
5109     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5110     assert self.instance is not None, \
5111       "Cannot retrieve locked instance %s" % self.op.instance_name
5112
5113     # extra hvparams
5114     if self.op.hvparams:
5115       # check hypervisor parameter syntax (locally)
5116       cluster = self.cfg.GetClusterInfo()
5117       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5118       filled_hvp = cluster.FillHV(instance)
5119       filled_hvp.update(self.op.hvparams)
5120       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5121       hv_type.CheckParameterSyntax(filled_hvp)
5122       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5123
5124     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5125
5126     if self.primary_offline and self.op.ignore_offline_nodes:
5127       self.proc.LogWarning("Ignoring offline primary node")
5128
5129       if self.op.hvparams or self.op.beparams:
5130         self.proc.LogWarning("Overridden parameters are ignored")
5131     else:
5132       _CheckNodeOnline(self, instance.primary_node)
5133
5134       bep = self.cfg.GetClusterInfo().FillBE(instance)
5135
5136       # check bridges existence
5137       _CheckInstanceBridgesExist(self, instance)
5138
5139       remote_info = self.rpc.call_instance_info(instance.primary_node,
5140                                                 instance.name,
5141                                                 instance.hypervisor)
5142       remote_info.Raise("Error checking node %s" % instance.primary_node,
5143                         prereq=True, ecode=errors.ECODE_ENVIRON)
5144       if not remote_info.payload: # not running already
5145         _CheckNodeFreeMemory(self, instance.primary_node,
5146                              "starting instance %s" % instance.name,
5147                              bep[constants.BE_MEMORY], instance.hypervisor)
5148
5149   def Exec(self, feedback_fn):
5150     """Start the instance.
5151
5152     """
5153     instance = self.instance
5154     force = self.op.force
5155
5156     self.cfg.MarkInstanceUp(instance.name)
5157
5158     if self.primary_offline:
5159       assert self.op.ignore_offline_nodes
5160       self.proc.LogInfo("Primary node offline, marked instance as started")
5161     else:
5162       node_current = instance.primary_node
5163
5164       _StartInstanceDisks(self, instance, force)
5165
5166       result = self.rpc.call_instance_start(node_current, instance,
5167                                             self.op.hvparams, self.op.beparams)
5168       msg = result.fail_msg
5169       if msg:
5170         _ShutdownInstanceDisks(self, instance)
5171         raise errors.OpExecError("Could not start instance: %s" % msg)
5172
5173
5174 class LUInstanceReboot(LogicalUnit):
5175   """Reboot an instance.
5176
5177   """
5178   HPATH = "instance-reboot"
5179   HTYPE = constants.HTYPE_INSTANCE
5180   REQ_BGL = False
5181
5182   def ExpandNames(self):
5183     self._ExpandAndLockInstance()
5184
5185   def BuildHooksEnv(self):
5186     """Build hooks env.
5187
5188     This runs on master, primary and secondary nodes of the instance.
5189
5190     """
5191     env = {
5192       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5193       "REBOOT_TYPE": self.op.reboot_type,
5194       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5195       }
5196     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5197     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5198     return env, nl, nl
5199
5200   def CheckPrereq(self):
5201     """Check prerequisites.
5202
5203     This checks that the instance is in the cluster.
5204
5205     """
5206     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5207     assert self.instance is not None, \
5208       "Cannot retrieve locked instance %s" % self.op.instance_name
5209
5210     _CheckNodeOnline(self, instance.primary_node)
5211
5212     # check bridges existence
5213     _CheckInstanceBridgesExist(self, instance)
5214
5215   def Exec(self, feedback_fn):
5216     """Reboot the instance.
5217
5218     """
5219     instance = self.instance
5220     ignore_secondaries = self.op.ignore_secondaries
5221     reboot_type = self.op.reboot_type
5222
5223     node_current = instance.primary_node
5224
5225     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5226                        constants.INSTANCE_REBOOT_HARD]:
5227       for disk in instance.disks:
5228         self.cfg.SetDiskID(disk, node_current)
5229       result = self.rpc.call_instance_reboot(node_current, instance,
5230                                              reboot_type,
5231                                              self.op.shutdown_timeout)
5232       result.Raise("Could not reboot instance")
5233     else:
5234       result = self.rpc.call_instance_shutdown(node_current, instance,
5235                                                self.op.shutdown_timeout)
5236       result.Raise("Could not shutdown instance for full reboot")
5237       _ShutdownInstanceDisks(self, instance)
5238       _StartInstanceDisks(self, instance, ignore_secondaries)
5239       result = self.rpc.call_instance_start(node_current, instance, None, None)
5240       msg = result.fail_msg
5241       if msg:
5242         _ShutdownInstanceDisks(self, instance)
5243         raise errors.OpExecError("Could not start instance for"
5244                                  " full reboot: %s" % msg)
5245
5246     self.cfg.MarkInstanceUp(instance.name)
5247
5248
5249 class LUInstanceShutdown(LogicalUnit):
5250   """Shutdown an instance.
5251
5252   """
5253   HPATH = "instance-stop"
5254   HTYPE = constants.HTYPE_INSTANCE
5255   REQ_BGL = False
5256
5257   def ExpandNames(self):
5258     self._ExpandAndLockInstance()
5259
5260   def BuildHooksEnv(self):
5261     """Build hooks env.
5262
5263     This runs on master, primary and secondary nodes of the instance.
5264
5265     """
5266     env = _BuildInstanceHookEnvByObject(self, self.instance)
5267     env["TIMEOUT"] = self.op.timeout
5268     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5269     return env, nl, nl
5270
5271   def CheckPrereq(self):
5272     """Check prerequisites.
5273
5274     This checks that the instance is in the cluster.
5275
5276     """
5277     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5278     assert self.instance is not None, \
5279       "Cannot retrieve locked instance %s" % self.op.instance_name
5280
5281     self.primary_offline = \
5282       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5283
5284     if self.primary_offline and self.op.ignore_offline_nodes:
5285       self.proc.LogWarning("Ignoring offline primary node")
5286     else:
5287       _CheckNodeOnline(self, self.instance.primary_node)
5288
5289   def Exec(self, feedback_fn):
5290     """Shutdown the instance.
5291
5292     """
5293     instance = self.instance
5294     node_current = instance.primary_node
5295     timeout = self.op.timeout
5296
5297     self.cfg.MarkInstanceDown(instance.name)
5298
5299     if self.primary_offline:
5300       assert self.op.ignore_offline_nodes
5301       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5302     else:
5303       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5304       msg = result.fail_msg
5305       if msg:
5306         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5307
5308       _ShutdownInstanceDisks(self, instance)
5309
5310
5311 class LUInstanceReinstall(LogicalUnit):
5312   """Reinstall an instance.
5313
5314   """
5315   HPATH = "instance-reinstall"
5316   HTYPE = constants.HTYPE_INSTANCE
5317   REQ_BGL = False
5318
5319   def ExpandNames(self):
5320     self._ExpandAndLockInstance()
5321
5322   def BuildHooksEnv(self):
5323     """Build hooks env.
5324
5325     This runs on master, primary and secondary nodes of the instance.
5326
5327     """
5328     env = _BuildInstanceHookEnvByObject(self, self.instance)
5329     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5330     return env, nl, nl
5331
5332   def CheckPrereq(self):
5333     """Check prerequisites.
5334
5335     This checks that the instance is in the cluster and is not running.
5336
5337     """
5338     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5339     assert instance is not None, \
5340       "Cannot retrieve locked instance %s" % self.op.instance_name
5341     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5342                      " offline, cannot reinstall")
5343     for node in instance.secondary_nodes:
5344       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5345                        " cannot reinstall")
5346
5347     if instance.disk_template == constants.DT_DISKLESS:
5348       raise errors.OpPrereqError("Instance '%s' has no disks" %
5349                                  self.op.instance_name,
5350                                  errors.ECODE_INVAL)
5351     _CheckInstanceDown(self, instance, "cannot reinstall")
5352
5353     if self.op.os_type is not None:
5354       # OS verification
5355       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5356       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5357       instance_os = self.op.os_type
5358     else:
5359       instance_os = instance.os
5360
5361     nodelist = list(instance.all_nodes)
5362
5363     if self.op.osparams:
5364       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5365       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5366       self.os_inst = i_osdict # the new dict (without defaults)
5367     else:
5368       self.os_inst = None
5369
5370     self.instance = instance
5371
5372   def Exec(self, feedback_fn):
5373     """Reinstall the instance.
5374
5375     """
5376     inst = self.instance
5377
5378     if self.op.os_type is not None:
5379       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5380       inst.os = self.op.os_type
5381       # Write to configuration
5382       self.cfg.Update(inst, feedback_fn)
5383
5384     _StartInstanceDisks(self, inst, None)
5385     try:
5386       feedback_fn("Running the instance OS create scripts...")
5387       # FIXME: pass debug option from opcode to backend
5388       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5389                                              self.op.debug_level,
5390                                              osparams=self.os_inst)
5391       result.Raise("Could not install OS for instance %s on node %s" %
5392                    (inst.name, inst.primary_node))
5393     finally:
5394       _ShutdownInstanceDisks(self, inst)
5395
5396
5397 class LUInstanceRecreateDisks(LogicalUnit):
5398   """Recreate an instance's missing disks.
5399
5400   """
5401   HPATH = "instance-recreate-disks"
5402   HTYPE = constants.HTYPE_INSTANCE
5403   REQ_BGL = False
5404
5405   def ExpandNames(self):
5406     self._ExpandAndLockInstance()
5407
5408   def BuildHooksEnv(self):
5409     """Build hooks env.
5410
5411     This runs on master, primary and secondary nodes of the instance.
5412
5413     """
5414     env = _BuildInstanceHookEnvByObject(self, self.instance)
5415     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5416     return env, nl, nl
5417
5418   def CheckPrereq(self):
5419     """Check prerequisites.
5420
5421     This checks that the instance is in the cluster and is not running.
5422
5423     """
5424     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5425     assert instance is not None, \
5426       "Cannot retrieve locked instance %s" % self.op.instance_name
5427     _CheckNodeOnline(self, instance.primary_node)
5428
5429     if instance.disk_template == constants.DT_DISKLESS:
5430       raise errors.OpPrereqError("Instance '%s' has no disks" %
5431                                  self.op.instance_name, errors.ECODE_INVAL)
5432     _CheckInstanceDown(self, instance, "cannot recreate disks")
5433
5434     if not self.op.disks:
5435       self.op.disks = range(len(instance.disks))
5436     else:
5437       for idx in self.op.disks:
5438         if idx >= len(instance.disks):
5439           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5440                                      errors.ECODE_INVAL)
5441
5442     self.instance = instance
5443
5444   def Exec(self, feedback_fn):
5445     """Recreate the disks.
5446
5447     """
5448     to_skip = []
5449     for idx, _ in enumerate(self.instance.disks):
5450       if idx not in self.op.disks: # disk idx has not been passed in
5451         to_skip.append(idx)
5452         continue
5453
5454     _CreateDisks(self, self.instance, to_skip=to_skip)
5455
5456
5457 class LUInstanceRename(LogicalUnit):
5458   """Rename an instance.
5459
5460   """
5461   HPATH = "instance-rename"
5462   HTYPE = constants.HTYPE_INSTANCE
5463
5464   def CheckArguments(self):
5465     """Check arguments.
5466
5467     """
5468     if self.op.ip_check and not self.op.name_check:
5469       # TODO: make the ip check more flexible and not depend on the name check
5470       raise errors.OpPrereqError("Cannot do ip check without a name check",
5471                                  errors.ECODE_INVAL)
5472
5473   def BuildHooksEnv(self):
5474     """Build hooks env.
5475
5476     This runs on master, primary and secondary nodes of the instance.
5477
5478     """
5479     env = _BuildInstanceHookEnvByObject(self, self.instance)
5480     env["INSTANCE_NEW_NAME"] = self.op.new_name
5481     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5482     return env, nl, nl
5483
5484   def CheckPrereq(self):
5485     """Check prerequisites.
5486
5487     This checks that the instance is in the cluster and is not running.
5488
5489     """
5490     self.op.instance_name = _ExpandInstanceName(self.cfg,
5491                                                 self.op.instance_name)
5492     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5493     assert instance is not None
5494     _CheckNodeOnline(self, instance.primary_node)
5495     _CheckInstanceDown(self, instance, "cannot rename")
5496     self.instance = instance
5497
5498     new_name = self.op.new_name
5499     if self.op.name_check:
5500       hostname = netutils.GetHostname(name=new_name)
5501       self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5502                    hostname.name)
5503       new_name = self.op.new_name = hostname.name
5504       if (self.op.ip_check and
5505           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5506         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5507                                    (hostname.ip, new_name),
5508                                    errors.ECODE_NOTUNIQUE)
5509
5510     instance_list = self.cfg.GetInstanceList()
5511     if new_name in instance_list and new_name != instance.name:
5512       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5513                                  new_name, errors.ECODE_EXISTS)
5514
5515   def Exec(self, feedback_fn):
5516     """Rename the instance.
5517
5518     """
5519     inst = self.instance
5520     old_name = inst.name
5521
5522     rename_file_storage = False
5523     if (inst.disk_template == constants.DT_FILE and
5524         self.op.new_name != inst.name):
5525       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5526       rename_file_storage = True
5527
5528     self.cfg.RenameInstance(inst.name, self.op.new_name)
5529     # Change the instance lock. This is definitely safe while we hold the BGL
5530     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5531     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5532
5533     # re-read the instance from the configuration after rename
5534     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5535
5536     if rename_file_storage:
5537       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5538       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5539                                                      old_file_storage_dir,
5540                                                      new_file_storage_dir)
5541       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5542                    " (but the instance has been renamed in Ganeti)" %
5543                    (inst.primary_node, old_file_storage_dir,
5544                     new_file_storage_dir))
5545
5546     _StartInstanceDisks(self, inst, None)
5547     try:
5548       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5549                                                  old_name, self.op.debug_level)
5550       msg = result.fail_msg
5551       if msg:
5552         msg = ("Could not run OS rename script for instance %s on node %s"
5553                " (but the instance has been renamed in Ganeti): %s" %
5554                (inst.name, inst.primary_node, msg))
5555         self.proc.LogWarning(msg)
5556     finally:
5557       _ShutdownInstanceDisks(self, inst)
5558
5559     return inst.name
5560
5561
5562 class LUInstanceRemove(LogicalUnit):
5563   """Remove an instance.
5564
5565   """
5566   HPATH = "instance-remove"
5567   HTYPE = constants.HTYPE_INSTANCE
5568   REQ_BGL = False
5569
5570   def ExpandNames(self):
5571     self._ExpandAndLockInstance()
5572     self.needed_locks[locking.LEVEL_NODE] = []
5573     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5574
5575   def DeclareLocks(self, level):
5576     if level == locking.LEVEL_NODE:
5577       self._LockInstancesNodes()
5578
5579   def BuildHooksEnv(self):
5580     """Build hooks env.
5581
5582     This runs on master, primary and secondary nodes of the instance.
5583
5584     """
5585     env = _BuildInstanceHookEnvByObject(self, self.instance)
5586     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5587     nl = [self.cfg.GetMasterNode()]
5588     nl_post = list(self.instance.all_nodes) + nl
5589     return env, nl, nl_post
5590
5591   def CheckPrereq(self):
5592     """Check prerequisites.
5593
5594     This checks that the instance is in the cluster.
5595
5596     """
5597     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5598     assert self.instance is not None, \
5599       "Cannot retrieve locked instance %s" % self.op.instance_name
5600
5601   def Exec(self, feedback_fn):
5602     """Remove the instance.
5603
5604     """
5605     instance = self.instance
5606     logging.info("Shutting down instance %s on node %s",
5607                  instance.name, instance.primary_node)
5608
5609     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5610                                              self.op.shutdown_timeout)
5611     msg = result.fail_msg
5612     if msg:
5613       if self.op.ignore_failures:
5614         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5615       else:
5616         raise errors.OpExecError("Could not shutdown instance %s on"
5617                                  " node %s: %s" %
5618                                  (instance.name, instance.primary_node, msg))
5619
5620     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5621
5622
5623 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5624   """Utility function to remove an instance.
5625
5626   """
5627   logging.info("Removing block devices for instance %s", instance.name)
5628
5629   if not _RemoveDisks(lu, instance):
5630     if not ignore_failures:
5631       raise errors.OpExecError("Can't remove instance's disks")
5632     feedback_fn("Warning: can't remove instance's disks")
5633
5634   logging.info("Removing instance %s out of cluster config", instance.name)
5635
5636   lu.cfg.RemoveInstance(instance.name)
5637
5638   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5639     "Instance lock removal conflict"
5640
5641   # Remove lock for the instance
5642   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5643
5644
5645 class LUInstanceQuery(NoHooksLU):
5646   """Logical unit for querying instances.
5647
5648   """
5649   # pylint: disable-msg=W0142
5650   REQ_BGL = False
5651
5652   def CheckArguments(self):
5653     self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5654                              self.op.use_locking)
5655
5656   def ExpandNames(self):
5657     self.iq.ExpandNames(self)
5658
5659   def DeclareLocks(self, level):
5660     self.iq.DeclareLocks(self, level)
5661
5662   def Exec(self, feedback_fn):
5663     return self.iq.OldStyleQuery(self)
5664
5665
5666 class LUInstanceFailover(LogicalUnit):
5667   """Failover an instance.
5668
5669   """
5670   HPATH = "instance-failover"
5671   HTYPE = constants.HTYPE_INSTANCE
5672   REQ_BGL = False
5673
5674   def ExpandNames(self):
5675     self._ExpandAndLockInstance()
5676     self.needed_locks[locking.LEVEL_NODE] = []
5677     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5678
5679   def DeclareLocks(self, level):
5680     if level == locking.LEVEL_NODE:
5681       self._LockInstancesNodes()
5682
5683   def BuildHooksEnv(self):
5684     """Build hooks env.
5685
5686     This runs on master, primary and secondary nodes of the instance.
5687
5688     """
5689     instance = self.instance
5690     source_node = instance.primary_node
5691     target_node = instance.secondary_nodes[0]
5692     env = {
5693       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5694       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5695       "OLD_PRIMARY": source_node,
5696       "OLD_SECONDARY": target_node,
5697       "NEW_PRIMARY": target_node,
5698       "NEW_SECONDARY": source_node,
5699       }
5700     env.update(_BuildInstanceHookEnvByObject(self, instance))
5701     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5702     nl_post = list(nl)
5703     nl_post.append(source_node)
5704     return env, nl, nl_post
5705
5706   def CheckPrereq(self):
5707     """Check prerequisites.
5708
5709     This checks that the instance is in the cluster.
5710
5711     """
5712     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5713     assert self.instance is not None, \
5714       "Cannot retrieve locked instance %s" % self.op.instance_name
5715
5716     bep = self.cfg.GetClusterInfo().FillBE(instance)
5717     if instance.disk_template not in constants.DTS_NET_MIRROR:
5718       raise errors.OpPrereqError("Instance's disk layout is not"
5719                                  " network mirrored, cannot failover.",
5720                                  errors.ECODE_STATE)
5721
5722     secondary_nodes = instance.secondary_nodes
5723     if not secondary_nodes:
5724       raise errors.ProgrammerError("no secondary node but using "
5725                                    "a mirrored disk template")
5726
5727     target_node = secondary_nodes[0]
5728     _CheckNodeOnline(self, target_node)
5729     _CheckNodeNotDrained(self, target_node)
5730     if instance.admin_up:
5731       # check memory requirements on the secondary node
5732       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5733                            instance.name, bep[constants.BE_MEMORY],
5734                            instance.hypervisor)
5735     else:
5736       self.LogInfo("Not checking memory on the secondary node as"
5737                    " instance will not be started")
5738
5739     # check bridge existance
5740     _CheckInstanceBridgesExist(self, instance, node=target_node)
5741
5742   def Exec(self, feedback_fn):
5743     """Failover an instance.
5744
5745     The failover is done by shutting it down on its present node and
5746     starting it on the secondary.
5747
5748     """
5749     instance = self.instance
5750     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5751
5752     source_node = instance.primary_node
5753     target_node = instance.secondary_nodes[0]
5754
5755     if instance.admin_up:
5756       feedback_fn("* checking disk consistency between source and target")
5757       for dev in instance.disks:
5758         # for drbd, these are drbd over lvm
5759         if not _CheckDiskConsistency(self, dev, target_node, False):
5760           if not self.op.ignore_consistency:
5761             raise errors.OpExecError("Disk %s is degraded on target node,"
5762                                      " aborting failover." % dev.iv_name)
5763     else:
5764       feedback_fn("* not checking disk consistency as instance is not running")
5765
5766     feedback_fn("* shutting down instance on source node")
5767     logging.info("Shutting down instance %s on node %s",
5768                  instance.name, source_node)
5769
5770     result = self.rpc.call_instance_shutdown(source_node, instance,
5771                                              self.op.shutdown_timeout)
5772     msg = result.fail_msg
5773     if msg:
5774       if self.op.ignore_consistency or primary_node.offline:
5775         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5776                              " Proceeding anyway. Please make sure node"
5777                              " %s is down. Error details: %s",
5778                              instance.name, source_node, source_node, msg)
5779       else:
5780         raise errors.OpExecError("Could not shutdown instance %s on"
5781                                  " node %s: %s" %
5782                                  (instance.name, source_node, msg))
5783
5784     feedback_fn("* deactivating the instance's disks on source node")
5785     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5786       raise errors.OpExecError("Can't shut down the instance's disks.")
5787
5788     instance.primary_node = target_node
5789     # distribute new instance config to the other nodes
5790     self.cfg.Update(instance, feedback_fn)
5791
5792     # Only start the instance if it's marked as up
5793     if instance.admin_up:
5794       feedback_fn("* activating the instance's disks on target node")
5795       logging.info("Starting instance %s on node %s",
5796                    instance.name, target_node)
5797
5798       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5799                                            ignore_secondaries=True)
5800       if not disks_ok:
5801         _ShutdownInstanceDisks(self, instance)
5802         raise errors.OpExecError("Can't activate the instance's disks")
5803
5804       feedback_fn("* starting the instance on the target node")
5805       result = self.rpc.call_instance_start(target_node, instance, None, None)
5806       msg = result.fail_msg
5807       if msg:
5808         _ShutdownInstanceDisks(self, instance)
5809         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5810                                  (instance.name, target_node, msg))
5811
5812
5813 class LUInstanceMigrate(LogicalUnit):
5814   """Migrate an instance.
5815
5816   This is migration without shutting down, compared to the failover,
5817   which is done with shutdown.
5818
5819   """
5820   HPATH = "instance-migrate"
5821   HTYPE = constants.HTYPE_INSTANCE
5822   REQ_BGL = False
5823
5824   def ExpandNames(self):
5825     self._ExpandAndLockInstance()
5826
5827     self.needed_locks[locking.LEVEL_NODE] = []
5828     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5829
5830     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5831                                        self.op.cleanup)
5832     self.tasklets = [self._migrater]
5833
5834   def DeclareLocks(self, level):
5835     if level == locking.LEVEL_NODE:
5836       self._LockInstancesNodes()
5837
5838   def BuildHooksEnv(self):
5839     """Build hooks env.
5840
5841     This runs on master, primary and secondary nodes of the instance.
5842
5843     """
5844     instance = self._migrater.instance
5845     source_node = instance.primary_node
5846     target_node = instance.secondary_nodes[0]
5847     env = _BuildInstanceHookEnvByObject(self, instance)
5848     env["MIGRATE_LIVE"] = self._migrater.live
5849     env["MIGRATE_CLEANUP"] = self.op.cleanup
5850     env.update({
5851         "OLD_PRIMARY": source_node,
5852         "OLD_SECONDARY": target_node,
5853         "NEW_PRIMARY": target_node,
5854         "NEW_SECONDARY": source_node,
5855         })
5856     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5857     nl_post = list(nl)
5858     nl_post.append(source_node)
5859     return env, nl, nl_post
5860
5861
5862 class LUInstanceMove(LogicalUnit):
5863   """Move an instance by data-copying.
5864
5865   """
5866   HPATH = "instance-move"
5867   HTYPE = constants.HTYPE_INSTANCE
5868   REQ_BGL = False
5869
5870   def ExpandNames(self):
5871     self._ExpandAndLockInstance()
5872     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5873     self.op.target_node = target_node
5874     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5875     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5876
5877   def DeclareLocks(self, level):
5878     if level == locking.LEVEL_NODE:
5879       self._LockInstancesNodes(primary_only=True)
5880
5881   def BuildHooksEnv(self):
5882     """Build hooks env.
5883
5884     This runs on master, primary and secondary nodes of the instance.
5885
5886     """
5887     env = {
5888       "TARGET_NODE": self.op.target_node,
5889       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5890       }
5891     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5892     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5893                                        self.op.target_node]
5894     return env, nl, nl
5895
5896   def CheckPrereq(self):
5897     """Check prerequisites.
5898
5899     This checks that the instance is in the cluster.
5900
5901     """
5902     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5903     assert self.instance is not None, \
5904       "Cannot retrieve locked instance %s" % self.op.instance_name
5905
5906     node = self.cfg.GetNodeInfo(self.op.target_node)
5907     assert node is not None, \
5908       "Cannot retrieve locked node %s" % self.op.target_node
5909
5910     self.target_node = target_node = node.name
5911
5912     if target_node == instance.primary_node:
5913       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5914                                  (instance.name, target_node),
5915                                  errors.ECODE_STATE)
5916
5917     bep = self.cfg.GetClusterInfo().FillBE(instance)
5918
5919     for idx, dsk in enumerate(instance.disks):
5920       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5921         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5922                                    " cannot copy" % idx, errors.ECODE_STATE)
5923
5924     _CheckNodeOnline(self, target_node)
5925     _CheckNodeNotDrained(self, target_node)
5926     _CheckNodeVmCapable(self, target_node)
5927
5928     if instance.admin_up:
5929       # check memory requirements on the secondary node
5930       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5931                            instance.name, bep[constants.BE_MEMORY],
5932                            instance.hypervisor)
5933     else:
5934       self.LogInfo("Not checking memory on the secondary node as"
5935                    " instance will not be started")
5936
5937     # check bridge existance
5938     _CheckInstanceBridgesExist(self, instance, node=target_node)
5939
5940   def Exec(self, feedback_fn):
5941     """Move an instance.
5942
5943     The move is done by shutting it down on its present node, copying
5944     the data over (slow) and starting it on the new node.
5945
5946     """
5947     instance = self.instance
5948
5949     source_node = instance.primary_node
5950     target_node = self.target_node
5951
5952     self.LogInfo("Shutting down instance %s on source node %s",
5953                  instance.name, source_node)
5954
5955     result = self.rpc.call_instance_shutdown(source_node, instance,
5956                                              self.op.shutdown_timeout)
5957     msg = result.fail_msg
5958     if msg:
5959       if self.op.ignore_consistency:
5960         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5961                              " Proceeding anyway. Please make sure node"
5962                              " %s is down. Error details: %s",
5963                              instance.name, source_node, source_node, msg)
5964       else:
5965         raise errors.OpExecError("Could not shutdown instance %s on"
5966                                  " node %s: %s" %
5967                                  (instance.name, source_node, msg))
5968
5969     # create the target disks
5970     try:
5971       _CreateDisks(self, instance, target_node=target_node)
5972     except errors.OpExecError:
5973       self.LogWarning("Device creation failed, reverting...")
5974       try:
5975         _RemoveDisks(self, instance, target_node=target_node)
5976       finally:
5977         self.cfg.ReleaseDRBDMinors(instance.name)
5978         raise
5979
5980     cluster_name = self.cfg.GetClusterInfo().cluster_name
5981
5982     errs = []
5983     # activate, get path, copy the data over
5984     for idx, disk in enumerate(instance.disks):
5985       self.LogInfo("Copying data for disk %d", idx)
5986       result = self.rpc.call_blockdev_assemble(target_node, disk,
5987                                                instance.name, True, idx)
5988       if result.fail_msg:
5989         self.LogWarning("Can't assemble newly created disk %d: %s",
5990                         idx, result.fail_msg)
5991         errs.append(result.fail_msg)
5992         break
5993       dev_path = result.payload
5994       result = self.rpc.call_blockdev_export(source_node, disk,
5995                                              target_node, dev_path,
5996                                              cluster_name)
5997       if result.fail_msg:
5998         self.LogWarning("Can't copy data over for disk %d: %s",
5999                         idx, result.fail_msg)
6000         errs.append(result.fail_msg)
6001         break
6002
6003     if errs:
6004       self.LogWarning("Some disks failed to copy, aborting")
6005       try:
6006         _RemoveDisks(self, instance, target_node=target_node)
6007       finally:
6008         self.cfg.ReleaseDRBDMinors(instance.name)
6009         raise errors.OpExecError("Errors during disk copy: %s" %
6010                                  (",".join(errs),))
6011
6012     instance.primary_node = target_node
6013     self.cfg.Update(instance, feedback_fn)
6014
6015     self.LogInfo("Removing the disks on the original node")
6016     _RemoveDisks(self, instance, target_node=source_node)
6017
6018     # Only start the instance if it's marked as up
6019     if instance.admin_up:
6020       self.LogInfo("Starting instance %s on node %s",
6021                    instance.name, target_node)
6022
6023       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6024                                            ignore_secondaries=True)
6025       if not disks_ok:
6026         _ShutdownInstanceDisks(self, instance)
6027         raise errors.OpExecError("Can't activate the instance's disks")
6028
6029       result = self.rpc.call_instance_start(target_node, instance, None, None)
6030       msg = result.fail_msg
6031       if msg:
6032         _ShutdownInstanceDisks(self, instance)
6033         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6034                                  (instance.name, target_node, msg))
6035
6036
6037 class LUNodeMigrate(LogicalUnit):
6038   """Migrate all instances from a node.
6039
6040   """
6041   HPATH = "node-migrate"
6042   HTYPE = constants.HTYPE_NODE
6043   REQ_BGL = False
6044
6045   def ExpandNames(self):
6046     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6047
6048     self.needed_locks = {
6049       locking.LEVEL_NODE: [self.op.node_name],
6050       }
6051
6052     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6053
6054     # Create tasklets for migrating instances for all instances on this node
6055     names = []
6056     tasklets = []
6057
6058     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6059       logging.debug("Migrating instance %s", inst.name)
6060       names.append(inst.name)
6061
6062       tasklets.append(TLMigrateInstance(self, inst.name, False))
6063
6064     self.tasklets = tasklets
6065
6066     # Declare instance locks
6067     self.needed_locks[locking.LEVEL_INSTANCE] = names
6068
6069   def DeclareLocks(self, level):
6070     if level == locking.LEVEL_NODE:
6071       self._LockInstancesNodes()
6072
6073   def BuildHooksEnv(self):
6074     """Build hooks env.
6075
6076     This runs on the master, the primary and all the secondaries.
6077
6078     """
6079     env = {
6080       "NODE_NAME": self.op.node_name,
6081       }
6082
6083     nl = [self.cfg.GetMasterNode()]
6084
6085     return (env, nl, nl)
6086
6087
6088 class TLMigrateInstance(Tasklet):
6089   """Tasklet class for instance migration.
6090
6091   @type live: boolean
6092   @ivar live: whether the migration will be done live or non-live;
6093       this variable is initalized only after CheckPrereq has run
6094
6095   """
6096   def __init__(self, lu, instance_name, cleanup):
6097     """Initializes this class.
6098
6099     """
6100     Tasklet.__init__(self, lu)
6101
6102     # Parameters
6103     self.instance_name = instance_name
6104     self.cleanup = cleanup
6105     self.live = False # will be overridden later
6106
6107   def CheckPrereq(self):
6108     """Check prerequisites.
6109
6110     This checks that the instance is in the cluster.
6111
6112     """
6113     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6114     instance = self.cfg.GetInstanceInfo(instance_name)
6115     assert instance is not None
6116
6117     if instance.disk_template != constants.DT_DRBD8:
6118       raise errors.OpPrereqError("Instance's disk layout is not"
6119                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
6120
6121     secondary_nodes = instance.secondary_nodes
6122     if not secondary_nodes:
6123       raise errors.ConfigurationError("No secondary node but using"
6124                                       " drbd8 disk template")
6125
6126     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6127
6128     target_node = secondary_nodes[0]
6129     # check memory requirements on the secondary node
6130     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6131                          instance.name, i_be[constants.BE_MEMORY],
6132                          instance.hypervisor)
6133
6134     # check bridge existance
6135     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6136
6137     if not self.cleanup:
6138       _CheckNodeNotDrained(self.lu, target_node)
6139       result = self.rpc.call_instance_migratable(instance.primary_node,
6140                                                  instance)
6141       result.Raise("Can't migrate, please use failover",
6142                    prereq=True, ecode=errors.ECODE_STATE)
6143
6144     self.instance = instance
6145
6146     if self.lu.op.live is not None and self.lu.op.mode is not None:
6147       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6148                                  " parameters are accepted",
6149                                  errors.ECODE_INVAL)
6150     if self.lu.op.live is not None:
6151       if self.lu.op.live:
6152         self.lu.op.mode = constants.HT_MIGRATION_LIVE
6153       else:
6154         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6155       # reset the 'live' parameter to None so that repeated
6156       # invocations of CheckPrereq do not raise an exception
6157       self.lu.op.live = None
6158     elif self.lu.op.mode is None:
6159       # read the default value from the hypervisor
6160       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6161       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6162
6163     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6164
6165   def _WaitUntilSync(self):
6166     """Poll with custom rpc for disk sync.
6167
6168     This uses our own step-based rpc call.
6169
6170     """
6171     self.feedback_fn("* wait until resync is done")
6172     all_done = False
6173     while not all_done:
6174       all_done = True
6175       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6176                                             self.nodes_ip,
6177                                             self.instance.disks)
6178       min_percent = 100
6179       for node, nres in result.items():
6180         nres.Raise("Cannot resync disks on node %s" % node)
6181         node_done, node_percent = nres.payload
6182         all_done = all_done and node_done
6183         if node_percent is not None:
6184           min_percent = min(min_percent, node_percent)
6185       if not all_done:
6186         if min_percent < 100:
6187           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6188         time.sleep(2)
6189
6190   def _EnsureSecondary(self, node):
6191     """Demote a node to secondary.
6192
6193     """
6194     self.feedback_fn("* switching node %s to secondary mode" % node)
6195
6196     for dev in self.instance.disks:
6197       self.cfg.SetDiskID(dev, node)
6198
6199     result = self.rpc.call_blockdev_close(node, self.instance.name,
6200                                           self.instance.disks)
6201     result.Raise("Cannot change disk to secondary on node %s" % node)
6202
6203   def _GoStandalone(self):
6204     """Disconnect from the network.
6205
6206     """
6207     self.feedback_fn("* changing into standalone mode")
6208     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6209                                                self.instance.disks)
6210     for node, nres in result.items():
6211       nres.Raise("Cannot disconnect disks node %s" % node)
6212
6213   def _GoReconnect(self, multimaster):
6214     """Reconnect to the network.
6215
6216     """
6217     if multimaster:
6218       msg = "dual-master"
6219     else:
6220       msg = "single-master"
6221     self.feedback_fn("* changing disks into %s mode" % msg)
6222     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6223                                            self.instance.disks,
6224                                            self.instance.name, multimaster)
6225     for node, nres in result.items():
6226       nres.Raise("Cannot change disks config on node %s" % node)
6227
6228   def _ExecCleanup(self):
6229     """Try to cleanup after a failed migration.
6230
6231     The cleanup is done by:
6232       - check that the instance is running only on one node
6233         (and update the config if needed)
6234       - change disks on its secondary node to secondary
6235       - wait until disks are fully synchronized
6236       - disconnect from the network
6237       - change disks into single-master mode
6238       - wait again until disks are fully synchronized
6239
6240     """
6241     instance = self.instance
6242     target_node = self.target_node
6243     source_node = self.source_node
6244
6245     # check running on only one node
6246     self.feedback_fn("* checking where the instance actually runs"
6247                      " (if this hangs, the hypervisor might be in"
6248                      " a bad state)")
6249     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6250     for node, result in ins_l.items():
6251       result.Raise("Can't contact node %s" % node)
6252
6253     runningon_source = instance.name in ins_l[source_node].payload
6254     runningon_target = instance.name in ins_l[target_node].payload
6255
6256     if runningon_source and runningon_target:
6257       raise errors.OpExecError("Instance seems to be running on two nodes,"
6258                                " or the hypervisor is confused. You will have"
6259                                " to ensure manually that it runs only on one"
6260                                " and restart this operation.")
6261
6262     if not (runningon_source or runningon_target):
6263       raise errors.OpExecError("Instance does not seem to be running at all."
6264                                " In this case, it's safer to repair by"
6265                                " running 'gnt-instance stop' to ensure disk"
6266                                " shutdown, and then restarting it.")
6267
6268     if runningon_target:
6269       # the migration has actually succeeded, we need to update the config
6270       self.feedback_fn("* instance running on secondary node (%s),"
6271                        " updating config" % target_node)
6272       instance.primary_node = target_node
6273       self.cfg.Update(instance, self.feedback_fn)
6274       demoted_node = source_node
6275     else:
6276       self.feedback_fn("* instance confirmed to be running on its"
6277                        " primary node (%s)" % source_node)
6278       demoted_node = target_node
6279
6280     self._EnsureSecondary(demoted_node)
6281     try:
6282       self._WaitUntilSync()
6283     except errors.OpExecError:
6284       # we ignore here errors, since if the device is standalone, it
6285       # won't be able to sync
6286       pass
6287     self._GoStandalone()
6288     self._GoReconnect(False)
6289     self._WaitUntilSync()
6290
6291     self.feedback_fn("* done")
6292
6293   def _RevertDiskStatus(self):
6294     """Try to revert the disk status after a failed migration.
6295
6296     """
6297     target_node = self.target_node
6298     try:
6299       self._EnsureSecondary(target_node)
6300       self._GoStandalone()
6301       self._GoReconnect(False)
6302       self._WaitUntilSync()
6303     except errors.OpExecError, err:
6304       self.lu.LogWarning("Migration failed and I can't reconnect the"
6305                          " drives: error '%s'\n"
6306                          "Please look and recover the instance status" %
6307                          str(err))
6308
6309   def _AbortMigration(self):
6310     """Call the hypervisor code to abort a started migration.
6311
6312     """
6313     instance = self.instance
6314     target_node = self.target_node
6315     migration_info = self.migration_info
6316
6317     abort_result = self.rpc.call_finalize_migration(target_node,
6318                                                     instance,
6319                                                     migration_info,
6320                                                     False)
6321     abort_msg = abort_result.fail_msg
6322     if abort_msg:
6323       logging.error("Aborting migration failed on target node %s: %s",
6324                     target_node, abort_msg)
6325       # Don't raise an exception here, as we stil have to try to revert the
6326       # disk status, even if this step failed.
6327
6328   def _ExecMigration(self):
6329     """Migrate an instance.
6330
6331     The migrate is done by:
6332       - change the disks into dual-master mode
6333       - wait until disks are fully synchronized again
6334       - migrate the instance
6335       - change disks on the new secondary node (the old primary) to secondary
6336       - wait until disks are fully synchronized
6337       - change disks into single-master mode
6338
6339     """
6340     instance = self.instance
6341     target_node = self.target_node
6342     source_node = self.source_node
6343
6344     self.feedback_fn("* checking disk consistency between source and target")
6345     for dev in instance.disks:
6346       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6347         raise errors.OpExecError("Disk %s is degraded or not fully"
6348                                  " synchronized on target node,"
6349                                  " aborting migrate." % dev.iv_name)
6350
6351     # First get the migration information from the remote node
6352     result = self.rpc.call_migration_info(source_node, instance)
6353     msg = result.fail_msg
6354     if msg:
6355       log_err = ("Failed fetching source migration information from %s: %s" %
6356                  (source_node, msg))
6357       logging.error(log_err)
6358       raise errors.OpExecError(log_err)
6359
6360     self.migration_info = migration_info = result.payload
6361
6362     # Then switch the disks to master/master mode
6363     self._EnsureSecondary(target_node)
6364     self._GoStandalone()
6365     self._GoReconnect(True)
6366     self._WaitUntilSync()
6367
6368     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6369     result = self.rpc.call_accept_instance(target_node,
6370                                            instance,
6371                                            migration_info,
6372                                            self.nodes_ip[target_node])
6373
6374     msg = result.fail_msg
6375     if msg:
6376       logging.error("Instance pre-migration failed, trying to revert"
6377                     " disk status: %s", msg)
6378       self.feedback_fn("Pre-migration failed, aborting")
6379       self._AbortMigration()
6380       self._RevertDiskStatus()
6381       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6382                                (instance.name, msg))
6383
6384     self.feedback_fn("* migrating instance to %s" % target_node)
6385     time.sleep(10)
6386     result = self.rpc.call_instance_migrate(source_node, instance,
6387                                             self.nodes_ip[target_node],
6388                                             self.live)
6389     msg = result.fail_msg
6390     if msg:
6391       logging.error("Instance migration failed, trying to revert"
6392                     " disk status: %s", msg)
6393       self.feedback_fn("Migration failed, aborting")
6394       self._AbortMigration()
6395       self._RevertDiskStatus()
6396       raise errors.OpExecError("Could not migrate instance %s: %s" %
6397                                (instance.name, msg))
6398     time.sleep(10)
6399
6400     instance.primary_node = target_node
6401     # distribute new instance config to the other nodes
6402     self.cfg.Update(instance, self.feedback_fn)
6403
6404     result = self.rpc.call_finalize_migration(target_node,
6405                                               instance,
6406                                               migration_info,
6407                                               True)
6408     msg = result.fail_msg
6409     if msg:
6410       logging.error("Instance migration succeeded, but finalization failed:"
6411                     " %s", msg)
6412       raise errors.OpExecError("Could not finalize instance migration: %s" %
6413                                msg)
6414
6415     self._EnsureSecondary(source_node)
6416     self._WaitUntilSync()
6417     self._GoStandalone()
6418     self._GoReconnect(False)
6419     self._WaitUntilSync()
6420
6421     self.feedback_fn("* done")
6422
6423   def Exec(self, feedback_fn):
6424     """Perform the migration.
6425
6426     """
6427     feedback_fn("Migrating instance %s" % self.instance.name)
6428
6429     self.feedback_fn = feedback_fn
6430
6431     self.source_node = self.instance.primary_node
6432     self.target_node = self.instance.secondary_nodes[0]
6433     self.all_nodes = [self.source_node, self.target_node]
6434     self.nodes_ip = {
6435       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6436       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6437       }
6438
6439     if self.cleanup:
6440       return self._ExecCleanup()
6441     else:
6442       return self._ExecMigration()
6443
6444
6445 def _CreateBlockDev(lu, node, instance, device, force_create,
6446                     info, force_open):
6447   """Create a tree of block devices on a given node.
6448
6449   If this device type has to be created on secondaries, create it and
6450   all its children.
6451
6452   If not, just recurse to children keeping the same 'force' value.
6453
6454   @param lu: the lu on whose behalf we execute
6455   @param node: the node on which to create the device
6456   @type instance: L{objects.Instance}
6457   @param instance: the instance which owns the device
6458   @type device: L{objects.Disk}
6459   @param device: the device to create
6460   @type force_create: boolean
6461   @param force_create: whether to force creation of this device; this
6462       will be change to True whenever we find a device which has
6463       CreateOnSecondary() attribute
6464   @param info: the extra 'metadata' we should attach to the device
6465       (this will be represented as a LVM tag)
6466   @type force_open: boolean
6467   @param force_open: this parameter will be passes to the
6468       L{backend.BlockdevCreate} function where it specifies
6469       whether we run on primary or not, and it affects both
6470       the child assembly and the device own Open() execution
6471
6472   """
6473   if device.CreateOnSecondary():
6474     force_create = True
6475
6476   if device.children:
6477     for child in device.children:
6478       _CreateBlockDev(lu, node, instance, child, force_create,
6479                       info, force_open)
6480
6481   if not force_create:
6482     return
6483
6484   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6485
6486
6487 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6488   """Create a single block device on a given node.
6489
6490   This will not recurse over children of the device, so they must be
6491   created in advance.
6492
6493   @param lu: the lu on whose behalf we execute
6494   @param node: the node on which to create the device
6495   @type instance: L{objects.Instance}
6496   @param instance: the instance which owns the device
6497   @type device: L{objects.Disk}
6498   @param device: the device to create
6499   @param info: the extra 'metadata' we should attach to the device
6500       (this will be represented as a LVM tag)
6501   @type force_open: boolean
6502   @param force_open: this parameter will be passes to the
6503       L{backend.BlockdevCreate} function where it specifies
6504       whether we run on primary or not, and it affects both
6505       the child assembly and the device own Open() execution
6506
6507   """
6508   lu.cfg.SetDiskID(device, node)
6509   result = lu.rpc.call_blockdev_create(node, device, device.size,
6510                                        instance.name, force_open, info)
6511   result.Raise("Can't create block device %s on"
6512                " node %s for instance %s" % (device, node, instance.name))
6513   if device.physical_id is None:
6514     device.physical_id = result.payload
6515
6516
6517 def _GenerateUniqueNames(lu, exts):
6518   """Generate a suitable LV name.
6519
6520   This will generate a logical volume name for the given instance.
6521
6522   """
6523   results = []
6524   for val in exts:
6525     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6526     results.append("%s%s" % (new_id, val))
6527   return results
6528
6529
6530 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
6531                          iv_name, p_minor, s_minor):
6532   """Generate a drbd8 device complete with its children.
6533
6534   """
6535   assert len(vgnames) == len(names) == 2
6536   port = lu.cfg.AllocatePort()
6537   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6538   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6539                           logical_id=(vgnames[0], names[0]))
6540   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6541                           logical_id=(vgnames[1], names[1]))
6542   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6543                           logical_id=(primary, secondary, port,
6544                                       p_minor, s_minor,
6545                                       shared_secret),
6546                           children=[dev_data, dev_meta],
6547                           iv_name=iv_name)
6548   return drbd_dev
6549
6550
6551 def _GenerateDiskTemplate(lu, template_name,
6552                           instance_name, primary_node,
6553                           secondary_nodes, disk_info,
6554                           file_storage_dir, file_driver,
6555                           base_index, feedback_fn):
6556   """Generate the entire disk layout for a given template type.
6557
6558   """
6559   #TODO: compute space requirements
6560
6561   vgname = lu.cfg.GetVGName()
6562   disk_count = len(disk_info)
6563   disks = []
6564   if template_name == constants.DT_DISKLESS:
6565     pass
6566   elif template_name == constants.DT_PLAIN:
6567     if len(secondary_nodes) != 0:
6568       raise errors.ProgrammerError("Wrong template configuration")
6569
6570     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6571                                       for i in range(disk_count)])
6572     for idx, disk in enumerate(disk_info):
6573       disk_index = idx + base_index
6574       vg = disk.get("vg", vgname)
6575       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6576       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6577                               logical_id=(vg, names[idx]),
6578                               iv_name="disk/%d" % disk_index,
6579                               mode=disk["mode"])
6580       disks.append(disk_dev)
6581   elif template_name == constants.DT_DRBD8:
6582     if len(secondary_nodes) != 1:
6583       raise errors.ProgrammerError("Wrong template configuration")
6584     remote_node = secondary_nodes[0]
6585     minors = lu.cfg.AllocateDRBDMinor(
6586       [primary_node, remote_node] * len(disk_info), instance_name)
6587
6588     names = []
6589     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6590                                                for i in range(disk_count)]):
6591       names.append(lv_prefix + "_data")
6592       names.append(lv_prefix + "_meta")
6593     for idx, disk in enumerate(disk_info):
6594       disk_index = idx + base_index
6595       vg = disk.get("vg", vgname)
6596       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6597                                       disk["size"], [vg, vg],
6598                                       names[idx*2:idx*2+2],
6599                                       "disk/%d" % disk_index,
6600                                       minors[idx*2], minors[idx*2+1])
6601       disk_dev.mode = disk["mode"]
6602       disks.append(disk_dev)
6603   elif template_name == constants.DT_FILE:
6604     if len(secondary_nodes) != 0:
6605       raise errors.ProgrammerError("Wrong template configuration")
6606
6607     opcodes.RequireFileStorage()
6608
6609     for idx, disk in enumerate(disk_info):
6610       disk_index = idx + base_index
6611       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6612                               iv_name="disk/%d" % disk_index,
6613                               logical_id=(file_driver,
6614                                           "%s/disk%d" % (file_storage_dir,
6615                                                          disk_index)),
6616                               mode=disk["mode"])
6617       disks.append(disk_dev)
6618   else:
6619     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6620   return disks
6621
6622
6623 def _GetInstanceInfoText(instance):
6624   """Compute that text that should be added to the disk's metadata.
6625
6626   """
6627   return "originstname+%s" % instance.name
6628
6629
6630 def _CalcEta(time_taken, written, total_size):
6631   """Calculates the ETA based on size written and total size.
6632
6633   @param time_taken: The time taken so far
6634   @param written: amount written so far
6635   @param total_size: The total size of data to be written
6636   @return: The remaining time in seconds
6637
6638   """
6639   avg_time = time_taken / float(written)
6640   return (total_size - written) * avg_time
6641
6642
6643 def _WipeDisks(lu, instance):
6644   """Wipes instance disks.
6645
6646   @type lu: L{LogicalUnit}
6647   @param lu: the logical unit on whose behalf we execute
6648   @type instance: L{objects.Instance}
6649   @param instance: the instance whose disks we should create
6650   @return: the success of the wipe
6651
6652   """
6653   node = instance.primary_node
6654
6655   for device in instance.disks:
6656     lu.cfg.SetDiskID(device, node)
6657
6658   logging.info("Pause sync of instance %s disks", instance.name)
6659   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6660
6661   for idx, success in enumerate(result.payload):
6662     if not success:
6663       logging.warn("pause-sync of instance %s for disks %d failed",
6664                    instance.name, idx)
6665
6666   try:
6667     for idx, device in enumerate(instance.disks):
6668       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6669       # MAX_WIPE_CHUNK at max
6670       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6671                             constants.MIN_WIPE_CHUNK_PERCENT)
6672       # we _must_ make this an int, otherwise rounding errors will
6673       # occur
6674       wipe_chunk_size = int(wipe_chunk_size)
6675
6676       lu.LogInfo("* Wiping disk %d", idx)
6677       logging.info("Wiping disk %d for instance %s, node %s using"
6678                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
6679
6680       offset = 0
6681       size = device.size
6682       last_output = 0
6683       start_time = time.time()
6684
6685       while offset < size:
6686         wipe_size = min(wipe_chunk_size, size - offset)
6687         logging.debug("Wiping disk %d, offset %s, chunk %s",
6688                       idx, offset, wipe_size)
6689         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6690         result.Raise("Could not wipe disk %d at offset %d for size %d" %
6691                      (idx, offset, wipe_size))
6692         now = time.time()
6693         offset += wipe_size
6694         if now - last_output >= 60:
6695           eta = _CalcEta(now - start_time, offset, size)
6696           lu.LogInfo(" - done: %.1f%% ETA: %s" %
6697                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
6698           last_output = now
6699   finally:
6700     logging.info("Resume sync of instance %s disks", instance.name)
6701
6702     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6703
6704     for idx, success in enumerate(result.payload):
6705       if not success:
6706         lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6707                       " look at the status and troubleshoot the issue.", idx)
6708         logging.warn("resume-sync of instance %s for disks %d failed",
6709                      instance.name, idx)
6710
6711
6712 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6713   """Create all disks for an instance.
6714
6715   This abstracts away some work from AddInstance.
6716
6717   @type lu: L{LogicalUnit}
6718   @param lu: the logical unit on whose behalf we execute
6719   @type instance: L{objects.Instance}
6720   @param instance: the instance whose disks we should create
6721   @type to_skip: list
6722   @param to_skip: list of indices to skip
6723   @type target_node: string
6724   @param target_node: if passed, overrides the target node for creation
6725   @rtype: boolean
6726   @return: the success of the creation
6727
6728   """
6729   info = _GetInstanceInfoText(instance)
6730   if target_node is None:
6731     pnode = instance.primary_node
6732     all_nodes = instance.all_nodes
6733   else:
6734     pnode = target_node
6735     all_nodes = [pnode]
6736
6737   if instance.disk_template == constants.DT_FILE:
6738     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6739     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6740
6741     result.Raise("Failed to create directory '%s' on"
6742                  " node %s" % (file_storage_dir, pnode))
6743
6744   # Note: this needs to be kept in sync with adding of disks in
6745   # LUInstanceSetParams
6746   for idx, device in enumerate(instance.disks):
6747     if to_skip and idx in to_skip:
6748       continue
6749     logging.info("Creating volume %s for instance %s",
6750                  device.iv_name, instance.name)
6751     #HARDCODE
6752     for node in all_nodes:
6753       f_create = node == pnode
6754       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6755
6756
6757 def _RemoveDisks(lu, instance, target_node=None):
6758   """Remove all disks for an instance.
6759
6760   This abstracts away some work from `AddInstance()` and
6761   `RemoveInstance()`. Note that in case some of the devices couldn't
6762   be removed, the removal will continue with the other ones (compare
6763   with `_CreateDisks()`).
6764
6765   @type lu: L{LogicalUnit}
6766   @param lu: the logical unit on whose behalf we execute
6767   @type instance: L{objects.Instance}
6768   @param instance: the instance whose disks we should remove
6769   @type target_node: string
6770   @param target_node: used to override the node on which to remove the disks
6771   @rtype: boolean
6772   @return: the success of the removal
6773
6774   """
6775   logging.info("Removing block devices for instance %s", instance.name)
6776
6777   all_result = True
6778   for device in instance.disks:
6779     if target_node:
6780       edata = [(target_node, device)]
6781     else:
6782       edata = device.ComputeNodeTree(instance.primary_node)
6783     for node, disk in edata:
6784       lu.cfg.SetDiskID(disk, node)
6785       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6786       if msg:
6787         lu.LogWarning("Could not remove block device %s on node %s,"
6788                       " continuing anyway: %s", device.iv_name, node, msg)
6789         all_result = False
6790
6791   if instance.disk_template == constants.DT_FILE:
6792     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6793     if target_node:
6794       tgt = target_node
6795     else:
6796       tgt = instance.primary_node
6797     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6798     if result.fail_msg:
6799       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6800                     file_storage_dir, instance.primary_node, result.fail_msg)
6801       all_result = False
6802
6803   return all_result
6804
6805
6806 def _ComputeDiskSizePerVG(disk_template, disks):
6807   """Compute disk size requirements in the volume group
6808
6809   """
6810   def _compute(disks, payload):
6811     """Universal algorithm
6812
6813     """
6814     vgs = {}
6815     for disk in disks:
6816       vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6817
6818     return vgs
6819
6820   # Required free disk space as a function of disk and swap space
6821   req_size_dict = {
6822     constants.DT_DISKLESS: {},
6823     constants.DT_PLAIN: _compute(disks, 0),
6824     # 128 MB are added for drbd metadata for each disk
6825     constants.DT_DRBD8: _compute(disks, 128),
6826     constants.DT_FILE: {},
6827   }
6828
6829   if disk_template not in req_size_dict:
6830     raise errors.ProgrammerError("Disk template '%s' size requirement"
6831                                  " is unknown" %  disk_template)
6832
6833   return req_size_dict[disk_template]
6834
6835
6836 def _ComputeDiskSize(disk_template, disks):
6837   """Compute disk size requirements in the volume group
6838
6839   """
6840   # Required free disk space as a function of disk and swap space
6841   req_size_dict = {
6842     constants.DT_DISKLESS: None,
6843     constants.DT_PLAIN: sum(d["size"] for d in disks),
6844     # 128 MB are added for drbd metadata for each disk
6845     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6846     constants.DT_FILE: None,
6847   }
6848
6849   if disk_template not in req_size_dict:
6850     raise errors.ProgrammerError("Disk template '%s' size requirement"
6851                                  " is unknown" %  disk_template)
6852
6853   return req_size_dict[disk_template]
6854
6855
6856 def _FilterVmNodes(lu, nodenames):
6857   """Filters out non-vm_capable nodes from a list.
6858
6859   @type lu: L{LogicalUnit}
6860   @param lu: the logical unit for which we check
6861   @type nodenames: list
6862   @param nodenames: the list of nodes on which we should check
6863   @rtype: list
6864   @return: the list of vm-capable nodes
6865
6866   """
6867   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
6868   return [name for name in nodenames if name not in vm_nodes]
6869
6870
6871 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6872   """Hypervisor parameter validation.
6873
6874   This function abstract the hypervisor parameter validation to be
6875   used in both instance create and instance modify.
6876
6877   @type lu: L{LogicalUnit}
6878   @param lu: the logical unit for which we check
6879   @type nodenames: list
6880   @param nodenames: the list of nodes on which we should check
6881   @type hvname: string
6882   @param hvname: the name of the hypervisor we should use
6883   @type hvparams: dict
6884   @param hvparams: the parameters which we need to check
6885   @raise errors.OpPrereqError: if the parameters are not valid
6886
6887   """
6888   nodenames = _FilterVmNodes(lu, nodenames)
6889   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6890                                                   hvname,
6891                                                   hvparams)
6892   for node in nodenames:
6893     info = hvinfo[node]
6894     if info.offline:
6895       continue
6896     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6897
6898
6899 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6900   """OS parameters validation.
6901
6902   @type lu: L{LogicalUnit}
6903   @param lu: the logical unit for which we check
6904   @type required: boolean
6905   @param required: whether the validation should fail if the OS is not
6906       found
6907   @type nodenames: list
6908   @param nodenames: the list of nodes on which we should check
6909   @type osname: string
6910   @param osname: the name of the hypervisor we should use
6911   @type osparams: dict
6912   @param osparams: the parameters which we need to check
6913   @raise errors.OpPrereqError: if the parameters are not valid
6914
6915   """
6916   nodenames = _FilterVmNodes(lu, nodenames)
6917   result = lu.rpc.call_os_validate(required, nodenames, osname,
6918                                    [constants.OS_VALIDATE_PARAMETERS],
6919                                    osparams)
6920   for node, nres in result.items():
6921     # we don't check for offline cases since this should be run only
6922     # against the master node and/or an instance's nodes
6923     nres.Raise("OS Parameters validation failed on node %s" % node)
6924     if not nres.payload:
6925       lu.LogInfo("OS %s not found on node %s, validation skipped",
6926                  osname, node)
6927
6928
6929 class LUInstanceCreate(LogicalUnit):
6930   """Create an instance.
6931
6932   """
6933   HPATH = "instance-add"
6934   HTYPE = constants.HTYPE_INSTANCE
6935   REQ_BGL = False
6936
6937   def CheckArguments(self):
6938     """Check arguments.
6939
6940     """
6941     # do not require name_check to ease forward/backward compatibility
6942     # for tools
6943     if self.op.no_install and self.op.start:
6944       self.LogInfo("No-installation mode selected, disabling startup")
6945       self.op.start = False
6946     # validate/normalize the instance name
6947     self.op.instance_name = \
6948       netutils.Hostname.GetNormalizedName(self.op.instance_name)
6949
6950     if self.op.ip_check and not self.op.name_check:
6951       # TODO: make the ip check more flexible and not depend on the name check
6952       raise errors.OpPrereqError("Cannot do ip check without a name check",
6953                                  errors.ECODE_INVAL)
6954
6955     # check nics' parameter names
6956     for nic in self.op.nics:
6957       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6958
6959     # check disks. parameter names and consistent adopt/no-adopt strategy
6960     has_adopt = has_no_adopt = False
6961     for disk in self.op.disks:
6962       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6963       if "adopt" in disk:
6964         has_adopt = True
6965       else:
6966         has_no_adopt = True
6967     if has_adopt and has_no_adopt:
6968       raise errors.OpPrereqError("Either all disks are adopted or none is",
6969                                  errors.ECODE_INVAL)
6970     if has_adopt:
6971       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6972         raise errors.OpPrereqError("Disk adoption is not supported for the"
6973                                    " '%s' disk template" %
6974                                    self.op.disk_template,
6975                                    errors.ECODE_INVAL)
6976       if self.op.iallocator is not None:
6977         raise errors.OpPrereqError("Disk adoption not allowed with an"
6978                                    " iallocator script", errors.ECODE_INVAL)
6979       if self.op.mode == constants.INSTANCE_IMPORT:
6980         raise errors.OpPrereqError("Disk adoption not allowed for"
6981                                    " instance import", errors.ECODE_INVAL)
6982
6983     self.adopt_disks = has_adopt
6984
6985     # instance name verification
6986     if self.op.name_check:
6987       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6988       self.op.instance_name = self.hostname1.name
6989       # used in CheckPrereq for ip ping check
6990       self.check_ip = self.hostname1.ip
6991     else:
6992       self.check_ip = None
6993
6994     # file storage checks
6995     if (self.op.file_driver and
6996         not self.op.file_driver in constants.FILE_DRIVER):
6997       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6998                                  self.op.file_driver, errors.ECODE_INVAL)
6999
7000     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7001       raise errors.OpPrereqError("File storage directory path not absolute",
7002                                  errors.ECODE_INVAL)
7003
7004     ### Node/iallocator related checks
7005     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7006
7007     if self.op.pnode is not None:
7008       if self.op.disk_template in constants.DTS_NET_MIRROR:
7009         if self.op.snode is None:
7010           raise errors.OpPrereqError("The networked disk templates need"
7011                                      " a mirror node", errors.ECODE_INVAL)
7012       elif self.op.snode:
7013         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7014                         " template")
7015         self.op.snode = None
7016
7017     self._cds = _GetClusterDomainSecret()
7018
7019     if self.op.mode == constants.INSTANCE_IMPORT:
7020       # On import force_variant must be True, because if we forced it at
7021       # initial install, our only chance when importing it back is that it
7022       # works again!
7023       self.op.force_variant = True
7024
7025       if self.op.no_install:
7026         self.LogInfo("No-installation mode has no effect during import")
7027
7028     elif self.op.mode == constants.INSTANCE_CREATE:
7029       if self.op.os_type is None:
7030         raise errors.OpPrereqError("No guest OS specified",
7031                                    errors.ECODE_INVAL)
7032       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7033         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7034                                    " installation" % self.op.os_type,
7035                                    errors.ECODE_STATE)
7036       if self.op.disk_template is None:
7037         raise errors.OpPrereqError("No disk template specified",
7038                                    errors.ECODE_INVAL)
7039
7040     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7041       # Check handshake to ensure both clusters have the same domain secret
7042       src_handshake = self.op.source_handshake
7043       if not src_handshake:
7044         raise errors.OpPrereqError("Missing source handshake",
7045                                    errors.ECODE_INVAL)
7046
7047       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7048                                                            src_handshake)
7049       if errmsg:
7050         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7051                                    errors.ECODE_INVAL)
7052
7053       # Load and check source CA
7054       self.source_x509_ca_pem = self.op.source_x509_ca
7055       if not self.source_x509_ca_pem:
7056         raise errors.OpPrereqError("Missing source X509 CA",
7057                                    errors.ECODE_INVAL)
7058
7059       try:
7060         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7061                                                     self._cds)
7062       except OpenSSL.crypto.Error, err:
7063         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7064                                    (err, ), errors.ECODE_INVAL)
7065
7066       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7067       if errcode is not None:
7068         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7069                                    errors.ECODE_INVAL)
7070
7071       self.source_x509_ca = cert
7072
7073       src_instance_name = self.op.source_instance_name
7074       if not src_instance_name:
7075         raise errors.OpPrereqError("Missing source instance name",
7076                                    errors.ECODE_INVAL)
7077
7078       self.source_instance_name = \
7079           netutils.GetHostname(name=src_instance_name).name
7080
7081     else:
7082       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7083                                  self.op.mode, errors.ECODE_INVAL)
7084
7085   def ExpandNames(self):
7086     """ExpandNames for CreateInstance.
7087
7088     Figure out the right locks for instance creation.
7089
7090     """
7091     self.needed_locks = {}
7092
7093     instance_name = self.op.instance_name
7094     # this is just a preventive check, but someone might still add this
7095     # instance in the meantime, and creation will fail at lock-add time
7096     if instance_name in self.cfg.GetInstanceList():
7097       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7098                                  instance_name, errors.ECODE_EXISTS)
7099
7100     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7101
7102     if self.op.iallocator:
7103       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7104     else:
7105       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7106       nodelist = [self.op.pnode]
7107       if self.op.snode is not None:
7108         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7109         nodelist.append(self.op.snode)
7110       self.needed_locks[locking.LEVEL_NODE] = nodelist
7111
7112     # in case of import lock the source node too
7113     if self.op.mode == constants.INSTANCE_IMPORT:
7114       src_node = self.op.src_node
7115       src_path = self.op.src_path
7116
7117       if src_path is None:
7118         self.op.src_path = src_path = self.op.instance_name
7119
7120       if src_node is None:
7121         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7122         self.op.src_node = None
7123         if os.path.isabs(src_path):
7124           raise errors.OpPrereqError("Importing an instance from an absolute"
7125                                      " path requires a source node option.",
7126                                      errors.ECODE_INVAL)
7127       else:
7128         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7129         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7130           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7131         if not os.path.isabs(src_path):
7132           self.op.src_path = src_path = \
7133             utils.PathJoin(constants.EXPORT_DIR, src_path)
7134
7135   def _RunAllocator(self):
7136     """Run the allocator based on input opcode.
7137
7138     """
7139     nics = [n.ToDict() for n in self.nics]
7140     ial = IAllocator(self.cfg, self.rpc,
7141                      mode=constants.IALLOCATOR_MODE_ALLOC,
7142                      name=self.op.instance_name,
7143                      disk_template=self.op.disk_template,
7144                      tags=[],
7145                      os=self.op.os_type,
7146                      vcpus=self.be_full[constants.BE_VCPUS],
7147                      mem_size=self.be_full[constants.BE_MEMORY],
7148                      disks=self.disks,
7149                      nics=nics,
7150                      hypervisor=self.op.hypervisor,
7151                      )
7152
7153     ial.Run(self.op.iallocator)
7154
7155     if not ial.success:
7156       raise errors.OpPrereqError("Can't compute nodes using"
7157                                  " iallocator '%s': %s" %
7158                                  (self.op.iallocator, ial.info),
7159                                  errors.ECODE_NORES)
7160     if len(ial.result) != ial.required_nodes:
7161       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7162                                  " of nodes (%s), required %s" %
7163                                  (self.op.iallocator, len(ial.result),
7164                                   ial.required_nodes), errors.ECODE_FAULT)
7165     self.op.pnode = ial.result[0]
7166     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7167                  self.op.instance_name, self.op.iallocator,
7168                  utils.CommaJoin(ial.result))
7169     if ial.required_nodes == 2:
7170       self.op.snode = ial.result[1]
7171
7172   def BuildHooksEnv(self):
7173     """Build hooks env.
7174
7175     This runs on master, primary and secondary nodes of the instance.
7176
7177     """
7178     env = {
7179       "ADD_MODE": self.op.mode,
7180       }
7181     if self.op.mode == constants.INSTANCE_IMPORT:
7182       env["SRC_NODE"] = self.op.src_node
7183       env["SRC_PATH"] = self.op.src_path
7184       env["SRC_IMAGES"] = self.src_images
7185
7186     env.update(_BuildInstanceHookEnv(
7187       name=self.op.instance_name,
7188       primary_node=self.op.pnode,
7189       secondary_nodes=self.secondaries,
7190       status=self.op.start,
7191       os_type=self.op.os_type,
7192       memory=self.be_full[constants.BE_MEMORY],
7193       vcpus=self.be_full[constants.BE_VCPUS],
7194       nics=_NICListToTuple(self, self.nics),
7195       disk_template=self.op.disk_template,
7196       disks=[(d["size"], d["mode"]) for d in self.disks],
7197       bep=self.be_full,
7198       hvp=self.hv_full,
7199       hypervisor_name=self.op.hypervisor,
7200     ))
7201
7202     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7203           self.secondaries)
7204     return env, nl, nl
7205
7206   def _ReadExportInfo(self):
7207     """Reads the export information from disk.
7208
7209     It will override the opcode source node and path with the actual
7210     information, if these two were not specified before.
7211
7212     @return: the export information
7213
7214     """
7215     assert self.op.mode == constants.INSTANCE_IMPORT
7216
7217     src_node = self.op.src_node
7218     src_path = self.op.src_path
7219
7220     if src_node is None:
7221       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7222       exp_list = self.rpc.call_export_list(locked_nodes)
7223       found = False
7224       for node in exp_list:
7225         if exp_list[node].fail_msg:
7226           continue
7227         if src_path in exp_list[node].payload:
7228           found = True
7229           self.op.src_node = src_node = node
7230           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7231                                                        src_path)
7232           break
7233       if not found:
7234         raise errors.OpPrereqError("No export found for relative path %s" %
7235                                     src_path, errors.ECODE_INVAL)
7236
7237     _CheckNodeOnline(self, src_node)
7238     result = self.rpc.call_export_info(src_node, src_path)
7239     result.Raise("No export or invalid export found in dir %s" % src_path)
7240
7241     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7242     if not export_info.has_section(constants.INISECT_EXP):
7243       raise errors.ProgrammerError("Corrupted export config",
7244                                    errors.ECODE_ENVIRON)
7245
7246     ei_version = export_info.get(constants.INISECT_EXP, "version")
7247     if (int(ei_version) != constants.EXPORT_VERSION):
7248       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7249                                  (ei_version, constants.EXPORT_VERSION),
7250                                  errors.ECODE_ENVIRON)
7251     return export_info
7252
7253   def _ReadExportParams(self, einfo):
7254     """Use export parameters as defaults.
7255
7256     In case the opcode doesn't specify (as in override) some instance
7257     parameters, then try to use them from the export information, if
7258     that declares them.
7259
7260     """
7261     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7262
7263     if self.op.disk_template is None:
7264       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7265         self.op.disk_template = einfo.get(constants.INISECT_INS,
7266                                           "disk_template")
7267       else:
7268         raise errors.OpPrereqError("No disk template specified and the export"
7269                                    " is missing the disk_template information",
7270                                    errors.ECODE_INVAL)
7271
7272     if not self.op.disks:
7273       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7274         disks = []
7275         # TODO: import the disk iv_name too
7276         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7277           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7278           disks.append({"size": disk_sz})
7279         self.op.disks = disks
7280       else:
7281         raise errors.OpPrereqError("No disk info specified and the export"
7282                                    " is missing the disk information",
7283                                    errors.ECODE_INVAL)
7284
7285     if (not self.op.nics and
7286         einfo.has_option(constants.INISECT_INS, "nic_count")):
7287       nics = []
7288       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7289         ndict = {}
7290         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7291           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7292           ndict[name] = v
7293         nics.append(ndict)
7294       self.op.nics = nics
7295
7296     if (self.op.hypervisor is None and
7297         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7298       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7299     if einfo.has_section(constants.INISECT_HYP):
7300       # use the export parameters but do not override the ones
7301       # specified by the user
7302       for name, value in einfo.items(constants.INISECT_HYP):
7303         if name not in self.op.hvparams:
7304           self.op.hvparams[name] = value
7305
7306     if einfo.has_section(constants.INISECT_BEP):
7307       # use the parameters, without overriding
7308       for name, value in einfo.items(constants.INISECT_BEP):
7309         if name not in self.op.beparams:
7310           self.op.beparams[name] = value
7311     else:
7312       # try to read the parameters old style, from the main section
7313       for name in constants.BES_PARAMETERS:
7314         if (name not in self.op.beparams and
7315             einfo.has_option(constants.INISECT_INS, name)):
7316           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7317
7318     if einfo.has_section(constants.INISECT_OSP):
7319       # use the parameters, without overriding
7320       for name, value in einfo.items(constants.INISECT_OSP):
7321         if name not in self.op.osparams:
7322           self.op.osparams[name] = value
7323
7324   def _RevertToDefaults(self, cluster):
7325     """Revert the instance parameters to the default values.
7326
7327     """
7328     # hvparams
7329     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7330     for name in self.op.hvparams.keys():
7331       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7332         del self.op.hvparams[name]
7333     # beparams
7334     be_defs = cluster.SimpleFillBE({})
7335     for name in self.op.beparams.keys():
7336       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7337         del self.op.beparams[name]
7338     # nic params
7339     nic_defs = cluster.SimpleFillNIC({})
7340     for nic in self.op.nics:
7341       for name in constants.NICS_PARAMETERS:
7342         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7343           del nic[name]
7344     # osparams
7345     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7346     for name in self.op.osparams.keys():
7347       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7348         del self.op.osparams[name]
7349
7350   def CheckPrereq(self):
7351     """Check prerequisites.
7352
7353     """
7354     if self.op.mode == constants.INSTANCE_IMPORT:
7355       export_info = self._ReadExportInfo()
7356       self._ReadExportParams(export_info)
7357
7358     if (not self.cfg.GetVGName() and
7359         self.op.disk_template not in constants.DTS_NOT_LVM):
7360       raise errors.OpPrereqError("Cluster does not support lvm-based"
7361                                  " instances", errors.ECODE_STATE)
7362
7363     if self.op.hypervisor is None:
7364       self.op.hypervisor = self.cfg.GetHypervisorType()
7365
7366     cluster = self.cfg.GetClusterInfo()
7367     enabled_hvs = cluster.enabled_hypervisors
7368     if self.op.hypervisor not in enabled_hvs:
7369       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7370                                  " cluster (%s)" % (self.op.hypervisor,
7371                                   ",".join(enabled_hvs)),
7372                                  errors.ECODE_STATE)
7373
7374     # check hypervisor parameter syntax (locally)
7375     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7376     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7377                                       self.op.hvparams)
7378     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7379     hv_type.CheckParameterSyntax(filled_hvp)
7380     self.hv_full = filled_hvp
7381     # check that we don't specify global parameters on an instance
7382     _CheckGlobalHvParams(self.op.hvparams)
7383
7384     # fill and remember the beparams dict
7385     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7386     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7387
7388     # build os parameters
7389     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7390
7391     # now that hvp/bep are in final format, let's reset to defaults,
7392     # if told to do so
7393     if self.op.identify_defaults:
7394       self._RevertToDefaults(cluster)
7395
7396     # NIC buildup
7397     self.nics = []
7398     for idx, nic in enumerate(self.op.nics):
7399       nic_mode_req = nic.get("mode", None)
7400       nic_mode = nic_mode_req
7401       if nic_mode is None:
7402         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7403
7404       # in routed mode, for the first nic, the default ip is 'auto'
7405       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7406         default_ip_mode = constants.VALUE_AUTO
7407       else:
7408         default_ip_mode = constants.VALUE_NONE
7409
7410       # ip validity checks
7411       ip = nic.get("ip", default_ip_mode)
7412       if ip is None or ip.lower() == constants.VALUE_NONE:
7413         nic_ip = None
7414       elif ip.lower() == constants.VALUE_AUTO:
7415         if not self.op.name_check:
7416           raise errors.OpPrereqError("IP address set to auto but name checks"
7417                                      " have been skipped",
7418                                      errors.ECODE_INVAL)
7419         nic_ip = self.hostname1.ip
7420       else:
7421         if not netutils.IPAddress.IsValid(ip):
7422           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7423                                      errors.ECODE_INVAL)
7424         nic_ip = ip
7425
7426       # TODO: check the ip address for uniqueness
7427       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7428         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7429                                    errors.ECODE_INVAL)
7430
7431       # MAC address verification
7432       mac = nic.get("mac", constants.VALUE_AUTO)
7433       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7434         mac = utils.NormalizeAndValidateMac(mac)
7435
7436         try:
7437           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7438         except errors.ReservationError:
7439           raise errors.OpPrereqError("MAC address %s already in use"
7440                                      " in cluster" % mac,
7441                                      errors.ECODE_NOTUNIQUE)
7442
7443       # bridge verification
7444       bridge = nic.get("bridge", None)
7445       link = nic.get("link", None)
7446       if bridge and link:
7447         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7448                                    " at the same time", errors.ECODE_INVAL)
7449       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7450         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7451                                    errors.ECODE_INVAL)
7452       elif bridge:
7453         link = bridge
7454
7455       nicparams = {}
7456       if nic_mode_req:
7457         nicparams[constants.NIC_MODE] = nic_mode_req
7458       if link:
7459         nicparams[constants.NIC_LINK] = link
7460
7461       check_params = cluster.SimpleFillNIC(nicparams)
7462       objects.NIC.CheckParameterSyntax(check_params)
7463       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7464
7465     # disk checks/pre-build
7466     self.disks = []
7467     for disk in self.op.disks:
7468       mode = disk.get("mode", constants.DISK_RDWR)
7469       if mode not in constants.DISK_ACCESS_SET:
7470         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7471                                    mode, errors.ECODE_INVAL)
7472       size = disk.get("size", None)
7473       if size is None:
7474         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7475       try:
7476         size = int(size)
7477       except (TypeError, ValueError):
7478         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7479                                    errors.ECODE_INVAL)
7480       vg = disk.get("vg", self.cfg.GetVGName())
7481       new_disk = {"size": size, "mode": mode, "vg": vg}
7482       if "adopt" in disk:
7483         new_disk["adopt"] = disk["adopt"]
7484       self.disks.append(new_disk)
7485
7486     if self.op.mode == constants.INSTANCE_IMPORT:
7487
7488       # Check that the new instance doesn't have less disks than the export
7489       instance_disks = len(self.disks)
7490       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7491       if instance_disks < export_disks:
7492         raise errors.OpPrereqError("Not enough disks to import."
7493                                    " (instance: %d, export: %d)" %
7494                                    (instance_disks, export_disks),
7495                                    errors.ECODE_INVAL)
7496
7497       disk_images = []
7498       for idx in range(export_disks):
7499         option = 'disk%d_dump' % idx
7500         if export_info.has_option(constants.INISECT_INS, option):
7501           # FIXME: are the old os-es, disk sizes, etc. useful?
7502           export_name = export_info.get(constants.INISECT_INS, option)
7503           image = utils.PathJoin(self.op.src_path, export_name)
7504           disk_images.append(image)
7505         else:
7506           disk_images.append(False)
7507
7508       self.src_images = disk_images
7509
7510       old_name = export_info.get(constants.INISECT_INS, 'name')
7511       try:
7512         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7513       except (TypeError, ValueError), err:
7514         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7515                                    " an integer: %s" % str(err),
7516                                    errors.ECODE_STATE)
7517       if self.op.instance_name == old_name:
7518         for idx, nic in enumerate(self.nics):
7519           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7520             nic_mac_ini = 'nic%d_mac' % idx
7521             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7522
7523     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7524
7525     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7526     if self.op.ip_check:
7527       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7528         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7529                                    (self.check_ip, self.op.instance_name),
7530                                    errors.ECODE_NOTUNIQUE)
7531
7532     #### mac address generation
7533     # By generating here the mac address both the allocator and the hooks get
7534     # the real final mac address rather than the 'auto' or 'generate' value.
7535     # There is a race condition between the generation and the instance object
7536     # creation, which means that we know the mac is valid now, but we're not
7537     # sure it will be when we actually add the instance. If things go bad
7538     # adding the instance will abort because of a duplicate mac, and the
7539     # creation job will fail.
7540     for nic in self.nics:
7541       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7542         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7543
7544     #### allocator run
7545
7546     if self.op.iallocator is not None:
7547       self._RunAllocator()
7548
7549     #### node related checks
7550
7551     # check primary node
7552     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7553     assert self.pnode is not None, \
7554       "Cannot retrieve locked node %s" % self.op.pnode
7555     if pnode.offline:
7556       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7557                                  pnode.name, errors.ECODE_STATE)
7558     if pnode.drained:
7559       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7560                                  pnode.name, errors.ECODE_STATE)
7561     if not pnode.vm_capable:
7562       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7563                                  " '%s'" % pnode.name, errors.ECODE_STATE)
7564
7565     self.secondaries = []
7566
7567     # mirror node verification
7568     if self.op.disk_template in constants.DTS_NET_MIRROR:
7569       if self.op.snode == pnode.name:
7570         raise errors.OpPrereqError("The secondary node cannot be the"
7571                                    " primary node.", errors.ECODE_INVAL)
7572       _CheckNodeOnline(self, self.op.snode)
7573       _CheckNodeNotDrained(self, self.op.snode)
7574       _CheckNodeVmCapable(self, self.op.snode)
7575       self.secondaries.append(self.op.snode)
7576
7577     nodenames = [pnode.name] + self.secondaries
7578
7579     if not self.adopt_disks:
7580       # Check lv size requirements, if not adopting
7581       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7582       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7583
7584     else: # instead, we must check the adoption data
7585       all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7586       if len(all_lvs) != len(self.disks):
7587         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7588                                    errors.ECODE_INVAL)
7589       for lv_name in all_lvs:
7590         try:
7591           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7592           # to ReserveLV uses the same syntax
7593           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7594         except errors.ReservationError:
7595           raise errors.OpPrereqError("LV named %s used by another instance" %
7596                                      lv_name, errors.ECODE_NOTUNIQUE)
7597
7598       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7599       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7600
7601       node_lvs = self.rpc.call_lv_list([pnode.name],
7602                                        vg_names.payload.keys())[pnode.name]
7603       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7604       node_lvs = node_lvs.payload
7605
7606       delta = all_lvs.difference(node_lvs.keys())
7607       if delta:
7608         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7609                                    utils.CommaJoin(delta),
7610                                    errors.ECODE_INVAL)
7611       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7612       if online_lvs:
7613         raise errors.OpPrereqError("Online logical volumes found, cannot"
7614                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7615                                    errors.ECODE_STATE)
7616       # update the size of disk based on what is found
7617       for dsk in self.disks:
7618         dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7619
7620     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7621
7622     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7623     # check OS parameters (remotely)
7624     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7625
7626     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7627
7628     # memory check on primary node
7629     if self.op.start:
7630       _CheckNodeFreeMemory(self, self.pnode.name,
7631                            "creating instance %s" % self.op.instance_name,
7632                            self.be_full[constants.BE_MEMORY],
7633                            self.op.hypervisor)
7634
7635     self.dry_run_result = list(nodenames)
7636
7637   def Exec(self, feedback_fn):
7638     """Create and add the instance to the cluster.
7639
7640     """
7641     instance = self.op.instance_name
7642     pnode_name = self.pnode.name
7643
7644     ht_kind = self.op.hypervisor
7645     if ht_kind in constants.HTS_REQ_PORT:
7646       network_port = self.cfg.AllocatePort()
7647     else:
7648       network_port = None
7649
7650     if constants.ENABLE_FILE_STORAGE:
7651       # this is needed because os.path.join does not accept None arguments
7652       if self.op.file_storage_dir is None:
7653         string_file_storage_dir = ""
7654       else:
7655         string_file_storage_dir = self.op.file_storage_dir
7656
7657       # build the full file storage dir path
7658       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7659                                         string_file_storage_dir, instance)
7660     else:
7661       file_storage_dir = ""
7662
7663     disks = _GenerateDiskTemplate(self,
7664                                   self.op.disk_template,
7665                                   instance, pnode_name,
7666                                   self.secondaries,
7667                                   self.disks,
7668                                   file_storage_dir,
7669                                   self.op.file_driver,
7670                                   0,
7671                                   feedback_fn)
7672
7673     iobj = objects.Instance(name=instance, os=self.op.os_type,
7674                             primary_node=pnode_name,
7675                             nics=self.nics, disks=disks,
7676                             disk_template=self.op.disk_template,
7677                             admin_up=False,
7678                             network_port=network_port,
7679                             beparams=self.op.beparams,
7680                             hvparams=self.op.hvparams,
7681                             hypervisor=self.op.hypervisor,
7682                             osparams=self.op.osparams,
7683                             )
7684
7685     if self.adopt_disks:
7686       # rename LVs to the newly-generated names; we need to construct
7687       # 'fake' LV disks with the old data, plus the new unique_id
7688       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7689       rename_to = []
7690       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7691         rename_to.append(t_dsk.logical_id)
7692         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7693         self.cfg.SetDiskID(t_dsk, pnode_name)
7694       result = self.rpc.call_blockdev_rename(pnode_name,
7695                                              zip(tmp_disks, rename_to))
7696       result.Raise("Failed to rename adoped LVs")
7697     else:
7698       feedback_fn("* creating instance disks...")
7699       try:
7700         _CreateDisks(self, iobj)
7701       except errors.OpExecError:
7702         self.LogWarning("Device creation failed, reverting...")
7703         try:
7704           _RemoveDisks(self, iobj)
7705         finally:
7706           self.cfg.ReleaseDRBDMinors(instance)
7707           raise
7708
7709     feedback_fn("adding instance %s to cluster config" % instance)
7710
7711     self.cfg.AddInstance(iobj, self.proc.GetECId())
7712
7713     # Declare that we don't want to remove the instance lock anymore, as we've
7714     # added the instance to the config
7715     del self.remove_locks[locking.LEVEL_INSTANCE]
7716     # Unlock all the nodes
7717     if self.op.mode == constants.INSTANCE_IMPORT:
7718       nodes_keep = [self.op.src_node]
7719       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7720                        if node != self.op.src_node]
7721       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7722       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7723     else:
7724       self.context.glm.release(locking.LEVEL_NODE)
7725       del self.acquired_locks[locking.LEVEL_NODE]
7726
7727     disk_abort = False
7728     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
7729       feedback_fn("* wiping instance disks...")
7730       try:
7731         _WipeDisks(self, iobj)
7732       except errors.OpExecError, err:
7733         logging.exception("Wiping disks failed")
7734         self.LogWarning("Wiping instance disks failed (%s)", err)
7735         disk_abort = True
7736
7737     if disk_abort:
7738       # Something is already wrong with the disks, don't do anything else
7739       pass
7740     elif self.op.wait_for_sync:
7741       disk_abort = not _WaitForSync(self, iobj)
7742     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7743       # make sure the disks are not degraded (still sync-ing is ok)
7744       time.sleep(15)
7745       feedback_fn("* checking mirrors status")
7746       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7747     else:
7748       disk_abort = False
7749
7750     if disk_abort:
7751       _RemoveDisks(self, iobj)
7752       self.cfg.RemoveInstance(iobj.name)
7753       # Make sure the instance lock gets removed
7754       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7755       raise errors.OpExecError("There are some degraded disks for"
7756                                " this instance")
7757
7758     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7759       if self.op.mode == constants.INSTANCE_CREATE:
7760         if not self.op.no_install:
7761           feedback_fn("* running the instance OS create scripts...")
7762           # FIXME: pass debug option from opcode to backend
7763           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7764                                                  self.op.debug_level)
7765           result.Raise("Could not add os for instance %s"
7766                        " on node %s" % (instance, pnode_name))
7767
7768       elif self.op.mode == constants.INSTANCE_IMPORT:
7769         feedback_fn("* running the instance OS import scripts...")
7770
7771         transfers = []
7772
7773         for idx, image in enumerate(self.src_images):
7774           if not image:
7775             continue
7776
7777           # FIXME: pass debug option from opcode to backend
7778           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7779                                              constants.IEIO_FILE, (image, ),
7780                                              constants.IEIO_SCRIPT,
7781                                              (iobj.disks[idx], idx),
7782                                              None)
7783           transfers.append(dt)
7784
7785         import_result = \
7786           masterd.instance.TransferInstanceData(self, feedback_fn,
7787                                                 self.op.src_node, pnode_name,
7788                                                 self.pnode.secondary_ip,
7789                                                 iobj, transfers)
7790         if not compat.all(import_result):
7791           self.LogWarning("Some disks for instance %s on node %s were not"
7792                           " imported successfully" % (instance, pnode_name))
7793
7794       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7795         feedback_fn("* preparing remote import...")
7796         # The source cluster will stop the instance before attempting to make a
7797         # connection. In some cases stopping an instance can take a long time,
7798         # hence the shutdown timeout is added to the connection timeout.
7799         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7800                            self.op.source_shutdown_timeout)
7801         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7802
7803         assert iobj.primary_node == self.pnode.name
7804         disk_results = \
7805           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7806                                         self.source_x509_ca,
7807                                         self._cds, timeouts)
7808         if not compat.all(disk_results):
7809           # TODO: Should the instance still be started, even if some disks
7810           # failed to import (valid for local imports, too)?
7811           self.LogWarning("Some disks for instance %s on node %s were not"
7812                           " imported successfully" % (instance, pnode_name))
7813
7814         # Run rename script on newly imported instance
7815         assert iobj.name == instance
7816         feedback_fn("Running rename script for %s" % instance)
7817         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7818                                                    self.source_instance_name,
7819                                                    self.op.debug_level)
7820         if result.fail_msg:
7821           self.LogWarning("Failed to run rename script for %s on node"
7822                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7823
7824       else:
7825         # also checked in the prereq part
7826         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7827                                      % self.op.mode)
7828
7829     if self.op.start:
7830       iobj.admin_up = True
7831       self.cfg.Update(iobj, feedback_fn)
7832       logging.info("Starting instance %s on node %s", instance, pnode_name)
7833       feedback_fn("* starting instance...")
7834       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7835       result.Raise("Could not start instance")
7836
7837     return list(iobj.all_nodes)
7838
7839
7840 class LUInstanceConsole(NoHooksLU):
7841   """Connect to an instance's console.
7842
7843   This is somewhat special in that it returns the command line that
7844   you need to run on the master node in order to connect to the
7845   console.
7846
7847   """
7848   REQ_BGL = False
7849
7850   def ExpandNames(self):
7851     self._ExpandAndLockInstance()
7852
7853   def CheckPrereq(self):
7854     """Check prerequisites.
7855
7856     This checks that the instance is in the cluster.
7857
7858     """
7859     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7860     assert self.instance is not None, \
7861       "Cannot retrieve locked instance %s" % self.op.instance_name
7862     _CheckNodeOnline(self, self.instance.primary_node)
7863
7864   def Exec(self, feedback_fn):
7865     """Connect to the console of an instance
7866
7867     """
7868     instance = self.instance
7869     node = instance.primary_node
7870
7871     node_insts = self.rpc.call_instance_list([node],
7872                                              [instance.hypervisor])[node]
7873     node_insts.Raise("Can't get node information from %s" % node)
7874
7875     if instance.name not in node_insts.payload:
7876       if instance.admin_up:
7877         state = "ERROR_down"
7878       else:
7879         state = "ADMIN_down"
7880       raise errors.OpExecError("Instance %s is not running (state %s)" %
7881                                (instance.name, state))
7882
7883     logging.debug("Connecting to console of %s on %s", instance.name, node)
7884
7885     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
7886
7887
7888 def _GetInstanceConsole(cluster, instance):
7889   """Returns console information for an instance.
7890
7891   @type cluster: L{objects.Cluster}
7892   @type instance: L{objects.Instance}
7893   @rtype: dict
7894
7895   """
7896   hyper = hypervisor.GetHypervisor(instance.hypervisor)
7897   # beparams and hvparams are passed separately, to avoid editing the
7898   # instance and then saving the defaults in the instance itself.
7899   hvparams = cluster.FillHV(instance)
7900   beparams = cluster.FillBE(instance)
7901   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7902
7903   assert console.instance == instance.name
7904   assert console.Validate()
7905
7906   return console.ToDict()
7907
7908
7909 class LUInstanceReplaceDisks(LogicalUnit):
7910   """Replace the disks of an instance.
7911
7912   """
7913   HPATH = "mirrors-replace"
7914   HTYPE = constants.HTYPE_INSTANCE
7915   REQ_BGL = False
7916
7917   def CheckArguments(self):
7918     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7919                                   self.op.iallocator)
7920
7921   def ExpandNames(self):
7922     self._ExpandAndLockInstance()
7923
7924     if self.op.iallocator is not None:
7925       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7926
7927     elif self.op.remote_node is not None:
7928       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7929       self.op.remote_node = remote_node
7930
7931       # Warning: do not remove the locking of the new secondary here
7932       # unless DRBD8.AddChildren is changed to work in parallel;
7933       # currently it doesn't since parallel invocations of
7934       # FindUnusedMinor will conflict
7935       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7936       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7937
7938     else:
7939       self.needed_locks[locking.LEVEL_NODE] = []
7940       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7941
7942     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7943                                    self.op.iallocator, self.op.remote_node,
7944                                    self.op.disks, False, self.op.early_release)
7945
7946     self.tasklets = [self.replacer]
7947
7948   def DeclareLocks(self, level):
7949     # If we're not already locking all nodes in the set we have to declare the
7950     # instance's primary/secondary nodes.
7951     if (level == locking.LEVEL_NODE and
7952         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7953       self._LockInstancesNodes()
7954
7955   def BuildHooksEnv(self):
7956     """Build hooks env.
7957
7958     This runs on the master, the primary and all the secondaries.
7959
7960     """
7961     instance = self.replacer.instance
7962     env = {
7963       "MODE": self.op.mode,
7964       "NEW_SECONDARY": self.op.remote_node,
7965       "OLD_SECONDARY": instance.secondary_nodes[0],
7966       }
7967     env.update(_BuildInstanceHookEnvByObject(self, instance))
7968     nl = [
7969       self.cfg.GetMasterNode(),
7970       instance.primary_node,
7971       ]
7972     if self.op.remote_node is not None:
7973       nl.append(self.op.remote_node)
7974     return env, nl, nl
7975
7976
7977 class TLReplaceDisks(Tasklet):
7978   """Replaces disks for an instance.
7979
7980   Note: Locking is not within the scope of this class.
7981
7982   """
7983   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7984                disks, delay_iallocator, early_release):
7985     """Initializes this class.
7986
7987     """
7988     Tasklet.__init__(self, lu)
7989
7990     # Parameters
7991     self.instance_name = instance_name
7992     self.mode = mode
7993     self.iallocator_name = iallocator_name
7994     self.remote_node = remote_node
7995     self.disks = disks
7996     self.delay_iallocator = delay_iallocator
7997     self.early_release = early_release
7998
7999     # Runtime data
8000     self.instance = None
8001     self.new_node = None
8002     self.target_node = None
8003     self.other_node = None
8004     self.remote_node_info = None
8005     self.node_secondary_ip = None
8006
8007   @staticmethod
8008   def CheckArguments(mode, remote_node, iallocator):
8009     """Helper function for users of this class.
8010
8011     """
8012     # check for valid parameter combination
8013     if mode == constants.REPLACE_DISK_CHG:
8014       if remote_node is None and iallocator is None:
8015         raise errors.OpPrereqError("When changing the secondary either an"
8016                                    " iallocator script must be used or the"
8017                                    " new node given", errors.ECODE_INVAL)
8018
8019       if remote_node is not None and iallocator is not None:
8020         raise errors.OpPrereqError("Give either the iallocator or the new"
8021                                    " secondary, not both", errors.ECODE_INVAL)
8022
8023     elif remote_node is not None or iallocator is not None:
8024       # Not replacing the secondary
8025       raise errors.OpPrereqError("The iallocator and new node options can"
8026                                  " only be used when changing the"
8027                                  " secondary node", errors.ECODE_INVAL)
8028
8029   @staticmethod
8030   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8031     """Compute a new secondary node using an IAllocator.
8032
8033     """
8034     ial = IAllocator(lu.cfg, lu.rpc,
8035                      mode=constants.IALLOCATOR_MODE_RELOC,
8036                      name=instance_name,
8037                      relocate_from=relocate_from)
8038
8039     ial.Run(iallocator_name)
8040
8041     if not ial.success:
8042       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8043                                  " %s" % (iallocator_name, ial.info),
8044                                  errors.ECODE_NORES)
8045
8046     if len(ial.result) != ial.required_nodes:
8047       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8048                                  " of nodes (%s), required %s" %
8049                                  (iallocator_name,
8050                                   len(ial.result), ial.required_nodes),
8051                                  errors.ECODE_FAULT)
8052
8053     remote_node_name = ial.result[0]
8054
8055     lu.LogInfo("Selected new secondary for instance '%s': %s",
8056                instance_name, remote_node_name)
8057
8058     return remote_node_name
8059
8060   def _FindFaultyDisks(self, node_name):
8061     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8062                                     node_name, True)
8063
8064   def CheckPrereq(self):
8065     """Check prerequisites.
8066
8067     This checks that the instance is in the cluster.
8068
8069     """
8070     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8071     assert instance is not None, \
8072       "Cannot retrieve locked instance %s" % self.instance_name
8073
8074     if instance.disk_template != constants.DT_DRBD8:
8075       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8076                                  " instances", errors.ECODE_INVAL)
8077
8078     if len(instance.secondary_nodes) != 1:
8079       raise errors.OpPrereqError("The instance has a strange layout,"
8080                                  " expected one secondary but found %d" %
8081                                  len(instance.secondary_nodes),
8082                                  errors.ECODE_FAULT)
8083
8084     if not self.delay_iallocator:
8085       self._CheckPrereq2()
8086
8087   def _CheckPrereq2(self):
8088     """Check prerequisites, second part.
8089
8090     This function should always be part of CheckPrereq. It was separated and is
8091     now called from Exec because during node evacuation iallocator was only
8092     called with an unmodified cluster model, not taking planned changes into
8093     account.
8094
8095     """
8096     instance = self.instance
8097     secondary_node = instance.secondary_nodes[0]
8098
8099     if self.iallocator_name is None:
8100       remote_node = self.remote_node
8101     else:
8102       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8103                                        instance.name, instance.secondary_nodes)
8104
8105     if remote_node is not None:
8106       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8107       assert self.remote_node_info is not None, \
8108         "Cannot retrieve locked node %s" % remote_node
8109     else:
8110       self.remote_node_info = None
8111
8112     if remote_node == self.instance.primary_node:
8113       raise errors.OpPrereqError("The specified node is the primary node of"
8114                                  " the instance.", errors.ECODE_INVAL)
8115
8116     if remote_node == secondary_node:
8117       raise errors.OpPrereqError("The specified node is already the"
8118                                  " secondary node of the instance.",
8119                                  errors.ECODE_INVAL)
8120
8121     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8122                                     constants.REPLACE_DISK_CHG):
8123       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8124                                  errors.ECODE_INVAL)
8125
8126     if self.mode == constants.REPLACE_DISK_AUTO:
8127       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8128       faulty_secondary = self._FindFaultyDisks(secondary_node)
8129
8130       if faulty_primary and faulty_secondary:
8131         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8132                                    " one node and can not be repaired"
8133                                    " automatically" % self.instance_name,
8134                                    errors.ECODE_STATE)
8135
8136       if faulty_primary:
8137         self.disks = faulty_primary
8138         self.target_node = instance.primary_node
8139         self.other_node = secondary_node
8140         check_nodes = [self.target_node, self.other_node]
8141       elif faulty_secondary:
8142         self.disks = faulty_secondary
8143         self.target_node = secondary_node
8144         self.other_node = instance.primary_node
8145         check_nodes = [self.target_node, self.other_node]
8146       else:
8147         self.disks = []
8148         check_nodes = []
8149
8150     else:
8151       # Non-automatic modes
8152       if self.mode == constants.REPLACE_DISK_PRI:
8153         self.target_node = instance.primary_node
8154         self.other_node = secondary_node
8155         check_nodes = [self.target_node, self.other_node]
8156
8157       elif self.mode == constants.REPLACE_DISK_SEC:
8158         self.target_node = secondary_node
8159         self.other_node = instance.primary_node
8160         check_nodes = [self.target_node, self.other_node]
8161
8162       elif self.mode == constants.REPLACE_DISK_CHG:
8163         self.new_node = remote_node
8164         self.other_node = instance.primary_node
8165         self.target_node = secondary_node
8166         check_nodes = [self.new_node, self.other_node]
8167
8168         _CheckNodeNotDrained(self.lu, remote_node)
8169         _CheckNodeVmCapable(self.lu, remote_node)
8170
8171         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8172         assert old_node_info is not None
8173         if old_node_info.offline and not self.early_release:
8174           # doesn't make sense to delay the release
8175           self.early_release = True
8176           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8177                           " early-release mode", secondary_node)
8178
8179       else:
8180         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8181                                      self.mode)
8182
8183       # If not specified all disks should be replaced
8184       if not self.disks:
8185         self.disks = range(len(self.instance.disks))
8186
8187     for node in check_nodes:
8188       _CheckNodeOnline(self.lu, node)
8189
8190     # Check whether disks are valid
8191     for disk_idx in self.disks:
8192       instance.FindDisk(disk_idx)
8193
8194     # Get secondary node IP addresses
8195     node_2nd_ip = {}
8196
8197     for node_name in [self.target_node, self.other_node, self.new_node]:
8198       if node_name is not None:
8199         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8200
8201     self.node_secondary_ip = node_2nd_ip
8202
8203   def Exec(self, feedback_fn):
8204     """Execute disk replacement.
8205
8206     This dispatches the disk replacement to the appropriate handler.
8207
8208     """
8209     if self.delay_iallocator:
8210       self._CheckPrereq2()
8211
8212     if not self.disks:
8213       feedback_fn("No disks need replacement")
8214       return
8215
8216     feedback_fn("Replacing disk(s) %s for %s" %
8217                 (utils.CommaJoin(self.disks), self.instance.name))
8218
8219     activate_disks = (not self.instance.admin_up)
8220
8221     # Activate the instance disks if we're replacing them on a down instance
8222     if activate_disks:
8223       _StartInstanceDisks(self.lu, self.instance, True)
8224
8225     try:
8226       # Should we replace the secondary node?
8227       if self.new_node is not None:
8228         fn = self._ExecDrbd8Secondary
8229       else:
8230         fn = self._ExecDrbd8DiskOnly
8231
8232       return fn(feedback_fn)
8233
8234     finally:
8235       # Deactivate the instance disks if we're replacing them on a
8236       # down instance
8237       if activate_disks:
8238         _SafeShutdownInstanceDisks(self.lu, self.instance)
8239
8240   def _CheckVolumeGroup(self, nodes):
8241     self.lu.LogInfo("Checking volume groups")
8242
8243     vgname = self.cfg.GetVGName()
8244
8245     # Make sure volume group exists on all involved nodes
8246     results = self.rpc.call_vg_list(nodes)
8247     if not results:
8248       raise errors.OpExecError("Can't list volume groups on the nodes")
8249
8250     for node in nodes:
8251       res = results[node]
8252       res.Raise("Error checking node %s" % node)
8253       if vgname not in res.payload:
8254         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8255                                  (vgname, node))
8256
8257   def _CheckDisksExistence(self, nodes):
8258     # Check disk existence
8259     for idx, dev in enumerate(self.instance.disks):
8260       if idx not in self.disks:
8261         continue
8262
8263       for node in nodes:
8264         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8265         self.cfg.SetDiskID(dev, node)
8266
8267         result = self.rpc.call_blockdev_find(node, dev)
8268
8269         msg = result.fail_msg
8270         if msg or not result.payload:
8271           if not msg:
8272             msg = "disk not found"
8273           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8274                                    (idx, node, msg))
8275
8276   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8277     for idx, dev in enumerate(self.instance.disks):
8278       if idx not in self.disks:
8279         continue
8280
8281       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8282                       (idx, node_name))
8283
8284       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8285                                    ldisk=ldisk):
8286         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8287                                  " replace disks for instance %s" %
8288                                  (node_name, self.instance.name))
8289
8290   def _CreateNewStorage(self, node_name):
8291     iv_names = {}
8292
8293     for idx, dev in enumerate(self.instance.disks):
8294       if idx not in self.disks:
8295         continue
8296
8297       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8298
8299       self.cfg.SetDiskID(dev, node_name)
8300
8301       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8302       names = _GenerateUniqueNames(self.lu, lv_names)
8303
8304       vg_data = dev.children[0].logical_id[0]
8305       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8306                              logical_id=(vg_data, names[0]))
8307       vg_meta = dev.children[1].logical_id[0]
8308       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8309                              logical_id=(vg_meta, names[1]))
8310
8311       new_lvs = [lv_data, lv_meta]
8312       old_lvs = dev.children
8313       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8314
8315       # we pass force_create=True to force the LVM creation
8316       for new_lv in new_lvs:
8317         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8318                         _GetInstanceInfoText(self.instance), False)
8319
8320     return iv_names
8321
8322   def _CheckDevices(self, node_name, iv_names):
8323     for name, (dev, _, _) in iv_names.iteritems():
8324       self.cfg.SetDiskID(dev, node_name)
8325
8326       result = self.rpc.call_blockdev_find(node_name, dev)
8327
8328       msg = result.fail_msg
8329       if msg or not result.payload:
8330         if not msg:
8331           msg = "disk not found"
8332         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8333                                  (name, msg))
8334
8335       if result.payload.is_degraded:
8336         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8337
8338   def _RemoveOldStorage(self, node_name, iv_names):
8339     for name, (_, old_lvs, _) in iv_names.iteritems():
8340       self.lu.LogInfo("Remove logical volumes for %s" % name)
8341
8342       for lv in old_lvs:
8343         self.cfg.SetDiskID(lv, node_name)
8344
8345         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8346         if msg:
8347           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8348                              hint="remove unused LVs manually")
8349
8350   def _ReleaseNodeLock(self, node_name):
8351     """Releases the lock for a given node."""
8352     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8353
8354   def _ExecDrbd8DiskOnly(self, feedback_fn):
8355     """Replace a disk on the primary or secondary for DRBD 8.
8356
8357     The algorithm for replace is quite complicated:
8358
8359       1. for each disk to be replaced:
8360
8361         1. create new LVs on the target node with unique names
8362         1. detach old LVs from the drbd device
8363         1. rename old LVs to name_replaced.<time_t>
8364         1. rename new LVs to old LVs
8365         1. attach the new LVs (with the old names now) to the drbd device
8366
8367       1. wait for sync across all devices
8368
8369       1. for each modified disk:
8370
8371         1. remove old LVs (which have the name name_replaces.<time_t>)
8372
8373     Failures are not very well handled.
8374
8375     """
8376     steps_total = 6
8377
8378     # Step: check device activation
8379     self.lu.LogStep(1, steps_total, "Check device existence")
8380     self._CheckDisksExistence([self.other_node, self.target_node])
8381     self._CheckVolumeGroup([self.target_node, self.other_node])
8382
8383     # Step: check other node consistency
8384     self.lu.LogStep(2, steps_total, "Check peer consistency")
8385     self._CheckDisksConsistency(self.other_node,
8386                                 self.other_node == self.instance.primary_node,
8387                                 False)
8388
8389     # Step: create new storage
8390     self.lu.LogStep(3, steps_total, "Allocate new storage")
8391     iv_names = self._CreateNewStorage(self.target_node)
8392
8393     # Step: for each lv, detach+rename*2+attach
8394     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8395     for dev, old_lvs, new_lvs in iv_names.itervalues():
8396       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8397
8398       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8399                                                      old_lvs)
8400       result.Raise("Can't detach drbd from local storage on node"
8401                    " %s for device %s" % (self.target_node, dev.iv_name))
8402       #dev.children = []
8403       #cfg.Update(instance)
8404
8405       # ok, we created the new LVs, so now we know we have the needed
8406       # storage; as such, we proceed on the target node to rename
8407       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8408       # using the assumption that logical_id == physical_id (which in
8409       # turn is the unique_id on that node)
8410
8411       # FIXME(iustin): use a better name for the replaced LVs
8412       temp_suffix = int(time.time())
8413       ren_fn = lambda d, suff: (d.physical_id[0],
8414                                 d.physical_id[1] + "_replaced-%s" % suff)
8415
8416       # Build the rename list based on what LVs exist on the node
8417       rename_old_to_new = []
8418       for to_ren in old_lvs:
8419         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8420         if not result.fail_msg and result.payload:
8421           # device exists
8422           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8423
8424       self.lu.LogInfo("Renaming the old LVs on the target node")
8425       result = self.rpc.call_blockdev_rename(self.target_node,
8426                                              rename_old_to_new)
8427       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8428
8429       # Now we rename the new LVs to the old LVs
8430       self.lu.LogInfo("Renaming the new LVs on the target node")
8431       rename_new_to_old = [(new, old.physical_id)
8432                            for old, new in zip(old_lvs, new_lvs)]
8433       result = self.rpc.call_blockdev_rename(self.target_node,
8434                                              rename_new_to_old)
8435       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8436
8437       for old, new in zip(old_lvs, new_lvs):
8438         new.logical_id = old.logical_id
8439         self.cfg.SetDiskID(new, self.target_node)
8440
8441       for disk in old_lvs:
8442         disk.logical_id = ren_fn(disk, temp_suffix)
8443         self.cfg.SetDiskID(disk, self.target_node)
8444
8445       # Now that the new lvs have the old name, we can add them to the device
8446       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8447       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8448                                                   new_lvs)
8449       msg = result.fail_msg
8450       if msg:
8451         for new_lv in new_lvs:
8452           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8453                                                new_lv).fail_msg
8454           if msg2:
8455             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8456                                hint=("cleanup manually the unused logical"
8457                                      "volumes"))
8458         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8459
8460       dev.children = new_lvs
8461
8462       self.cfg.Update(self.instance, feedback_fn)
8463
8464     cstep = 5
8465     if self.early_release:
8466       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8467       cstep += 1
8468       self._RemoveOldStorage(self.target_node, iv_names)
8469       # WARNING: we release both node locks here, do not do other RPCs
8470       # than WaitForSync to the primary node
8471       self._ReleaseNodeLock([self.target_node, self.other_node])
8472
8473     # Wait for sync
8474     # This can fail as the old devices are degraded and _WaitForSync
8475     # does a combined result over all disks, so we don't check its return value
8476     self.lu.LogStep(cstep, steps_total, "Sync devices")
8477     cstep += 1
8478     _WaitForSync(self.lu, self.instance)
8479
8480     # Check all devices manually
8481     self._CheckDevices(self.instance.primary_node, iv_names)
8482
8483     # Step: remove old storage
8484     if not self.early_release:
8485       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8486       cstep += 1
8487       self._RemoveOldStorage(self.target_node, iv_names)
8488
8489   def _ExecDrbd8Secondary(self, feedback_fn):
8490     """Replace the secondary node for DRBD 8.
8491
8492     The algorithm for replace is quite complicated:
8493       - for all disks of the instance:
8494         - create new LVs on the new node with same names
8495         - shutdown the drbd device on the old secondary
8496         - disconnect the drbd network on the primary
8497         - create the drbd device on the new secondary
8498         - network attach the drbd on the primary, using an artifice:
8499           the drbd code for Attach() will connect to the network if it
8500           finds a device which is connected to the good local disks but
8501           not network enabled
8502       - wait for sync across all devices
8503       - remove all disks from the old secondary
8504
8505     Failures are not very well handled.
8506
8507     """
8508     steps_total = 6
8509
8510     # Step: check device activation
8511     self.lu.LogStep(1, steps_total, "Check device existence")
8512     self._CheckDisksExistence([self.instance.primary_node])
8513     self._CheckVolumeGroup([self.instance.primary_node])
8514
8515     # Step: check other node consistency
8516     self.lu.LogStep(2, steps_total, "Check peer consistency")
8517     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8518
8519     # Step: create new storage
8520     self.lu.LogStep(3, steps_total, "Allocate new storage")
8521     for idx, dev in enumerate(self.instance.disks):
8522       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8523                       (self.new_node, idx))
8524       # we pass force_create=True to force LVM creation
8525       for new_lv in dev.children:
8526         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8527                         _GetInstanceInfoText(self.instance), False)
8528
8529     # Step 4: dbrd minors and drbd setups changes
8530     # after this, we must manually remove the drbd minors on both the
8531     # error and the success paths
8532     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8533     minors = self.cfg.AllocateDRBDMinor([self.new_node
8534                                          for dev in self.instance.disks],
8535                                         self.instance.name)
8536     logging.debug("Allocated minors %r", minors)
8537
8538     iv_names = {}
8539     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8540       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8541                       (self.new_node, idx))
8542       # create new devices on new_node; note that we create two IDs:
8543       # one without port, so the drbd will be activated without
8544       # networking information on the new node at this stage, and one
8545       # with network, for the latter activation in step 4
8546       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8547       if self.instance.primary_node == o_node1:
8548         p_minor = o_minor1
8549       else:
8550         assert self.instance.primary_node == o_node2, "Three-node instance?"
8551         p_minor = o_minor2
8552
8553       new_alone_id = (self.instance.primary_node, self.new_node, None,
8554                       p_minor, new_minor, o_secret)
8555       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8556                     p_minor, new_minor, o_secret)
8557
8558       iv_names[idx] = (dev, dev.children, new_net_id)
8559       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8560                     new_net_id)
8561       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8562                               logical_id=new_alone_id,
8563                               children=dev.children,
8564                               size=dev.size)
8565       try:
8566         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8567                               _GetInstanceInfoText(self.instance), False)
8568       except errors.GenericError:
8569         self.cfg.ReleaseDRBDMinors(self.instance.name)
8570         raise
8571
8572     # We have new devices, shutdown the drbd on the old secondary
8573     for idx, dev in enumerate(self.instance.disks):
8574       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8575       self.cfg.SetDiskID(dev, self.target_node)
8576       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8577       if msg:
8578         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8579                            "node: %s" % (idx, msg),
8580                            hint=("Please cleanup this device manually as"
8581                                  " soon as possible"))
8582
8583     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8584     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8585                                                self.node_secondary_ip,
8586                                                self.instance.disks)\
8587                                               [self.instance.primary_node]
8588
8589     msg = result.fail_msg
8590     if msg:
8591       # detaches didn't succeed (unlikely)
8592       self.cfg.ReleaseDRBDMinors(self.instance.name)
8593       raise errors.OpExecError("Can't detach the disks from the network on"
8594                                " old node: %s" % (msg,))
8595
8596     # if we managed to detach at least one, we update all the disks of
8597     # the instance to point to the new secondary
8598     self.lu.LogInfo("Updating instance configuration")
8599     for dev, _, new_logical_id in iv_names.itervalues():
8600       dev.logical_id = new_logical_id
8601       self.cfg.SetDiskID(dev, self.instance.primary_node)
8602
8603     self.cfg.Update(self.instance, feedback_fn)
8604
8605     # and now perform the drbd attach
8606     self.lu.LogInfo("Attaching primary drbds to new secondary"
8607                     " (standalone => connected)")
8608     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8609                                             self.new_node],
8610                                            self.node_secondary_ip,
8611                                            self.instance.disks,
8612                                            self.instance.name,
8613                                            False)
8614     for to_node, to_result in result.items():
8615       msg = to_result.fail_msg
8616       if msg:
8617         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8618                            to_node, msg,
8619                            hint=("please do a gnt-instance info to see the"
8620                                  " status of disks"))
8621     cstep = 5
8622     if self.early_release:
8623       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8624       cstep += 1
8625       self._RemoveOldStorage(self.target_node, iv_names)
8626       # WARNING: we release all node locks here, do not do other RPCs
8627       # than WaitForSync to the primary node
8628       self._ReleaseNodeLock([self.instance.primary_node,
8629                              self.target_node,
8630                              self.new_node])
8631
8632     # Wait for sync
8633     # This can fail as the old devices are degraded and _WaitForSync
8634     # does a combined result over all disks, so we don't check its return value
8635     self.lu.LogStep(cstep, steps_total, "Sync devices")
8636     cstep += 1
8637     _WaitForSync(self.lu, self.instance)
8638
8639     # Check all devices manually
8640     self._CheckDevices(self.instance.primary_node, iv_names)
8641
8642     # Step: remove old storage
8643     if not self.early_release:
8644       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8645       self._RemoveOldStorage(self.target_node, iv_names)
8646
8647
8648 class LURepairNodeStorage(NoHooksLU):
8649   """Repairs the volume group on a node.
8650
8651   """
8652   REQ_BGL = False
8653
8654   def CheckArguments(self):
8655     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8656
8657     storage_type = self.op.storage_type
8658
8659     if (constants.SO_FIX_CONSISTENCY not in
8660         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8661       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8662                                  " repaired" % storage_type,
8663                                  errors.ECODE_INVAL)
8664
8665   def ExpandNames(self):
8666     self.needed_locks = {
8667       locking.LEVEL_NODE: [self.op.node_name],
8668       }
8669
8670   def _CheckFaultyDisks(self, instance, node_name):
8671     """Ensure faulty disks abort the opcode or at least warn."""
8672     try:
8673       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8674                                   node_name, True):
8675         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8676                                    " node '%s'" % (instance.name, node_name),
8677                                    errors.ECODE_STATE)
8678     except errors.OpPrereqError, err:
8679       if self.op.ignore_consistency:
8680         self.proc.LogWarning(str(err.args[0]))
8681       else:
8682         raise
8683
8684   def CheckPrereq(self):
8685     """Check prerequisites.
8686
8687     """
8688     # Check whether any instance on this node has faulty disks
8689     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8690       if not inst.admin_up:
8691         continue
8692       check_nodes = set(inst.all_nodes)
8693       check_nodes.discard(self.op.node_name)
8694       for inst_node_name in check_nodes:
8695         self._CheckFaultyDisks(inst, inst_node_name)
8696
8697   def Exec(self, feedback_fn):
8698     feedback_fn("Repairing storage unit '%s' on %s ..." %
8699                 (self.op.name, self.op.node_name))
8700
8701     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8702     result = self.rpc.call_storage_execute(self.op.node_name,
8703                                            self.op.storage_type, st_args,
8704                                            self.op.name,
8705                                            constants.SO_FIX_CONSISTENCY)
8706     result.Raise("Failed to repair storage unit '%s' on %s" %
8707                  (self.op.name, self.op.node_name))
8708
8709
8710 class LUNodeEvacStrategy(NoHooksLU):
8711   """Computes the node evacuation strategy.
8712
8713   """
8714   REQ_BGL = False
8715
8716   def CheckArguments(self):
8717     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8718
8719   def ExpandNames(self):
8720     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8721     self.needed_locks = locks = {}
8722     if self.op.remote_node is None:
8723       locks[locking.LEVEL_NODE] = locking.ALL_SET
8724     else:
8725       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8726       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8727
8728   def Exec(self, feedback_fn):
8729     if self.op.remote_node is not None:
8730       instances = []
8731       for node in self.op.nodes:
8732         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8733       result = []
8734       for i in instances:
8735         if i.primary_node == self.op.remote_node:
8736           raise errors.OpPrereqError("Node %s is the primary node of"
8737                                      " instance %s, cannot use it as"
8738                                      " secondary" %
8739                                      (self.op.remote_node, i.name),
8740                                      errors.ECODE_INVAL)
8741         result.append([i.name, self.op.remote_node])
8742     else:
8743       ial = IAllocator(self.cfg, self.rpc,
8744                        mode=constants.IALLOCATOR_MODE_MEVAC,
8745                        evac_nodes=self.op.nodes)
8746       ial.Run(self.op.iallocator, validate=True)
8747       if not ial.success:
8748         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8749                                  errors.ECODE_NORES)
8750       result = ial.result
8751     return result
8752
8753
8754 class LUInstanceGrowDisk(LogicalUnit):
8755   """Grow a disk of an instance.
8756
8757   """
8758   HPATH = "disk-grow"
8759   HTYPE = constants.HTYPE_INSTANCE
8760   REQ_BGL = False
8761
8762   def ExpandNames(self):
8763     self._ExpandAndLockInstance()
8764     self.needed_locks[locking.LEVEL_NODE] = []
8765     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8766
8767   def DeclareLocks(self, level):
8768     if level == locking.LEVEL_NODE:
8769       self._LockInstancesNodes()
8770
8771   def BuildHooksEnv(self):
8772     """Build hooks env.
8773
8774     This runs on the master, the primary and all the secondaries.
8775
8776     """
8777     env = {
8778       "DISK": self.op.disk,
8779       "AMOUNT": self.op.amount,
8780       }
8781     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8782     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8783     return env, nl, nl
8784
8785   def CheckPrereq(self):
8786     """Check prerequisites.
8787
8788     This checks that the instance is in the cluster.
8789
8790     """
8791     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8792     assert instance is not None, \
8793       "Cannot retrieve locked instance %s" % self.op.instance_name
8794     nodenames = list(instance.all_nodes)
8795     for node in nodenames:
8796       _CheckNodeOnline(self, node)
8797
8798     self.instance = instance
8799
8800     if instance.disk_template not in constants.DTS_GROWABLE:
8801       raise errors.OpPrereqError("Instance's disk layout does not support"
8802                                  " growing.", errors.ECODE_INVAL)
8803
8804     self.disk = instance.FindDisk(self.op.disk)
8805
8806     if instance.disk_template != constants.DT_FILE:
8807       # TODO: check the free disk space for file, when that feature
8808       # will be supported
8809       _CheckNodesFreeDiskPerVG(self, nodenames,
8810                                self.disk.ComputeGrowth(self.op.amount))
8811
8812   def Exec(self, feedback_fn):
8813     """Execute disk grow.
8814
8815     """
8816     instance = self.instance
8817     disk = self.disk
8818
8819     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8820     if not disks_ok:
8821       raise errors.OpExecError("Cannot activate block device to grow")
8822
8823     for node in instance.all_nodes:
8824       self.cfg.SetDiskID(disk, node)
8825       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8826       result.Raise("Grow request failed to node %s" % node)
8827
8828       # TODO: Rewrite code to work properly
8829       # DRBD goes into sync mode for a short amount of time after executing the
8830       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8831       # calling "resize" in sync mode fails. Sleeping for a short amount of
8832       # time is a work-around.
8833       time.sleep(5)
8834
8835     disk.RecordGrow(self.op.amount)
8836     self.cfg.Update(instance, feedback_fn)
8837     if self.op.wait_for_sync:
8838       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8839       if disk_abort:
8840         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8841                              " status.\nPlease check the instance.")
8842       if not instance.admin_up:
8843         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8844     elif not instance.admin_up:
8845       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8846                            " not supposed to be running because no wait for"
8847                            " sync mode was requested.")
8848
8849
8850 class LUInstanceQueryData(NoHooksLU):
8851   """Query runtime instance data.
8852
8853   """
8854   REQ_BGL = False
8855
8856   def ExpandNames(self):
8857     self.needed_locks = {}
8858
8859     # Use locking if requested or when non-static information is wanted
8860     if not (self.op.static or self.op.use_locking):
8861       self.LogWarning("Non-static data requested, locks need to be acquired")
8862       self.op.use_locking = True
8863
8864     if self.op.instances or not self.op.use_locking:
8865       # Expand instance names right here
8866       self.wanted_names = _GetWantedInstances(self, self.op.instances)
8867     else:
8868       # Will use acquired locks
8869       self.wanted_names = None
8870
8871     if self.op.use_locking:
8872       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8873
8874       if self.wanted_names is None:
8875         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8876       else:
8877         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8878
8879       self.needed_locks[locking.LEVEL_NODE] = []
8880       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8881       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8882
8883   def DeclareLocks(self, level):
8884     if self.op.use_locking and level == locking.LEVEL_NODE:
8885       self._LockInstancesNodes()
8886
8887   def CheckPrereq(self):
8888     """Check prerequisites.
8889
8890     This only checks the optional instance list against the existing names.
8891
8892     """
8893     if self.wanted_names is None:
8894       assert self.op.use_locking, "Locking was not used"
8895       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8896
8897     self.wanted_instances = [self.cfg.GetInstanceInfo(name)
8898                              for name in self.wanted_names]
8899
8900   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8901     """Returns the status of a block device
8902
8903     """
8904     if self.op.static or not node:
8905       return None
8906
8907     self.cfg.SetDiskID(dev, node)
8908
8909     result = self.rpc.call_blockdev_find(node, dev)
8910     if result.offline:
8911       return None
8912
8913     result.Raise("Can't compute disk status for %s" % instance_name)
8914
8915     status = result.payload
8916     if status is None:
8917       return None
8918
8919     return (status.dev_path, status.major, status.minor,
8920             status.sync_percent, status.estimated_time,
8921             status.is_degraded, status.ldisk_status)
8922
8923   def _ComputeDiskStatus(self, instance, snode, dev):
8924     """Compute block device status.
8925
8926     """
8927     if dev.dev_type in constants.LDS_DRBD:
8928       # we change the snode then (otherwise we use the one passed in)
8929       if dev.logical_id[0] == instance.primary_node:
8930         snode = dev.logical_id[1]
8931       else:
8932         snode = dev.logical_id[0]
8933
8934     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8935                                               instance.name, dev)
8936     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8937
8938     if dev.children:
8939       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8940                       for child in dev.children]
8941     else:
8942       dev_children = []
8943
8944     return {
8945       "iv_name": dev.iv_name,
8946       "dev_type": dev.dev_type,
8947       "logical_id": dev.logical_id,
8948       "physical_id": dev.physical_id,
8949       "pstatus": dev_pstatus,
8950       "sstatus": dev_sstatus,
8951       "children": dev_children,
8952       "mode": dev.mode,
8953       "size": dev.size,
8954       }
8955
8956   def Exec(self, feedback_fn):
8957     """Gather and return data"""
8958     result = {}
8959
8960     cluster = self.cfg.GetClusterInfo()
8961
8962     for instance in self.wanted_instances:
8963       if not self.op.static:
8964         remote_info = self.rpc.call_instance_info(instance.primary_node,
8965                                                   instance.name,
8966                                                   instance.hypervisor)
8967         remote_info.Raise("Error checking node %s" % instance.primary_node)
8968         remote_info = remote_info.payload
8969         if remote_info and "state" in remote_info:
8970           remote_state = "up"
8971         else:
8972           remote_state = "down"
8973       else:
8974         remote_state = None
8975       if instance.admin_up:
8976         config_state = "up"
8977       else:
8978         config_state = "down"
8979
8980       disks = [self._ComputeDiskStatus(instance, None, device)
8981                for device in instance.disks]
8982
8983       result[instance.name] = {
8984         "name": instance.name,
8985         "config_state": config_state,
8986         "run_state": remote_state,
8987         "pnode": instance.primary_node,
8988         "snodes": instance.secondary_nodes,
8989         "os": instance.os,
8990         # this happens to be the same format used for hooks
8991         "nics": _NICListToTuple(self, instance.nics),
8992         "disk_template": instance.disk_template,
8993         "disks": disks,
8994         "hypervisor": instance.hypervisor,
8995         "network_port": instance.network_port,
8996         "hv_instance": instance.hvparams,
8997         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8998         "be_instance": instance.beparams,
8999         "be_actual": cluster.FillBE(instance),
9000         "os_instance": instance.osparams,
9001         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9002         "serial_no": instance.serial_no,
9003         "mtime": instance.mtime,
9004         "ctime": instance.ctime,
9005         "uuid": instance.uuid,
9006         }
9007
9008     return result
9009
9010
9011 class LUInstanceSetParams(LogicalUnit):
9012   """Modifies an instances's parameters.
9013
9014   """
9015   HPATH = "instance-modify"
9016   HTYPE = constants.HTYPE_INSTANCE
9017   REQ_BGL = False
9018
9019   def CheckArguments(self):
9020     if not (self.op.nics or self.op.disks or self.op.disk_template or
9021             self.op.hvparams or self.op.beparams or self.op.os_name):
9022       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9023
9024     if self.op.hvparams:
9025       _CheckGlobalHvParams(self.op.hvparams)
9026
9027     # Disk validation
9028     disk_addremove = 0
9029     for disk_op, disk_dict in self.op.disks:
9030       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9031       if disk_op == constants.DDM_REMOVE:
9032         disk_addremove += 1
9033         continue
9034       elif disk_op == constants.DDM_ADD:
9035         disk_addremove += 1
9036       else:
9037         if not isinstance(disk_op, int):
9038           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9039         if not isinstance(disk_dict, dict):
9040           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9041           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9042
9043       if disk_op == constants.DDM_ADD:
9044         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9045         if mode not in constants.DISK_ACCESS_SET:
9046           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9047                                      errors.ECODE_INVAL)
9048         size = disk_dict.get('size', None)
9049         if size is None:
9050           raise errors.OpPrereqError("Required disk parameter size missing",
9051                                      errors.ECODE_INVAL)
9052         try:
9053           size = int(size)
9054         except (TypeError, ValueError), err:
9055           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9056                                      str(err), errors.ECODE_INVAL)
9057         disk_dict['size'] = size
9058       else:
9059         # modification of disk
9060         if 'size' in disk_dict:
9061           raise errors.OpPrereqError("Disk size change not possible, use"
9062                                      " grow-disk", errors.ECODE_INVAL)
9063
9064     if disk_addremove > 1:
9065       raise errors.OpPrereqError("Only one disk add or remove operation"
9066                                  " supported at a time", errors.ECODE_INVAL)
9067
9068     if self.op.disks and self.op.disk_template is not None:
9069       raise errors.OpPrereqError("Disk template conversion and other disk"
9070                                  " changes not supported at the same time",
9071                                  errors.ECODE_INVAL)
9072
9073     if (self.op.disk_template and
9074         self.op.disk_template in constants.DTS_NET_MIRROR and
9075         self.op.remote_node is None):
9076       raise errors.OpPrereqError("Changing the disk template to a mirrored"
9077                                  " one requires specifying a secondary node",
9078                                  errors.ECODE_INVAL)
9079
9080     # NIC validation
9081     nic_addremove = 0
9082     for nic_op, nic_dict in self.op.nics:
9083       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9084       if nic_op == constants.DDM_REMOVE:
9085         nic_addremove += 1
9086         continue
9087       elif nic_op == constants.DDM_ADD:
9088         nic_addremove += 1
9089       else:
9090         if not isinstance(nic_op, int):
9091           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9092         if not isinstance(nic_dict, dict):
9093           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9094           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9095
9096       # nic_dict should be a dict
9097       nic_ip = nic_dict.get('ip', None)
9098       if nic_ip is not None:
9099         if nic_ip.lower() == constants.VALUE_NONE:
9100           nic_dict['ip'] = None
9101         else:
9102           if not netutils.IPAddress.IsValid(nic_ip):
9103             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9104                                        errors.ECODE_INVAL)
9105
9106       nic_bridge = nic_dict.get('bridge', None)
9107       nic_link = nic_dict.get('link', None)
9108       if nic_bridge and nic_link:
9109         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9110                                    " at the same time", errors.ECODE_INVAL)
9111       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9112         nic_dict['bridge'] = None
9113       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9114         nic_dict['link'] = None
9115
9116       if nic_op == constants.DDM_ADD:
9117         nic_mac = nic_dict.get('mac', None)
9118         if nic_mac is None:
9119           nic_dict['mac'] = constants.VALUE_AUTO
9120
9121       if 'mac' in nic_dict:
9122         nic_mac = nic_dict['mac']
9123         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9124           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9125
9126         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9127           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9128                                      " modifying an existing nic",
9129                                      errors.ECODE_INVAL)
9130
9131     if nic_addremove > 1:
9132       raise errors.OpPrereqError("Only one NIC add or remove operation"
9133                                  " supported at a time", errors.ECODE_INVAL)
9134
9135   def ExpandNames(self):
9136     self._ExpandAndLockInstance()
9137     self.needed_locks[locking.LEVEL_NODE] = []
9138     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9139
9140   def DeclareLocks(self, level):
9141     if level == locking.LEVEL_NODE:
9142       self._LockInstancesNodes()
9143       if self.op.disk_template and self.op.remote_node:
9144         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9145         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9146
9147   def BuildHooksEnv(self):
9148     """Build hooks env.
9149
9150     This runs on the master, primary and secondaries.
9151
9152     """
9153     args = dict()
9154     if constants.BE_MEMORY in self.be_new:
9155       args['memory'] = self.be_new[constants.BE_MEMORY]
9156     if constants.BE_VCPUS in self.be_new:
9157       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9158     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9159     # information at all.
9160     if self.op.nics:
9161       args['nics'] = []
9162       nic_override = dict(self.op.nics)
9163       for idx, nic in enumerate(self.instance.nics):
9164         if idx in nic_override:
9165           this_nic_override = nic_override[idx]
9166         else:
9167           this_nic_override = {}
9168         if 'ip' in this_nic_override:
9169           ip = this_nic_override['ip']
9170         else:
9171           ip = nic.ip
9172         if 'mac' in this_nic_override:
9173           mac = this_nic_override['mac']
9174         else:
9175           mac = nic.mac
9176         if idx in self.nic_pnew:
9177           nicparams = self.nic_pnew[idx]
9178         else:
9179           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9180         mode = nicparams[constants.NIC_MODE]
9181         link = nicparams[constants.NIC_LINK]
9182         args['nics'].append((ip, mac, mode, link))
9183       if constants.DDM_ADD in nic_override:
9184         ip = nic_override[constants.DDM_ADD].get('ip', None)
9185         mac = nic_override[constants.DDM_ADD]['mac']
9186         nicparams = self.nic_pnew[constants.DDM_ADD]
9187         mode = nicparams[constants.NIC_MODE]
9188         link = nicparams[constants.NIC_LINK]
9189         args['nics'].append((ip, mac, mode, link))
9190       elif constants.DDM_REMOVE in nic_override:
9191         del args['nics'][-1]
9192
9193     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9194     if self.op.disk_template:
9195       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9196     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9197     return env, nl, nl
9198
9199   def CheckPrereq(self):
9200     """Check prerequisites.
9201
9202     This only checks the instance list against the existing names.
9203
9204     """
9205     # checking the new params on the primary/secondary nodes
9206
9207     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9208     cluster = self.cluster = self.cfg.GetClusterInfo()
9209     assert self.instance is not None, \
9210       "Cannot retrieve locked instance %s" % self.op.instance_name
9211     pnode = instance.primary_node
9212     nodelist = list(instance.all_nodes)
9213
9214     # OS change
9215     if self.op.os_name and not self.op.force:
9216       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9217                       self.op.force_variant)
9218       instance_os = self.op.os_name
9219     else:
9220       instance_os = instance.os
9221
9222     if self.op.disk_template:
9223       if instance.disk_template == self.op.disk_template:
9224         raise errors.OpPrereqError("Instance already has disk template %s" %
9225                                    instance.disk_template, errors.ECODE_INVAL)
9226
9227       if (instance.disk_template,
9228           self.op.disk_template) not in self._DISK_CONVERSIONS:
9229         raise errors.OpPrereqError("Unsupported disk template conversion from"
9230                                    " %s to %s" % (instance.disk_template,
9231                                                   self.op.disk_template),
9232                                    errors.ECODE_INVAL)
9233       _CheckInstanceDown(self, instance, "cannot change disk template")
9234       if self.op.disk_template in constants.DTS_NET_MIRROR:
9235         if self.op.remote_node == pnode:
9236           raise errors.OpPrereqError("Given new secondary node %s is the same"
9237                                      " as the primary node of the instance" %
9238                                      self.op.remote_node, errors.ECODE_STATE)
9239         _CheckNodeOnline(self, self.op.remote_node)
9240         _CheckNodeNotDrained(self, self.op.remote_node)
9241         # FIXME: here we assume that the old instance type is DT_PLAIN
9242         assert instance.disk_template == constants.DT_PLAIN
9243         disks = [{"size": d.size, "vg": d.logical_id[0]}
9244                  for d in instance.disks]
9245         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9246         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9247
9248     # hvparams processing
9249     if self.op.hvparams:
9250       hv_type = instance.hypervisor
9251       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9252       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9253       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9254
9255       # local check
9256       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9257       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9258       self.hv_new = hv_new # the new actual values
9259       self.hv_inst = i_hvdict # the new dict (without defaults)
9260     else:
9261       self.hv_new = self.hv_inst = {}
9262
9263     # beparams processing
9264     if self.op.beparams:
9265       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9266                                    use_none=True)
9267       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9268       be_new = cluster.SimpleFillBE(i_bedict)
9269       self.be_new = be_new # the new actual values
9270       self.be_inst = i_bedict # the new dict (without defaults)
9271     else:
9272       self.be_new = self.be_inst = {}
9273
9274     # osparams processing
9275     if self.op.osparams:
9276       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9277       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9278       self.os_inst = i_osdict # the new dict (without defaults)
9279     else:
9280       self.os_inst = {}
9281
9282     self.warn = []
9283
9284     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9285       mem_check_list = [pnode]
9286       if be_new[constants.BE_AUTO_BALANCE]:
9287         # either we changed auto_balance to yes or it was from before
9288         mem_check_list.extend(instance.secondary_nodes)
9289       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9290                                                   instance.hypervisor)
9291       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9292                                          instance.hypervisor)
9293       pninfo = nodeinfo[pnode]
9294       msg = pninfo.fail_msg
9295       if msg:
9296         # Assume the primary node is unreachable and go ahead
9297         self.warn.append("Can't get info from primary node %s: %s" %
9298                          (pnode,  msg))
9299       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9300         self.warn.append("Node data from primary node %s doesn't contain"
9301                          " free memory information" % pnode)
9302       elif instance_info.fail_msg:
9303         self.warn.append("Can't get instance runtime information: %s" %
9304                         instance_info.fail_msg)
9305       else:
9306         if instance_info.payload:
9307           current_mem = int(instance_info.payload['memory'])
9308         else:
9309           # Assume instance not running
9310           # (there is a slight race condition here, but it's not very probable,
9311           # and we have no other way to check)
9312           current_mem = 0
9313         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9314                     pninfo.payload['memory_free'])
9315         if miss_mem > 0:
9316           raise errors.OpPrereqError("This change will prevent the instance"
9317                                      " from starting, due to %d MB of memory"
9318                                      " missing on its primary node" % miss_mem,
9319                                      errors.ECODE_NORES)
9320
9321       if be_new[constants.BE_AUTO_BALANCE]:
9322         for node, nres in nodeinfo.items():
9323           if node not in instance.secondary_nodes:
9324             continue
9325           msg = nres.fail_msg
9326           if msg:
9327             self.warn.append("Can't get info from secondary node %s: %s" %
9328                              (node, msg))
9329           elif not isinstance(nres.payload.get('memory_free', None), int):
9330             self.warn.append("Secondary node %s didn't return free"
9331                              " memory information" % node)
9332           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9333             self.warn.append("Not enough memory to failover instance to"
9334                              " secondary node %s" % node)
9335
9336     # NIC processing
9337     self.nic_pnew = {}
9338     self.nic_pinst = {}
9339     for nic_op, nic_dict in self.op.nics:
9340       if nic_op == constants.DDM_REMOVE:
9341         if not instance.nics:
9342           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9343                                      errors.ECODE_INVAL)
9344         continue
9345       if nic_op != constants.DDM_ADD:
9346         # an existing nic
9347         if not instance.nics:
9348           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9349                                      " no NICs" % nic_op,
9350                                      errors.ECODE_INVAL)
9351         if nic_op < 0 or nic_op >= len(instance.nics):
9352           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9353                                      " are 0 to %d" %
9354                                      (nic_op, len(instance.nics) - 1),
9355                                      errors.ECODE_INVAL)
9356         old_nic_params = instance.nics[nic_op].nicparams
9357         old_nic_ip = instance.nics[nic_op].ip
9358       else:
9359         old_nic_params = {}
9360         old_nic_ip = None
9361
9362       update_params_dict = dict([(key, nic_dict[key])
9363                                  for key in constants.NICS_PARAMETERS
9364                                  if key in nic_dict])
9365
9366       if 'bridge' in nic_dict:
9367         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9368
9369       new_nic_params = _GetUpdatedParams(old_nic_params,
9370                                          update_params_dict)
9371       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9372       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9373       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9374       self.nic_pinst[nic_op] = new_nic_params
9375       self.nic_pnew[nic_op] = new_filled_nic_params
9376       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9377
9378       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9379         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9380         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9381         if msg:
9382           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9383           if self.op.force:
9384             self.warn.append(msg)
9385           else:
9386             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9387       if new_nic_mode == constants.NIC_MODE_ROUTED:
9388         if 'ip' in nic_dict:
9389           nic_ip = nic_dict['ip']
9390         else:
9391           nic_ip = old_nic_ip
9392         if nic_ip is None:
9393           raise errors.OpPrereqError('Cannot set the nic ip to None'
9394                                      ' on a routed nic', errors.ECODE_INVAL)
9395       if 'mac' in nic_dict:
9396         nic_mac = nic_dict['mac']
9397         if nic_mac is None:
9398           raise errors.OpPrereqError('Cannot set the nic mac to None',
9399                                      errors.ECODE_INVAL)
9400         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9401           # otherwise generate the mac
9402           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9403         else:
9404           # or validate/reserve the current one
9405           try:
9406             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9407           except errors.ReservationError:
9408             raise errors.OpPrereqError("MAC address %s already in use"
9409                                        " in cluster" % nic_mac,
9410                                        errors.ECODE_NOTUNIQUE)
9411
9412     # DISK processing
9413     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9414       raise errors.OpPrereqError("Disk operations not supported for"
9415                                  " diskless instances",
9416                                  errors.ECODE_INVAL)
9417     for disk_op, _ in self.op.disks:
9418       if disk_op == constants.DDM_REMOVE:
9419         if len(instance.disks) == 1:
9420           raise errors.OpPrereqError("Cannot remove the last disk of"
9421                                      " an instance", errors.ECODE_INVAL)
9422         _CheckInstanceDown(self, instance, "cannot remove disks")
9423
9424       if (disk_op == constants.DDM_ADD and
9425           len(instance.disks) >= constants.MAX_DISKS):
9426         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9427                                    " add more" % constants.MAX_DISKS,
9428                                    errors.ECODE_STATE)
9429       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9430         # an existing disk
9431         if disk_op < 0 or disk_op >= len(instance.disks):
9432           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9433                                      " are 0 to %d" %
9434                                      (disk_op, len(instance.disks)),
9435                                      errors.ECODE_INVAL)
9436
9437     return
9438
9439   def _ConvertPlainToDrbd(self, feedback_fn):
9440     """Converts an instance from plain to drbd.
9441
9442     """
9443     feedback_fn("Converting template to drbd")
9444     instance = self.instance
9445     pnode = instance.primary_node
9446     snode = self.op.remote_node
9447
9448     # create a fake disk info for _GenerateDiskTemplate
9449     disk_info = [{"size": d.size, "mode": d.mode,
9450                   "vg": d.logical_id[0]} for d in instance.disks]
9451     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9452                                       instance.name, pnode, [snode],
9453                                       disk_info, None, None, 0, feedback_fn)
9454     info = _GetInstanceInfoText(instance)
9455     feedback_fn("Creating aditional volumes...")
9456     # first, create the missing data and meta devices
9457     for disk in new_disks:
9458       # unfortunately this is... not too nice
9459       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9460                             info, True)
9461       for child in disk.children:
9462         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9463     # at this stage, all new LVs have been created, we can rename the
9464     # old ones
9465     feedback_fn("Renaming original volumes...")
9466     rename_list = [(o, n.children[0].logical_id)
9467                    for (o, n) in zip(instance.disks, new_disks)]
9468     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9469     result.Raise("Failed to rename original LVs")
9470
9471     feedback_fn("Initializing DRBD devices...")
9472     # all child devices are in place, we can now create the DRBD devices
9473     for disk in new_disks:
9474       for node in [pnode, snode]:
9475         f_create = node == pnode
9476         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9477
9478     # at this point, the instance has been modified
9479     instance.disk_template = constants.DT_DRBD8
9480     instance.disks = new_disks
9481     self.cfg.Update(instance, feedback_fn)
9482
9483     # disks are created, waiting for sync
9484     disk_abort = not _WaitForSync(self, instance)
9485     if disk_abort:
9486       raise errors.OpExecError("There are some degraded disks for"
9487                                " this instance, please cleanup manually")
9488
9489   def _ConvertDrbdToPlain(self, feedback_fn):
9490     """Converts an instance from drbd to plain.
9491
9492     """
9493     instance = self.instance
9494     assert len(instance.secondary_nodes) == 1
9495     pnode = instance.primary_node
9496     snode = instance.secondary_nodes[0]
9497     feedback_fn("Converting template to plain")
9498
9499     old_disks = instance.disks
9500     new_disks = [d.children[0] for d in old_disks]
9501
9502     # copy over size and mode
9503     for parent, child in zip(old_disks, new_disks):
9504       child.size = parent.size
9505       child.mode = parent.mode
9506
9507     # update instance structure
9508     instance.disks = new_disks
9509     instance.disk_template = constants.DT_PLAIN
9510     self.cfg.Update(instance, feedback_fn)
9511
9512     feedback_fn("Removing volumes on the secondary node...")
9513     for disk in old_disks:
9514       self.cfg.SetDiskID(disk, snode)
9515       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9516       if msg:
9517         self.LogWarning("Could not remove block device %s on node %s,"
9518                         " continuing anyway: %s", disk.iv_name, snode, msg)
9519
9520     feedback_fn("Removing unneeded volumes on the primary node...")
9521     for idx, disk in enumerate(old_disks):
9522       meta = disk.children[1]
9523       self.cfg.SetDiskID(meta, pnode)
9524       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9525       if msg:
9526         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9527                         " continuing anyway: %s", idx, pnode, msg)
9528
9529   def Exec(self, feedback_fn):
9530     """Modifies an instance.
9531
9532     All parameters take effect only at the next restart of the instance.
9533
9534     """
9535     # Process here the warnings from CheckPrereq, as we don't have a
9536     # feedback_fn there.
9537     for warn in self.warn:
9538       feedback_fn("WARNING: %s" % warn)
9539
9540     result = []
9541     instance = self.instance
9542     # disk changes
9543     for disk_op, disk_dict in self.op.disks:
9544       if disk_op == constants.DDM_REMOVE:
9545         # remove the last disk
9546         device = instance.disks.pop()
9547         device_idx = len(instance.disks)
9548         for node, disk in device.ComputeNodeTree(instance.primary_node):
9549           self.cfg.SetDiskID(disk, node)
9550           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9551           if msg:
9552             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9553                             " continuing anyway", device_idx, node, msg)
9554         result.append(("disk/%d" % device_idx, "remove"))
9555       elif disk_op == constants.DDM_ADD:
9556         # add a new disk
9557         if instance.disk_template == constants.DT_FILE:
9558           file_driver, file_path = instance.disks[0].logical_id
9559           file_path = os.path.dirname(file_path)
9560         else:
9561           file_driver = file_path = None
9562         disk_idx_base = len(instance.disks)
9563         new_disk = _GenerateDiskTemplate(self,
9564                                          instance.disk_template,
9565                                          instance.name, instance.primary_node,
9566                                          instance.secondary_nodes,
9567                                          [disk_dict],
9568                                          file_path,
9569                                          file_driver,
9570                                          disk_idx_base, feedback_fn)[0]
9571         instance.disks.append(new_disk)
9572         info = _GetInstanceInfoText(instance)
9573
9574         logging.info("Creating volume %s for instance %s",
9575                      new_disk.iv_name, instance.name)
9576         # Note: this needs to be kept in sync with _CreateDisks
9577         #HARDCODE
9578         for node in instance.all_nodes:
9579           f_create = node == instance.primary_node
9580           try:
9581             _CreateBlockDev(self, node, instance, new_disk,
9582                             f_create, info, f_create)
9583           except errors.OpExecError, err:
9584             self.LogWarning("Failed to create volume %s (%s) on"
9585                             " node %s: %s",
9586                             new_disk.iv_name, new_disk, node, err)
9587         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9588                        (new_disk.size, new_disk.mode)))
9589       else:
9590         # change a given disk
9591         instance.disks[disk_op].mode = disk_dict['mode']
9592         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9593
9594     if self.op.disk_template:
9595       r_shut = _ShutdownInstanceDisks(self, instance)
9596       if not r_shut:
9597         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9598                                  " proceed with disk template conversion")
9599       mode = (instance.disk_template, self.op.disk_template)
9600       try:
9601         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9602       except:
9603         self.cfg.ReleaseDRBDMinors(instance.name)
9604         raise
9605       result.append(("disk_template", self.op.disk_template))
9606
9607     # NIC changes
9608     for nic_op, nic_dict in self.op.nics:
9609       if nic_op == constants.DDM_REMOVE:
9610         # remove the last nic
9611         del instance.nics[-1]
9612         result.append(("nic.%d" % len(instance.nics), "remove"))
9613       elif nic_op == constants.DDM_ADD:
9614         # mac and bridge should be set, by now
9615         mac = nic_dict['mac']
9616         ip = nic_dict.get('ip', None)
9617         nicparams = self.nic_pinst[constants.DDM_ADD]
9618         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9619         instance.nics.append(new_nic)
9620         result.append(("nic.%d" % (len(instance.nics) - 1),
9621                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9622                        (new_nic.mac, new_nic.ip,
9623                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9624                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9625                        )))
9626       else:
9627         for key in 'mac', 'ip':
9628           if key in nic_dict:
9629             setattr(instance.nics[nic_op], key, nic_dict[key])
9630         if nic_op in self.nic_pinst:
9631           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9632         for key, val in nic_dict.iteritems():
9633           result.append(("nic.%s/%d" % (key, nic_op), val))
9634
9635     # hvparams changes
9636     if self.op.hvparams:
9637       instance.hvparams = self.hv_inst
9638       for key, val in self.op.hvparams.iteritems():
9639         result.append(("hv/%s" % key, val))
9640
9641     # beparams changes
9642     if self.op.beparams:
9643       instance.beparams = self.be_inst
9644       for key, val in self.op.beparams.iteritems():
9645         result.append(("be/%s" % key, val))
9646
9647     # OS change
9648     if self.op.os_name:
9649       instance.os = self.op.os_name
9650
9651     # osparams changes
9652     if self.op.osparams:
9653       instance.osparams = self.os_inst
9654       for key, val in self.op.osparams.iteritems():
9655         result.append(("os/%s" % key, val))
9656
9657     self.cfg.Update(instance, feedback_fn)
9658
9659     return result
9660
9661   _DISK_CONVERSIONS = {
9662     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9663     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9664     }
9665
9666
9667 class LUBackupQuery(NoHooksLU):
9668   """Query the exports list
9669
9670   """
9671   REQ_BGL = False
9672
9673   def ExpandNames(self):
9674     self.needed_locks = {}
9675     self.share_locks[locking.LEVEL_NODE] = 1
9676     if not self.op.nodes:
9677       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9678     else:
9679       self.needed_locks[locking.LEVEL_NODE] = \
9680         _GetWantedNodes(self, self.op.nodes)
9681
9682   def Exec(self, feedback_fn):
9683     """Compute the list of all the exported system images.
9684
9685     @rtype: dict
9686     @return: a dictionary with the structure node->(export-list)
9687         where export-list is a list of the instances exported on
9688         that node.
9689
9690     """
9691     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9692     rpcresult = self.rpc.call_export_list(self.nodes)
9693     result = {}
9694     for node in rpcresult:
9695       if rpcresult[node].fail_msg:
9696         result[node] = False
9697       else:
9698         result[node] = rpcresult[node].payload
9699
9700     return result
9701
9702
9703 class LUBackupPrepare(NoHooksLU):
9704   """Prepares an instance for an export and returns useful information.
9705
9706   """
9707   REQ_BGL = False
9708
9709   def ExpandNames(self):
9710     self._ExpandAndLockInstance()
9711
9712   def CheckPrereq(self):
9713     """Check prerequisites.
9714
9715     """
9716     instance_name = self.op.instance_name
9717
9718     self.instance = self.cfg.GetInstanceInfo(instance_name)
9719     assert self.instance is not None, \
9720           "Cannot retrieve locked instance %s" % self.op.instance_name
9721     _CheckNodeOnline(self, self.instance.primary_node)
9722
9723     self._cds = _GetClusterDomainSecret()
9724
9725   def Exec(self, feedback_fn):
9726     """Prepares an instance for an export.
9727
9728     """
9729     instance = self.instance
9730
9731     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9732       salt = utils.GenerateSecret(8)
9733
9734       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9735       result = self.rpc.call_x509_cert_create(instance.primary_node,
9736                                               constants.RIE_CERT_VALIDITY)
9737       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9738
9739       (name, cert_pem) = result.payload
9740
9741       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9742                                              cert_pem)
9743
9744       return {
9745         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9746         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9747                           salt),
9748         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9749         }
9750
9751     return None
9752
9753
9754 class LUBackupExport(LogicalUnit):
9755   """Export an instance to an image in the cluster.
9756
9757   """
9758   HPATH = "instance-export"
9759   HTYPE = constants.HTYPE_INSTANCE
9760   REQ_BGL = False
9761
9762   def CheckArguments(self):
9763     """Check the arguments.
9764
9765     """
9766     self.x509_key_name = self.op.x509_key_name
9767     self.dest_x509_ca_pem = self.op.destination_x509_ca
9768
9769     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9770       if not self.x509_key_name:
9771         raise errors.OpPrereqError("Missing X509 key name for encryption",
9772                                    errors.ECODE_INVAL)
9773
9774       if not self.dest_x509_ca_pem:
9775         raise errors.OpPrereqError("Missing destination X509 CA",
9776                                    errors.ECODE_INVAL)
9777
9778   def ExpandNames(self):
9779     self._ExpandAndLockInstance()
9780
9781     # Lock all nodes for local exports
9782     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9783       # FIXME: lock only instance primary and destination node
9784       #
9785       # Sad but true, for now we have do lock all nodes, as we don't know where
9786       # the previous export might be, and in this LU we search for it and
9787       # remove it from its current node. In the future we could fix this by:
9788       #  - making a tasklet to search (share-lock all), then create the
9789       #    new one, then one to remove, after
9790       #  - removing the removal operation altogether
9791       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9792
9793   def DeclareLocks(self, level):
9794     """Last minute lock declaration."""
9795     # All nodes are locked anyway, so nothing to do here.
9796
9797   def BuildHooksEnv(self):
9798     """Build hooks env.
9799
9800     This will run on the master, primary node and target node.
9801
9802     """
9803     env = {
9804       "EXPORT_MODE": self.op.mode,
9805       "EXPORT_NODE": self.op.target_node,
9806       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9807       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9808       # TODO: Generic function for boolean env variables
9809       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9810       }
9811
9812     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9813
9814     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9815
9816     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9817       nl.append(self.op.target_node)
9818
9819     return env, nl, nl
9820
9821   def CheckPrereq(self):
9822     """Check prerequisites.
9823
9824     This checks that the instance and node names are valid.
9825
9826     """
9827     instance_name = self.op.instance_name
9828
9829     self.instance = self.cfg.GetInstanceInfo(instance_name)
9830     assert self.instance is not None, \
9831           "Cannot retrieve locked instance %s" % self.op.instance_name
9832     _CheckNodeOnline(self, self.instance.primary_node)
9833
9834     if (self.op.remove_instance and self.instance.admin_up and
9835         not self.op.shutdown):
9836       raise errors.OpPrereqError("Can not remove instance without shutting it"
9837                                  " down before")
9838
9839     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9840       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9841       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9842       assert self.dst_node is not None
9843
9844       _CheckNodeOnline(self, self.dst_node.name)
9845       _CheckNodeNotDrained(self, self.dst_node.name)
9846
9847       self._cds = None
9848       self.dest_disk_info = None
9849       self.dest_x509_ca = None
9850
9851     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9852       self.dst_node = None
9853
9854       if len(self.op.target_node) != len(self.instance.disks):
9855         raise errors.OpPrereqError(("Received destination information for %s"
9856                                     " disks, but instance %s has %s disks") %
9857                                    (len(self.op.target_node), instance_name,
9858                                     len(self.instance.disks)),
9859                                    errors.ECODE_INVAL)
9860
9861       cds = _GetClusterDomainSecret()
9862
9863       # Check X509 key name
9864       try:
9865         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9866       except (TypeError, ValueError), err:
9867         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9868
9869       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9870         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9871                                    errors.ECODE_INVAL)
9872
9873       # Load and verify CA
9874       try:
9875         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9876       except OpenSSL.crypto.Error, err:
9877         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9878                                    (err, ), errors.ECODE_INVAL)
9879
9880       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9881       if errcode is not None:
9882         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9883                                    (msg, ), errors.ECODE_INVAL)
9884
9885       self.dest_x509_ca = cert
9886
9887       # Verify target information
9888       disk_info = []
9889       for idx, disk_data in enumerate(self.op.target_node):
9890         try:
9891           (host, port, magic) = \
9892             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9893         except errors.GenericError, err:
9894           raise errors.OpPrereqError("Target info for disk %s: %s" %
9895                                      (idx, err), errors.ECODE_INVAL)
9896
9897         disk_info.append((host, port, magic))
9898
9899       assert len(disk_info) == len(self.op.target_node)
9900       self.dest_disk_info = disk_info
9901
9902     else:
9903       raise errors.ProgrammerError("Unhandled export mode %r" %
9904                                    self.op.mode)
9905
9906     # instance disk type verification
9907     # TODO: Implement export support for file-based disks
9908     for disk in self.instance.disks:
9909       if disk.dev_type == constants.LD_FILE:
9910         raise errors.OpPrereqError("Export not supported for instances with"
9911                                    " file-based disks", errors.ECODE_INVAL)
9912
9913   def _CleanupExports(self, feedback_fn):
9914     """Removes exports of current instance from all other nodes.
9915
9916     If an instance in a cluster with nodes A..D was exported to node C, its
9917     exports will be removed from the nodes A, B and D.
9918
9919     """
9920     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9921
9922     nodelist = self.cfg.GetNodeList()
9923     nodelist.remove(self.dst_node.name)
9924
9925     # on one-node clusters nodelist will be empty after the removal
9926     # if we proceed the backup would be removed because OpBackupQuery
9927     # substitutes an empty list with the full cluster node list.
9928     iname = self.instance.name
9929     if nodelist:
9930       feedback_fn("Removing old exports for instance %s" % iname)
9931       exportlist = self.rpc.call_export_list(nodelist)
9932       for node in exportlist:
9933         if exportlist[node].fail_msg:
9934           continue
9935         if iname in exportlist[node].payload:
9936           msg = self.rpc.call_export_remove(node, iname).fail_msg
9937           if msg:
9938             self.LogWarning("Could not remove older export for instance %s"
9939                             " on node %s: %s", iname, node, msg)
9940
9941   def Exec(self, feedback_fn):
9942     """Export an instance to an image in the cluster.
9943
9944     """
9945     assert self.op.mode in constants.EXPORT_MODES
9946
9947     instance = self.instance
9948     src_node = instance.primary_node
9949
9950     if self.op.shutdown:
9951       # shutdown the instance, but not the disks
9952       feedback_fn("Shutting down instance %s" % instance.name)
9953       result = self.rpc.call_instance_shutdown(src_node, instance,
9954                                                self.op.shutdown_timeout)
9955       # TODO: Maybe ignore failures if ignore_remove_failures is set
9956       result.Raise("Could not shutdown instance %s on"
9957                    " node %s" % (instance.name, src_node))
9958
9959     # set the disks ID correctly since call_instance_start needs the
9960     # correct drbd minor to create the symlinks
9961     for disk in instance.disks:
9962       self.cfg.SetDiskID(disk, src_node)
9963
9964     activate_disks = (not instance.admin_up)
9965
9966     if activate_disks:
9967       # Activate the instance disks if we'exporting a stopped instance
9968       feedback_fn("Activating disks for %s" % instance.name)
9969       _StartInstanceDisks(self, instance, None)
9970
9971     try:
9972       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9973                                                      instance)
9974
9975       helper.CreateSnapshots()
9976       try:
9977         if (self.op.shutdown and instance.admin_up and
9978             not self.op.remove_instance):
9979           assert not activate_disks
9980           feedback_fn("Starting instance %s" % instance.name)
9981           result = self.rpc.call_instance_start(src_node, instance, None, None)
9982           msg = result.fail_msg
9983           if msg:
9984             feedback_fn("Failed to start instance: %s" % msg)
9985             _ShutdownInstanceDisks(self, instance)
9986             raise errors.OpExecError("Could not start instance: %s" % msg)
9987
9988         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9989           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9990         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9991           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9992           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9993
9994           (key_name, _, _) = self.x509_key_name
9995
9996           dest_ca_pem = \
9997             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9998                                             self.dest_x509_ca)
9999
10000           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10001                                                      key_name, dest_ca_pem,
10002                                                      timeouts)
10003       finally:
10004         helper.Cleanup()
10005
10006       # Check for backwards compatibility
10007       assert len(dresults) == len(instance.disks)
10008       assert compat.all(isinstance(i, bool) for i in dresults), \
10009              "Not all results are boolean: %r" % dresults
10010
10011     finally:
10012       if activate_disks:
10013         feedback_fn("Deactivating disks for %s" % instance.name)
10014         _ShutdownInstanceDisks(self, instance)
10015
10016     if not (compat.all(dresults) and fin_resu):
10017       failures = []
10018       if not fin_resu:
10019         failures.append("export finalization")
10020       if not compat.all(dresults):
10021         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10022                                if not dsk)
10023         failures.append("disk export: disk(s) %s" % fdsk)
10024
10025       raise errors.OpExecError("Export failed, errors in %s" %
10026                                utils.CommaJoin(failures))
10027
10028     # At this point, the export was successful, we can cleanup/finish
10029
10030     # Remove instance if requested
10031     if self.op.remove_instance:
10032       feedback_fn("Removing instance %s" % instance.name)
10033       _RemoveInstance(self, feedback_fn, instance,
10034                       self.op.ignore_remove_failures)
10035
10036     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10037       self._CleanupExports(feedback_fn)
10038
10039     return fin_resu, dresults
10040
10041
10042 class LUBackupRemove(NoHooksLU):
10043   """Remove exports related to the named instance.
10044
10045   """
10046   REQ_BGL = False
10047
10048   def ExpandNames(self):
10049     self.needed_locks = {}
10050     # We need all nodes to be locked in order for RemoveExport to work, but we
10051     # don't need to lock the instance itself, as nothing will happen to it (and
10052     # we can remove exports also for a removed instance)
10053     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10054
10055   def Exec(self, feedback_fn):
10056     """Remove any export.
10057
10058     """
10059     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10060     # If the instance was not found we'll try with the name that was passed in.
10061     # This will only work if it was an FQDN, though.
10062     fqdn_warn = False
10063     if not instance_name:
10064       fqdn_warn = True
10065       instance_name = self.op.instance_name
10066
10067     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10068     exportlist = self.rpc.call_export_list(locked_nodes)
10069     found = False
10070     for node in exportlist:
10071       msg = exportlist[node].fail_msg
10072       if msg:
10073         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10074         continue
10075       if instance_name in exportlist[node].payload:
10076         found = True
10077         result = self.rpc.call_export_remove(node, instance_name)
10078         msg = result.fail_msg
10079         if msg:
10080           logging.error("Could not remove export for instance %s"
10081                         " on node %s: %s", instance_name, node, msg)
10082
10083     if fqdn_warn and not found:
10084       feedback_fn("Export not found. If trying to remove an export belonging"
10085                   " to a deleted instance please use its Fully Qualified"
10086                   " Domain Name.")
10087
10088
10089 class LUGroupAdd(LogicalUnit):
10090   """Logical unit for creating node groups.
10091
10092   """
10093   HPATH = "group-add"
10094   HTYPE = constants.HTYPE_GROUP
10095   REQ_BGL = False
10096
10097   def ExpandNames(self):
10098     # We need the new group's UUID here so that we can create and acquire the
10099     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10100     # that it should not check whether the UUID exists in the configuration.
10101     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10102     self.needed_locks = {}
10103     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10104
10105   def CheckPrereq(self):
10106     """Check prerequisites.
10107
10108     This checks that the given group name is not an existing node group
10109     already.
10110
10111     """
10112     try:
10113       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10114     except errors.OpPrereqError:
10115       pass
10116     else:
10117       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10118                                  " node group (UUID: %s)" %
10119                                  (self.op.group_name, existing_uuid),
10120                                  errors.ECODE_EXISTS)
10121
10122     if self.op.ndparams:
10123       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10124
10125   def BuildHooksEnv(self):
10126     """Build hooks env.
10127
10128     """
10129     env = {
10130       "GROUP_NAME": self.op.group_name,
10131       }
10132     mn = self.cfg.GetMasterNode()
10133     return env, [mn], [mn]
10134
10135   def Exec(self, feedback_fn):
10136     """Add the node group to the cluster.
10137
10138     """
10139     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10140                                   uuid=self.group_uuid,
10141                                   alloc_policy=self.op.alloc_policy,
10142                                   ndparams=self.op.ndparams)
10143
10144     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10145     del self.remove_locks[locking.LEVEL_NODEGROUP]
10146
10147
10148 class LUGroupAssignNodes(NoHooksLU):
10149   """Logical unit for assigning nodes to groups.
10150
10151   """
10152   REQ_BGL = False
10153
10154   def ExpandNames(self):
10155     # These raise errors.OpPrereqError on their own:
10156     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10157     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10158
10159     # We want to lock all the affected nodes and groups. We have readily
10160     # available the list of nodes, and the *destination* group. To gather the
10161     # list of "source" groups, we need to fetch node information.
10162     self.node_data = self.cfg.GetAllNodesInfo()
10163     affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10164     affected_groups.add(self.group_uuid)
10165
10166     self.needed_locks = {
10167       locking.LEVEL_NODEGROUP: list(affected_groups),
10168       locking.LEVEL_NODE: self.op.nodes,
10169       }
10170
10171   def CheckPrereq(self):
10172     """Check prerequisites.
10173
10174     """
10175     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10176     instance_data = self.cfg.GetAllInstancesInfo()
10177
10178     if self.group is None:
10179       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10180                                (self.op.group_name, self.group_uuid))
10181
10182     (new_splits, previous_splits) = \
10183       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10184                                              for node in self.op.nodes],
10185                                             self.node_data, instance_data)
10186
10187     if new_splits:
10188       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10189
10190       if not self.op.force:
10191         raise errors.OpExecError("The following instances get split by this"
10192                                  " change and --force was not given: %s" %
10193                                  fmt_new_splits)
10194       else:
10195         self.LogWarning("This operation will split the following instances: %s",
10196                         fmt_new_splits)
10197
10198         if previous_splits:
10199           self.LogWarning("In addition, these already-split instances continue"
10200                           " to be split across groups: %s",
10201                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10202
10203   def Exec(self, feedback_fn):
10204     """Assign nodes to a new group.
10205
10206     """
10207     for node in self.op.nodes:
10208       self.node_data[node].group = self.group_uuid
10209
10210     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10211
10212   @staticmethod
10213   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10214     """Check for split instances after a node assignment.
10215
10216     This method considers a series of node assignments as an atomic operation,
10217     and returns information about split instances after applying the set of
10218     changes.
10219
10220     In particular, it returns information about newly split instances, and
10221     instances that were already split, and remain so after the change.
10222
10223     Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10224     considered.
10225
10226     @type changes: list of (node_name, new_group_uuid) pairs.
10227     @param changes: list of node assignments to consider.
10228     @param node_data: a dict with data for all nodes
10229     @param instance_data: a dict with all instances to consider
10230     @rtype: a two-tuple
10231     @return: a list of instances that were previously okay and result split as a
10232       consequence of this change, and a list of instances that were previously
10233       split and this change does not fix.
10234
10235     """
10236     changed_nodes = dict((node, group) for node, group in changes
10237                          if node_data[node].group != group)
10238
10239     all_split_instances = set()
10240     previously_split_instances = set()
10241
10242     def InstanceNodes(instance):
10243       return [instance.primary_node] + list(instance.secondary_nodes)
10244
10245     for inst in instance_data.values():
10246       if inst.disk_template not in constants.DTS_NET_MIRROR:
10247         continue
10248
10249       instance_nodes = InstanceNodes(inst)
10250
10251       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10252         previously_split_instances.add(inst.name)
10253
10254       if len(set(changed_nodes.get(node, node_data[node].group)
10255                  for node in instance_nodes)) > 1:
10256         all_split_instances.add(inst.name)
10257
10258     return (list(all_split_instances - previously_split_instances),
10259             list(previously_split_instances & all_split_instances))
10260
10261
10262 class _GroupQuery(_QueryBase):
10263
10264   FIELDS = query.GROUP_FIELDS
10265
10266   def ExpandNames(self, lu):
10267     lu.needed_locks = {}
10268
10269     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10270     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10271
10272     if not self.names:
10273       self.wanted = [name_to_uuid[name]
10274                      for name in utils.NiceSort(name_to_uuid.keys())]
10275     else:
10276       # Accept names to be either names or UUIDs.
10277       missing = []
10278       self.wanted = []
10279       all_uuid = frozenset(self._all_groups.keys())
10280
10281       for name in self.names:
10282         if name in all_uuid:
10283           self.wanted.append(name)
10284         elif name in name_to_uuid:
10285           self.wanted.append(name_to_uuid[name])
10286         else:
10287           missing.append(name)
10288
10289       if missing:
10290         raise errors.OpPrereqError("Some groups do not exist: %s" %
10291                                    utils.CommaJoin(missing),
10292                                    errors.ECODE_NOENT)
10293
10294   def DeclareLocks(self, lu, level):
10295     pass
10296
10297   def _GetQueryData(self, lu):
10298     """Computes the list of node groups and their attributes.
10299
10300     """
10301     do_nodes = query.GQ_NODE in self.requested_data
10302     do_instances = query.GQ_INST in self.requested_data
10303
10304     group_to_nodes = None
10305     group_to_instances = None
10306
10307     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10308     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10309     # latter GetAllInstancesInfo() is not enough, for we have to go through
10310     # instance->node. Hence, we will need to process nodes even if we only need
10311     # instance information.
10312     if do_nodes or do_instances:
10313       all_nodes = lu.cfg.GetAllNodesInfo()
10314       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10315       node_to_group = {}
10316
10317       for node in all_nodes.values():
10318         if node.group in group_to_nodes:
10319           group_to_nodes[node.group].append(node.name)
10320           node_to_group[node.name] = node.group
10321
10322       if do_instances:
10323         all_instances = lu.cfg.GetAllInstancesInfo()
10324         group_to_instances = dict((uuid, []) for uuid in self.wanted)
10325
10326         for instance in all_instances.values():
10327           node = instance.primary_node
10328           if node in node_to_group:
10329             group_to_instances[node_to_group[node]].append(instance.name)
10330
10331         if not do_nodes:
10332           # Do not pass on node information if it was not requested.
10333           group_to_nodes = None
10334
10335     return query.GroupQueryData([self._all_groups[uuid]
10336                                  for uuid in self.wanted],
10337                                 group_to_nodes, group_to_instances)
10338
10339
10340 class LUGroupQuery(NoHooksLU):
10341   """Logical unit for querying node groups.
10342
10343   """
10344   REQ_BGL = False
10345
10346   def CheckArguments(self):
10347     self.gq = _GroupQuery(self.op.names, self.op.output_fields, False)
10348
10349   def ExpandNames(self):
10350     self.gq.ExpandNames(self)
10351
10352   def Exec(self, feedback_fn):
10353     return self.gq.OldStyleQuery(self)
10354
10355
10356 class LUGroupSetParams(LogicalUnit):
10357   """Modifies the parameters of a node group.
10358
10359   """
10360   HPATH = "group-modify"
10361   HTYPE = constants.HTYPE_GROUP
10362   REQ_BGL = False
10363
10364   def CheckArguments(self):
10365     all_changes = [
10366       self.op.ndparams,
10367       self.op.alloc_policy,
10368       ]
10369
10370     if all_changes.count(None) == len(all_changes):
10371       raise errors.OpPrereqError("Please pass at least one modification",
10372                                  errors.ECODE_INVAL)
10373
10374   def ExpandNames(self):
10375     # This raises errors.OpPrereqError on its own:
10376     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10377
10378     self.needed_locks = {
10379       locking.LEVEL_NODEGROUP: [self.group_uuid],
10380       }
10381
10382   def CheckPrereq(self):
10383     """Check prerequisites.
10384
10385     """
10386     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10387
10388     if self.group is None:
10389       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10390                                (self.op.group_name, self.group_uuid))
10391
10392     if self.op.ndparams:
10393       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10394       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10395       self.new_ndparams = new_ndparams
10396
10397   def BuildHooksEnv(self):
10398     """Build hooks env.
10399
10400     """
10401     env = {
10402       "GROUP_NAME": self.op.group_name,
10403       "NEW_ALLOC_POLICY": self.op.alloc_policy,
10404       }
10405     mn = self.cfg.GetMasterNode()
10406     return env, [mn], [mn]
10407
10408   def Exec(self, feedback_fn):
10409     """Modifies the node group.
10410
10411     """
10412     result = []
10413
10414     if self.op.ndparams:
10415       self.group.ndparams = self.new_ndparams
10416       result.append(("ndparams", str(self.group.ndparams)))
10417
10418     if self.op.alloc_policy:
10419       self.group.alloc_policy = self.op.alloc_policy
10420
10421     self.cfg.Update(self.group, feedback_fn)
10422     return result
10423
10424
10425
10426 class LUGroupRemove(LogicalUnit):
10427   HPATH = "group-remove"
10428   HTYPE = constants.HTYPE_GROUP
10429   REQ_BGL = False
10430
10431   def ExpandNames(self):
10432     # This will raises errors.OpPrereqError on its own:
10433     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10434     self.needed_locks = {
10435       locking.LEVEL_NODEGROUP: [self.group_uuid],
10436       }
10437
10438   def CheckPrereq(self):
10439     """Check prerequisites.
10440
10441     This checks that the given group name exists as a node group, that is
10442     empty (i.e., contains no nodes), and that is not the last group of the
10443     cluster.
10444
10445     """
10446     # Verify that the group is empty.
10447     group_nodes = [node.name
10448                    for node in self.cfg.GetAllNodesInfo().values()
10449                    if node.group == self.group_uuid]
10450
10451     if group_nodes:
10452       raise errors.OpPrereqError("Group '%s' not empty, has the following"
10453                                  " nodes: %s" %
10454                                  (self.op.group_name,
10455                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
10456                                  errors.ECODE_STATE)
10457
10458     # Verify the cluster would not be left group-less.
10459     if len(self.cfg.GetNodeGroupList()) == 1:
10460       raise errors.OpPrereqError("Group '%s' is the only group,"
10461                                  " cannot be removed" %
10462                                  self.op.group_name,
10463                                  errors.ECODE_STATE)
10464
10465   def BuildHooksEnv(self):
10466     """Build hooks env.
10467
10468     """
10469     env = {
10470       "GROUP_NAME": self.op.group_name,
10471       }
10472     mn = self.cfg.GetMasterNode()
10473     return env, [mn], [mn]
10474
10475   def Exec(self, feedback_fn):
10476     """Remove the node group.
10477
10478     """
10479     try:
10480       self.cfg.RemoveNodeGroup(self.group_uuid)
10481     except errors.ConfigurationError:
10482       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10483                                (self.op.group_name, self.group_uuid))
10484
10485     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10486
10487
10488 class LUGroupRename(LogicalUnit):
10489   HPATH = "group-rename"
10490   HTYPE = constants.HTYPE_GROUP
10491   REQ_BGL = False
10492
10493   def ExpandNames(self):
10494     # This raises errors.OpPrereqError on its own:
10495     self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10496
10497     self.needed_locks = {
10498       locking.LEVEL_NODEGROUP: [self.group_uuid],
10499       }
10500
10501   def CheckPrereq(self):
10502     """Check prerequisites.
10503
10504     This checks that the given old_name exists as a node group, and that
10505     new_name doesn't.
10506
10507     """
10508     try:
10509       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10510     except errors.OpPrereqError:
10511       pass
10512     else:
10513       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10514                                  " node group (UUID: %s)" %
10515                                  (self.op.new_name, new_name_uuid),
10516                                  errors.ECODE_EXISTS)
10517
10518   def BuildHooksEnv(self):
10519     """Build hooks env.
10520
10521     """
10522     env = {
10523       "OLD_NAME": self.op.old_name,
10524       "NEW_NAME": self.op.new_name,
10525       }
10526
10527     mn = self.cfg.GetMasterNode()
10528     all_nodes = self.cfg.GetAllNodesInfo()
10529     run_nodes = [mn]
10530     all_nodes.pop(mn, None)
10531
10532     for node in all_nodes.values():
10533       if node.group == self.group_uuid:
10534         run_nodes.append(node.name)
10535
10536     return env, run_nodes, run_nodes
10537
10538   def Exec(self, feedback_fn):
10539     """Rename the node group.
10540
10541     """
10542     group = self.cfg.GetNodeGroup(self.group_uuid)
10543
10544     if group is None:
10545       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10546                                (self.op.old_name, self.group_uuid))
10547
10548     group.name = self.op.new_name
10549     self.cfg.Update(group, feedback_fn)
10550
10551     return self.op.new_name
10552
10553
10554 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10555   """Generic tags LU.
10556
10557   This is an abstract class which is the parent of all the other tags LUs.
10558
10559   """
10560
10561   def ExpandNames(self):
10562     self.needed_locks = {}
10563     if self.op.kind == constants.TAG_NODE:
10564       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10565       self.needed_locks[locking.LEVEL_NODE] = self.op.name
10566     elif self.op.kind == constants.TAG_INSTANCE:
10567       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10568       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10569
10570     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10571     # not possible to acquire the BGL based on opcode parameters)
10572
10573   def CheckPrereq(self):
10574     """Check prerequisites.
10575
10576     """
10577     if self.op.kind == constants.TAG_CLUSTER:
10578       self.target = self.cfg.GetClusterInfo()
10579     elif self.op.kind == constants.TAG_NODE:
10580       self.target = self.cfg.GetNodeInfo(self.op.name)
10581     elif self.op.kind == constants.TAG_INSTANCE:
10582       self.target = self.cfg.GetInstanceInfo(self.op.name)
10583     else:
10584       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10585                                  str(self.op.kind), errors.ECODE_INVAL)
10586
10587
10588 class LUTagsGet(TagsLU):
10589   """Returns the tags of a given object.
10590
10591   """
10592   REQ_BGL = False
10593
10594   def ExpandNames(self):
10595     TagsLU.ExpandNames(self)
10596
10597     # Share locks as this is only a read operation
10598     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10599
10600   def Exec(self, feedback_fn):
10601     """Returns the tag list.
10602
10603     """
10604     return list(self.target.GetTags())
10605
10606
10607 class LUTagsSearch(NoHooksLU):
10608   """Searches the tags for a given pattern.
10609
10610   """
10611   REQ_BGL = False
10612
10613   def ExpandNames(self):
10614     self.needed_locks = {}
10615
10616   def CheckPrereq(self):
10617     """Check prerequisites.
10618
10619     This checks the pattern passed for validity by compiling it.
10620
10621     """
10622     try:
10623       self.re = re.compile(self.op.pattern)
10624     except re.error, err:
10625       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10626                                  (self.op.pattern, err), errors.ECODE_INVAL)
10627
10628   def Exec(self, feedback_fn):
10629     """Returns the tag list.
10630
10631     """
10632     cfg = self.cfg
10633     tgts = [("/cluster", cfg.GetClusterInfo())]
10634     ilist = cfg.GetAllInstancesInfo().values()
10635     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10636     nlist = cfg.GetAllNodesInfo().values()
10637     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10638     results = []
10639     for path, target in tgts:
10640       for tag in target.GetTags():
10641         if self.re.search(tag):
10642           results.append((path, tag))
10643     return results
10644
10645
10646 class LUTagsSet(TagsLU):
10647   """Sets a tag on a given object.
10648
10649   """
10650   REQ_BGL = False
10651
10652   def CheckPrereq(self):
10653     """Check prerequisites.
10654
10655     This checks the type and length of the tag name and value.
10656
10657     """
10658     TagsLU.CheckPrereq(self)
10659     for tag in self.op.tags:
10660       objects.TaggableObject.ValidateTag(tag)
10661
10662   def Exec(self, feedback_fn):
10663     """Sets the tag.
10664
10665     """
10666     try:
10667       for tag in self.op.tags:
10668         self.target.AddTag(tag)
10669     except errors.TagError, err:
10670       raise errors.OpExecError("Error while setting tag: %s" % str(err))
10671     self.cfg.Update(self.target, feedback_fn)
10672
10673
10674 class LUTagsDel(TagsLU):
10675   """Delete a list of tags from a given object.
10676
10677   """
10678   REQ_BGL = False
10679
10680   def CheckPrereq(self):
10681     """Check prerequisites.
10682
10683     This checks that we have the given tag.
10684
10685     """
10686     TagsLU.CheckPrereq(self)
10687     for tag in self.op.tags:
10688       objects.TaggableObject.ValidateTag(tag)
10689     del_tags = frozenset(self.op.tags)
10690     cur_tags = self.target.GetTags()
10691
10692     diff_tags = del_tags - cur_tags
10693     if diff_tags:
10694       diff_names = ("'%s'" % i for i in sorted(diff_tags))
10695       raise errors.OpPrereqError("Tag(s) %s not found" %
10696                                  (utils.CommaJoin(diff_names), ),
10697                                  errors.ECODE_NOENT)
10698
10699   def Exec(self, feedback_fn):
10700     """Remove the tag from the object.
10701
10702     """
10703     for tag in self.op.tags:
10704       self.target.RemoveTag(tag)
10705     self.cfg.Update(self.target, feedback_fn)
10706
10707
10708 class LUTestDelay(NoHooksLU):
10709   """Sleep for a specified amount of time.
10710
10711   This LU sleeps on the master and/or nodes for a specified amount of
10712   time.
10713
10714   """
10715   REQ_BGL = False
10716
10717   def ExpandNames(self):
10718     """Expand names and set required locks.
10719
10720     This expands the node list, if any.
10721
10722     """
10723     self.needed_locks = {}
10724     if self.op.on_nodes:
10725       # _GetWantedNodes can be used here, but is not always appropriate to use
10726       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10727       # more information.
10728       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10729       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10730
10731   def _TestDelay(self):
10732     """Do the actual sleep.
10733
10734     """
10735     if self.op.on_master:
10736       if not utils.TestDelay(self.op.duration):
10737         raise errors.OpExecError("Error during master delay test")
10738     if self.op.on_nodes:
10739       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10740       for node, node_result in result.items():
10741         node_result.Raise("Failure during rpc call to node %s" % node)
10742
10743   def Exec(self, feedback_fn):
10744     """Execute the test delay opcode, with the wanted repetitions.
10745
10746     """
10747     if self.op.repeat == 0:
10748       self._TestDelay()
10749     else:
10750       top_value = self.op.repeat - 1
10751       for i in range(self.op.repeat):
10752         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10753         self._TestDelay()
10754
10755
10756 class LUTestJqueue(NoHooksLU):
10757   """Utility LU to test some aspects of the job queue.
10758
10759   """
10760   REQ_BGL = False
10761
10762   # Must be lower than default timeout for WaitForJobChange to see whether it
10763   # notices changed jobs
10764   _CLIENT_CONNECT_TIMEOUT = 20.0
10765   _CLIENT_CONFIRM_TIMEOUT = 60.0
10766
10767   @classmethod
10768   def _NotifyUsingSocket(cls, cb, errcls):
10769     """Opens a Unix socket and waits for another program to connect.
10770
10771     @type cb: callable
10772     @param cb: Callback to send socket name to client
10773     @type errcls: class
10774     @param errcls: Exception class to use for errors
10775
10776     """
10777     # Using a temporary directory as there's no easy way to create temporary
10778     # sockets without writing a custom loop around tempfile.mktemp and
10779     # socket.bind
10780     tmpdir = tempfile.mkdtemp()
10781     try:
10782       tmpsock = utils.PathJoin(tmpdir, "sock")
10783
10784       logging.debug("Creating temporary socket at %s", tmpsock)
10785       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10786       try:
10787         sock.bind(tmpsock)
10788         sock.listen(1)
10789
10790         # Send details to client
10791         cb(tmpsock)
10792
10793         # Wait for client to connect before continuing
10794         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10795         try:
10796           (conn, _) = sock.accept()
10797         except socket.error, err:
10798           raise errcls("Client didn't connect in time (%s)" % err)
10799       finally:
10800         sock.close()
10801     finally:
10802       # Remove as soon as client is connected
10803       shutil.rmtree(tmpdir)
10804
10805     # Wait for client to close
10806     try:
10807       try:
10808         # pylint: disable-msg=E1101
10809         # Instance of '_socketobject' has no ... member
10810         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10811         conn.recv(1)
10812       except socket.error, err:
10813         raise errcls("Client failed to confirm notification (%s)" % err)
10814     finally:
10815       conn.close()
10816
10817   def _SendNotification(self, test, arg, sockname):
10818     """Sends a notification to the client.
10819
10820     @type test: string
10821     @param test: Test name
10822     @param arg: Test argument (depends on test)
10823     @type sockname: string
10824     @param sockname: Socket path
10825
10826     """
10827     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10828
10829   def _Notify(self, prereq, test, arg):
10830     """Notifies the client of a test.
10831
10832     @type prereq: bool
10833     @param prereq: Whether this is a prereq-phase test
10834     @type test: string
10835     @param test: Test name
10836     @param arg: Test argument (depends on test)
10837
10838     """
10839     if prereq:
10840       errcls = errors.OpPrereqError
10841     else:
10842       errcls = errors.OpExecError
10843
10844     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10845                                                   test, arg),
10846                                    errcls)
10847
10848   def CheckArguments(self):
10849     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10850     self.expandnames_calls = 0
10851
10852   def ExpandNames(self):
10853     checkargs_calls = getattr(self, "checkargs_calls", 0)
10854     if checkargs_calls < 1:
10855       raise errors.ProgrammerError("CheckArguments was not called")
10856
10857     self.expandnames_calls += 1
10858
10859     if self.op.notify_waitlock:
10860       self._Notify(True, constants.JQT_EXPANDNAMES, None)
10861
10862     self.LogInfo("Expanding names")
10863
10864     # Get lock on master node (just to get a lock, not for a particular reason)
10865     self.needed_locks = {
10866       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10867       }
10868
10869   def Exec(self, feedback_fn):
10870     if self.expandnames_calls < 1:
10871       raise errors.ProgrammerError("ExpandNames was not called")
10872
10873     if self.op.notify_exec:
10874       self._Notify(False, constants.JQT_EXEC, None)
10875
10876     self.LogInfo("Executing")
10877
10878     if self.op.log_messages:
10879       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10880       for idx, msg in enumerate(self.op.log_messages):
10881         self.LogInfo("Sending log message %s", idx + 1)
10882         feedback_fn(constants.JQT_MSGPREFIX + msg)
10883         # Report how many test messages have been sent
10884         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10885
10886     if self.op.fail:
10887       raise errors.OpExecError("Opcode failure was requested")
10888
10889     return True
10890
10891
10892 class IAllocator(object):
10893   """IAllocator framework.
10894
10895   An IAllocator instance has three sets of attributes:
10896     - cfg that is needed to query the cluster
10897     - input data (all members of the _KEYS class attribute are required)
10898     - four buffer attributes (in|out_data|text), that represent the
10899       input (to the external script) in text and data structure format,
10900       and the output from it, again in two formats
10901     - the result variables from the script (success, info, nodes) for
10902       easy usage
10903
10904   """
10905   # pylint: disable-msg=R0902
10906   # lots of instance attributes
10907   _ALLO_KEYS = [
10908     "name", "mem_size", "disks", "disk_template",
10909     "os", "tags", "nics", "vcpus", "hypervisor",
10910     ]
10911   _RELO_KEYS = [
10912     "name", "relocate_from",
10913     ]
10914   _EVAC_KEYS = [
10915     "evac_nodes",
10916     ]
10917
10918   def __init__(self, cfg, rpc, mode, **kwargs):
10919     self.cfg = cfg
10920     self.rpc = rpc
10921     # init buffer variables
10922     self.in_text = self.out_text = self.in_data = self.out_data = None
10923     # init all input fields so that pylint is happy
10924     self.mode = mode
10925     self.mem_size = self.disks = self.disk_template = None
10926     self.os = self.tags = self.nics = self.vcpus = None
10927     self.hypervisor = None
10928     self.relocate_from = None
10929     self.name = None
10930     self.evac_nodes = None
10931     # computed fields
10932     self.required_nodes = None
10933     # init result fields
10934     self.success = self.info = self.result = None
10935     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10936       keyset = self._ALLO_KEYS
10937       fn = self._AddNewInstance
10938     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10939       keyset = self._RELO_KEYS
10940       fn = self._AddRelocateInstance
10941     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10942       keyset = self._EVAC_KEYS
10943       fn = self._AddEvacuateNodes
10944     else:
10945       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10946                                    " IAllocator" % self.mode)
10947     for key in kwargs:
10948       if key not in keyset:
10949         raise errors.ProgrammerError("Invalid input parameter '%s' to"
10950                                      " IAllocator" % key)
10951       setattr(self, key, kwargs[key])
10952
10953     for key in keyset:
10954       if key not in kwargs:
10955         raise errors.ProgrammerError("Missing input parameter '%s' to"
10956                                      " IAllocator" % key)
10957     self._BuildInputData(fn)
10958
10959   def _ComputeClusterData(self):
10960     """Compute the generic allocator input data.
10961
10962     This is the data that is independent of the actual operation.
10963
10964     """
10965     cfg = self.cfg
10966     cluster_info = cfg.GetClusterInfo()
10967     # cluster data
10968     data = {
10969       "version": constants.IALLOCATOR_VERSION,
10970       "cluster_name": cfg.GetClusterName(),
10971       "cluster_tags": list(cluster_info.GetTags()),
10972       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10973       # we don't have job IDs
10974       }
10975     ninfo = cfg.GetAllNodesInfo()
10976     iinfo = cfg.GetAllInstancesInfo().values()
10977     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10978
10979     # node data
10980     node_list = [n.name for n in ninfo.values() if n.vm_capable]
10981
10982     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10983       hypervisor_name = self.hypervisor
10984     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10985       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10986     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10987       hypervisor_name = cluster_info.enabled_hypervisors[0]
10988
10989     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10990                                         hypervisor_name)
10991     node_iinfo = \
10992       self.rpc.call_all_instances_info(node_list,
10993                                        cluster_info.enabled_hypervisors)
10994
10995     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10996
10997     config_ndata = self._ComputeBasicNodeData(ninfo)
10998     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
10999                                                  i_list, config_ndata)
11000     assert len(data["nodes"]) == len(ninfo), \
11001         "Incomplete node data computed"
11002
11003     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11004
11005     self.in_data = data
11006
11007   @staticmethod
11008   def _ComputeNodeGroupData(cfg):
11009     """Compute node groups data.
11010
11011     """
11012     ng = {}
11013     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11014       ng[guuid] = {
11015         "name": gdata.name,
11016         "alloc_policy": gdata.alloc_policy,
11017         }
11018     return ng
11019
11020   @staticmethod
11021   def _ComputeBasicNodeData(node_cfg):
11022     """Compute global node data.
11023
11024     @rtype: dict
11025     @returns: a dict of name: (node dict, node config)
11026
11027     """
11028     node_results = {}
11029     for ninfo in node_cfg.values():
11030       # fill in static (config-based) values
11031       pnr = {
11032         "tags": list(ninfo.GetTags()),
11033         "primary_ip": ninfo.primary_ip,
11034         "secondary_ip": ninfo.secondary_ip,
11035         "offline": ninfo.offline,
11036         "drained": ninfo.drained,
11037         "master_candidate": ninfo.master_candidate,
11038         "group": ninfo.group,
11039         "master_capable": ninfo.master_capable,
11040         "vm_capable": ninfo.vm_capable,
11041         }
11042
11043       node_results[ninfo.name] = pnr
11044
11045     return node_results
11046
11047   @staticmethod
11048   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11049                               node_results):
11050     """Compute global node data.
11051
11052     @param node_results: the basic node structures as filled from the config
11053
11054     """
11055     # make a copy of the current dict
11056     node_results = dict(node_results)
11057     for nname, nresult in node_data.items():
11058       assert nname in node_results, "Missing basic data for node %s" % nname
11059       ninfo = node_cfg[nname]
11060
11061       if not (ninfo.offline or ninfo.drained):
11062         nresult.Raise("Can't get data for node %s" % nname)
11063         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11064                                 nname)
11065         remote_info = nresult.payload
11066
11067         for attr in ['memory_total', 'memory_free', 'memory_dom0',
11068                      'vg_size', 'vg_free', 'cpu_total']:
11069           if attr not in remote_info:
11070             raise errors.OpExecError("Node '%s' didn't return attribute"
11071                                      " '%s'" % (nname, attr))
11072           if not isinstance(remote_info[attr], int):
11073             raise errors.OpExecError("Node '%s' returned invalid value"
11074                                      " for '%s': %s" %
11075                                      (nname, attr, remote_info[attr]))
11076         # compute memory used by primary instances
11077         i_p_mem = i_p_up_mem = 0
11078         for iinfo, beinfo in i_list:
11079           if iinfo.primary_node == nname:
11080             i_p_mem += beinfo[constants.BE_MEMORY]
11081             if iinfo.name not in node_iinfo[nname].payload:
11082               i_used_mem = 0
11083             else:
11084               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11085             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11086             remote_info['memory_free'] -= max(0, i_mem_diff)
11087
11088             if iinfo.admin_up:
11089               i_p_up_mem += beinfo[constants.BE_MEMORY]
11090
11091         # compute memory used by instances
11092         pnr_dyn = {
11093           "total_memory": remote_info['memory_total'],
11094           "reserved_memory": remote_info['memory_dom0'],
11095           "free_memory": remote_info['memory_free'],
11096           "total_disk": remote_info['vg_size'],
11097           "free_disk": remote_info['vg_free'],
11098           "total_cpus": remote_info['cpu_total'],
11099           "i_pri_memory": i_p_mem,
11100           "i_pri_up_memory": i_p_up_mem,
11101           }
11102         pnr_dyn.update(node_results[nname])
11103         node_results[nname] = pnr_dyn
11104
11105     return node_results
11106
11107   @staticmethod
11108   def _ComputeInstanceData(cluster_info, i_list):
11109     """Compute global instance data.
11110
11111     """
11112     instance_data = {}
11113     for iinfo, beinfo in i_list:
11114       nic_data = []
11115       for nic in iinfo.nics:
11116         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11117         nic_dict = {"mac": nic.mac,
11118                     "ip": nic.ip,
11119                     "mode": filled_params[constants.NIC_MODE],
11120                     "link": filled_params[constants.NIC_LINK],
11121                    }
11122         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11123           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11124         nic_data.append(nic_dict)
11125       pir = {
11126         "tags": list(iinfo.GetTags()),
11127         "admin_up": iinfo.admin_up,
11128         "vcpus": beinfo[constants.BE_VCPUS],
11129         "memory": beinfo[constants.BE_MEMORY],
11130         "os": iinfo.os,
11131         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11132         "nics": nic_data,
11133         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11134         "disk_template": iinfo.disk_template,
11135         "hypervisor": iinfo.hypervisor,
11136         }
11137       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11138                                                  pir["disks"])
11139       instance_data[iinfo.name] = pir
11140
11141     return instance_data
11142
11143   def _AddNewInstance(self):
11144     """Add new instance data to allocator structure.
11145
11146     This in combination with _AllocatorGetClusterData will create the
11147     correct structure needed as input for the allocator.
11148
11149     The checks for the completeness of the opcode must have already been
11150     done.
11151
11152     """
11153     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11154
11155     if self.disk_template in constants.DTS_NET_MIRROR:
11156       self.required_nodes = 2
11157     else:
11158       self.required_nodes = 1
11159     request = {
11160       "name": self.name,
11161       "disk_template": self.disk_template,
11162       "tags": self.tags,
11163       "os": self.os,
11164       "vcpus": self.vcpus,
11165       "memory": self.mem_size,
11166       "disks": self.disks,
11167       "disk_space_total": disk_space,
11168       "nics": self.nics,
11169       "required_nodes": self.required_nodes,
11170       }
11171     return request
11172
11173   def _AddRelocateInstance(self):
11174     """Add relocate instance data to allocator structure.
11175
11176     This in combination with _IAllocatorGetClusterData will create the
11177     correct structure needed as input for the allocator.
11178
11179     The checks for the completeness of the opcode must have already been
11180     done.
11181
11182     """
11183     instance = self.cfg.GetInstanceInfo(self.name)
11184     if instance is None:
11185       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11186                                    " IAllocator" % self.name)
11187
11188     if instance.disk_template not in constants.DTS_NET_MIRROR:
11189       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11190                                  errors.ECODE_INVAL)
11191
11192     if len(instance.secondary_nodes) != 1:
11193       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11194                                  errors.ECODE_STATE)
11195
11196     self.required_nodes = 1
11197     disk_sizes = [{'size': disk.size} for disk in instance.disks]
11198     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11199
11200     request = {
11201       "name": self.name,
11202       "disk_space_total": disk_space,
11203       "required_nodes": self.required_nodes,
11204       "relocate_from": self.relocate_from,
11205       }
11206     return request
11207
11208   def _AddEvacuateNodes(self):
11209     """Add evacuate nodes data to allocator structure.
11210
11211     """
11212     request = {
11213       "evac_nodes": self.evac_nodes
11214       }
11215     return request
11216
11217   def _BuildInputData(self, fn):
11218     """Build input data structures.
11219
11220     """
11221     self._ComputeClusterData()
11222
11223     request = fn()
11224     request["type"] = self.mode
11225     self.in_data["request"] = request
11226
11227     self.in_text = serializer.Dump(self.in_data)
11228
11229   def Run(self, name, validate=True, call_fn=None):
11230     """Run an instance allocator and return the results.
11231
11232     """
11233     if call_fn is None:
11234       call_fn = self.rpc.call_iallocator_runner
11235
11236     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11237     result.Raise("Failure while running the iallocator script")
11238
11239     self.out_text = result.payload
11240     if validate:
11241       self._ValidateResult()
11242
11243   def _ValidateResult(self):
11244     """Process the allocator results.
11245
11246     This will process and if successful save the result in
11247     self.out_data and the other parameters.
11248
11249     """
11250     try:
11251       rdict = serializer.Load(self.out_text)
11252     except Exception, err:
11253       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11254
11255     if not isinstance(rdict, dict):
11256       raise errors.OpExecError("Can't parse iallocator results: not a dict")
11257
11258     # TODO: remove backwards compatiblity in later versions
11259     if "nodes" in rdict and "result" not in rdict:
11260       rdict["result"] = rdict["nodes"]
11261       del rdict["nodes"]
11262
11263     for key in "success", "info", "result":
11264       if key not in rdict:
11265         raise errors.OpExecError("Can't parse iallocator results:"
11266                                  " missing key '%s'" % key)
11267       setattr(self, key, rdict[key])
11268
11269     if not isinstance(rdict["result"], list):
11270       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11271                                " is not a list")
11272     self.out_data = rdict
11273
11274
11275 class LUTestAllocator(NoHooksLU):
11276   """Run allocator tests.
11277
11278   This LU runs the allocator tests
11279
11280   """
11281   def CheckPrereq(self):
11282     """Check prerequisites.
11283
11284     This checks the opcode parameters depending on the director and mode test.
11285
11286     """
11287     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11288       for attr in ["mem_size", "disks", "disk_template",
11289                    "os", "tags", "nics", "vcpus"]:
11290         if not hasattr(self.op, attr):
11291           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11292                                      attr, errors.ECODE_INVAL)
11293       iname = self.cfg.ExpandInstanceName(self.op.name)
11294       if iname is not None:
11295         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11296                                    iname, errors.ECODE_EXISTS)
11297       if not isinstance(self.op.nics, list):
11298         raise errors.OpPrereqError("Invalid parameter 'nics'",
11299                                    errors.ECODE_INVAL)
11300       if not isinstance(self.op.disks, list):
11301         raise errors.OpPrereqError("Invalid parameter 'disks'",
11302                                    errors.ECODE_INVAL)
11303       for row in self.op.disks:
11304         if (not isinstance(row, dict) or
11305             "size" not in row or
11306             not isinstance(row["size"], int) or
11307             "mode" not in row or
11308             row["mode"] not in ['r', 'w']):
11309           raise errors.OpPrereqError("Invalid contents of the 'disks'"
11310                                      " parameter", errors.ECODE_INVAL)
11311       if self.op.hypervisor is None:
11312         self.op.hypervisor = self.cfg.GetHypervisorType()
11313     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11314       fname = _ExpandInstanceName(self.cfg, self.op.name)
11315       self.op.name = fname
11316       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11317     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11318       if not hasattr(self.op, "evac_nodes"):
11319         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11320                                    " opcode input", errors.ECODE_INVAL)
11321     else:
11322       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11323                                  self.op.mode, errors.ECODE_INVAL)
11324
11325     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11326       if self.op.allocator is None:
11327         raise errors.OpPrereqError("Missing allocator name",
11328                                    errors.ECODE_INVAL)
11329     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11330       raise errors.OpPrereqError("Wrong allocator test '%s'" %
11331                                  self.op.direction, errors.ECODE_INVAL)
11332
11333   def Exec(self, feedback_fn):
11334     """Run the allocator test.
11335
11336     """
11337     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11338       ial = IAllocator(self.cfg, self.rpc,
11339                        mode=self.op.mode,
11340                        name=self.op.name,
11341                        mem_size=self.op.mem_size,
11342                        disks=self.op.disks,
11343                        disk_template=self.op.disk_template,
11344                        os=self.op.os,
11345                        tags=self.op.tags,
11346                        nics=self.op.nics,
11347                        vcpus=self.op.vcpus,
11348                        hypervisor=self.op.hypervisor,
11349                        )
11350     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11351       ial = IAllocator(self.cfg, self.rpc,
11352                        mode=self.op.mode,
11353                        name=self.op.name,
11354                        relocate_from=list(self.relocate_from),
11355                        )
11356     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11357       ial = IAllocator(self.cfg, self.rpc,
11358                        mode=self.op.mode,
11359                        evac_nodes=self.op.evac_nodes)
11360     else:
11361       raise errors.ProgrammerError("Uncatched mode %s in"
11362                                    " LUTestAllocator.Exec", self.op.mode)
11363
11364     if self.op.direction == constants.IALLOCATOR_DIR_IN:
11365       result = ial.in_text
11366     else:
11367       ial.Run(self.op.allocator, validate=False)
11368       result = ial.out_text
11369     return result
11370
11371
11372 #: Query type implementations
11373 _QUERY_IMPL = {
11374   constants.QR_INSTANCE: _InstanceQuery,
11375   constants.QR_NODE: _NodeQuery,
11376   constants.QR_GROUP: _GroupQuery,
11377   }
11378
11379
11380 def _GetQueryImplementation(name):
11381   """Returns the implemtnation for a query type.
11382
11383   @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11384
11385   """
11386   try:
11387     return _QUERY_IMPL[name]
11388   except KeyError:
11389     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11390                                errors.ECODE_INVAL)