code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 # End types
  78 class LogicalUnit(object):
  79   """Logical Unit base class.
  80
  81   Subclasses must follow these rules:
  82     - implement ExpandNames
  83     - implement CheckPrereq (except when tasklets are used)
  84     - implement Exec (except when tasklets are used)
  85     - implement BuildHooksEnv
  86     - redefine HPATH and HTYPE
  87     - optionally redefine their run requirements:
  88         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  89
  90   Note that all commands require root permissions.
  91
  92   @ivar dry_run_result: the value (if any) that will be returned to the caller
  93       in dry-run mode (signalled by opcode dry_run parameter)
  94
  95   """
  96   HPATH = None
  97   HTYPE = None
  98   REQ_BGL = True
  99
 100   def __init__(self, processor, op, context, rpc):
 101     """Constructor for LogicalUnit.
 102
 103     This needs to be overridden in derived classes in order to check op
 104     validity.
 105
 106     """
 107     self.proc = processor
 108     self.op = op
 109     self.cfg = context.cfg
 110     self.context = context
 111     self.rpc = rpc
 112     # Dicts used to declare locking needs to mcpu
 113     self.needed_locks = None
 114     self.acquired_locks = {}
 115     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 116     self.add_locks = {}
 117     self.remove_locks = {}
 118     # Used to force good behavior when calling helper functions
 119     self.recalculate_locks = {}
 120     self.__ssh = None
 121     # logging
 122     self.Log = processor.Log # pylint: disable-msg=C0103
 123     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 124     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 125     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 126     # support for dry-run
 127     self.dry_run_result = None
 128     # support for generic debug attribute
 129     if (not hasattr(self.op, "debug_level") or
 130         not isinstance(self.op.debug_level, int)):
 131       self.op.debug_level = 0
 132
 133     # Tasklets
 134     self.tasklets = None
 135
 136     # Validate opcode parameters and set defaults
 137     self.op.Validate(True)
 138
 139     self.CheckArguments()
 140
 141   def __GetSSH(self):
 142     """Returns the SshRunner object
 143
 144     """
 145     if not self.__ssh:
 146       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 147     return self.__ssh
 148
 149   ssh = property(fget=__GetSSH)
 150
 151   def CheckArguments(self):
 152     """Check syntactic validity for the opcode arguments.
 153
 154     This method is for doing a simple syntactic check and ensure
 155     validity of opcode parameters, without any cluster-related
 156     checks. While the same can be accomplished in ExpandNames and/or
 157     CheckPrereq, doing these separate is better because:
 158
 159       - ExpandNames is left as as purely a lock-related function
 160       - CheckPrereq is run after we have acquired locks (and possible
 161         waited for them)
 162
 163     The function is allowed to change the self.op attribute so that
 164     later methods can no longer worry about missing parameters.
 165
 166     """
 167     pass
 168
 169   def ExpandNames(self):
 170     """Expand names for this LU.
 171
 172     This method is called before starting to execute the opcode, and it should
 173     update all the parameters of the opcode to their canonical form (e.g. a
 174     short node name must be fully expanded after this method has successfully
 175     completed). This way locking, hooks, logging, etc. can work correctly.
 176
 177     LUs which implement this method must also populate the self.needed_locks
 178     member, as a dict with lock levels as keys, and a list of needed lock names
 179     as values. Rules:
 180
 181       - use an empty dict if you don't need any lock
 182       - if you don't need any lock at a particular level omit that level
 183       - don't put anything for the BGL level
 184       - if you want all locks at a level use locking.ALL_SET as a value
 185
 186     If you need to share locks (rather than acquire them exclusively) at one
 187     level you can modify self.share_locks, setting a true value (usually 1) for
 188     that level. By default locks are not shared.
 189
 190     This function can also define a list of tasklets, which then will be
 191     executed in order instead of the usual LU-level CheckPrereq and Exec
 192     functions, if those are not defined by the LU.
 193
 194     Examples::
 195
 196       # Acquire all nodes and one instance
 197       self.needed_locks = {
 198         locking.LEVEL_NODE: locking.ALL_SET,
 199         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 200       }
 201       # Acquire just two nodes
 202       self.needed_locks = {
 203         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 204       }
 205       # Acquire no locks
 206       self.needed_locks = {} # No, you can't leave it to the default value None
 207
 208     """
 209     # The implementation of this method is mandatory only if the new LU is
 210     # concurrent, so that old LUs don't need to be changed all at the same
 211     # time.
 212     if self.REQ_BGL:
 213       self.needed_locks = {} # Exclusive LUs don't need locks.
 214     else:
 215       raise NotImplementedError
 216
 217   def DeclareLocks(self, level):
 218     """Declare LU locking needs for a level
 219
 220     While most LUs can just declare their locking needs at ExpandNames time,
 221     sometimes there's the need to calculate some locks after having acquired
 222     the ones before. This function is called just before acquiring locks at a
 223     particular level, but after acquiring the ones at lower levels, and permits
 224     such calculations. It can be used to modify self.needed_locks, and by
 225     default it does nothing.
 226
 227     This function is only called if you have something already set in
 228     self.needed_locks for the level.
 229
 230     @param level: Locking level which is going to be locked
 231     @type level: member of ganeti.locking.LEVELS
 232
 233     """
 234
 235   def CheckPrereq(self):
 236     """Check prerequisites for this LU.
 237
 238     This method should check that the prerequisites for the execution
 239     of this LU are fulfilled. It can do internode communication, but
 240     it should be idempotent - no cluster or system changes are
 241     allowed.
 242
 243     The method should raise errors.OpPrereqError in case something is
 244     not fulfilled. Its return value is ignored.
 245
 246     This method should also update all the parameters of the opcode to
 247     their canonical form if it hasn't been done by ExpandNames before.
 248
 249     """
 250     if self.tasklets is not None:
 251       for (idx, tl) in enumerate(self.tasklets):
 252         logging.debug("Checking prerequisites for tasklet %s/%s",
 253                       idx + 1, len(self.tasklets))
 254         tl.CheckPrereq()
 255     else:
 256       pass
 257
 258   def Exec(self, feedback_fn):
 259     """Execute the LU.
 260
 261     This method should implement the actual work. It should raise
 262     errors.OpExecError for failures that are somewhat dealt with in
 263     code, or expected.
 264
 265     """
 266     if self.tasklets is not None:
 267       for (idx, tl) in enumerate(self.tasklets):
 268         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 269         tl.Exec(feedback_fn)
 270     else:
 271       raise NotImplementedError
 272
 273   def BuildHooksEnv(self):
 274     """Build hooks environment for this LU.
 275
 276     This method should return a three-node tuple consisting of: a dict
 277     containing the environment that will be used for running the
 278     specific hook for this LU, a list of node names on which the hook
 279     should run before the execution, and a list of node names on which
 280     the hook should run after the execution.
 281
 282     The keys of the dict must not have 'GANETI_' prefixed as this will
 283     be handled in the hooks runner. Also note additional keys will be
 284     added by the hooks runner. If the LU doesn't define any
 285     environment, an empty dict (and not None) should be returned.
 286
 287     No nodes should be returned as an empty list (and not None).
 288
 289     Note that if the HPATH for a LU class is None, this function will
 290     not be called.
 291
 292     """
 293     raise NotImplementedError
 294
 295   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 296     """Notify the LU about the results of its hooks.
 297
 298     This method is called every time a hooks phase is executed, and notifies
 299     the Logical Unit about the hooks' result. The LU can then use it to alter
 300     its result based on the hooks.  By default the method does nothing and the
 301     previous result is passed back unchanged but any LU can define it if it
 302     wants to use the local cluster hook-scripts somehow.
 303
 304     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 305         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 306     @param hook_results: the results of the multi-node hooks rpc call
 307     @param feedback_fn: function used send feedback back to the caller
 308     @param lu_result: the previous Exec result this LU had, or None
 309         in the PRE phase
 310     @return: the new Exec result, based on the previous result
 311         and hook results
 312
 313     """
 314     # API must be kept, thus we ignore the unused argument and could
 315     # be a function warnings
 316     # pylint: disable-msg=W0613,R0201
 317     return lu_result
 318
 319   def _ExpandAndLockInstance(self):
 320     """Helper function to expand and lock an instance.
 321
 322     Many LUs that work on an instance take its name in self.op.instance_name
 323     and need to expand it and then declare the expanded name for locking. This
 324     function does it, and then updates self.op.instance_name to the expanded
 325     name. It also initializes needed_locks as a dict, if this hasn't been done
 326     before.
 327
 328     """
 329     if self.needed_locks is None:
 330       self.needed_locks = {}
 331     else:
 332       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 333         "_ExpandAndLockInstance called with instance-level locks set"
 334     self.op.instance_name = _ExpandInstanceName(self.cfg,
 335                                                 self.op.instance_name)
 336     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 337
 338   def _LockInstancesNodes(self, primary_only=False):
 339     """Helper function to declare instances' nodes for locking.
 340
 341     This function should be called after locking one or more instances to lock
 342     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 343     with all primary or secondary nodes for instances already locked and
 344     present in self.needed_locks[locking.LEVEL_INSTANCE].
 345
 346     It should be called from DeclareLocks, and for safety only works if
 347     self.recalculate_locks[locking.LEVEL_NODE] is set.
 348
 349     In the future it may grow parameters to just lock some instance's nodes, or
 350     to just lock primaries or secondary nodes, if needed.
 351
 352     If should be called in DeclareLocks in a way similar to::
 353
 354       if level == locking.LEVEL_NODE:
 355         self._LockInstancesNodes()
 356
 357     @type primary_only: boolean
 358     @param primary_only: only lock primary nodes of locked instances
 359
 360     """
 361     assert locking.LEVEL_NODE in self.recalculate_locks, \
 362       "_LockInstancesNodes helper function called with no nodes to recalculate"
 363
 364     # TODO: check if we're really been called with the instance locks held
 365
 366     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 367     # future we might want to have different behaviors depending on the value
 368     # of self.recalculate_locks[locking.LEVEL_NODE]
 369     wanted_nodes = []
 370     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 371       instance = self.context.cfg.GetInstanceInfo(instance_name)
 372       wanted_nodes.append(instance.primary_node)
 373       if not primary_only:
 374         wanted_nodes.extend(instance.secondary_nodes)
 375
 376     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 377       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 378     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 379       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 380
 381     del self.recalculate_locks[locking.LEVEL_NODE]
 382
 383
 384 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 385   """Simple LU which runs no hooks.
 386
 387   This LU is intended as a parent for other LogicalUnits which will
 388   run no hooks, in order to reduce duplicate code.
 389
 390   """
 391   HPATH = None
 392   HTYPE = None
 393
 394   def BuildHooksEnv(self):
 395     """Empty BuildHooksEnv for NoHooksLu.
 396
 397     This just raises an error.
 398
 399     """
 400     assert False, "BuildHooksEnv called for NoHooksLUs"
 401
 402
 403 class Tasklet:
 404   """Tasklet base class.
 405
 406   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 407   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 408   tasklets know nothing about locks.
 409
 410   Subclasses must follow these rules:
 411     - Implement CheckPrereq
 412     - Implement Exec
 413
 414   """
 415   def __init__(self, lu):
 416     self.lu = lu
 417
 418     # Shortcuts
 419     self.cfg = lu.cfg
 420     self.rpc = lu.rpc
 421
 422   def CheckPrereq(self):
 423     """Check prerequisites for this tasklets.
 424
 425     This method should check whether the prerequisites for the execution of
 426     this tasklet are fulfilled. It can do internode communication, but it
 427     should be idempotent - no cluster or system changes are allowed.
 428
 429     The method should raise errors.OpPrereqError in case something is not
 430     fulfilled. Its return value is ignored.
 431
 432     This method should also update all parameters to their canonical form if it
 433     hasn't been done before.
 434
 435     """
 436     pass
 437
 438   def Exec(self, feedback_fn):
 439     """Execute the tasklet.
 440
 441     This method should implement the actual work. It should raise
 442     errors.OpExecError for failures that are somewhat dealt with in code, or
 443     expected.
 444
 445     """
 446     raise NotImplementedError
 447
 448
 449 class _QueryBase:
 450   """Base for query utility classes.
 451
 452   """
 453   #: Attribute holding field definitions
 454   FIELDS = None
 455
 456   def __init__(self, names, fields, use_locking):
 457     """Initializes this class.
 458
 459     """
 460     self.names = names
 461     self.use_locking = use_locking
 462
 463     self.query = query.Query(self.FIELDS, fields)
 464     self.requested_data = self.query.RequestedData()
 465
 466     self.do_locking = None
 467     self.wanted = None
 468
 469   def _GetNames(self, lu, all_names, lock_level):
 470     """Helper function to determine names asked for in the query.
 471
 472     """
 473     if self.do_locking:
 474       names = lu.acquired_locks[lock_level]
 475     else:
 476       names = all_names
 477
 478     if self.wanted == locking.ALL_SET:
 479       assert not self.names
 480       # caller didn't specify names, so ordering is not important
 481       return utils.NiceSort(names)
 482
 483     # caller specified names and we must keep the same order
 484     assert self.names
 485     assert not self.do_locking or lu.acquired_locks[lock_level]
 486
 487     missing = set(self.wanted).difference(names)
 488     if missing:
 489       raise errors.OpExecError("Some items were removed before retrieving"
 490                                " their data: %s" % missing)
 491
 492     # Return expanded names
 493     return self.wanted
 494
 495   @classmethod
 496   def FieldsQuery(cls, fields):
 497     """Returns list of available fields.
 498
 499     @return: List of L{objects.QueryFieldDefinition}
 500
 501     """
 502     return query.QueryFields(cls.FIELDS, fields)
 503
 504   def ExpandNames(self, lu):
 505     """Expand names for this query.
 506
 507     See L{LogicalUnit.ExpandNames}.
 508
 509     """
 510     raise NotImplementedError()
 511
 512   def DeclareLocks(self, lu, level):
 513     """Declare locks for this query.
 514
 515     See L{LogicalUnit.DeclareLocks}.
 516
 517     """
 518     raise NotImplementedError()
 519
 520   def _GetQueryData(self, lu):
 521     """Collects all data for this query.
 522
 523     @return: Query data object
 524
 525     """
 526     raise NotImplementedError()
 527
 528   def NewStyleQuery(self, lu):
 529     """Collect data and execute query.
 530
 531     """
 532     return query.GetQueryResponse(self.query, self._GetQueryData(lu))
 533
 534   def OldStyleQuery(self, lu):
 535     """Collect data and execute query.
 536
 537     """
 538     return self.query.OldStyleQuery(self._GetQueryData(lu))
 539
 540
 541 def _GetWantedNodes(lu, nodes):
 542   """Returns list of checked and expanded node names.
 543
 544   @type lu: L{LogicalUnit}
 545   @param lu: the logical unit on whose behalf we execute
 546   @type nodes: list
 547   @param nodes: list of node names or None for all nodes
 548   @rtype: list
 549   @return: the list of nodes, sorted
 550   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 551
 552   """
 553   if nodes:
 554     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 555
 556   return utils.NiceSort(lu.cfg.GetNodeList())
 557
 558
 559 def _GetWantedInstances(lu, instances):
 560   """Returns list of checked and expanded instance names.
 561
 562   @type lu: L{LogicalUnit}
 563   @param lu: the logical unit on whose behalf we execute
 564   @type instances: list
 565   @param instances: list of instance names or None for all instances
 566   @rtype: list
 567   @return: the list of instances, sorted
 568   @raise errors.OpPrereqError: if the instances parameter is wrong type
 569   @raise errors.OpPrereqError: if any of the passed instances is not found
 570
 571   """
 572   if instances:
 573     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 574   else:
 575     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 576   return wanted
 577
 578
 579 def _GetUpdatedParams(old_params, update_dict,
 580                       use_default=True, use_none=False):
 581   """Return the new version of a parameter dictionary.
 582
 583   @type old_params: dict
 584   @param old_params: old parameters
 585   @type update_dict: dict
 586   @param update_dict: dict containing new parameter values, or
 587       constants.VALUE_DEFAULT to reset the parameter to its default
 588       value
 589   @param use_default: boolean
 590   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 591       values as 'to be deleted' values
 592   @param use_none: boolean
 593   @type use_none: whether to recognise C{None} values as 'to be
 594       deleted' values
 595   @rtype: dict
 596   @return: the new parameter dictionary
 597
 598   """
 599   params_copy = copy.deepcopy(old_params)
 600   for key, val in update_dict.iteritems():
 601     if ((use_default and val == constants.VALUE_DEFAULT) or
 602         (use_none and val is None)):
 603       try:
 604         del params_copy[key]
 605       except KeyError:
 606         pass
 607     else:
 608       params_copy[key] = val
 609   return params_copy
 610
 611
 612 def _CheckOutputFields(static, dynamic, selected):
 613   """Checks whether all selected fields are valid.
 614
 615   @type static: L{utils.FieldSet}
 616   @param static: static fields set
 617   @type dynamic: L{utils.FieldSet}
 618   @param dynamic: dynamic fields set
 619
 620   """
 621   f = utils.FieldSet()
 622   f.Extend(static)
 623   f.Extend(dynamic)
 624
 625   delta = f.NonMatching(selected)
 626   if delta:
 627     raise errors.OpPrereqError("Unknown output fields selected: %s"
 628                                % ",".join(delta), errors.ECODE_INVAL)
 629
 630
 631 def _CheckGlobalHvParams(params):
 632   """Validates that given hypervisor params are not global ones.
 633
 634   This will ensure that instances don't get customised versions of
 635   global params.
 636
 637   """
 638   used_globals = constants.HVC_GLOBALS.intersection(params)
 639   if used_globals:
 640     msg = ("The following hypervisor parameters are global and cannot"
 641            " be customized at instance level, please modify them at"
 642            " cluster level: %s" % utils.CommaJoin(used_globals))
 643     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 644
 645
 646 def _CheckNodeOnline(lu, node, msg=None):
 647   """Ensure that a given node is online.
 648
 649   @param lu: the LU on behalf of which we make the check
 650   @param node: the node to check
 651   @param msg: if passed, should be a message to replace the default one
 652   @raise errors.OpPrereqError: if the node is offline
 653
 654   """
 655   if msg is None:
 656     msg = "Can't use offline node"
 657   if lu.cfg.GetNodeInfo(node).offline:
 658     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 659
 660
 661 def _CheckNodeNotDrained(lu, node):
 662   """Ensure that a given node is not drained.
 663
 664   @param lu: the LU on behalf of which we make the check
 665   @param node: the node to check
 666   @raise errors.OpPrereqError: if the node is drained
 667
 668   """
 669   if lu.cfg.GetNodeInfo(node).drained:
 670     raise errors.OpPrereqError("Can't use drained node %s" % node,
 671                                errors.ECODE_STATE)
 672
 673
 674 def _CheckNodeVmCapable(lu, node):
 675   """Ensure that a given node is vm capable.
 676
 677   @param lu: the LU on behalf of which we make the check
 678   @param node: the node to check
 679   @raise errors.OpPrereqError: if the node is not vm capable
 680
 681   """
 682   if not lu.cfg.GetNodeInfo(node).vm_capable:
 683     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 684                                errors.ECODE_STATE)
 685
 686
 687 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 688   """Ensure that a node supports a given OS.
 689
 690   @param lu: the LU on behalf of which we make the check
 691   @param node: the node to check
 692   @param os_name: the OS to query about
 693   @param force_variant: whether to ignore variant errors
 694   @raise errors.OpPrereqError: if the node is not supporting the OS
 695
 696   """
 697   result = lu.rpc.call_os_get(node, os_name)
 698   result.Raise("OS '%s' not in supported OS list for node %s" %
 699                (os_name, node),
 700                prereq=True, ecode=errors.ECODE_INVAL)
 701   if not force_variant:
 702     _CheckOSVariant(result.payload, os_name)
 703
 704
 705 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 706   """Ensure that a node has the given secondary ip.
 707
 708   @type lu: L{LogicalUnit}
 709   @param lu: the LU on behalf of which we make the check
 710   @type node: string
 711   @param node: the node to check
 712   @type secondary_ip: string
 713   @param secondary_ip: the ip to check
 714   @type prereq: boolean
 715   @param prereq: whether to throw a prerequisite or an execute error
 716   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 717   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 718
 719   """
 720   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 721   result.Raise("Failure checking secondary ip on node %s" % node,
 722                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 723   if not result.payload:
 724     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 725            " please fix and re-run this command" % secondary_ip)
 726     if prereq:
 727       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 728     else:
 729       raise errors.OpExecError(msg)
 730
 731
 732 def _GetClusterDomainSecret():
 733   """Reads the cluster domain secret.
 734
 735   """
 736   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 737                                strict=True)
 738
 739
 740 def _CheckInstanceDown(lu, instance, reason):
 741   """Ensure that an instance is not running."""
 742   if instance.admin_up:
 743     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 744                                (instance.name, reason), errors.ECODE_STATE)
 745
 746   pnode = instance.primary_node
 747   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 748   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 749               prereq=True, ecode=errors.ECODE_ENVIRON)
 750
 751   if instance.name in ins_l.payload:
 752     raise errors.OpPrereqError("Instance %s is running, %s" %
 753                                (instance.name, reason), errors.ECODE_STATE)
 754
 755
 756 def _ExpandItemName(fn, name, kind):
 757   """Expand an item name.
 758
 759   @param fn: the function to use for expansion
 760   @param name: requested item name
 761   @param kind: text description ('Node' or 'Instance')
 762   @return: the resolved (full) name
 763   @raise errors.OpPrereqError: if the item is not found
 764
 765   """
 766   full_name = fn(name)
 767   if full_name is None:
 768     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 769                                errors.ECODE_NOENT)
 770   return full_name
 771
 772
 773 def _ExpandNodeName(cfg, name):
 774   """Wrapper over L{_ExpandItemName} for nodes."""
 775   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 776
 777
 778 def _ExpandInstanceName(cfg, name):
 779   """Wrapper over L{_ExpandItemName} for instance."""
 780   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 781
 782
 783 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 784                           memory, vcpus, nics, disk_template, disks,
 785                           bep, hvp, hypervisor_name):
 786   """Builds instance related env variables for hooks
 787
 788   This builds the hook environment from individual variables.
 789
 790   @type name: string
 791   @param name: the name of the instance
 792   @type primary_node: string
 793   @param primary_node: the name of the instance's primary node
 794   @type secondary_nodes: list
 795   @param secondary_nodes: list of secondary nodes as strings
 796   @type os_type: string
 797   @param os_type: the name of the instance's OS
 798   @type status: boolean
 799   @param status: the should_run status of the instance
 800   @type memory: string
 801   @param memory: the memory size of the instance
 802   @type vcpus: string
 803   @param vcpus: the count of VCPUs the instance has
 804   @type nics: list
 805   @param nics: list of tuples (ip, mac, mode, link) representing
 806       the NICs the instance has
 807   @type disk_template: string
 808   @param disk_template: the disk template of the instance
 809   @type disks: list
 810   @param disks: the list of (size, mode) pairs
 811   @type bep: dict
 812   @param bep: the backend parameters for the instance
 813   @type hvp: dict
 814   @param hvp: the hypervisor parameters for the instance
 815   @type hypervisor_name: string
 816   @param hypervisor_name: the hypervisor for the instance
 817   @rtype: dict
 818   @return: the hook environment for this instance
 819
 820   """
 821   if status:
 822     str_status = "up"
 823   else:
 824     str_status = "down"
 825   env = {
 826     "OP_TARGET": name,
 827     "INSTANCE_NAME": name,
 828     "INSTANCE_PRIMARY": primary_node,
 829     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 830     "INSTANCE_OS_TYPE": os_type,
 831     "INSTANCE_STATUS": str_status,
 832     "INSTANCE_MEMORY": memory,
 833     "INSTANCE_VCPUS": vcpus,
 834     "INSTANCE_DISK_TEMPLATE": disk_template,
 835     "INSTANCE_HYPERVISOR": hypervisor_name,
 836   }
 837
 838   if nics:
 839     nic_count = len(nics)
 840     for idx, (ip, mac, mode, link) in enumerate(nics):
 841       if ip is None:
 842         ip = ""
 843       env["INSTANCE_NIC%d_IP" % idx] = ip
 844       env["INSTANCE_NIC%d_MAC" % idx] = mac
 845       env["INSTANCE_NIC%d_MODE" % idx] = mode
 846       env["INSTANCE_NIC%d_LINK" % idx] = link
 847       if mode == constants.NIC_MODE_BRIDGED:
 848         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 849   else:
 850     nic_count = 0
 851
 852   env["INSTANCE_NIC_COUNT"] = nic_count
 853
 854   if disks:
 855     disk_count = len(disks)
 856     for idx, (size, mode) in enumerate(disks):
 857       env["INSTANCE_DISK%d_SIZE" % idx] = size
 858       env["INSTANCE_DISK%d_MODE" % idx] = mode
 859   else:
 860     disk_count = 0
 861
 862   env["INSTANCE_DISK_COUNT"] = disk_count
 863
 864   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 865     for key, value in source.items():
 866       env["INSTANCE_%s_%s" % (kind, key)] = value
 867
 868   return env
 869
 870
 871 def _NICListToTuple(lu, nics):
 872   """Build a list of nic information tuples.
 873
 874   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 875   value in LUInstanceQueryData.
 876
 877   @type lu:  L{LogicalUnit}
 878   @param lu: the logical unit on whose behalf we execute
 879   @type nics: list of L{objects.NIC}
 880   @param nics: list of nics to convert to hooks tuples
 881
 882   """
 883   hooks_nics = []
 884   cluster = lu.cfg.GetClusterInfo()
 885   for nic in nics:
 886     ip = nic.ip
 887     mac = nic.mac
 888     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 889     mode = filled_params[constants.NIC_MODE]
 890     link = filled_params[constants.NIC_LINK]
 891     hooks_nics.append((ip, mac, mode, link))
 892   return hooks_nics
 893
 894
 895 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 896   """Builds instance related env variables for hooks from an object.
 897
 898   @type lu: L{LogicalUnit}
 899   @param lu: the logical unit on whose behalf we execute
 900   @type instance: L{objects.Instance}
 901   @param instance: the instance for which we should build the
 902       environment
 903   @type override: dict
 904   @param override: dictionary with key/values that will override
 905       our values
 906   @rtype: dict
 907   @return: the hook environment dictionary
 908
 909   """
 910   cluster = lu.cfg.GetClusterInfo()
 911   bep = cluster.FillBE(instance)
 912   hvp = cluster.FillHV(instance)
 913   args = {
 914     'name': instance.name,
 915     'primary_node': instance.primary_node,
 916     'secondary_nodes': instance.secondary_nodes,
 917     'os_type': instance.os,
 918     'status': instance.admin_up,
 919     'memory': bep[constants.BE_MEMORY],
 920     'vcpus': bep[constants.BE_VCPUS],
 921     'nics': _NICListToTuple(lu, instance.nics),
 922     'disk_template': instance.disk_template,
 923     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 924     'bep': bep,
 925     'hvp': hvp,
 926     'hypervisor_name': instance.hypervisor,
 927   }
 928   if override:
 929     args.update(override)
 930   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 931
 932
 933 def _AdjustCandidatePool(lu, exceptions):
 934   """Adjust the candidate pool after node operations.
 935
 936   """
 937   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 938   if mod_list:
 939     lu.LogInfo("Promoted nodes to master candidate role: %s",
 940                utils.CommaJoin(node.name for node in mod_list))
 941     for name in mod_list:
 942       lu.context.ReaddNode(name)
 943   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 944   if mc_now > mc_max:
 945     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 946                (mc_now, mc_max))
 947
 948
 949 def _DecideSelfPromotion(lu, exceptions=None):
 950   """Decide whether I should promote myself as a master candidate.
 951
 952   """
 953   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 954   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 955   # the new node will increase mc_max with one, so:
 956   mc_should = min(mc_should + 1, cp_size)
 957   return mc_now < mc_should
 958
 959
 960 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 961   """Check that the brigdes needed by a list of nics exist.
 962
 963   """
 964   cluster = lu.cfg.GetClusterInfo()
 965   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 966   brlist = [params[constants.NIC_LINK] for params in paramslist
 967             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 968   if brlist:
 969     result = lu.rpc.call_bridges_exist(target_node, brlist)
 970     result.Raise("Error checking bridges on destination node '%s'" %
 971                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 972
 973
 974 def _CheckInstanceBridgesExist(lu, instance, node=None):
 975   """Check that the brigdes needed by an instance exist.
 976
 977   """
 978   if node is None:
 979     node = instance.primary_node
 980   _CheckNicsBridgesExist(lu, instance.nics, node)
 981
 982
 983 def _CheckOSVariant(os_obj, name):
 984   """Check whether an OS name conforms to the os variants specification.
 985
 986   @type os_obj: L{objects.OS}
 987   @param os_obj: OS object to check
 988   @type name: string
 989   @param name: OS name passed by the user, to check for validity
 990
 991   """
 992   if not os_obj.supported_variants:
 993     return
 994   variant = objects.OS.GetVariant(name)
 995   if not variant:
 996     raise errors.OpPrereqError("OS name must include a variant",
 997                                errors.ECODE_INVAL)
 998
 999   if variant not in os_obj.supported_variants:
1000     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1001
1002
1003 def _GetNodeInstancesInner(cfg, fn):
1004   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1005
1006
1007 def _GetNodeInstances(cfg, node_name):
1008   """Returns a list of all primary and secondary instances on a node.
1009
1010   """
1011
1012   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1013
1014
1015 def _GetNodePrimaryInstances(cfg, node_name):
1016   """Returns primary instances on a node.
1017
1018   """
1019   return _GetNodeInstancesInner(cfg,
1020                                 lambda inst: node_name == inst.primary_node)
1021
1022
1023 def _GetNodeSecondaryInstances(cfg, node_name):
1024   """Returns secondary instances on a node.
1025
1026   """
1027   return _GetNodeInstancesInner(cfg,
1028                                 lambda inst: node_name in inst.secondary_nodes)
1029
1030
1031 def _GetStorageTypeArgs(cfg, storage_type):
1032   """Returns the arguments for a storage type.
1033
1034   """
1035   # Special case for file storage
1036   if storage_type == constants.ST_FILE:
1037     # storage.FileStorage wants a list of storage directories
1038     return [[cfg.GetFileStorageDir()]]
1039
1040   return []
1041
1042
1043 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1044   faulty = []
1045
1046   for dev in instance.disks:
1047     cfg.SetDiskID(dev, node_name)
1048
1049   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1050   result.Raise("Failed to get disk status from node %s" % node_name,
1051                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1052
1053   for idx, bdev_status in enumerate(result.payload):
1054     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1055       faulty.append(idx)
1056
1057   return faulty
1058
1059
1060 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1061   """Check the sanity of iallocator and node arguments and use the
1062   cluster-wide iallocator if appropriate.
1063
1064   Check that at most one of (iallocator, node) is specified. If none is
1065   specified, then the LU's opcode's iallocator slot is filled with the
1066   cluster-wide default iallocator.
1067
1068   @type iallocator_slot: string
1069   @param iallocator_slot: the name of the opcode iallocator slot
1070   @type node_slot: string
1071   @param node_slot: the name of the opcode target node slot
1072
1073   """
1074   node = getattr(lu.op, node_slot, None)
1075   iallocator = getattr(lu.op, iallocator_slot, None)
1076
1077   if node is not None and iallocator is not None:
1078     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1079                                errors.ECODE_INVAL)
1080   elif node is None and iallocator is None:
1081     default_iallocator = lu.cfg.GetDefaultIAllocator()
1082     if default_iallocator:
1083       setattr(lu.op, iallocator_slot, default_iallocator)
1084     else:
1085       raise errors.OpPrereqError("No iallocator or node given and no"
1086                                  " cluster-wide default iallocator found."
1087                                  " Please specify either an iallocator or a"
1088                                  " node, or set a cluster-wide default"
1089                                  " iallocator.")
1090
1091
1092 class LUClusterPostInit(LogicalUnit):
1093   """Logical unit for running hooks after cluster initialization.
1094
1095   """
1096   HPATH = "cluster-init"
1097   HTYPE = constants.HTYPE_CLUSTER
1098
1099   def BuildHooksEnv(self):
1100     """Build hooks env.
1101
1102     """
1103     env = {"OP_TARGET": self.cfg.GetClusterName()}
1104     mn = self.cfg.GetMasterNode()
1105     return env, [], [mn]
1106
1107   def Exec(self, feedback_fn):
1108     """Nothing to do.
1109
1110     """
1111     return True
1112
1113
1114 class LUClusterDestroy(LogicalUnit):
1115   """Logical unit for destroying the cluster.
1116
1117   """
1118   HPATH = "cluster-destroy"
1119   HTYPE = constants.HTYPE_CLUSTER
1120
1121   def BuildHooksEnv(self):
1122     """Build hooks env.
1123
1124     """
1125     env = {"OP_TARGET": self.cfg.GetClusterName()}
1126     return env, [], []
1127
1128   def CheckPrereq(self):
1129     """Check prerequisites.
1130
1131     This checks whether the cluster is empty.
1132
1133     Any errors are signaled by raising errors.OpPrereqError.
1134
1135     """
1136     master = self.cfg.GetMasterNode()
1137
1138     nodelist = self.cfg.GetNodeList()
1139     if len(nodelist) != 1 or nodelist[0] != master:
1140       raise errors.OpPrereqError("There are still %d node(s) in"
1141                                  " this cluster." % (len(nodelist) - 1),
1142                                  errors.ECODE_INVAL)
1143     instancelist = self.cfg.GetInstanceList()
1144     if instancelist:
1145       raise errors.OpPrereqError("There are still %d instance(s) in"
1146                                  " this cluster." % len(instancelist),
1147                                  errors.ECODE_INVAL)
1148
1149   def Exec(self, feedback_fn):
1150     """Destroys the cluster.
1151
1152     """
1153     master = self.cfg.GetMasterNode()
1154
1155     # Run post hooks on master node before it's removed
1156     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1157     try:
1158       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1159     except:
1160       # pylint: disable-msg=W0702
1161       self.LogWarning("Errors occurred running hooks on %s" % master)
1162
1163     result = self.rpc.call_node_stop_master(master, False)
1164     result.Raise("Could not disable the master role")
1165
1166     return master
1167
1168
1169 def _VerifyCertificate(filename):
1170   """Verifies a certificate for LUClusterVerify.
1171
1172   @type filename: string
1173   @param filename: Path to PEM file
1174
1175   """
1176   try:
1177     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1178                                            utils.ReadFile(filename))
1179   except Exception, err: # pylint: disable-msg=W0703
1180     return (LUClusterVerify.ETYPE_ERROR,
1181             "Failed to load X509 certificate %s: %s" % (filename, err))
1182
1183   (errcode, msg) = \
1184     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1185                                 constants.SSL_CERT_EXPIRATION_ERROR)
1186
1187   if msg:
1188     fnamemsg = "While verifying %s: %s" % (filename, msg)
1189   else:
1190     fnamemsg = None
1191
1192   if errcode is None:
1193     return (None, fnamemsg)
1194   elif errcode == utils.CERT_WARNING:
1195     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1196   elif errcode == utils.CERT_ERROR:
1197     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1198
1199   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1200
1201
1202 class LUClusterVerify(LogicalUnit):
1203   """Verifies the cluster status.
1204
1205   """
1206   HPATH = "cluster-verify"
1207   HTYPE = constants.HTYPE_CLUSTER
1208   REQ_BGL = False
1209
1210   TCLUSTER = "cluster"
1211   TNODE = "node"
1212   TINSTANCE = "instance"
1213
1214   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1215   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1216   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1217   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1218   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1219   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1220   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1221   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1222   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1223   ENODEDRBD = (TNODE, "ENODEDRBD")
1224   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1225   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1226   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1227   ENODEHV = (TNODE, "ENODEHV")
1228   ENODELVM = (TNODE, "ENODELVM")
1229   ENODEN1 = (TNODE, "ENODEN1")
1230   ENODENET = (TNODE, "ENODENET")
1231   ENODEOS = (TNODE, "ENODEOS")
1232   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1233   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1234   ENODERPC = (TNODE, "ENODERPC")
1235   ENODESSH = (TNODE, "ENODESSH")
1236   ENODEVERSION = (TNODE, "ENODEVERSION")
1237   ENODESETUP = (TNODE, "ENODESETUP")
1238   ENODETIME = (TNODE, "ENODETIME")
1239   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1240
1241   ETYPE_FIELD = "code"
1242   ETYPE_ERROR = "ERROR"
1243   ETYPE_WARNING = "WARNING"
1244
1245   _HOOKS_INDENT_RE = re.compile("^", re.M)
1246
1247   class NodeImage(object):
1248     """A class representing the logical and physical status of a node.
1249
1250     @type name: string
1251     @ivar name: the node name to which this object refers
1252     @ivar volumes: a structure as returned from
1253         L{ganeti.backend.GetVolumeList} (runtime)
1254     @ivar instances: a list of running instances (runtime)
1255     @ivar pinst: list of configured primary instances (config)
1256     @ivar sinst: list of configured secondary instances (config)
1257     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1258         of this node (config)
1259     @ivar mfree: free memory, as reported by hypervisor (runtime)
1260     @ivar dfree: free disk, as reported by the node (runtime)
1261     @ivar offline: the offline status (config)
1262     @type rpc_fail: boolean
1263     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1264         not whether the individual keys were correct) (runtime)
1265     @type lvm_fail: boolean
1266     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1267     @type hyp_fail: boolean
1268     @ivar hyp_fail: whether the RPC call didn't return the instance list
1269     @type ghost: boolean
1270     @ivar ghost: whether this is a known node or not (config)
1271     @type os_fail: boolean
1272     @ivar os_fail: whether the RPC call didn't return valid OS data
1273     @type oslist: list
1274     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1275     @type vm_capable: boolean
1276     @ivar vm_capable: whether the node can host instances
1277
1278     """
1279     def __init__(self, offline=False, name=None, vm_capable=True):
1280       self.name = name
1281       self.volumes = {}
1282       self.instances = []
1283       self.pinst = []
1284       self.sinst = []
1285       self.sbp = {}
1286       self.mfree = 0
1287       self.dfree = 0
1288       self.offline = offline
1289       self.vm_capable = vm_capable
1290       self.rpc_fail = False
1291       self.lvm_fail = False
1292       self.hyp_fail = False
1293       self.ghost = False
1294       self.os_fail = False
1295       self.oslist = {}
1296
1297   def ExpandNames(self):
1298     self.needed_locks = {
1299       locking.LEVEL_NODE: locking.ALL_SET,
1300       locking.LEVEL_INSTANCE: locking.ALL_SET,
1301     }
1302     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1303
1304   def _Error(self, ecode, item, msg, *args, **kwargs):
1305     """Format an error message.
1306
1307     Based on the opcode's error_codes parameter, either format a
1308     parseable error code, or a simpler error string.
1309
1310     This must be called only from Exec and functions called from Exec.
1311
1312     """
1313     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1314     itype, etxt = ecode
1315     # first complete the msg
1316     if args:
1317       msg = msg % args
1318     # then format the whole message
1319     if self.op.error_codes:
1320       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1321     else:
1322       if item:
1323         item = " " + item
1324       else:
1325         item = ""
1326       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1327     # and finally report it via the feedback_fn
1328     self._feedback_fn("  - %s" % msg)
1329
1330   def _ErrorIf(self, cond, *args, **kwargs):
1331     """Log an error message if the passed condition is True.
1332
1333     """
1334     cond = bool(cond) or self.op.debug_simulate_errors
1335     if cond:
1336       self._Error(*args, **kwargs)
1337     # do not mark the operation as failed for WARN cases only
1338     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1339       self.bad = self.bad or cond
1340
1341   def _VerifyNode(self, ninfo, nresult):
1342     """Perform some basic validation on data returned from a node.
1343
1344       - check the result data structure is well formed and has all the
1345         mandatory fields
1346       - check ganeti version
1347
1348     @type ninfo: L{objects.Node}
1349     @param ninfo: the node to check
1350     @param nresult: the results from the node
1351     @rtype: boolean
1352     @return: whether overall this call was successful (and we can expect
1353          reasonable values in the respose)
1354
1355     """
1356     node = ninfo.name
1357     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1358
1359     # main result, nresult should be a non-empty dict
1360     test = not nresult or not isinstance(nresult, dict)
1361     _ErrorIf(test, self.ENODERPC, node,
1362                   "unable to verify node: no data returned")
1363     if test:
1364       return False
1365
1366     # compares ganeti version
1367     local_version = constants.PROTOCOL_VERSION
1368     remote_version = nresult.get("version", None)
1369     test = not (remote_version and
1370                 isinstance(remote_version, (list, tuple)) and
1371                 len(remote_version) == 2)
1372     _ErrorIf(test, self.ENODERPC, node,
1373              "connection to node returned invalid data")
1374     if test:
1375       return False
1376
1377     test = local_version != remote_version[0]
1378     _ErrorIf(test, self.ENODEVERSION, node,
1379              "incompatible protocol versions: master %s,"
1380              " node %s", local_version, remote_version[0])
1381     if test:
1382       return False
1383
1384     # node seems compatible, we can actually try to look into its results
1385
1386     # full package version
1387     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1388                   self.ENODEVERSION, node,
1389                   "software version mismatch: master %s, node %s",
1390                   constants.RELEASE_VERSION, remote_version[1],
1391                   code=self.ETYPE_WARNING)
1392
1393     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1394     if ninfo.vm_capable and isinstance(hyp_result, dict):
1395       for hv_name, hv_result in hyp_result.iteritems():
1396         test = hv_result is not None
1397         _ErrorIf(test, self.ENODEHV, node,
1398                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1399
1400     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1401     if ninfo.vm_capable and isinstance(hvp_result, list):
1402       for item, hv_name, hv_result in hvp_result:
1403         _ErrorIf(True, self.ENODEHV, node,
1404                  "hypervisor %s parameter verify failure (source %s): %s",
1405                  hv_name, item, hv_result)
1406
1407     test = nresult.get(constants.NV_NODESETUP,
1408                            ["Missing NODESETUP results"])
1409     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1410              "; ".join(test))
1411
1412     return True
1413
1414   def _VerifyNodeTime(self, ninfo, nresult,
1415                       nvinfo_starttime, nvinfo_endtime):
1416     """Check the node time.
1417
1418     @type ninfo: L{objects.Node}
1419     @param ninfo: the node to check
1420     @param nresult: the remote results for the node
1421     @param nvinfo_starttime: the start time of the RPC call
1422     @param nvinfo_endtime: the end time of the RPC call
1423
1424     """
1425     node = ninfo.name
1426     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1427
1428     ntime = nresult.get(constants.NV_TIME, None)
1429     try:
1430       ntime_merged = utils.MergeTime(ntime)
1431     except (ValueError, TypeError):
1432       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1433       return
1434
1435     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1436       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1437     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1438       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1439     else:
1440       ntime_diff = None
1441
1442     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1443              "Node time diverges by at least %s from master node time",
1444              ntime_diff)
1445
1446   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1447     """Check the node time.
1448
1449     @type ninfo: L{objects.Node}
1450     @param ninfo: the node to check
1451     @param nresult: the remote results for the node
1452     @param vg_name: the configured VG name
1453
1454     """
1455     if vg_name is None:
1456       return
1457
1458     node = ninfo.name
1459     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1460
1461     # checks vg existence and size > 20G
1462     vglist = nresult.get(constants.NV_VGLIST, None)
1463     test = not vglist
1464     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1465     if not test:
1466       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1467                                             constants.MIN_VG_SIZE)
1468       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1469
1470     # check pv names
1471     pvlist = nresult.get(constants.NV_PVLIST, None)
1472     test = pvlist is None
1473     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1474     if not test:
1475       # check that ':' is not present in PV names, since it's a
1476       # special character for lvcreate (denotes the range of PEs to
1477       # use on the PV)
1478       for _, pvname, owner_vg in pvlist:
1479         test = ":" in pvname
1480         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1481                  " '%s' of VG '%s'", pvname, owner_vg)
1482
1483   def _VerifyNodeNetwork(self, ninfo, nresult):
1484     """Check the node time.
1485
1486     @type ninfo: L{objects.Node}
1487     @param ninfo: the node to check
1488     @param nresult: the remote results for the node
1489
1490     """
1491     node = ninfo.name
1492     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1493
1494     test = constants.NV_NODELIST not in nresult
1495     _ErrorIf(test, self.ENODESSH, node,
1496              "node hasn't returned node ssh connectivity data")
1497     if not test:
1498       if nresult[constants.NV_NODELIST]:
1499         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1500           _ErrorIf(True, self.ENODESSH, node,
1501                    "ssh communication with node '%s': %s", a_node, a_msg)
1502
1503     test = constants.NV_NODENETTEST not in nresult
1504     _ErrorIf(test, self.ENODENET, node,
1505              "node hasn't returned node tcp connectivity data")
1506     if not test:
1507       if nresult[constants.NV_NODENETTEST]:
1508         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1509         for anode in nlist:
1510           _ErrorIf(True, self.ENODENET, node,
1511                    "tcp communication with node '%s': %s",
1512                    anode, nresult[constants.NV_NODENETTEST][anode])
1513
1514     test = constants.NV_MASTERIP not in nresult
1515     _ErrorIf(test, self.ENODENET, node,
1516              "node hasn't returned node master IP reachability data")
1517     if not test:
1518       if not nresult[constants.NV_MASTERIP]:
1519         if node == self.master_node:
1520           msg = "the master node cannot reach the master IP (not configured?)"
1521         else:
1522           msg = "cannot reach the master IP"
1523         _ErrorIf(True, self.ENODENET, node, msg)
1524
1525   def _VerifyInstance(self, instance, instanceconfig, node_image,
1526                       diskstatus):
1527     """Verify an instance.
1528
1529     This function checks to see if the required block devices are
1530     available on the instance's node.
1531
1532     """
1533     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1534     node_current = instanceconfig.primary_node
1535
1536     node_vol_should = {}
1537     instanceconfig.MapLVsByNode(node_vol_should)
1538
1539     for node in node_vol_should:
1540       n_img = node_image[node]
1541       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1542         # ignore missing volumes on offline or broken nodes
1543         continue
1544       for volume in node_vol_should[node]:
1545         test = volume not in n_img.volumes
1546         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1547                  "volume %s missing on node %s", volume, node)
1548
1549     if instanceconfig.admin_up:
1550       pri_img = node_image[node_current]
1551       test = instance not in pri_img.instances and not pri_img.offline
1552       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1553                "instance not running on its primary node %s",
1554                node_current)
1555
1556     for node, n_img in node_image.items():
1557       if node != node_current:
1558         test = instance in n_img.instances
1559         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1560                  "instance should not run on node %s", node)
1561
1562     diskdata = [(nname, success, status, idx)
1563                 for (nname, disks) in diskstatus.items()
1564                 for idx, (success, status) in enumerate(disks)]
1565
1566     for nname, success, bdev_status, idx in diskdata:
1567       # the 'ghost node' construction in Exec() ensures that we have a
1568       # node here
1569       snode = node_image[nname]
1570       bad_snode = snode.ghost or snode.offline
1571       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1572                self.EINSTANCEFAULTYDISK, instance,
1573                "couldn't retrieve status for disk/%s on %s: %s",
1574                idx, nname, bdev_status)
1575       _ErrorIf((instanceconfig.admin_up and success and
1576                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1577                self.EINSTANCEFAULTYDISK, instance,
1578                "disk/%s on %s is faulty", idx, nname)
1579
1580   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1581     """Verify if there are any unknown volumes in the cluster.
1582
1583     The .os, .swap and backup volumes are ignored. All other volumes are
1584     reported as unknown.
1585
1586     @type reserved: L{ganeti.utils.FieldSet}
1587     @param reserved: a FieldSet of reserved volume names
1588
1589     """
1590     for node, n_img in node_image.items():
1591       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1592         # skip non-healthy nodes
1593         continue
1594       for volume in n_img.volumes:
1595         test = ((node not in node_vol_should or
1596                 volume not in node_vol_should[node]) and
1597                 not reserved.Matches(volume))
1598         self._ErrorIf(test, self.ENODEORPHANLV, node,
1599                       "volume %s is unknown", volume)
1600
1601   def _VerifyOrphanInstances(self, instancelist, node_image):
1602     """Verify the list of running instances.
1603
1604     This checks what instances are running but unknown to the cluster.
1605
1606     """
1607     for node, n_img in node_image.items():
1608       for o_inst in n_img.instances:
1609         test = o_inst not in instancelist
1610         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1611                       "instance %s on node %s should not exist", o_inst, node)
1612
1613   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1614     """Verify N+1 Memory Resilience.
1615
1616     Check that if one single node dies we can still start all the
1617     instances it was primary for.
1618
1619     """
1620     for node, n_img in node_image.items():
1621       # This code checks that every node which is now listed as
1622       # secondary has enough memory to host all instances it is
1623       # supposed to should a single other node in the cluster fail.
1624       # FIXME: not ready for failover to an arbitrary node
1625       # FIXME: does not support file-backed instances
1626       # WARNING: we currently take into account down instances as well
1627       # as up ones, considering that even if they're down someone
1628       # might want to start them even in the event of a node failure.
1629       if n_img.offline:
1630         # we're skipping offline nodes from the N+1 warning, since
1631         # most likely we don't have good memory infromation from them;
1632         # we already list instances living on such nodes, and that's
1633         # enough warning
1634         continue
1635       for prinode, instances in n_img.sbp.items():
1636         needed_mem = 0
1637         for instance in instances:
1638           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1639           if bep[constants.BE_AUTO_BALANCE]:
1640             needed_mem += bep[constants.BE_MEMORY]
1641         test = n_img.mfree < needed_mem
1642         self._ErrorIf(test, self.ENODEN1, node,
1643                       "not enough memory to accomodate instance failovers"
1644                       " should node %s fail", prinode)
1645
1646   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1647                        master_files):
1648     """Verifies and computes the node required file checksums.
1649
1650     @type ninfo: L{objects.Node}
1651     @param ninfo: the node to check
1652     @param nresult: the remote results for the node
1653     @param file_list: required list of files
1654     @param local_cksum: dictionary of local files and their checksums
1655     @param master_files: list of files that only masters should have
1656
1657     """
1658     node = ninfo.name
1659     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1660
1661     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1662     test = not isinstance(remote_cksum, dict)
1663     _ErrorIf(test, self.ENODEFILECHECK, node,
1664              "node hasn't returned file checksum data")
1665     if test:
1666       return
1667
1668     for file_name in file_list:
1669       node_is_mc = ninfo.master_candidate
1670       must_have = (file_name not in master_files) or node_is_mc
1671       # missing
1672       test1 = file_name not in remote_cksum
1673       # invalid checksum
1674       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1675       # existing and good
1676       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1677       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1678                "file '%s' missing", file_name)
1679       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1680                "file '%s' has wrong checksum", file_name)
1681       # not candidate and this is not a must-have file
1682       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1683                "file '%s' should not exist on non master"
1684                " candidates (and the file is outdated)", file_name)
1685       # all good, except non-master/non-must have combination
1686       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1687                "file '%s' should not exist"
1688                " on non master candidates", file_name)
1689
1690   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1691                       drbd_map):
1692     """Verifies and the node DRBD status.
1693
1694     @type ninfo: L{objects.Node}
1695     @param ninfo: the node to check
1696     @param nresult: the remote results for the node
1697     @param instanceinfo: the dict of instances
1698     @param drbd_helper: the configured DRBD usermode helper
1699     @param drbd_map: the DRBD map as returned by
1700         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1701
1702     """
1703     node = ninfo.name
1704     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1705
1706     if drbd_helper:
1707       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1708       test = (helper_result == None)
1709       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1710                "no drbd usermode helper returned")
1711       if helper_result:
1712         status, payload = helper_result
1713         test = not status
1714         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1715                  "drbd usermode helper check unsuccessful: %s", payload)
1716         test = status and (payload != drbd_helper)
1717         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1718                  "wrong drbd usermode helper: %s", payload)
1719
1720     # compute the DRBD minors
1721     node_drbd = {}
1722     for minor, instance in drbd_map[node].items():
1723       test = instance not in instanceinfo
1724       _ErrorIf(test, self.ECLUSTERCFG, None,
1725                "ghost instance '%s' in temporary DRBD map", instance)
1726         # ghost instance should not be running, but otherwise we
1727         # don't give double warnings (both ghost instance and
1728         # unallocated minor in use)
1729       if test:
1730         node_drbd[minor] = (instance, False)
1731       else:
1732         instance = instanceinfo[instance]
1733         node_drbd[minor] = (instance.name, instance.admin_up)
1734
1735     # and now check them
1736     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1737     test = not isinstance(used_minors, (tuple, list))
1738     _ErrorIf(test, self.ENODEDRBD, node,
1739              "cannot parse drbd status file: %s", str(used_minors))
1740     if test:
1741       # we cannot check drbd status
1742       return
1743
1744     for minor, (iname, must_exist) in node_drbd.items():
1745       test = minor not in used_minors and must_exist
1746       _ErrorIf(test, self.ENODEDRBD, node,
1747                "drbd minor %d of instance %s is not active", minor, iname)
1748     for minor in used_minors:
1749       test = minor not in node_drbd
1750       _ErrorIf(test, self.ENODEDRBD, node,
1751                "unallocated drbd minor %d is in use", minor)
1752
1753   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1754     """Builds the node OS structures.
1755
1756     @type ninfo: L{objects.Node}
1757     @param ninfo: the node to check
1758     @param nresult: the remote results for the node
1759     @param nimg: the node image object
1760
1761     """
1762     node = ninfo.name
1763     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1764
1765     remote_os = nresult.get(constants.NV_OSLIST, None)
1766     test = (not isinstance(remote_os, list) or
1767             not compat.all(isinstance(v, list) and len(v) == 7
1768                            for v in remote_os))
1769
1770     _ErrorIf(test, self.ENODEOS, node,
1771              "node hasn't returned valid OS data")
1772
1773     nimg.os_fail = test
1774
1775     if test:
1776       return
1777
1778     os_dict = {}
1779
1780     for (name, os_path, status, diagnose,
1781          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1782
1783       if name not in os_dict:
1784         os_dict[name] = []
1785
1786       # parameters is a list of lists instead of list of tuples due to
1787       # JSON lacking a real tuple type, fix it:
1788       parameters = [tuple(v) for v in parameters]
1789       os_dict[name].append((os_path, status, diagnose,
1790                             set(variants), set(parameters), set(api_ver)))
1791
1792     nimg.oslist = os_dict
1793
1794   def _VerifyNodeOS(self, ninfo, nimg, base):
1795     """Verifies the node OS list.
1796
1797     @type ninfo: L{objects.Node}
1798     @param ninfo: the node to check
1799     @param nimg: the node image object
1800     @param base: the 'template' node we match against (e.g. from the master)
1801
1802     """
1803     node = ninfo.name
1804     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1805
1806     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1807
1808     for os_name, os_data in nimg.oslist.items():
1809       assert os_data, "Empty OS status for OS %s?!" % os_name
1810       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1811       _ErrorIf(not f_status, self.ENODEOS, node,
1812                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1813       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1814                "OS '%s' has multiple entries (first one shadows the rest): %s",
1815                os_name, utils.CommaJoin([v[0] for v in os_data]))
1816       # this will catched in backend too
1817       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1818                and not f_var, self.ENODEOS, node,
1819                "OS %s with API at least %d does not declare any variant",
1820                os_name, constants.OS_API_V15)
1821       # comparisons with the 'base' image
1822       test = os_name not in base.oslist
1823       _ErrorIf(test, self.ENODEOS, node,
1824                "Extra OS %s not present on reference node (%s)",
1825                os_name, base.name)
1826       if test:
1827         continue
1828       assert base.oslist[os_name], "Base node has empty OS status?"
1829       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1830       if not b_status:
1831         # base OS is invalid, skipping
1832         continue
1833       for kind, a, b in [("API version", f_api, b_api),
1834                          ("variants list", f_var, b_var),
1835                          ("parameters", f_param, b_param)]:
1836         _ErrorIf(a != b, self.ENODEOS, node,
1837                  "OS %s %s differs from reference node %s: %s vs. %s",
1838                  kind, os_name, base.name,
1839                  utils.CommaJoin(a), utils.CommaJoin(b))
1840
1841     # check any missing OSes
1842     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1843     _ErrorIf(missing, self.ENODEOS, node,
1844              "OSes present on reference node %s but missing on this node: %s",
1845              base.name, utils.CommaJoin(missing))
1846
1847   def _VerifyOob(self, ninfo, nresult):
1848     """Verifies out of band functionality of a node.
1849
1850     @type ninfo: L{objects.Node}
1851     @param ninfo: the node to check
1852     @param nresult: the remote results for the node
1853
1854     """
1855     node = ninfo.name
1856     # We just have to verify the paths on master and/or master candidates
1857     # as the oob helper is invoked on the master
1858     if ((ninfo.master_candidate or ninfo.master_capable) and
1859         constants.NV_OOB_PATHS in nresult):
1860       for path_result in nresult[constants.NV_OOB_PATHS]:
1861         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1862
1863   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1864     """Verifies and updates the node volume data.
1865
1866     This function will update a L{NodeImage}'s internal structures
1867     with data from the remote call.
1868
1869     @type ninfo: L{objects.Node}
1870     @param ninfo: the node to check
1871     @param nresult: the remote results for the node
1872     @param nimg: the node image object
1873     @param vg_name: the configured VG name
1874
1875     """
1876     node = ninfo.name
1877     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1878
1879     nimg.lvm_fail = True
1880     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1881     if vg_name is None:
1882       pass
1883     elif isinstance(lvdata, basestring):
1884       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1885                utils.SafeEncode(lvdata))
1886     elif not isinstance(lvdata, dict):
1887       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1888     else:
1889       nimg.volumes = lvdata
1890       nimg.lvm_fail = False
1891
1892   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1893     """Verifies and updates the node instance list.
1894
1895     If the listing was successful, then updates this node's instance
1896     list. Otherwise, it marks the RPC call as failed for the instance
1897     list key.
1898
1899     @type ninfo: L{objects.Node}
1900     @param ninfo: the node to check
1901     @param nresult: the remote results for the node
1902     @param nimg: the node image object
1903
1904     """
1905     idata = nresult.get(constants.NV_INSTANCELIST, None)
1906     test = not isinstance(idata, list)
1907     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1908                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1909     if test:
1910       nimg.hyp_fail = True
1911     else:
1912       nimg.instances = idata
1913
1914   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1915     """Verifies and computes a node information map
1916
1917     @type ninfo: L{objects.Node}
1918     @param ninfo: the node to check
1919     @param nresult: the remote results for the node
1920     @param nimg: the node image object
1921     @param vg_name: the configured VG name
1922
1923     """
1924     node = ninfo.name
1925     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1926
1927     # try to read free memory (from the hypervisor)
1928     hv_info = nresult.get(constants.NV_HVINFO, None)
1929     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1930     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1931     if not test:
1932       try:
1933         nimg.mfree = int(hv_info["memory_free"])
1934       except (ValueError, TypeError):
1935         _ErrorIf(True, self.ENODERPC, node,
1936                  "node returned invalid nodeinfo, check hypervisor")
1937
1938     # FIXME: devise a free space model for file based instances as well
1939     if vg_name is not None:
1940       test = (constants.NV_VGLIST not in nresult or
1941               vg_name not in nresult[constants.NV_VGLIST])
1942       _ErrorIf(test, self.ENODELVM, node,
1943                "node didn't return data for the volume group '%s'"
1944                " - it is either missing or broken", vg_name)
1945       if not test:
1946         try:
1947           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1948         except (ValueError, TypeError):
1949           _ErrorIf(True, self.ENODERPC, node,
1950                    "node returned invalid LVM info, check LVM status")
1951
1952   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1953     """Gets per-disk status information for all instances.
1954
1955     @type nodelist: list of strings
1956     @param nodelist: Node names
1957     @type node_image: dict of (name, L{objects.Node})
1958     @param node_image: Node objects
1959     @type instanceinfo: dict of (name, L{objects.Instance})
1960     @param instanceinfo: Instance objects
1961     @rtype: {instance: {node: [(succes, payload)]}}
1962     @return: a dictionary of per-instance dictionaries with nodes as
1963         keys and disk information as values; the disk information is a
1964         list of tuples (success, payload)
1965
1966     """
1967     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1968
1969     node_disks = {}
1970     node_disks_devonly = {}
1971     diskless_instances = set()
1972     diskless = constants.DT_DISKLESS
1973
1974     for nname in nodelist:
1975       node_instances = list(itertools.chain(node_image[nname].pinst,
1976                                             node_image[nname].sinst))
1977       diskless_instances.update(inst for inst in node_instances
1978                                 if instanceinfo[inst].disk_template == diskless)
1979       disks = [(inst, disk)
1980                for inst in node_instances
1981                for disk in instanceinfo[inst].disks]
1982
1983       if not disks:
1984         # No need to collect data
1985         continue
1986
1987       node_disks[nname] = disks
1988
1989       # Creating copies as SetDiskID below will modify the objects and that can
1990       # lead to incorrect data returned from nodes
1991       devonly = [dev.Copy() for (_, dev) in disks]
1992
1993       for dev in devonly:
1994         self.cfg.SetDiskID(dev, nname)
1995
1996       node_disks_devonly[nname] = devonly
1997
1998     assert len(node_disks) == len(node_disks_devonly)
1999
2000     # Collect data from all nodes with disks
2001     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2002                                                           node_disks_devonly)
2003
2004     assert len(result) == len(node_disks)
2005
2006     instdisk = {}
2007
2008     for (nname, nres) in result.items():
2009       disks = node_disks[nname]
2010
2011       if nres.offline:
2012         # No data from this node
2013         data = len(disks) * [(False, "node offline")]
2014       else:
2015         msg = nres.fail_msg
2016         _ErrorIf(msg, self.ENODERPC, nname,
2017                  "while getting disk information: %s", msg)
2018         if msg:
2019           # No data from this node
2020           data = len(disks) * [(False, msg)]
2021         else:
2022           data = []
2023           for idx, i in enumerate(nres.payload):
2024             if isinstance(i, (tuple, list)) and len(i) == 2:
2025               data.append(i)
2026             else:
2027               logging.warning("Invalid result from node %s, entry %d: %s",
2028                               nname, idx, i)
2029               data.append((False, "Invalid result from the remote node"))
2030
2031       for ((inst, _), status) in zip(disks, data):
2032         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2033
2034     # Add empty entries for diskless instances.
2035     for inst in diskless_instances:
2036       assert inst not in instdisk
2037       instdisk[inst] = {}
2038
2039     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2040                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2041                       compat.all(isinstance(s, (tuple, list)) and
2042                                  len(s) == 2 for s in statuses)
2043                       for inst, nnames in instdisk.items()
2044                       for nname, statuses in nnames.items())
2045     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2046
2047     return instdisk
2048
2049   def _VerifyHVP(self, hvp_data):
2050     """Verifies locally the syntax of the hypervisor parameters.
2051
2052     """
2053     for item, hv_name, hv_params in hvp_data:
2054       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2055              (item, hv_name))
2056       try:
2057         hv_class = hypervisor.GetHypervisor(hv_name)
2058         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2059         hv_class.CheckParameterSyntax(hv_params)
2060       except errors.GenericError, err:
2061         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2062
2063
2064   def BuildHooksEnv(self):
2065     """Build hooks env.
2066
2067     Cluster-Verify hooks just ran in the post phase and their failure makes
2068     the output be logged in the verify output and the verification to fail.
2069
2070     """
2071     all_nodes = self.cfg.GetNodeList()
2072     env = {
2073       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2074       }
2075     for node in self.cfg.GetAllNodesInfo().values():
2076       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2077
2078     return env, [], all_nodes
2079
2080   def Exec(self, feedback_fn):
2081     """Verify integrity of cluster, performing various test on nodes.
2082
2083     """
2084     # This method has too many local variables. pylint: disable-msg=R0914
2085     self.bad = False
2086     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2087     verbose = self.op.verbose
2088     self._feedback_fn = feedback_fn
2089     feedback_fn("* Verifying global settings")
2090     for msg in self.cfg.VerifyConfig():
2091       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2092
2093     # Check the cluster certificates
2094     for cert_filename in constants.ALL_CERT_FILES:
2095       (errcode, msg) = _VerifyCertificate(cert_filename)
2096       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2097
2098     vg_name = self.cfg.GetVGName()
2099     drbd_helper = self.cfg.GetDRBDHelper()
2100     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2101     cluster = self.cfg.GetClusterInfo()
2102     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2103     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2104     nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2105     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2106     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2107                         for iname in instancelist)
2108     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2109     i_non_redundant = [] # Non redundant instances
2110     i_non_a_balanced = [] # Non auto-balanced instances
2111     n_offline = 0 # Count of offline nodes
2112     n_drained = 0 # Count of nodes being drained
2113     node_vol_should = {}
2114
2115     # FIXME: verify OS list
2116     # do local checksums
2117     master_files = [constants.CLUSTER_CONF_FILE]
2118     master_node = self.master_node = self.cfg.GetMasterNode()
2119     master_ip = self.cfg.GetMasterIP()
2120
2121     file_names = ssconf.SimpleStore().GetFileList()
2122     file_names.extend(constants.ALL_CERT_FILES)
2123     file_names.extend(master_files)
2124     if cluster.modify_etc_hosts:
2125       file_names.append(constants.ETC_HOSTS)
2126
2127     local_checksums = utils.FingerprintFiles(file_names)
2128
2129     # Compute the set of hypervisor parameters
2130     hvp_data = []
2131     for hv_name in hypervisors:
2132       hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2133     for os_name, os_hvp in cluster.os_hvp.items():
2134       for hv_name, hv_params in os_hvp.items():
2135         if not hv_params:
2136           continue
2137         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2138         hvp_data.append(("os %s" % os_name, hv_name, full_params))
2139     # TODO: collapse identical parameter values in a single one
2140     for instance in instanceinfo.values():
2141       if not instance.hvparams:
2142         continue
2143       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2144                        cluster.FillHV(instance)))
2145     # and verify them locally
2146     self._VerifyHVP(hvp_data)
2147
2148     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2149     node_verify_param = {
2150       constants.NV_FILELIST: file_names,
2151       constants.NV_NODELIST: [node.name for node in nodeinfo
2152                               if not node.offline],
2153       constants.NV_HYPERVISOR: hypervisors,
2154       constants.NV_HVPARAMS: hvp_data,
2155       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2156                                   node.secondary_ip) for node in nodeinfo
2157                                  if not node.offline],
2158       constants.NV_INSTANCELIST: hypervisors,
2159       constants.NV_VERSION: None,
2160       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2161       constants.NV_NODESETUP: None,
2162       constants.NV_TIME: None,
2163       constants.NV_MASTERIP: (master_node, master_ip),
2164       constants.NV_OSLIST: None,
2165       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2166       }
2167
2168     if vg_name is not None:
2169       node_verify_param[constants.NV_VGLIST] = None
2170       node_verify_param[constants.NV_LVLIST] = vg_name
2171       node_verify_param[constants.NV_PVLIST] = [vg_name]
2172       node_verify_param[constants.NV_DRBDLIST] = None
2173
2174     if drbd_helper:
2175       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2176
2177     # Build our expected cluster state
2178     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2179                                                  name=node.name,
2180                                                  vm_capable=node.vm_capable))
2181                       for node in nodeinfo)
2182
2183     # Gather OOB paths
2184     oob_paths = []
2185     for node in nodeinfo:
2186       path = _SupportsOob(self.cfg, node)
2187       if path and path not in oob_paths:
2188         oob_paths.append(path)
2189
2190     if oob_paths:
2191       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2192
2193     for instance in instancelist:
2194       inst_config = instanceinfo[instance]
2195
2196       for nname in inst_config.all_nodes:
2197         if nname not in node_image:
2198           # ghost node
2199           gnode = self.NodeImage(name=nname)
2200           gnode.ghost = True
2201           node_image[nname] = gnode
2202
2203       inst_config.MapLVsByNode(node_vol_should)
2204
2205       pnode = inst_config.primary_node
2206       node_image[pnode].pinst.append(instance)
2207
2208       for snode in inst_config.secondary_nodes:
2209         nimg = node_image[snode]
2210         nimg.sinst.append(instance)
2211         if pnode not in nimg.sbp:
2212           nimg.sbp[pnode] = []
2213         nimg.sbp[pnode].append(instance)
2214
2215     # At this point, we have the in-memory data structures complete,
2216     # except for the runtime information, which we'll gather next
2217
2218     # Due to the way our RPC system works, exact response times cannot be
2219     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2220     # time before and after executing the request, we can at least have a time
2221     # window.
2222     nvinfo_starttime = time.time()
2223     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2224                                            self.cfg.GetClusterName())
2225     nvinfo_endtime = time.time()
2226
2227     all_drbd_map = self.cfg.ComputeDRBDMap()
2228
2229     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2230     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2231
2232     feedback_fn("* Verifying node status")
2233
2234     refos_img = None
2235
2236     for node_i in nodeinfo:
2237       node = node_i.name
2238       nimg = node_image[node]
2239
2240       if node_i.offline:
2241         if verbose:
2242           feedback_fn("* Skipping offline node %s" % (node,))
2243         n_offline += 1
2244         continue
2245
2246       if node == master_node:
2247         ntype = "master"
2248       elif node_i.master_candidate:
2249         ntype = "master candidate"
2250       elif node_i.drained:
2251         ntype = "drained"
2252         n_drained += 1
2253       else:
2254         ntype = "regular"
2255       if verbose:
2256         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2257
2258       msg = all_nvinfo[node].fail_msg
2259       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2260       if msg:
2261         nimg.rpc_fail = True
2262         continue
2263
2264       nresult = all_nvinfo[node].payload
2265
2266       nimg.call_ok = self._VerifyNode(node_i, nresult)
2267       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2268       self._VerifyNodeNetwork(node_i, nresult)
2269       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2270                             master_files)
2271
2272       self._VerifyOob(node_i, nresult)
2273
2274       if nimg.vm_capable:
2275         self._VerifyNodeLVM(node_i, nresult, vg_name)
2276         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2277                              all_drbd_map)
2278
2279         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2280         self._UpdateNodeInstances(node_i, nresult, nimg)
2281         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2282         self._UpdateNodeOS(node_i, nresult, nimg)
2283         if not nimg.os_fail:
2284           if refos_img is None:
2285             refos_img = nimg
2286           self._VerifyNodeOS(node_i, nimg, refos_img)
2287
2288     feedback_fn("* Verifying instance status")
2289     for instance in instancelist:
2290       if verbose:
2291         feedback_fn("* Verifying instance %s" % instance)
2292       inst_config = instanceinfo[instance]
2293       self._VerifyInstance(instance, inst_config, node_image,
2294                            instdisk[instance])
2295       inst_nodes_offline = []
2296
2297       pnode = inst_config.primary_node
2298       pnode_img = node_image[pnode]
2299       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2300                self.ENODERPC, pnode, "instance %s, connection to"
2301                " primary node failed", instance)
2302
2303       _ErrorIf(pnode_img.offline, self.EINSTANCEBADNODE, instance,
2304                "instance lives on offline node %s", inst_config.primary_node)
2305
2306       # If the instance is non-redundant we cannot survive losing its primary
2307       # node, so we are not N+1 compliant. On the other hand we have no disk
2308       # templates with more than one secondary so that situation is not well
2309       # supported either.
2310       # FIXME: does not support file-backed instances
2311       if not inst_config.secondary_nodes:
2312         i_non_redundant.append(instance)
2313
2314       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2315                instance, "instance has multiple secondary nodes: %s",
2316                utils.CommaJoin(inst_config.secondary_nodes),
2317                code=self.ETYPE_WARNING)
2318
2319       if inst_config.disk_template in constants.DTS_NET_MIRROR:
2320         pnode = inst_config.primary_node
2321         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2322         instance_groups = {}
2323
2324         for node in instance_nodes:
2325           instance_groups.setdefault(nodeinfo_byname[node].group,
2326                                      []).append(node)
2327
2328         pretty_list = [
2329           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2330           # Sort so that we always list the primary node first.
2331           for group, nodes in sorted(instance_groups.items(),
2332                                      key=lambda (_, nodes): pnode in nodes,
2333                                      reverse=True)]
2334
2335         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2336                       instance, "instance has primary and secondary nodes in"
2337                       " different groups: %s", utils.CommaJoin(pretty_list),
2338                       code=self.ETYPE_WARNING)
2339
2340       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2341         i_non_a_balanced.append(instance)
2342
2343       for snode in inst_config.secondary_nodes:
2344         s_img = node_image[snode]
2345         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2346                  "instance %s, connection to secondary node failed", instance)
2347
2348         if s_img.offline:
2349           inst_nodes_offline.append(snode)
2350
2351       # warn that the instance lives on offline nodes
2352       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2353                "instance has offline secondary node(s) %s",
2354                utils.CommaJoin(inst_nodes_offline))
2355       # ... or ghost/non-vm_capable nodes
2356       for node in inst_config.all_nodes:
2357         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2358                  "instance lives on ghost node %s", node)
2359         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2360                  instance, "instance lives on non-vm_capable node %s", node)
2361
2362     feedback_fn("* Verifying orphan volumes")
2363     reserved = utils.FieldSet(*cluster.reserved_lvs)
2364     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2365
2366     feedback_fn("* Verifying orphan instances")
2367     self._VerifyOrphanInstances(instancelist, node_image)
2368
2369     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2370       feedback_fn("* Verifying N+1 Memory redundancy")
2371       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2372
2373     feedback_fn("* Other Notes")
2374     if i_non_redundant:
2375       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2376                   % len(i_non_redundant))
2377
2378     if i_non_a_balanced:
2379       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2380                   % len(i_non_a_balanced))
2381
2382     if n_offline:
2383       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2384
2385     if n_drained:
2386       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2387
2388     return not self.bad
2389
2390   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2391     """Analyze the post-hooks' result
2392
2393     This method analyses the hook result, handles it, and sends some
2394     nicely-formatted feedback back to the user.
2395
2396     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2397         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2398     @param hooks_results: the results of the multi-node hooks rpc call
2399     @param feedback_fn: function used send feedback back to the caller
2400     @param lu_result: previous Exec result
2401     @return: the new Exec result, based on the previous result
2402         and hook results
2403
2404     """
2405     # We only really run POST phase hooks, and are only interested in
2406     # their results
2407     if phase == constants.HOOKS_PHASE_POST:
2408       # Used to change hooks' output to proper indentation
2409       feedback_fn("* Hooks Results")
2410       assert hooks_results, "invalid result from hooks"
2411
2412       for node_name in hooks_results:
2413         res = hooks_results[node_name]
2414         msg = res.fail_msg
2415         test = msg and not res.offline
2416         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2417                       "Communication failure in hooks execution: %s", msg)
2418         if res.offline or msg:
2419           # No need to investigate payload if node is offline or gave an error.
2420           # override manually lu_result here as _ErrorIf only
2421           # overrides self.bad
2422           lu_result = 1
2423           continue
2424         for script, hkr, output in res.payload:
2425           test = hkr == constants.HKR_FAIL
2426           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2427                         "Script %s failed, output:", script)
2428           if test:
2429             output = self._HOOKS_INDENT_RE.sub('      ', output)
2430             feedback_fn("%s" % output)
2431             lu_result = 0
2432
2433       return lu_result
2434
2435
2436 class LUClusterVerifyDisks(NoHooksLU):
2437   """Verifies the cluster disks status.
2438
2439   """
2440   REQ_BGL = False
2441
2442   def ExpandNames(self):
2443     self.needed_locks = {
2444       locking.LEVEL_NODE: locking.ALL_SET,
2445       locking.LEVEL_INSTANCE: locking.ALL_SET,
2446     }
2447     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2448
2449   def Exec(self, feedback_fn):
2450     """Verify integrity of cluster disks.
2451
2452     @rtype: tuple of three items
2453     @return: a tuple of (dict of node-to-node_error, list of instances
2454         which need activate-disks, dict of instance: (node, volume) for
2455         missing volumes
2456
2457     """
2458     result = res_nodes, res_instances, res_missing = {}, [], {}
2459
2460     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2461     instances = self.cfg.GetAllInstancesInfo().values()
2462
2463     nv_dict = {}
2464     for inst in instances:
2465       inst_lvs = {}
2466       if not inst.admin_up:
2467         continue
2468       inst.MapLVsByNode(inst_lvs)
2469       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2470       for node, vol_list in inst_lvs.iteritems():
2471         for vol in vol_list:
2472           nv_dict[(node, vol)] = inst
2473
2474     if not nv_dict:
2475       return result
2476
2477     node_lvs = self.rpc.call_lv_list(nodes, [])
2478     for node, node_res in node_lvs.items():
2479       if node_res.offline:
2480         continue
2481       msg = node_res.fail_msg
2482       if msg:
2483         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2484         res_nodes[node] = msg
2485         continue
2486
2487       lvs = node_res.payload
2488       for lv_name, (_, _, lv_online) in lvs.items():
2489         inst = nv_dict.pop((node, lv_name), None)
2490         if (not lv_online and inst is not None
2491             and inst.name not in res_instances):
2492           res_instances.append(inst.name)
2493
2494     # any leftover items in nv_dict are missing LVs, let's arrange the
2495     # data better
2496     for key, inst in nv_dict.iteritems():
2497       if inst.name not in res_missing:
2498         res_missing[inst.name] = []
2499       res_missing[inst.name].append(key)
2500
2501     return result
2502
2503
2504 class LUClusterRepairDiskSizes(NoHooksLU):
2505   """Verifies the cluster disks sizes.
2506
2507   """
2508   REQ_BGL = False
2509
2510   def ExpandNames(self):
2511     if self.op.instances:
2512       self.wanted_names = []
2513       for name in self.op.instances:
2514         full_name = _ExpandInstanceName(self.cfg, name)
2515         self.wanted_names.append(full_name)
2516       self.needed_locks = {
2517         locking.LEVEL_NODE: [],
2518         locking.LEVEL_INSTANCE: self.wanted_names,
2519         }
2520       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2521     else:
2522       self.wanted_names = None
2523       self.needed_locks = {
2524         locking.LEVEL_NODE: locking.ALL_SET,
2525         locking.LEVEL_INSTANCE: locking.ALL_SET,
2526         }
2527     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2528
2529   def DeclareLocks(self, level):
2530     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2531       self._LockInstancesNodes(primary_only=True)
2532
2533   def CheckPrereq(self):
2534     """Check prerequisites.
2535
2536     This only checks the optional instance list against the existing names.
2537
2538     """
2539     if self.wanted_names is None:
2540       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2541
2542     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2543                              in self.wanted_names]
2544
2545   def _EnsureChildSizes(self, disk):
2546     """Ensure children of the disk have the needed disk size.
2547
2548     This is valid mainly for DRBD8 and fixes an issue where the
2549     children have smaller disk size.
2550
2551     @param disk: an L{ganeti.objects.Disk} object
2552
2553     """
2554     if disk.dev_type == constants.LD_DRBD8:
2555       assert disk.children, "Empty children for DRBD8?"
2556       fchild = disk.children[0]
2557       mismatch = fchild.size < disk.size
2558       if mismatch:
2559         self.LogInfo("Child disk has size %d, parent %d, fixing",
2560                      fchild.size, disk.size)
2561         fchild.size = disk.size
2562
2563       # and we recurse on this child only, not on the metadev
2564       return self._EnsureChildSizes(fchild) or mismatch
2565     else:
2566       return False
2567
2568   def Exec(self, feedback_fn):
2569     """Verify the size of cluster disks.
2570
2571     """
2572     # TODO: check child disks too
2573     # TODO: check differences in size between primary/secondary nodes
2574     per_node_disks = {}
2575     for instance in self.wanted_instances:
2576       pnode = instance.primary_node
2577       if pnode not in per_node_disks:
2578         per_node_disks[pnode] = []
2579       for idx, disk in enumerate(instance.disks):
2580         per_node_disks[pnode].append((instance, idx, disk))
2581
2582     changed = []
2583     for node, dskl in per_node_disks.items():
2584       newl = [v[2].Copy() for v in dskl]
2585       for dsk in newl:
2586         self.cfg.SetDiskID(dsk, node)
2587       result = self.rpc.call_blockdev_getsize(node, newl)
2588       if result.fail_msg:
2589         self.LogWarning("Failure in blockdev_getsize call to node"
2590                         " %s, ignoring", node)
2591         continue
2592       if len(result.payload) != len(dskl):
2593         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2594                         " result.payload=%s", node, len(dskl), result.payload)
2595         self.LogWarning("Invalid result from node %s, ignoring node results",
2596                         node)
2597         continue
2598       for ((instance, idx, disk), size) in zip(dskl, result.payload):
2599         if size is None:
2600           self.LogWarning("Disk %d of instance %s did not return size"
2601                           " information, ignoring", idx, instance.name)
2602           continue
2603         if not isinstance(size, (int, long)):
2604           self.LogWarning("Disk %d of instance %s did not return valid"
2605                           " size information, ignoring", idx, instance.name)
2606           continue
2607         size = size >> 20
2608         if size != disk.size:
2609           self.LogInfo("Disk %d of instance %s has mismatched size,"
2610                        " correcting: recorded %d, actual %d", idx,
2611                        instance.name, disk.size, size)
2612           disk.size = size
2613           self.cfg.Update(instance, feedback_fn)
2614           changed.append((instance.name, idx, size))
2615         if self._EnsureChildSizes(disk):
2616           self.cfg.Update(instance, feedback_fn)
2617           changed.append((instance.name, idx, disk.size))
2618     return changed
2619
2620
2621 class LUClusterRename(LogicalUnit):
2622   """Rename the cluster.
2623
2624   """
2625   HPATH = "cluster-rename"
2626   HTYPE = constants.HTYPE_CLUSTER
2627
2628   def BuildHooksEnv(self):
2629     """Build hooks env.
2630
2631     """
2632     env = {
2633       "OP_TARGET": self.cfg.GetClusterName(),
2634       "NEW_NAME": self.op.name,
2635       }
2636     mn = self.cfg.GetMasterNode()
2637     all_nodes = self.cfg.GetNodeList()
2638     return env, [mn], all_nodes
2639
2640   def CheckPrereq(self):
2641     """Verify that the passed name is a valid one.
2642
2643     """
2644     hostname = netutils.GetHostname(name=self.op.name,
2645                                     family=self.cfg.GetPrimaryIPFamily())
2646
2647     new_name = hostname.name
2648     self.ip = new_ip = hostname.ip
2649     old_name = self.cfg.GetClusterName()
2650     old_ip = self.cfg.GetMasterIP()
2651     if new_name == old_name and new_ip == old_ip:
2652       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2653                                  " cluster has changed",
2654                                  errors.ECODE_INVAL)
2655     if new_ip != old_ip:
2656       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2657         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2658                                    " reachable on the network" %
2659                                    new_ip, errors.ECODE_NOTUNIQUE)
2660
2661     self.op.name = new_name
2662
2663   def Exec(self, feedback_fn):
2664     """Rename the cluster.
2665
2666     """
2667     clustername = self.op.name
2668     ip = self.ip
2669
2670     # shutdown the master IP
2671     master = self.cfg.GetMasterNode()
2672     result = self.rpc.call_node_stop_master(master, False)
2673     result.Raise("Could not disable the master role")
2674
2675     try:
2676       cluster = self.cfg.GetClusterInfo()
2677       cluster.cluster_name = clustername
2678       cluster.master_ip = ip
2679       self.cfg.Update(cluster, feedback_fn)
2680
2681       # update the known hosts file
2682       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2683       node_list = self.cfg.GetOnlineNodeList()
2684       try:
2685         node_list.remove(master)
2686       except ValueError:
2687         pass
2688       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2689     finally:
2690       result = self.rpc.call_node_start_master(master, False, False)
2691       msg = result.fail_msg
2692       if msg:
2693         self.LogWarning("Could not re-enable the master role on"
2694                         " the master, please restart manually: %s", msg)
2695
2696     return clustername
2697
2698
2699 class LUClusterSetParams(LogicalUnit):
2700   """Change the parameters of the cluster.
2701
2702   """
2703   HPATH = "cluster-modify"
2704   HTYPE = constants.HTYPE_CLUSTER
2705   REQ_BGL = False
2706
2707   def CheckArguments(self):
2708     """Check parameters
2709
2710     """
2711     if self.op.uid_pool:
2712       uidpool.CheckUidPool(self.op.uid_pool)
2713
2714     if self.op.add_uids:
2715       uidpool.CheckUidPool(self.op.add_uids)
2716
2717     if self.op.remove_uids:
2718       uidpool.CheckUidPool(self.op.remove_uids)
2719
2720   def ExpandNames(self):
2721     # FIXME: in the future maybe other cluster params won't require checking on
2722     # all nodes to be modified.
2723     self.needed_locks = {
2724       locking.LEVEL_NODE: locking.ALL_SET,
2725     }
2726     self.share_locks[locking.LEVEL_NODE] = 1
2727
2728   def BuildHooksEnv(self):
2729     """Build hooks env.
2730
2731     """
2732     env = {
2733       "OP_TARGET": self.cfg.GetClusterName(),
2734       "NEW_VG_NAME": self.op.vg_name,
2735       }
2736     mn = self.cfg.GetMasterNode()
2737     return env, [mn], [mn]
2738
2739   def CheckPrereq(self):
2740     """Check prerequisites.
2741
2742     This checks whether the given params don't conflict and
2743     if the given volume group is valid.
2744
2745     """
2746     if self.op.vg_name is not None and not self.op.vg_name:
2747       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2748         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2749                                    " instances exist", errors.ECODE_INVAL)
2750
2751     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2752       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2753         raise errors.OpPrereqError("Cannot disable drbd helper while"
2754                                    " drbd-based instances exist",
2755                                    errors.ECODE_INVAL)
2756
2757     node_list = self.acquired_locks[locking.LEVEL_NODE]
2758
2759     # if vg_name not None, checks given volume group on all nodes
2760     if self.op.vg_name:
2761       vglist = self.rpc.call_vg_list(node_list)
2762       for node in node_list:
2763         msg = vglist[node].fail_msg
2764         if msg:
2765           # ignoring down node
2766           self.LogWarning("Error while gathering data on node %s"
2767                           " (ignoring node): %s", node, msg)
2768           continue
2769         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2770                                               self.op.vg_name,
2771                                               constants.MIN_VG_SIZE)
2772         if vgstatus:
2773           raise errors.OpPrereqError("Error on node '%s': %s" %
2774                                      (node, vgstatus), errors.ECODE_ENVIRON)
2775
2776     if self.op.drbd_helper:
2777       # checks given drbd helper on all nodes
2778       helpers = self.rpc.call_drbd_helper(node_list)
2779       for node in node_list:
2780         ninfo = self.cfg.GetNodeInfo(node)
2781         if ninfo.offline:
2782           self.LogInfo("Not checking drbd helper on offline node %s", node)
2783           continue
2784         msg = helpers[node].fail_msg
2785         if msg:
2786           raise errors.OpPrereqError("Error checking drbd helper on node"
2787                                      " '%s': %s" % (node, msg),
2788                                      errors.ECODE_ENVIRON)
2789         node_helper = helpers[node].payload
2790         if node_helper != self.op.drbd_helper:
2791           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2792                                      (node, node_helper), errors.ECODE_ENVIRON)
2793
2794     self.cluster = cluster = self.cfg.GetClusterInfo()
2795     # validate params changes
2796     if self.op.beparams:
2797       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2798       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2799
2800     if self.op.ndparams:
2801       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2802       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2803
2804       # TODO: we need a more general way to handle resetting
2805       # cluster-level parameters to default values
2806       if self.new_ndparams["oob_program"] == "":
2807         self.new_ndparams["oob_program"] = \
2808             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2809
2810     if self.op.nicparams:
2811       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2812       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2813       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2814       nic_errors = []
2815
2816       # check all instances for consistency
2817       for instance in self.cfg.GetAllInstancesInfo().values():
2818         for nic_idx, nic in enumerate(instance.nics):
2819           params_copy = copy.deepcopy(nic.nicparams)
2820           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2821
2822           # check parameter syntax
2823           try:
2824             objects.NIC.CheckParameterSyntax(params_filled)
2825           except errors.ConfigurationError, err:
2826             nic_errors.append("Instance %s, nic/%d: %s" %
2827                               (instance.name, nic_idx, err))
2828
2829           # if we're moving instances to routed, check that they have an ip
2830           target_mode = params_filled[constants.NIC_MODE]
2831           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2832             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2833                               (instance.name, nic_idx))
2834       if nic_errors:
2835         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2836                                    "\n".join(nic_errors))
2837
2838     # hypervisor list/parameters
2839     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2840     if self.op.hvparams:
2841       for hv_name, hv_dict in self.op.hvparams.items():
2842         if hv_name not in self.new_hvparams:
2843           self.new_hvparams[hv_name] = hv_dict
2844         else:
2845           self.new_hvparams[hv_name].update(hv_dict)
2846
2847     # os hypervisor parameters
2848     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2849     if self.op.os_hvp:
2850       for os_name, hvs in self.op.os_hvp.items():
2851         if os_name not in self.new_os_hvp:
2852           self.new_os_hvp[os_name] = hvs
2853         else:
2854           for hv_name, hv_dict in hvs.items():
2855             if hv_name not in self.new_os_hvp[os_name]:
2856               self.new_os_hvp[os_name][hv_name] = hv_dict
2857             else:
2858               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2859
2860     # os parameters
2861     self.new_osp = objects.FillDict(cluster.osparams, {})
2862     if self.op.osparams:
2863       for os_name, osp in self.op.osparams.items():
2864         if os_name not in self.new_osp:
2865           self.new_osp[os_name] = {}
2866
2867         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2868                                                   use_none=True)
2869
2870         if not self.new_osp[os_name]:
2871           # we removed all parameters
2872           del self.new_osp[os_name]
2873         else:
2874           # check the parameter validity (remote check)
2875           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2876                          os_name, self.new_osp[os_name])
2877
2878     # changes to the hypervisor list
2879     if self.op.enabled_hypervisors is not None:
2880       self.hv_list = self.op.enabled_hypervisors
2881       for hv in self.hv_list:
2882         # if the hypervisor doesn't already exist in the cluster
2883         # hvparams, we initialize it to empty, and then (in both
2884         # cases) we make sure to fill the defaults, as we might not
2885         # have a complete defaults list if the hypervisor wasn't
2886         # enabled before
2887         if hv not in new_hvp:
2888           new_hvp[hv] = {}
2889         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2890         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2891     else:
2892       self.hv_list = cluster.enabled_hypervisors
2893
2894     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2895       # either the enabled list has changed, or the parameters have, validate
2896       for hv_name, hv_params in self.new_hvparams.items():
2897         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2898             (self.op.enabled_hypervisors and
2899              hv_name in self.op.enabled_hypervisors)):
2900           # either this is a new hypervisor, or its parameters have changed
2901           hv_class = hypervisor.GetHypervisor(hv_name)
2902           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2903           hv_class.CheckParameterSyntax(hv_params)
2904           _CheckHVParams(self, node_list, hv_name, hv_params)
2905
2906     if self.op.os_hvp:
2907       # no need to check any newly-enabled hypervisors, since the
2908       # defaults have already been checked in the above code-block
2909       for os_name, os_hvp in self.new_os_hvp.items():
2910         for hv_name, hv_params in os_hvp.items():
2911           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2912           # we need to fill in the new os_hvp on top of the actual hv_p
2913           cluster_defaults = self.new_hvparams.get(hv_name, {})
2914           new_osp = objects.FillDict(cluster_defaults, hv_params)
2915           hv_class = hypervisor.GetHypervisor(hv_name)
2916           hv_class.CheckParameterSyntax(new_osp)
2917           _CheckHVParams(self, node_list, hv_name, new_osp)
2918
2919     if self.op.default_iallocator:
2920       alloc_script = utils.FindFile(self.op.default_iallocator,
2921                                     constants.IALLOCATOR_SEARCH_PATH,
2922                                     os.path.isfile)
2923       if alloc_script is None:
2924         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2925                                    " specified" % self.op.default_iallocator,
2926                                    errors.ECODE_INVAL)
2927
2928   def Exec(self, feedback_fn):
2929     """Change the parameters of the cluster.
2930
2931     """
2932     if self.op.vg_name is not None:
2933       new_volume = self.op.vg_name
2934       if not new_volume:
2935         new_volume = None
2936       if new_volume != self.cfg.GetVGName():
2937         self.cfg.SetVGName(new_volume)
2938       else:
2939         feedback_fn("Cluster LVM configuration already in desired"
2940                     " state, not changing")
2941     if self.op.drbd_helper is not None:
2942       new_helper = self.op.drbd_helper
2943       if not new_helper:
2944         new_helper = None
2945       if new_helper != self.cfg.GetDRBDHelper():
2946         self.cfg.SetDRBDHelper(new_helper)
2947       else:
2948         feedback_fn("Cluster DRBD helper already in desired state,"
2949                     " not changing")
2950     if self.op.hvparams:
2951       self.cluster.hvparams = self.new_hvparams
2952     if self.op.os_hvp:
2953       self.cluster.os_hvp = self.new_os_hvp
2954     if self.op.enabled_hypervisors is not None:
2955       self.cluster.hvparams = self.new_hvparams
2956       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2957     if self.op.beparams:
2958       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2959     if self.op.nicparams:
2960       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2961     if self.op.osparams:
2962       self.cluster.osparams = self.new_osp
2963     if self.op.ndparams:
2964       self.cluster.ndparams = self.new_ndparams
2965
2966     if self.op.candidate_pool_size is not None:
2967       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2968       # we need to update the pool size here, otherwise the save will fail
2969       _AdjustCandidatePool(self, [])
2970
2971     if self.op.maintain_node_health is not None:
2972       self.cluster.maintain_node_health = self.op.maintain_node_health
2973
2974     if self.op.prealloc_wipe_disks is not None:
2975       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2976
2977     if self.op.add_uids is not None:
2978       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2979
2980     if self.op.remove_uids is not None:
2981       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2982
2983     if self.op.uid_pool is not None:
2984       self.cluster.uid_pool = self.op.uid_pool
2985
2986     if self.op.default_iallocator is not None:
2987       self.cluster.default_iallocator = self.op.default_iallocator
2988
2989     if self.op.reserved_lvs is not None:
2990       self.cluster.reserved_lvs = self.op.reserved_lvs
2991
2992     def helper_os(aname, mods, desc):
2993       desc += " OS list"
2994       lst = getattr(self.cluster, aname)
2995       for key, val in mods:
2996         if key == constants.DDM_ADD:
2997           if val in lst:
2998             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2999           else:
3000             lst.append(val)
3001         elif key == constants.DDM_REMOVE:
3002           if val in lst:
3003             lst.remove(val)
3004           else:
3005             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3006         else:
3007           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3008
3009     if self.op.hidden_os:
3010       helper_os("hidden_os", self.op.hidden_os, "hidden")
3011
3012     if self.op.blacklisted_os:
3013       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3014
3015     if self.op.master_netdev:
3016       master = self.cfg.GetMasterNode()
3017       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3018                   self.cluster.master_netdev)
3019       result = self.rpc.call_node_stop_master(master, False)
3020       result.Raise("Could not disable the master ip")
3021       feedback_fn("Changing master_netdev from %s to %s" %
3022                   (self.cluster.master_netdev, self.op.master_netdev))
3023       self.cluster.master_netdev = self.op.master_netdev
3024
3025     self.cfg.Update(self.cluster, feedback_fn)
3026
3027     if self.op.master_netdev:
3028       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3029                   self.op.master_netdev)
3030       result = self.rpc.call_node_start_master(master, False, False)
3031       if result.fail_msg:
3032         self.LogWarning("Could not re-enable the master ip on"
3033                         " the master, please restart manually: %s",
3034                         result.fail_msg)
3035
3036
3037 def _UploadHelper(lu, nodes, fname):
3038   """Helper for uploading a file and showing warnings.
3039
3040   """
3041   if os.path.exists(fname):
3042     result = lu.rpc.call_upload_file(nodes, fname)
3043     for to_node, to_result in result.items():
3044       msg = to_result.fail_msg
3045       if msg:
3046         msg = ("Copy of file %s to node %s failed: %s" %
3047                (fname, to_node, msg))
3048         lu.proc.LogWarning(msg)
3049
3050
3051 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3052   """Distribute additional files which are part of the cluster configuration.
3053
3054   ConfigWriter takes care of distributing the config and ssconf files, but
3055   there are more files which should be distributed to all nodes. This function
3056   makes sure those are copied.
3057
3058   @param lu: calling logical unit
3059   @param additional_nodes: list of nodes not in the config to distribute to
3060   @type additional_vm: boolean
3061   @param additional_vm: whether the additional nodes are vm-capable or not
3062
3063   """
3064   # 1. Gather target nodes
3065   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3066   dist_nodes = lu.cfg.GetOnlineNodeList()
3067   nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3068   vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3069   if additional_nodes is not None:
3070     dist_nodes.extend(additional_nodes)
3071     if additional_vm:
3072       vm_nodes.extend(additional_nodes)
3073   if myself.name in dist_nodes:
3074     dist_nodes.remove(myself.name)
3075   if myself.name in vm_nodes:
3076     vm_nodes.remove(myself.name)
3077
3078   # 2. Gather files to distribute
3079   dist_files = set([constants.ETC_HOSTS,
3080                     constants.SSH_KNOWN_HOSTS_FILE,
3081                     constants.RAPI_CERT_FILE,
3082                     constants.RAPI_USERS_FILE,
3083                     constants.CONFD_HMAC_KEY,
3084                     constants.CLUSTER_DOMAIN_SECRET_FILE,
3085                    ])
3086
3087   vm_files = set()
3088   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3089   for hv_name in enabled_hypervisors:
3090     hv_class = hypervisor.GetHypervisor(hv_name)
3091     vm_files.update(hv_class.GetAncillaryFiles())
3092
3093   # 3. Perform the files upload
3094   for fname in dist_files:
3095     _UploadHelper(lu, dist_nodes, fname)
3096   for fname in vm_files:
3097     _UploadHelper(lu, vm_nodes, fname)
3098
3099
3100 class LUClusterRedistConf(NoHooksLU):
3101   """Force the redistribution of cluster configuration.
3102
3103   This is a very simple LU.
3104
3105   """
3106   REQ_BGL = False
3107
3108   def ExpandNames(self):
3109     self.needed_locks = {
3110       locking.LEVEL_NODE: locking.ALL_SET,
3111     }
3112     self.share_locks[locking.LEVEL_NODE] = 1
3113
3114   def Exec(self, feedback_fn):
3115     """Redistribute the configuration.
3116
3117     """
3118     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3119     _RedistributeAncillaryFiles(self)
3120
3121
3122 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3123   """Sleep and poll for an instance's disk to sync.
3124
3125   """
3126   if not instance.disks or disks is not None and not disks:
3127     return True
3128
3129   disks = _ExpandCheckDisks(instance, disks)
3130
3131   if not oneshot:
3132     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3133
3134   node = instance.primary_node
3135
3136   for dev in disks:
3137     lu.cfg.SetDiskID(dev, node)
3138
3139   # TODO: Convert to utils.Retry
3140
3141   retries = 0
3142   degr_retries = 10 # in seconds, as we sleep 1 second each time
3143   while True:
3144     max_time = 0
3145     done = True
3146     cumul_degraded = False
3147     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3148     msg = rstats.fail_msg
3149     if msg:
3150       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3151       retries += 1
3152       if retries >= 10:
3153         raise errors.RemoteError("Can't contact node %s for mirror data,"
3154                                  " aborting." % node)
3155       time.sleep(6)
3156       continue
3157     rstats = rstats.payload
3158     retries = 0
3159     for i, mstat in enumerate(rstats):
3160       if mstat is None:
3161         lu.LogWarning("Can't compute data for node %s/%s",
3162                            node, disks[i].iv_name)
3163         continue
3164
3165       cumul_degraded = (cumul_degraded or
3166                         (mstat.is_degraded and mstat.sync_percent is None))
3167       if mstat.sync_percent is not None:
3168         done = False
3169         if mstat.estimated_time is not None:
3170           rem_time = ("%s remaining (estimated)" %
3171                       utils.FormatSeconds(mstat.estimated_time))
3172           max_time = mstat.estimated_time
3173         else:
3174           rem_time = "no time estimate"
3175         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3176                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3177
3178     # if we're done but degraded, let's do a few small retries, to
3179     # make sure we see a stable and not transient situation; therefore
3180     # we force restart of the loop
3181     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3182       logging.info("Degraded disks found, %d retries left", degr_retries)
3183       degr_retries -= 1
3184       time.sleep(1)
3185       continue
3186
3187     if done or oneshot:
3188       break
3189
3190     time.sleep(min(60, max_time))
3191
3192   if done:
3193     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3194   return not cumul_degraded
3195
3196
3197 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3198   """Check that mirrors are not degraded.
3199
3200   The ldisk parameter, if True, will change the test from the
3201   is_degraded attribute (which represents overall non-ok status for
3202   the device(s)) to the ldisk (representing the local storage status).
3203
3204   """
3205   lu.cfg.SetDiskID(dev, node)
3206
3207   result = True
3208
3209   if on_primary or dev.AssembleOnSecondary():
3210     rstats = lu.rpc.call_blockdev_find(node, dev)
3211     msg = rstats.fail_msg
3212     if msg:
3213       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3214       result = False
3215     elif not rstats.payload:
3216       lu.LogWarning("Can't find disk on node %s", node)
3217       result = False
3218     else:
3219       if ldisk:
3220         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3221       else:
3222         result = result and not rstats.payload.is_degraded
3223
3224   if dev.children:
3225     for child in dev.children:
3226       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3227
3228   return result
3229
3230
3231 class LUOobCommand(NoHooksLU):
3232   """Logical unit for OOB handling.
3233
3234   """
3235   REG_BGL = False
3236
3237   def CheckPrereq(self):
3238     """Check prerequisites.
3239
3240     This checks:
3241      - the node exists in the configuration
3242      - OOB is supported
3243
3244     Any errors are signaled by raising errors.OpPrereqError.
3245
3246     """
3247     self.nodes = []
3248     for node_name in self.op.node_names:
3249       node = self.cfg.GetNodeInfo(node_name)
3250
3251       if node is None:
3252         raise errors.OpPrereqError("Node %s not found" % node_name,
3253                                    errors.ECODE_NOENT)
3254       else:
3255         self.nodes.append(node)
3256
3257       if (self.op.command == constants.OOB_POWER_OFF and not node.offline):
3258         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3259                                     " not marked offline") % node_name,
3260                                    errors.ECODE_STATE)
3261
3262   def ExpandNames(self):
3263     """Gather locks we need.
3264
3265     """
3266     if self.op.node_names:
3267       self.op.node_names = [_ExpandNodeName(self.cfg, name)
3268                             for name in self.op.node_names]
3269     else:
3270       self.op.node_names = self.cfg.GetNodeList()
3271
3272     self.needed_locks = {
3273       locking.LEVEL_NODE: self.op.node_names,
3274       }
3275
3276   def Exec(self, feedback_fn):
3277     """Execute OOB and return result if we expect any.
3278
3279     """
3280     master_node = self.cfg.GetMasterNode()
3281     ret = []
3282
3283     for node in self.nodes:
3284       node_entry = [(constants.RS_NORMAL, node.name)]
3285       ret.append(node_entry)
3286
3287       oob_program = _SupportsOob(self.cfg, node)
3288
3289       if not oob_program:
3290         node_entry.append((constants.RS_UNAVAIL, None))
3291         continue
3292
3293       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3294                    self.op.command, oob_program, node.name)
3295       result = self.rpc.call_run_oob(master_node, oob_program,
3296                                      self.op.command, node.name,
3297                                      self.op.timeout)
3298
3299       if result.fail_msg:
3300         self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3301                         node.name, result.fail_msg)
3302         node_entry.append((constants.RS_NODATA, None))
3303       else:
3304         try:
3305           self._CheckPayload(result)
3306         except errors.OpExecError, err:
3307           self.LogWarning("The payload returned by '%s' is not valid: %s",
3308                           node.name, err)
3309           node_entry.append((constants.RS_NODATA, None))
3310         else:
3311           if self.op.command == constants.OOB_HEALTH:
3312             # For health we should log important events
3313             for item, status in result.payload:
3314               if status in [constants.OOB_STATUS_WARNING,
3315                             constants.OOB_STATUS_CRITICAL]:
3316                 self.LogWarning("On node '%s' item '%s' has status '%s'",
3317                                 node.name, item, status)
3318
3319           if self.op.command == constants.OOB_POWER_ON:
3320             node.powered = True
3321           elif self.op.command == constants.OOB_POWER_OFF:
3322             node.powered = False
3323           elif self.op.command == constants.OOB_POWER_STATUS:
3324             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3325             if powered != node.powered:
3326               logging.warning(("Recorded power state (%s) of node '%s' does not"
3327                                " match actual power state (%s)"), node.powered,
3328                               node.name, powered)
3329
3330           # For configuration changing commands we should update the node
3331           if self.op.command in (constants.OOB_POWER_ON,
3332                                  constants.OOB_POWER_OFF):
3333             self.cfg.Update(node, feedback_fn)
3334
3335           node_entry.append((constants.RS_NORMAL, result.payload))
3336
3337     return ret
3338
3339   def _CheckPayload(self, result):
3340     """Checks if the payload is valid.
3341
3342     @param result: RPC result
3343     @raises errors.OpExecError: If payload is not valid
3344
3345     """
3346     errs = []
3347     if self.op.command == constants.OOB_HEALTH:
3348       if not isinstance(result.payload, list):
3349         errs.append("command 'health' is expected to return a list but got %s" %
3350                     type(result.payload))
3351       else:
3352         for item, status in result.payload:
3353           if status not in constants.OOB_STATUSES:
3354             errs.append("health item '%s' has invalid status '%s'" %
3355                         (item, status))
3356
3357     if self.op.command == constants.OOB_POWER_STATUS:
3358       if not isinstance(result.payload, dict):
3359         errs.append("power-status is expected to return a dict but got %s" %
3360                     type(result.payload))
3361
3362     if self.op.command in [
3363         constants.OOB_POWER_ON,
3364         constants.OOB_POWER_OFF,
3365         constants.OOB_POWER_CYCLE,
3366         ]:
3367       if result.payload is not None:
3368         errs.append("%s is expected to not return payload but got '%s'" %
3369                     (self.op.command, result.payload))
3370
3371     if errs:
3372       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3373                                utils.CommaJoin(errs))
3374
3375
3376
3377 class LUOsDiagnose(NoHooksLU):
3378   """Logical unit for OS diagnose/query.
3379
3380   """
3381   REQ_BGL = False
3382   _HID = "hidden"
3383   _BLK = "blacklisted"
3384   _VLD = "valid"
3385   _FIELDS_STATIC = utils.FieldSet()
3386   _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3387                                    "parameters", "api_versions", _HID, _BLK)
3388
3389   def CheckArguments(self):
3390     if self.op.names:
3391       raise errors.OpPrereqError("Selective OS query not supported",
3392                                  errors.ECODE_INVAL)
3393
3394     _CheckOutputFields(static=self._FIELDS_STATIC,
3395                        dynamic=self._FIELDS_DYNAMIC,
3396                        selected=self.op.output_fields)
3397
3398   def ExpandNames(self):
3399     # Lock all nodes, in shared mode
3400     # Temporary removal of locks, should be reverted later
3401     # TODO: reintroduce locks when they are lighter-weight
3402     self.needed_locks = {}
3403     #self.share_locks[locking.LEVEL_NODE] = 1
3404     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3405
3406   @staticmethod
3407   def _DiagnoseByOS(rlist):
3408     """Remaps a per-node return list into an a per-os per-node dictionary
3409
3410     @param rlist: a map with node names as keys and OS objects as values
3411
3412     @rtype: dict
3413     @return: a dictionary with osnames as keys and as value another
3414         map, with nodes as keys and tuples of (path, status, diagnose,
3415         variants, parameters, api_versions) as values, eg::
3416
3417           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3418                                      (/srv/..., False, "invalid api")],
3419                            "node2": [(/srv/..., True, "", [], [])]}
3420           }
3421
3422     """
3423     all_os = {}
3424     # we build here the list of nodes that didn't fail the RPC (at RPC
3425     # level), so that nodes with a non-responding node daemon don't
3426     # make all OSes invalid
3427     good_nodes = [node_name for node_name in rlist
3428                   if not rlist[node_name].fail_msg]
3429     for node_name, nr in rlist.items():
3430       if nr.fail_msg or not nr.payload:
3431         continue
3432       for (name, path, status, diagnose, variants,
3433            params, api_versions) in nr.payload:
3434         if name not in all_os:
3435           # build a list of nodes for this os containing empty lists
3436           # for each node in node_list
3437           all_os[name] = {}
3438           for nname in good_nodes:
3439             all_os[name][nname] = []
3440         # convert params from [name, help] to (name, help)
3441         params = [tuple(v) for v in params]
3442         all_os[name][node_name].append((path, status, diagnose,
3443                                         variants, params, api_versions))
3444     return all_os
3445
3446   def Exec(self, feedback_fn):
3447     """Compute the list of OSes.
3448
3449     """
3450     valid_nodes = [node.name
3451                    for node in self.cfg.GetAllNodesInfo().values()
3452                    if not node.offline and node.vm_capable]
3453     node_data = self.rpc.call_os_diagnose(valid_nodes)
3454     pol = self._DiagnoseByOS(node_data)
3455     output = []
3456     cluster = self.cfg.GetClusterInfo()
3457
3458     for os_name in utils.NiceSort(pol.keys()):
3459       os_data = pol[os_name]
3460       row = []
3461       valid = True
3462       (variants, params, api_versions) = null_state = (set(), set(), set())
3463       for idx, osl in enumerate(os_data.values()):
3464         valid = bool(valid and osl and osl[0][1])
3465         if not valid:
3466           (variants, params, api_versions) = null_state
3467           break
3468         node_variants, node_params, node_api = osl[0][3:6]
3469         if idx == 0: # first entry
3470           variants = set(node_variants)
3471           params = set(node_params)
3472           api_versions = set(node_api)
3473         else: # keep consistency
3474           variants.intersection_update(node_variants)
3475           params.intersection_update(node_params)
3476           api_versions.intersection_update(node_api)
3477
3478       is_hid = os_name in cluster.hidden_os
3479       is_blk = os_name in cluster.blacklisted_os
3480       if ((self._HID not in self.op.output_fields and is_hid) or
3481           (self._BLK not in self.op.output_fields and is_blk) or
3482           (self._VLD not in self.op.output_fields and not valid)):
3483         continue
3484
3485       for field in self.op.output_fields:
3486         if field == "name":
3487           val = os_name
3488         elif field == self._VLD:
3489           val = valid
3490         elif field == "node_status":
3491           # this is just a copy of the dict
3492           val = {}
3493           for node_name, nos_list in os_data.items():
3494             val[node_name] = nos_list
3495         elif field == "variants":
3496           val = utils.NiceSort(list(variants))
3497         elif field == "parameters":
3498           val = list(params)
3499         elif field == "api_versions":
3500           val = list(api_versions)
3501         elif field == self._HID:
3502           val = is_hid
3503         elif field == self._BLK:
3504           val = is_blk
3505         else:
3506           raise errors.ParameterError(field)
3507         row.append(val)
3508       output.append(row)
3509
3510     return output
3511
3512
3513 class LUNodeRemove(LogicalUnit):
3514   """Logical unit for removing a node.
3515
3516   """
3517   HPATH = "node-remove"
3518   HTYPE = constants.HTYPE_NODE
3519
3520   def BuildHooksEnv(self):
3521     """Build hooks env.
3522
3523     This doesn't run on the target node in the pre phase as a failed
3524     node would then be impossible to remove.
3525
3526     """
3527     env = {
3528       "OP_TARGET": self.op.node_name,
3529       "NODE_NAME": self.op.node_name,
3530       }
3531     all_nodes = self.cfg.GetNodeList()
3532     try:
3533       all_nodes.remove(self.op.node_name)
3534     except ValueError:
3535       logging.warning("Node %s which is about to be removed not found"
3536                       " in the all nodes list", self.op.node_name)
3537     return env, all_nodes, all_nodes
3538
3539   def CheckPrereq(self):
3540     """Check prerequisites.
3541
3542     This checks:
3543      - the node exists in the configuration
3544      - it does not have primary or secondary instances
3545      - it's not the master
3546
3547     Any errors are signaled by raising errors.OpPrereqError.
3548
3549     """
3550     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3551     node = self.cfg.GetNodeInfo(self.op.node_name)
3552     assert node is not None
3553
3554     instance_list = self.cfg.GetInstanceList()
3555
3556     masternode = self.cfg.GetMasterNode()
3557     if node.name == masternode:
3558       raise errors.OpPrereqError("Node is the master node,"
3559                                  " you need to failover first.",
3560                                  errors.ECODE_INVAL)
3561
3562     for instance_name in instance_list:
3563       instance = self.cfg.GetInstanceInfo(instance_name)
3564       if node.name in instance.all_nodes:
3565         raise errors.OpPrereqError("Instance %s is still running on the node,"
3566                                    " please remove first." % instance_name,
3567                                    errors.ECODE_INVAL)
3568     self.op.node_name = node.name
3569     self.node = node
3570
3571   def Exec(self, feedback_fn):
3572     """Removes the node from the cluster.
3573
3574     """
3575     node = self.node
3576     logging.info("Stopping the node daemon and removing configs from node %s",
3577                  node.name)
3578
3579     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3580
3581     # Promote nodes to master candidate as needed
3582     _AdjustCandidatePool(self, exceptions=[node.name])
3583     self.context.RemoveNode(node.name)
3584
3585     # Run post hooks on the node before it's removed
3586     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3587     try:
3588       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3589     except:
3590       # pylint: disable-msg=W0702
3591       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3592
3593     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3594     msg = result.fail_msg
3595     if msg:
3596       self.LogWarning("Errors encountered on the remote node while leaving"
3597                       " the cluster: %s", msg)
3598
3599     # Remove node from our /etc/hosts
3600     if self.cfg.GetClusterInfo().modify_etc_hosts:
3601       master_node = self.cfg.GetMasterNode()
3602       result = self.rpc.call_etc_hosts_modify(master_node,
3603                                               constants.ETC_HOSTS_REMOVE,
3604                                               node.name, None)
3605       result.Raise("Can't update hosts file with new host data")
3606       _RedistributeAncillaryFiles(self)
3607
3608
3609 class _NodeQuery(_QueryBase):
3610   FIELDS = query.NODE_FIELDS
3611
3612   def ExpandNames(self, lu):
3613     lu.needed_locks = {}
3614     lu.share_locks[locking.LEVEL_NODE] = 1
3615
3616     if self.names:
3617       self.wanted = _GetWantedNodes(lu, self.names)
3618     else:
3619       self.wanted = locking.ALL_SET
3620
3621     self.do_locking = (self.use_locking and
3622                        query.NQ_LIVE in self.requested_data)
3623
3624     if self.do_locking:
3625       # if we don't request only static fields, we need to lock the nodes
3626       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3627
3628   def DeclareLocks(self, lu, level):
3629     pass
3630
3631   def _GetQueryData(self, lu):
3632     """Computes the list of nodes and their attributes.
3633
3634     """
3635     all_info = lu.cfg.GetAllNodesInfo()
3636
3637     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3638
3639     # Gather data as requested
3640     if query.NQ_LIVE in self.requested_data:
3641       # filter out non-vm_capable nodes
3642       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3643
3644       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3645                                         lu.cfg.GetHypervisorType())
3646       live_data = dict((name, nresult.payload)
3647                        for (name, nresult) in node_data.items()
3648                        if not nresult.fail_msg and nresult.payload)
3649     else:
3650       live_data = None
3651
3652     if query.NQ_INST in self.requested_data:
3653       node_to_primary = dict([(name, set()) for name in nodenames])
3654       node_to_secondary = dict([(name, set()) for name in nodenames])
3655
3656       inst_data = lu.cfg.GetAllInstancesInfo()
3657
3658       for inst in inst_data.values():
3659         if inst.primary_node in node_to_primary:
3660           node_to_primary[inst.primary_node].add(inst.name)
3661         for secnode in inst.secondary_nodes:
3662           if secnode in node_to_secondary:
3663             node_to_secondary[secnode].add(inst.name)
3664     else:
3665       node_to_primary = None
3666       node_to_secondary = None
3667
3668     if query.NQ_OOB in self.requested_data:
3669       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3670                          for name, node in all_info.iteritems())
3671     else:
3672       oob_support = None
3673
3674     if query.NQ_GROUP in self.requested_data:
3675       groups = lu.cfg.GetAllNodeGroupsInfo()
3676     else:
3677       groups = {}
3678
3679     return query.NodeQueryData([all_info[name] for name in nodenames],
3680                                live_data, lu.cfg.GetMasterNode(),
3681                                node_to_primary, node_to_secondary, groups,
3682                                oob_support, lu.cfg.GetClusterInfo())
3683
3684
3685 class LUNodeQuery(NoHooksLU):
3686   """Logical unit for querying nodes.
3687
3688   """
3689   # pylint: disable-msg=W0142
3690   REQ_BGL = False
3691
3692   def CheckArguments(self):
3693     self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3694                          self.op.use_locking)
3695
3696   def ExpandNames(self):
3697     self.nq.ExpandNames(self)
3698
3699   def Exec(self, feedback_fn):
3700     return self.nq.OldStyleQuery(self)
3701
3702
3703 class LUNodeQueryvols(NoHooksLU):
3704   """Logical unit for getting volumes on node(s).
3705
3706   """
3707   REQ_BGL = False
3708   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3709   _FIELDS_STATIC = utils.FieldSet("node")
3710
3711   def CheckArguments(self):
3712     _CheckOutputFields(static=self._FIELDS_STATIC,
3713                        dynamic=self._FIELDS_DYNAMIC,
3714                        selected=self.op.output_fields)
3715
3716   def ExpandNames(self):
3717     self.needed_locks = {}
3718     self.share_locks[locking.LEVEL_NODE] = 1
3719     if not self.op.nodes:
3720       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3721     else:
3722       self.needed_locks[locking.LEVEL_NODE] = \
3723         _GetWantedNodes(self, self.op.nodes)
3724
3725   def Exec(self, feedback_fn):
3726     """Computes the list of nodes and their attributes.
3727
3728     """
3729     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3730     volumes = self.rpc.call_node_volumes(nodenames)
3731
3732     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3733              in self.cfg.GetInstanceList()]
3734
3735     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3736
3737     output = []
3738     for node in nodenames:
3739       nresult = volumes[node]
3740       if nresult.offline:
3741         continue
3742       msg = nresult.fail_msg
3743       if msg:
3744         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3745         continue
3746
3747       node_vols = nresult.payload[:]
3748       node_vols.sort(key=lambda vol: vol['dev'])
3749
3750       for vol in node_vols:
3751         node_output = []
3752         for field in self.op.output_fields:
3753           if field == "node":
3754             val = node
3755           elif field == "phys":
3756             val = vol['dev']
3757           elif field == "vg":
3758             val = vol['vg']
3759           elif field == "name":
3760             val = vol['name']
3761           elif field == "size":
3762             val = int(float(vol['size']))
3763           elif field == "instance":
3764             for inst in ilist:
3765               if node not in lv_by_node[inst]:
3766                 continue
3767               if vol['name'] in lv_by_node[inst][node]:
3768                 val = inst.name
3769                 break
3770             else:
3771               val = '-'
3772           else:
3773             raise errors.ParameterError(field)
3774           node_output.append(str(val))
3775
3776         output.append(node_output)
3777
3778     return output
3779
3780
3781 class LUNodeQueryStorage(NoHooksLU):
3782   """Logical unit for getting information on storage units on node(s).
3783
3784   """
3785   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3786   REQ_BGL = False
3787
3788   def CheckArguments(self):
3789     _CheckOutputFields(static=self._FIELDS_STATIC,
3790                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3791                        selected=self.op.output_fields)
3792
3793   def ExpandNames(self):
3794     self.needed_locks = {}
3795     self.share_locks[locking.LEVEL_NODE] = 1
3796
3797     if self.op.nodes:
3798       self.needed_locks[locking.LEVEL_NODE] = \
3799         _GetWantedNodes(self, self.op.nodes)
3800     else:
3801       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3802
3803   def Exec(self, feedback_fn):
3804     """Computes the list of nodes and their attributes.
3805
3806     """
3807     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3808
3809     # Always get name to sort by
3810     if constants.SF_NAME in self.op.output_fields:
3811       fields = self.op.output_fields[:]
3812     else:
3813       fields = [constants.SF_NAME] + self.op.output_fields
3814
3815     # Never ask for node or type as it's only known to the LU
3816     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3817       while extra in fields:
3818         fields.remove(extra)
3819
3820     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3821     name_idx = field_idx[constants.SF_NAME]
3822
3823     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3824     data = self.rpc.call_storage_list(self.nodes,
3825                                       self.op.storage_type, st_args,
3826                                       self.op.name, fields)
3827
3828     result = []
3829
3830     for node in utils.NiceSort(self.nodes):
3831       nresult = data[node]
3832       if nresult.offline:
3833         continue
3834
3835       msg = nresult.fail_msg
3836       if msg:
3837         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3838         continue
3839
3840       rows = dict([(row[name_idx], row) for row in nresult.payload])
3841
3842       for name in utils.NiceSort(rows.keys()):
3843         row = rows[name]
3844
3845         out = []
3846
3847         for field in self.op.output_fields:
3848           if field == constants.SF_NODE:
3849             val = node
3850           elif field == constants.SF_TYPE:
3851             val = self.op.storage_type
3852           elif field in field_idx:
3853             val = row[field_idx[field]]
3854           else:
3855             raise errors.ParameterError(field)
3856
3857           out.append(val)
3858
3859         result.append(out)
3860
3861     return result
3862
3863
3864 class _InstanceQuery(_QueryBase):
3865   FIELDS = query.INSTANCE_FIELDS
3866
3867   def ExpandNames(self, lu):
3868     lu.needed_locks = {}
3869     lu.share_locks[locking.LEVEL_INSTANCE] = 1
3870     lu.share_locks[locking.LEVEL_NODE] = 1
3871
3872     if self.names:
3873       self.wanted = _GetWantedInstances(lu, self.names)
3874     else:
3875       self.wanted = locking.ALL_SET
3876
3877     self.do_locking = (self.use_locking and
3878                        query.IQ_LIVE in self.requested_data)
3879     if self.do_locking:
3880       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3881       lu.needed_locks[locking.LEVEL_NODE] = []
3882       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3883
3884   def DeclareLocks(self, lu, level):
3885     if level == locking.LEVEL_NODE and self.do_locking:
3886       lu._LockInstancesNodes() # pylint: disable-msg=W0212
3887
3888   def _GetQueryData(self, lu):
3889     """Computes the list of instances and their attributes.
3890
3891     """
3892     cluster = lu.cfg.GetClusterInfo()
3893     all_info = lu.cfg.GetAllInstancesInfo()
3894
3895     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3896
3897     instance_list = [all_info[name] for name in instance_names]
3898     nodes = frozenset(itertools.chain(*(inst.all_nodes
3899                                         for inst in instance_list)))
3900     hv_list = list(set([inst.hypervisor for inst in instance_list]))
3901     bad_nodes = []
3902     offline_nodes = []
3903     wrongnode_inst = set()
3904
3905     # Gather data as requested
3906     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
3907       live_data = {}
3908       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3909       for name in nodes:
3910         result = node_data[name]
3911         if result.offline:
3912           # offline nodes will be in both lists
3913           assert result.fail_msg
3914           offline_nodes.append(name)
3915         if result.fail_msg:
3916           bad_nodes.append(name)
3917         elif result.payload:
3918           for inst in result.payload:
3919             if inst in all_info:
3920               if all_info[inst].primary_node == name:
3921                 live_data.update(result.payload)
3922               else:
3923                 wrongnode_inst.add(inst)
3924             else:
3925               # orphan instance; we don't list it here as we don't
3926               # handle this case yet in the output of instance listing
3927               logging.warning("Orphan instance '%s' found on node %s",
3928                               inst, name)
3929         # else no instance is alive
3930     else:
3931       live_data = {}
3932
3933     if query.IQ_DISKUSAGE in self.requested_data:
3934       disk_usage = dict((inst.name,
3935                          _ComputeDiskSize(inst.disk_template,
3936                                           [{"size": disk.size}
3937                                            for disk in inst.disks]))
3938                         for inst in instance_list)
3939     else:
3940       disk_usage = None
3941
3942     if query.IQ_CONSOLE in self.requested_data:
3943       consinfo = {}
3944       for inst in instance_list:
3945         if inst.name in live_data:
3946           # Instance is running
3947           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
3948         else:
3949           consinfo[inst.name] = None
3950       assert set(consinfo.keys()) == set(instance_names)
3951     else:
3952       consinfo = None
3953
3954     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3955                                    disk_usage, offline_nodes, bad_nodes,
3956                                    live_data, wrongnode_inst, consinfo)
3957
3958
3959 class LUQuery(NoHooksLU):
3960   """Query for resources/items of a certain kind.
3961
3962   """
3963   # pylint: disable-msg=W0142
3964   REQ_BGL = False
3965
3966   def CheckArguments(self):
3967     qcls = _GetQueryImplementation(self.op.what)
3968     names = qlang.ReadSimpleFilter("name", self.op.filter)
3969
3970     self.impl = qcls(names, self.op.fields, False)
3971
3972   def ExpandNames(self):
3973     self.impl.ExpandNames(self)
3974
3975   def DeclareLocks(self, level):
3976     self.impl.DeclareLocks(self, level)
3977
3978   def Exec(self, feedback_fn):
3979     return self.impl.NewStyleQuery(self)
3980
3981
3982 class LUQueryFields(NoHooksLU):
3983   """Query for resources/items of a certain kind.
3984
3985   """
3986   # pylint: disable-msg=W0142
3987   REQ_BGL = False
3988
3989   def CheckArguments(self):
3990     self.qcls = _GetQueryImplementation(self.op.what)
3991
3992   def ExpandNames(self):
3993     self.needed_locks = {}
3994
3995   def Exec(self, feedback_fn):
3996     return self.qcls.FieldsQuery(self.op.fields)
3997
3998
3999 class LUNodeModifyStorage(NoHooksLU):
4000   """Logical unit for modifying a storage volume on a node.
4001
4002   """
4003   REQ_BGL = False
4004
4005   def CheckArguments(self):
4006     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4007
4008     storage_type = self.op.storage_type
4009
4010     try:
4011       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4012     except KeyError:
4013       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4014                                  " modified" % storage_type,
4015                                  errors.ECODE_INVAL)
4016
4017     diff = set(self.op.changes.keys()) - modifiable
4018     if diff:
4019       raise errors.OpPrereqError("The following fields can not be modified for"
4020                                  " storage units of type '%s': %r" %
4021                                  (storage_type, list(diff)),
4022                                  errors.ECODE_INVAL)
4023
4024   def ExpandNames(self):
4025     self.needed_locks = {
4026       locking.LEVEL_NODE: self.op.node_name,
4027       }
4028
4029   def Exec(self, feedback_fn):
4030     """Computes the list of nodes and their attributes.
4031
4032     """
4033     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4034     result = self.rpc.call_storage_modify(self.op.node_name,
4035                                           self.op.storage_type, st_args,
4036                                           self.op.name, self.op.changes)
4037     result.Raise("Failed to modify storage unit '%s' on %s" %
4038                  (self.op.name, self.op.node_name))
4039
4040
4041 class LUNodeAdd(LogicalUnit):
4042   """Logical unit for adding node to the cluster.
4043
4044   """
4045   HPATH = "node-add"
4046   HTYPE = constants.HTYPE_NODE
4047   _NFLAGS = ["master_capable", "vm_capable"]
4048
4049   def CheckArguments(self):
4050     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4051     # validate/normalize the node name
4052     self.hostname = netutils.GetHostname(name=self.op.node_name,
4053                                          family=self.primary_ip_family)
4054     self.op.node_name = self.hostname.name
4055     if self.op.readd and self.op.group:
4056       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4057                                  " being readded", errors.ECODE_INVAL)
4058
4059   def BuildHooksEnv(self):
4060     """Build hooks env.
4061
4062     This will run on all nodes before, and on all nodes + the new node after.
4063
4064     """
4065     env = {
4066       "OP_TARGET": self.op.node_name,
4067       "NODE_NAME": self.op.node_name,
4068       "NODE_PIP": self.op.primary_ip,
4069       "NODE_SIP": self.op.secondary_ip,
4070       "MASTER_CAPABLE": str(self.op.master_capable),
4071       "VM_CAPABLE": str(self.op.vm_capable),
4072       }
4073     nodes_0 = self.cfg.GetNodeList()
4074     nodes_1 = nodes_0 + [self.op.node_name, ]
4075     return env, nodes_0, nodes_1
4076
4077   def CheckPrereq(self):
4078     """Check prerequisites.
4079
4080     This checks:
4081      - the new node is not already in the config
4082      - it is resolvable
4083      - its parameters (single/dual homed) matches the cluster
4084
4085     Any errors are signaled by raising errors.OpPrereqError.
4086
4087     """
4088     cfg = self.cfg
4089     hostname = self.hostname
4090     node = hostname.name
4091     primary_ip = self.op.primary_ip = hostname.ip
4092     if self.op.secondary_ip is None:
4093       if self.primary_ip_family == netutils.IP6Address.family:
4094         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4095                                    " IPv4 address must be given as secondary",
4096                                    errors.ECODE_INVAL)
4097       self.op.secondary_ip = primary_ip
4098
4099     secondary_ip = self.op.secondary_ip
4100     if not netutils.IP4Address.IsValid(secondary_ip):
4101       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4102                                  " address" % secondary_ip, errors.ECODE_INVAL)
4103
4104     node_list = cfg.GetNodeList()
4105     if not self.op.readd and node in node_list:
4106       raise errors.OpPrereqError("Node %s is already in the configuration" %
4107                                  node, errors.ECODE_EXISTS)
4108     elif self.op.readd and node not in node_list:
4109       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4110                                  errors.ECODE_NOENT)
4111
4112     self.changed_primary_ip = False
4113
4114     for existing_node_name in node_list:
4115       existing_node = cfg.GetNodeInfo(existing_node_name)
4116
4117       if self.op.readd and node == existing_node_name:
4118         if existing_node.secondary_ip != secondary_ip:
4119           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4120                                      " address configuration as before",
4121                                      errors.ECODE_INVAL)
4122         if existing_node.primary_ip != primary_ip:
4123           self.changed_primary_ip = True
4124
4125         continue
4126
4127       if (existing_node.primary_ip == primary_ip or
4128           existing_node.secondary_ip == primary_ip or
4129           existing_node.primary_ip == secondary_ip or
4130           existing_node.secondary_ip == secondary_ip):
4131         raise errors.OpPrereqError("New node ip address(es) conflict with"
4132                                    " existing node %s" % existing_node.name,
4133                                    errors.ECODE_NOTUNIQUE)
4134
4135     # After this 'if' block, None is no longer a valid value for the
4136     # _capable op attributes
4137     if self.op.readd:
4138       old_node = self.cfg.GetNodeInfo(node)
4139       assert old_node is not None, "Can't retrieve locked node %s" % node
4140       for attr in self._NFLAGS:
4141         if getattr(self.op, attr) is None:
4142           setattr(self.op, attr, getattr(old_node, attr))
4143     else:
4144       for attr in self._NFLAGS:
4145         if getattr(self.op, attr) is None:
4146           setattr(self.op, attr, True)
4147
4148     if self.op.readd and not self.op.vm_capable:
4149       pri, sec = cfg.GetNodeInstances(node)
4150       if pri or sec:
4151         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4152                                    " flag set to false, but it already holds"
4153                                    " instances" % node,
4154                                    errors.ECODE_STATE)
4155
4156     # check that the type of the node (single versus dual homed) is the
4157     # same as for the master
4158     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4159     master_singlehomed = myself.secondary_ip == myself.primary_ip
4160     newbie_singlehomed = secondary_ip == primary_ip
4161     if master_singlehomed != newbie_singlehomed:
4162       if master_singlehomed:
4163         raise errors.OpPrereqError("The master has no secondary ip but the"
4164                                    " new node has one",
4165                                    errors.ECODE_INVAL)
4166       else:
4167         raise errors.OpPrereqError("The master has a secondary ip but the"
4168                                    " new node doesn't have one",
4169                                    errors.ECODE_INVAL)
4170
4171     # checks reachability
4172     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4173       raise errors.OpPrereqError("Node not reachable by ping",
4174                                  errors.ECODE_ENVIRON)
4175
4176     if not newbie_singlehomed:
4177       # check reachability from my secondary ip to newbie's secondary ip
4178       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4179                            source=myself.secondary_ip):
4180         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4181                                    " based ping to node daemon port",
4182                                    errors.ECODE_ENVIRON)
4183
4184     if self.op.readd:
4185       exceptions = [node]
4186     else:
4187       exceptions = []
4188
4189     if self.op.master_capable:
4190       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4191     else:
4192       self.master_candidate = False
4193
4194     if self.op.readd:
4195       self.new_node = old_node
4196     else:
4197       node_group = cfg.LookupNodeGroup(self.op.group)
4198       self.new_node = objects.Node(name=node,
4199                                    primary_ip=primary_ip,
4200                                    secondary_ip=secondary_ip,
4201                                    master_candidate=self.master_candidate,
4202                                    offline=False, drained=False,
4203                                    group=node_group)
4204
4205     if self.op.ndparams:
4206       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4207
4208   def Exec(self, feedback_fn):
4209     """Adds the new node to the cluster.
4210
4211     """
4212     new_node = self.new_node
4213     node = new_node.name
4214
4215     # We adding a new node so we assume it's powered
4216     new_node.powered = True
4217
4218     # for re-adds, reset the offline/drained/master-candidate flags;
4219     # we need to reset here, otherwise offline would prevent RPC calls
4220     # later in the procedure; this also means that if the re-add
4221     # fails, we are left with a non-offlined, broken node
4222     if self.op.readd:
4223       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4224       self.LogInfo("Readding a node, the offline/drained flags were reset")
4225       # if we demote the node, we do cleanup later in the procedure
4226       new_node.master_candidate = self.master_candidate
4227       if self.changed_primary_ip:
4228         new_node.primary_ip = self.op.primary_ip
4229
4230     # copy the master/vm_capable flags
4231     for attr in self._NFLAGS:
4232       setattr(new_node, attr, getattr(self.op, attr))
4233
4234     # notify the user about any possible mc promotion
4235     if new_node.master_candidate:
4236       self.LogInfo("Node will be a master candidate")
4237
4238     if self.op.ndparams:
4239       new_node.ndparams = self.op.ndparams
4240     else:
4241       new_node.ndparams = {}
4242
4243     # check connectivity
4244     result = self.rpc.call_version([node])[node]
4245     result.Raise("Can't get version information from node %s" % node)
4246     if constants.PROTOCOL_VERSION == result.payload:
4247       logging.info("Communication to node %s fine, sw version %s match",
4248                    node, result.payload)
4249     else:
4250       raise errors.OpExecError("Version mismatch master version %s,"
4251                                " node version %s" %
4252                                (constants.PROTOCOL_VERSION, result.payload))
4253
4254     # Add node to our /etc/hosts, and add key to known_hosts
4255     if self.cfg.GetClusterInfo().modify_etc_hosts:
4256       master_node = self.cfg.GetMasterNode()
4257       result = self.rpc.call_etc_hosts_modify(master_node,
4258                                               constants.ETC_HOSTS_ADD,
4259                                               self.hostname.name,
4260                                               self.hostname.ip)
4261       result.Raise("Can't update hosts file with new host data")
4262
4263     if new_node.secondary_ip != new_node.primary_ip:
4264       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4265                                False)
4266
4267     node_verify_list = [self.cfg.GetMasterNode()]
4268     node_verify_param = {
4269       constants.NV_NODELIST: [node],
4270       # TODO: do a node-net-test as well?
4271     }
4272
4273     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4274                                        self.cfg.GetClusterName())
4275     for verifier in node_verify_list:
4276       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4277       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4278       if nl_payload:
4279         for failed in nl_payload:
4280           feedback_fn("ssh/hostname verification failed"
4281                       " (checking from %s): %s" %
4282                       (verifier, nl_payload[failed]))
4283         raise errors.OpExecError("ssh/hostname verification failed.")
4284
4285     if self.op.readd:
4286       _RedistributeAncillaryFiles(self)
4287       self.context.ReaddNode(new_node)
4288       # make sure we redistribute the config
4289       self.cfg.Update(new_node, feedback_fn)
4290       # and make sure the new node will not have old files around
4291       if not new_node.master_candidate:
4292         result = self.rpc.call_node_demote_from_mc(new_node.name)
4293         msg = result.fail_msg
4294         if msg:
4295           self.LogWarning("Node failed to demote itself from master"
4296                           " candidate status: %s" % msg)
4297     else:
4298       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4299                                   additional_vm=self.op.vm_capable)
4300       self.context.AddNode(new_node, self.proc.GetECId())
4301
4302
4303 class LUNodeSetParams(LogicalUnit):
4304   """Modifies the parameters of a node.
4305
4306   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4307       to the node role (as _ROLE_*)
4308   @cvar _R2F: a dictionary from node role to tuples of flags
4309   @cvar _FLAGS: a list of attribute names corresponding to the flags
4310
4311   """
4312   HPATH = "node-modify"
4313   HTYPE = constants.HTYPE_NODE
4314   REQ_BGL = False
4315   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4316   _F2R = {
4317     (True, False, False): _ROLE_CANDIDATE,
4318     (False, True, False): _ROLE_DRAINED,
4319     (False, False, True): _ROLE_OFFLINE,
4320     (False, False, False): _ROLE_REGULAR,
4321     }
4322   _R2F = dict((v, k) for k, v in _F2R.items())
4323   _FLAGS = ["master_candidate", "drained", "offline"]
4324
4325   def CheckArguments(self):
4326     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4327     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4328                 self.op.master_capable, self.op.vm_capable,
4329                 self.op.secondary_ip, self.op.ndparams]
4330     if all_mods.count(None) == len(all_mods):
4331       raise errors.OpPrereqError("Please pass at least one modification",
4332                                  errors.ECODE_INVAL)
4333     if all_mods.count(True) > 1:
4334       raise errors.OpPrereqError("Can't set the node into more than one"
4335                                  " state at the same time",
4336                                  errors.ECODE_INVAL)
4337
4338     # Boolean value that tells us whether we might be demoting from MC
4339     self.might_demote = (self.op.master_candidate == False or
4340                          self.op.offline == True or
4341                          self.op.drained == True or
4342                          self.op.master_capable == False)
4343
4344     if self.op.secondary_ip:
4345       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4346         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4347                                    " address" % self.op.secondary_ip,
4348                                    errors.ECODE_INVAL)
4349
4350     self.lock_all = self.op.auto_promote and self.might_demote
4351     self.lock_instances = self.op.secondary_ip is not None
4352
4353   def ExpandNames(self):
4354     if self.lock_all:
4355       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4356     else:
4357       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4358
4359     if self.lock_instances:
4360       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4361
4362   def DeclareLocks(self, level):
4363     # If we have locked all instances, before waiting to lock nodes, release
4364     # all the ones living on nodes unrelated to the current operation.
4365     if level == locking.LEVEL_NODE and self.lock_instances:
4366       instances_release = []
4367       instances_keep = []
4368       self.affected_instances = []
4369       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4370         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4371           instance = self.context.cfg.GetInstanceInfo(instance_name)
4372           i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4373           if i_mirrored and self.op.node_name in instance.all_nodes:
4374             instances_keep.append(instance_name)
4375             self.affected_instances.append(instance)
4376           else:
4377             instances_release.append(instance_name)
4378         if instances_release:
4379           self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4380           self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4381
4382   def BuildHooksEnv(self):
4383     """Build hooks env.
4384
4385     This runs on the master node.
4386
4387     """
4388     env = {
4389       "OP_TARGET": self.op.node_name,
4390       "MASTER_CANDIDATE": str(self.op.master_candidate),
4391       "OFFLINE": str(self.op.offline),
4392       "DRAINED": str(self.op.drained),
4393       "MASTER_CAPABLE": str(self.op.master_capable),
4394       "VM_CAPABLE": str(self.op.vm_capable),
4395       }
4396     nl = [self.cfg.GetMasterNode(),
4397           self.op.node_name]
4398     return env, nl, nl
4399
4400   def CheckPrereq(self):
4401     """Check prerequisites.
4402
4403     This only checks the instance list against the existing names.
4404
4405     """
4406     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4407
4408     if (self.op.master_candidate is not None or
4409         self.op.drained is not None or
4410         self.op.offline is not None):
4411       # we can't change the master's node flags
4412       if self.op.node_name == self.cfg.GetMasterNode():
4413         raise errors.OpPrereqError("The master role can be changed"
4414                                    " only via master-failover",
4415                                    errors.ECODE_INVAL)
4416
4417     if self.op.master_candidate and not node.master_capable:
4418       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4419                                  " it a master candidate" % node.name,
4420                                  errors.ECODE_STATE)
4421
4422     if self.op.vm_capable == False:
4423       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4424       if ipri or isec:
4425         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4426                                    " the vm_capable flag" % node.name,
4427                                    errors.ECODE_STATE)
4428
4429     if node.master_candidate and self.might_demote and not self.lock_all:
4430       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4431       # check if after removing the current node, we're missing master
4432       # candidates
4433       (mc_remaining, mc_should, _) = \
4434           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4435       if mc_remaining < mc_should:
4436         raise errors.OpPrereqError("Not enough master candidates, please"
4437                                    " pass auto promote option to allow"
4438                                    " promotion", errors.ECODE_STATE)
4439
4440     self.old_flags = old_flags = (node.master_candidate,
4441                                   node.drained, node.offline)
4442     assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4443     self.old_role = old_role = self._F2R[old_flags]
4444
4445     # Check for ineffective changes
4446     for attr in self._FLAGS:
4447       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4448         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4449         setattr(self.op, attr, None)
4450
4451     # Past this point, any flag change to False means a transition
4452     # away from the respective state, as only real changes are kept
4453
4454     # TODO: We might query the real power state if it supports OOB
4455     if _SupportsOob(self.cfg, node):
4456       if self.op.offline is False and not (node.powered or
4457                                            self.op.powered == True):
4458         raise errors.OpPrereqError(("Please power on node %s first before you"
4459                                     " can reset offline state") %
4460                                    self.op.node_name)
4461     elif self.op.powered is not None:
4462       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4463                                   " which does not support out-of-band"
4464                                   " handling") % self.op.node_name)
4465
4466     # If we're being deofflined/drained, we'll MC ourself if needed
4467     if (self.op.drained == False or self.op.offline == False or
4468         (self.op.master_capable and not node.master_capable)):
4469       if _DecideSelfPromotion(self):
4470         self.op.master_candidate = True
4471         self.LogInfo("Auto-promoting node to master candidate")
4472
4473     # If we're no longer master capable, we'll demote ourselves from MC
4474     if self.op.master_capable == False and node.master_candidate:
4475       self.LogInfo("Demoting from master candidate")
4476       self.op.master_candidate = False
4477
4478     # Compute new role
4479     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4480     if self.op.master_candidate:
4481       new_role = self._ROLE_CANDIDATE
4482     elif self.op.drained:
4483       new_role = self._ROLE_DRAINED
4484     elif self.op.offline:
4485       new_role = self._ROLE_OFFLINE
4486     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4487       # False is still in new flags, which means we're un-setting (the
4488       # only) True flag
4489       new_role = self._ROLE_REGULAR
4490     else: # no new flags, nothing, keep old role
4491       new_role = old_role
4492
4493     self.new_role = new_role
4494
4495     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4496       # Trying to transition out of offline status
4497       result = self.rpc.call_version([node.name])[node.name]
4498       if result.fail_msg:
4499         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4500                                    " to report its version: %s" %
4501                                    (node.name, result.fail_msg),
4502                                    errors.ECODE_STATE)
4503       else:
4504         self.LogWarning("Transitioning node from offline to online state"
4505                         " without using re-add. Please make sure the node"
4506                         " is healthy!")
4507
4508     if self.op.secondary_ip:
4509       # Ok even without locking, because this can't be changed by any LU
4510       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4511       master_singlehomed = master.secondary_ip == master.primary_ip
4512       if master_singlehomed and self.op.secondary_ip:
4513         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4514                                    " homed cluster", errors.ECODE_INVAL)
4515
4516       if node.offline:
4517         if self.affected_instances:
4518           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4519                                      " node has instances (%s) configured"
4520                                      " to use it" % self.affected_instances)
4521       else:
4522         # On online nodes, check that no instances are running, and that
4523         # the node has the new ip and we can reach it.
4524         for instance in self.affected_instances:
4525           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4526
4527         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4528         if master.name != node.name:
4529           # check reachability from master secondary ip to new secondary ip
4530           if not netutils.TcpPing(self.op.secondary_ip,
4531                                   constants.DEFAULT_NODED_PORT,
4532                                   source=master.secondary_ip):
4533             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4534                                        " based ping to node daemon port",
4535                                        errors.ECODE_ENVIRON)
4536
4537     if self.op.ndparams:
4538       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4539       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4540       self.new_ndparams = new_ndparams
4541
4542   def Exec(self, feedback_fn):
4543     """Modifies a node.
4544
4545     """
4546     node = self.node
4547     old_role = self.old_role
4548     new_role = self.new_role
4549
4550     result = []
4551
4552     if self.op.ndparams:
4553       node.ndparams = self.new_ndparams
4554
4555     if self.op.powered is not None:
4556       node.powered = self.op.powered
4557
4558     for attr in ["master_capable", "vm_capable"]:
4559       val = getattr(self.op, attr)
4560       if val is not None:
4561         setattr(node, attr, val)
4562         result.append((attr, str(val)))
4563
4564     if new_role != old_role:
4565       # Tell the node to demote itself, if no longer MC and not offline
4566       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4567         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4568         if msg:
4569           self.LogWarning("Node failed to demote itself: %s", msg)
4570
4571       new_flags = self._R2F[new_role]
4572       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4573         if of != nf:
4574           result.append((desc, str(nf)))
4575       (node.master_candidate, node.drained, node.offline) = new_flags
4576
4577       # we locked all nodes, we adjust the CP before updating this node
4578       if self.lock_all:
4579         _AdjustCandidatePool(self, [node.name])
4580
4581     if self.op.secondary_ip:
4582       node.secondary_ip = self.op.secondary_ip
4583       result.append(("secondary_ip", self.op.secondary_ip))
4584
4585     # this will trigger configuration file update, if needed
4586     self.cfg.Update(node, feedback_fn)
4587
4588     # this will trigger job queue propagation or cleanup if the mc
4589     # flag changed
4590     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4591       self.context.ReaddNode(node)
4592
4593     return result
4594
4595
4596 class LUNodePowercycle(NoHooksLU):
4597   """Powercycles a node.
4598
4599   """
4600   REQ_BGL = False
4601
4602   def CheckArguments(self):
4603     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4604     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4605       raise errors.OpPrereqError("The node is the master and the force"
4606                                  " parameter was not set",
4607                                  errors.ECODE_INVAL)
4608
4609   def ExpandNames(self):
4610     """Locking for PowercycleNode.
4611
4612     This is a last-resort option and shouldn't block on other
4613     jobs. Therefore, we grab no locks.
4614
4615     """
4616     self.needed_locks = {}
4617
4618   def Exec(self, feedback_fn):
4619     """Reboots a node.
4620
4621     """
4622     result = self.rpc.call_node_powercycle(self.op.node_name,
4623                                            self.cfg.GetHypervisorType())
4624     result.Raise("Failed to schedule the reboot")
4625     return result.payload
4626
4627
4628 class LUClusterQuery(NoHooksLU):
4629   """Query cluster configuration.
4630
4631   """
4632   REQ_BGL = False
4633
4634   def ExpandNames(self):
4635     self.needed_locks = {}
4636
4637   def Exec(self, feedback_fn):
4638     """Return cluster config.
4639
4640     """
4641     cluster = self.cfg.GetClusterInfo()
4642     os_hvp = {}
4643
4644     # Filter just for enabled hypervisors
4645     for os_name, hv_dict in cluster.os_hvp.items():
4646       os_hvp[os_name] = {}
4647       for hv_name, hv_params in hv_dict.items():
4648         if hv_name in cluster.enabled_hypervisors:
4649           os_hvp[os_name][hv_name] = hv_params
4650
4651     # Convert ip_family to ip_version
4652     primary_ip_version = constants.IP4_VERSION
4653     if cluster.primary_ip_family == netutils.IP6Address.family:
4654       primary_ip_version = constants.IP6_VERSION
4655
4656     result = {
4657       "software_version": constants.RELEASE_VERSION,
4658       "protocol_version": constants.PROTOCOL_VERSION,
4659       "config_version": constants.CONFIG_VERSION,
4660       "os_api_version": max(constants.OS_API_VERSIONS),
4661       "export_version": constants.EXPORT_VERSION,
4662       "architecture": (platform.architecture()[0], platform.machine()),
4663       "name": cluster.cluster_name,
4664       "master": cluster.master_node,
4665       "default_hypervisor": cluster.enabled_hypervisors[0],
4666       "enabled_hypervisors": cluster.enabled_hypervisors,
4667       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4668                         for hypervisor_name in cluster.enabled_hypervisors]),
4669       "os_hvp": os_hvp,
4670       "beparams": cluster.beparams,
4671       "osparams": cluster.osparams,
4672       "nicparams": cluster.nicparams,
4673       "ndparams": cluster.ndparams,
4674       "candidate_pool_size": cluster.candidate_pool_size,
4675       "master_netdev": cluster.master_netdev,
4676       "volume_group_name": cluster.volume_group_name,
4677       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4678       "file_storage_dir": cluster.file_storage_dir,
4679       "maintain_node_health": cluster.maintain_node_health,
4680       "ctime": cluster.ctime,
4681       "mtime": cluster.mtime,
4682       "uuid": cluster.uuid,
4683       "tags": list(cluster.GetTags()),
4684       "uid_pool": cluster.uid_pool,
4685       "default_iallocator": cluster.default_iallocator,
4686       "reserved_lvs": cluster.reserved_lvs,
4687       "primary_ip_version": primary_ip_version,
4688       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4689       "hidden_os": cluster.hidden_os,
4690       "blacklisted_os": cluster.blacklisted_os,
4691       }
4692
4693     return result
4694
4695
4696 class LUClusterConfigQuery(NoHooksLU):
4697   """Return configuration values.
4698
4699   """
4700   REQ_BGL = False
4701   _FIELDS_DYNAMIC = utils.FieldSet()
4702   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4703                                   "watcher_pause", "volume_group_name")
4704
4705   def CheckArguments(self):
4706     _CheckOutputFields(static=self._FIELDS_STATIC,
4707                        dynamic=self._FIELDS_DYNAMIC,
4708                        selected=self.op.output_fields)
4709
4710   def ExpandNames(self):
4711     self.needed_locks = {}
4712
4713   def Exec(self, feedback_fn):
4714     """Dump a representation of the cluster config to the standard output.
4715
4716     """
4717     values = []
4718     for field in self.op.output_fields:
4719       if field == "cluster_name":
4720         entry = self.cfg.GetClusterName()
4721       elif field == "master_node":
4722         entry = self.cfg.GetMasterNode()
4723       elif field == "drain_flag":
4724         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4725       elif field == "watcher_pause":
4726         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4727       elif field == "volume_group_name":
4728         entry = self.cfg.GetVGName()
4729       else:
4730         raise errors.ParameterError(field)
4731       values.append(entry)
4732     return values
4733
4734
4735 class LUInstanceActivateDisks(NoHooksLU):
4736   """Bring up an instance's disks.
4737
4738   """
4739   REQ_BGL = False
4740
4741   def ExpandNames(self):
4742     self._ExpandAndLockInstance()
4743     self.needed_locks[locking.LEVEL_NODE] = []
4744     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4745
4746   def DeclareLocks(self, level):
4747     if level == locking.LEVEL_NODE:
4748       self._LockInstancesNodes()
4749
4750   def CheckPrereq(self):
4751     """Check prerequisites.
4752
4753     This checks that the instance is in the cluster.
4754
4755     """
4756     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4757     assert self.instance is not None, \
4758       "Cannot retrieve locked instance %s" % self.op.instance_name
4759     _CheckNodeOnline(self, self.instance.primary_node)
4760
4761   def Exec(self, feedback_fn):
4762     """Activate the disks.
4763
4764     """
4765     disks_ok, disks_info = \
4766               _AssembleInstanceDisks(self, self.instance,
4767                                      ignore_size=self.op.ignore_size)
4768     if not disks_ok:
4769       raise errors.OpExecError("Cannot activate block devices")
4770
4771     return disks_info
4772
4773
4774 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4775                            ignore_size=False):
4776   """Prepare the block devices for an instance.
4777
4778   This sets up the block devices on all nodes.
4779
4780   @type lu: L{LogicalUnit}
4781   @param lu: the logical unit on whose behalf we execute
4782   @type instance: L{objects.Instance}
4783   @param instance: the instance for whose disks we assemble
4784   @type disks: list of L{objects.Disk} or None
4785   @param disks: which disks to assemble (or all, if None)
4786   @type ignore_secondaries: boolean
4787   @param ignore_secondaries: if true, errors on secondary nodes
4788       won't result in an error return from the function
4789   @type ignore_size: boolean
4790   @param ignore_size: if true, the current known size of the disk
4791       will not be used during the disk activation, useful for cases
4792       when the size is wrong
4793   @return: False if the operation failed, otherwise a list of
4794       (host, instance_visible_name, node_visible_name)
4795       with the mapping from node devices to instance devices
4796
4797   """
4798   device_info = []
4799   disks_ok = True
4800   iname = instance.name
4801   disks = _ExpandCheckDisks(instance, disks)
4802
4803   # With the two passes mechanism we try to reduce the window of
4804   # opportunity for the race condition of switching DRBD to primary
4805   # before handshaking occured, but we do not eliminate it
4806
4807   # The proper fix would be to wait (with some limits) until the
4808   # connection has been made and drbd transitions from WFConnection
4809   # into any other network-connected state (Connected, SyncTarget,
4810   # SyncSource, etc.)
4811
4812   # 1st pass, assemble on all nodes in secondary mode
4813   for idx, inst_disk in enumerate(disks):
4814     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4815       if ignore_size:
4816         node_disk = node_disk.Copy()
4817         node_disk.UnsetSize()
4818       lu.cfg.SetDiskID(node_disk, node)
4819       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
4820       msg = result.fail_msg
4821       if msg:
4822         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4823                            " (is_primary=False, pass=1): %s",
4824                            inst_disk.iv_name, node, msg)
4825         if not ignore_secondaries:
4826           disks_ok = False
4827
4828   # FIXME: race condition on drbd migration to primary
4829
4830   # 2nd pass, do only the primary node
4831   for idx, inst_disk in enumerate(disks):
4832     dev_path = None
4833
4834     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4835       if node != instance.primary_node:
4836         continue
4837       if ignore_size:
4838         node_disk = node_disk.Copy()
4839         node_disk.UnsetSize()
4840       lu.cfg.SetDiskID(node_disk, node)
4841       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
4842       msg = result.fail_msg
4843       if msg:
4844         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4845                            " (is_primary=True, pass=2): %s",
4846                            inst_disk.iv_name, node, msg)
4847         disks_ok = False
4848       else:
4849         dev_path = result.payload
4850
4851     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4852
4853   # leave the disks configured for the primary node
4854   # this is a workaround that would be fixed better by
4855   # improving the logical/physical id handling
4856   for disk in disks:
4857     lu.cfg.SetDiskID(disk, instance.primary_node)
4858
4859   return disks_ok, device_info
4860
4861
4862 def _StartInstanceDisks(lu, instance, force):
4863   """Start the disks of an instance.
4864
4865   """
4866   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4867                                            ignore_secondaries=force)
4868   if not disks_ok:
4869     _ShutdownInstanceDisks(lu, instance)
4870     if force is not None and not force:
4871       lu.proc.LogWarning("", hint="If the message above refers to a"
4872                          " secondary node,"
4873                          " you can retry the operation using '--force'.")
4874     raise errors.OpExecError("Disk consistency error")
4875
4876
4877 class LUInstanceDeactivateDisks(NoHooksLU):
4878   """Shutdown an instance's disks.
4879
4880   """
4881   REQ_BGL = False
4882
4883   def ExpandNames(self):
4884     self._ExpandAndLockInstance()
4885     self.needed_locks[locking.LEVEL_NODE] = []
4886     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4887
4888   def DeclareLocks(self, level):
4889     if level == locking.LEVEL_NODE:
4890       self._LockInstancesNodes()
4891
4892   def CheckPrereq(self):
4893     """Check prerequisites.
4894
4895     This checks that the instance is in the cluster.
4896
4897     """
4898     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4899     assert self.instance is not None, \
4900       "Cannot retrieve locked instance %s" % self.op.instance_name
4901
4902   def Exec(self, feedback_fn):
4903     """Deactivate the disks
4904
4905     """
4906     instance = self.instance
4907     if self.op.force:
4908       _ShutdownInstanceDisks(self, instance)
4909     else:
4910       _SafeShutdownInstanceDisks(self, instance)
4911
4912
4913 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4914   """Shutdown block devices of an instance.
4915
4916   This function checks if an instance is running, before calling
4917   _ShutdownInstanceDisks.
4918
4919   """
4920   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4921   _ShutdownInstanceDisks(lu, instance, disks=disks)
4922
4923
4924 def _ExpandCheckDisks(instance, disks):
4925   """Return the instance disks selected by the disks list
4926
4927   @type disks: list of L{objects.Disk} or None
4928   @param disks: selected disks
4929   @rtype: list of L{objects.Disk}
4930   @return: selected instance disks to act on
4931
4932   """
4933   if disks is None:
4934     return instance.disks
4935   else:
4936     if not set(disks).issubset(instance.disks):
4937       raise errors.ProgrammerError("Can only act on disks belonging to the"
4938                                    " target instance")
4939     return disks
4940
4941
4942 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4943   """Shutdown block devices of an instance.
4944
4945   This does the shutdown on all nodes of the instance.
4946
4947   If the ignore_primary is false, errors on the primary node are
4948   ignored.
4949
4950   """
4951   all_result = True
4952   disks = _ExpandCheckDisks(instance, disks)
4953
4954   for disk in disks:
4955     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4956       lu.cfg.SetDiskID(top_disk, node)
4957       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4958       msg = result.fail_msg
4959       if msg:
4960         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4961                       disk.iv_name, node, msg)
4962         if ((node == instance.primary_node and not ignore_primary) or
4963             (node != instance.primary_node and not result.offline)):
4964           all_result = False
4965   return all_result
4966
4967
4968 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4969   """Checks if a node has enough free memory.
4970
4971   This function check if a given node has the needed amount of free
4972   memory. In case the node has less memory or we cannot get the
4973   information from the node, this function raise an OpPrereqError
4974   exception.
4975
4976   @type lu: C{LogicalUnit}
4977   @param lu: a logical unit from which we get configuration data
4978   @type node: C{str}
4979   @param node: the node to check
4980   @type reason: C{str}
4981   @param reason: string to use in the error message
4982   @type requested: C{int}
4983   @param requested: the amount of memory in MiB to check for
4984   @type hypervisor_name: C{str}
4985   @param hypervisor_name: the hypervisor to ask for memory stats
4986   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4987       we cannot check the node
4988
4989   """
4990   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4991   nodeinfo[node].Raise("Can't get data from node %s" % node,
4992                        prereq=True, ecode=errors.ECODE_ENVIRON)
4993   free_mem = nodeinfo[node].payload.get('memory_free', None)
4994   if not isinstance(free_mem, int):
4995     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4996                                " was '%s'" % (node, free_mem),
4997                                errors.ECODE_ENVIRON)
4998   if requested > free_mem:
4999     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5000                                " needed %s MiB, available %s MiB" %
5001                                (node, reason, requested, free_mem),
5002                                errors.ECODE_NORES)
5003
5004
5005 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5006   """Checks if nodes have enough free disk space in the all VGs.
5007
5008   This function check if all given nodes have the needed amount of
5009   free disk. In case any node has less disk or we cannot get the
5010   information from the node, this function raise an OpPrereqError
5011   exception.
5012
5013   @type lu: C{LogicalUnit}
5014   @param lu: a logical unit from which we get configuration data
5015   @type nodenames: C{list}
5016   @param nodenames: the list of node names to check
5017   @type req_sizes: C{dict}
5018   @param req_sizes: the hash of vg and corresponding amount of disk in
5019       MiB to check for
5020   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5021       or we cannot check the node
5022
5023   """
5024   for vg, req_size in req_sizes.items():
5025     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5026
5027
5028 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5029   """Checks if nodes have enough free disk space in the specified VG.
5030
5031   This function check if all given nodes have the needed amount of
5032   free disk. In case any node has less disk or we cannot get the
5033   information from the node, this function raise an OpPrereqError
5034   exception.
5035
5036   @type lu: C{LogicalUnit}
5037   @param lu: a logical unit from which we get configuration data
5038   @type nodenames: C{list}
5039   @param nodenames: the list of node names to check
5040   @type vg: C{str}
5041   @param vg: the volume group to check
5042   @type requested: C{int}
5043   @param requested: the amount of disk in MiB to check for
5044   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5045       or we cannot check the node
5046
5047   """
5048   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5049   for node in nodenames:
5050     info = nodeinfo[node]
5051     info.Raise("Cannot get current information from node %s" % node,
5052                prereq=True, ecode=errors.ECODE_ENVIRON)
5053     vg_free = info.payload.get("vg_free", None)
5054     if not isinstance(vg_free, int):
5055       raise errors.OpPrereqError("Can't compute free disk space on node"
5056                                  " %s for vg %s, result was '%s'" %
5057                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5058     if requested > vg_free:
5059       raise errors.OpPrereqError("Not enough disk space on target node %s"
5060                                  " vg %s: required %d MiB, available %d MiB" %
5061                                  (node, vg, requested, vg_free),
5062                                  errors.ECODE_NORES)
5063
5064
5065 class LUInstanceStartup(LogicalUnit):
5066   """Starts an instance.
5067
5068   """
5069   HPATH = "instance-start"
5070   HTYPE = constants.HTYPE_INSTANCE
5071   REQ_BGL = False
5072
5073   def CheckArguments(self):
5074     # extra beparams
5075     if self.op.beparams:
5076       # fill the beparams dict
5077       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5078
5079   def ExpandNames(self):
5080     self._ExpandAndLockInstance()
5081
5082   def BuildHooksEnv(self):
5083     """Build hooks env.
5084
5085     This runs on master, primary and secondary nodes of the instance.
5086
5087     """
5088     env = {
5089       "FORCE": self.op.force,
5090       }
5091     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5092     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5093     return env, nl, nl
5094
5095   def CheckPrereq(self):
5096     """Check prerequisites.
5097
5098     This checks that the instance is in the cluster.
5099
5100     """
5101     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5102     assert self.instance is not None, \
5103       "Cannot retrieve locked instance %s" % self.op.instance_name
5104
5105     # extra hvparams
5106     if self.op.hvparams:
5107       # check hypervisor parameter syntax (locally)
5108       cluster = self.cfg.GetClusterInfo()
5109       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5110       filled_hvp = cluster.FillHV(instance)
5111       filled_hvp.update(self.op.hvparams)
5112       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5113       hv_type.CheckParameterSyntax(filled_hvp)
5114       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5115
5116     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5117
5118     if self.primary_offline and self.op.ignore_offline_nodes:
5119       self.proc.LogWarning("Ignoring offline primary node")
5120
5121       if self.op.hvparams or self.op.beparams:
5122         self.proc.LogWarning("Overridden parameters are ignored")
5123     else:
5124       _CheckNodeOnline(self, instance.primary_node)
5125
5126       bep = self.cfg.GetClusterInfo().FillBE(instance)
5127
5128       # check bridges existence
5129       _CheckInstanceBridgesExist(self, instance)
5130
5131       remote_info = self.rpc.call_instance_info(instance.primary_node,
5132                                                 instance.name,
5133                                                 instance.hypervisor)
5134       remote_info.Raise("Error checking node %s" % instance.primary_node,
5135                         prereq=True, ecode=errors.ECODE_ENVIRON)
5136       if not remote_info.payload: # not running already
5137         _CheckNodeFreeMemory(self, instance.primary_node,
5138                              "starting instance %s" % instance.name,
5139                              bep[constants.BE_MEMORY], instance.hypervisor)
5140
5141   def Exec(self, feedback_fn):
5142     """Start the instance.
5143
5144     """
5145     instance = self.instance
5146     force = self.op.force
5147
5148     self.cfg.MarkInstanceUp(instance.name)
5149
5150     if self.primary_offline:
5151       assert self.op.ignore_offline_nodes
5152       self.proc.LogInfo("Primary node offline, marked instance as started")
5153     else:
5154       node_current = instance.primary_node
5155
5156       _StartInstanceDisks(self, instance, force)
5157
5158       result = self.rpc.call_instance_start(node_current, instance,
5159                                             self.op.hvparams, self.op.beparams)
5160       msg = result.fail_msg
5161       if msg:
5162         _ShutdownInstanceDisks(self, instance)
5163         raise errors.OpExecError("Could not start instance: %s" % msg)
5164
5165
5166 class LUInstanceReboot(LogicalUnit):
5167   """Reboot an instance.
5168
5169   """
5170   HPATH = "instance-reboot"
5171   HTYPE = constants.HTYPE_INSTANCE
5172   REQ_BGL = False
5173
5174   def ExpandNames(self):
5175     self._ExpandAndLockInstance()
5176
5177   def BuildHooksEnv(self):
5178     """Build hooks env.
5179
5180     This runs on master, primary and secondary nodes of the instance.
5181
5182     """
5183     env = {
5184       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5185       "REBOOT_TYPE": self.op.reboot_type,
5186       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5187       }
5188     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5189     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5190     return env, nl, nl
5191
5192   def CheckPrereq(self):
5193     """Check prerequisites.
5194
5195     This checks that the instance is in the cluster.
5196
5197     """
5198     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5199     assert self.instance is not None, \
5200       "Cannot retrieve locked instance %s" % self.op.instance_name
5201
5202     _CheckNodeOnline(self, instance.primary_node)
5203
5204     # check bridges existence
5205     _CheckInstanceBridgesExist(self, instance)
5206
5207   def Exec(self, feedback_fn):
5208     """Reboot the instance.
5209
5210     """
5211     instance = self.instance
5212     ignore_secondaries = self.op.ignore_secondaries
5213     reboot_type = self.op.reboot_type
5214
5215     node_current = instance.primary_node
5216
5217     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5218                        constants.INSTANCE_REBOOT_HARD]:
5219       for disk in instance.disks:
5220         self.cfg.SetDiskID(disk, node_current)
5221       result = self.rpc.call_instance_reboot(node_current, instance,
5222                                              reboot_type,
5223                                              self.op.shutdown_timeout)
5224       result.Raise("Could not reboot instance")
5225     else:
5226       result = self.rpc.call_instance_shutdown(node_current, instance,
5227                                                self.op.shutdown_timeout)
5228       result.Raise("Could not shutdown instance for full reboot")
5229       _ShutdownInstanceDisks(self, instance)
5230       _StartInstanceDisks(self, instance, ignore_secondaries)
5231       result = self.rpc.call_instance_start(node_current, instance, None, None)
5232       msg = result.fail_msg
5233       if msg:
5234         _ShutdownInstanceDisks(self, instance)
5235         raise errors.OpExecError("Could not start instance for"
5236                                  " full reboot: %s" % msg)
5237
5238     self.cfg.MarkInstanceUp(instance.name)
5239
5240
5241 class LUInstanceShutdown(LogicalUnit):
5242   """Shutdown an instance.
5243
5244   """
5245   HPATH = "instance-stop"
5246   HTYPE = constants.HTYPE_INSTANCE
5247   REQ_BGL = False
5248
5249   def ExpandNames(self):
5250     self._ExpandAndLockInstance()
5251
5252   def BuildHooksEnv(self):
5253     """Build hooks env.
5254
5255     This runs on master, primary and secondary nodes of the instance.
5256
5257     """
5258     env = _BuildInstanceHookEnvByObject(self, self.instance)
5259     env["TIMEOUT"] = self.op.timeout
5260     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5261     return env, nl, nl
5262
5263   def CheckPrereq(self):
5264     """Check prerequisites.
5265
5266     This checks that the instance is in the cluster.
5267
5268     """
5269     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5270     assert self.instance is not None, \
5271       "Cannot retrieve locked instance %s" % self.op.instance_name
5272
5273     self.primary_offline = \
5274       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5275
5276     if self.primary_offline and self.op.ignore_offline_nodes:
5277       self.proc.LogWarning("Ignoring offline primary node")
5278     else:
5279       _CheckNodeOnline(self, self.instance.primary_node)
5280
5281   def Exec(self, feedback_fn):
5282     """Shutdown the instance.
5283
5284     """
5285     instance = self.instance
5286     node_current = instance.primary_node
5287     timeout = self.op.timeout
5288
5289     self.cfg.MarkInstanceDown(instance.name)
5290
5291     if self.primary_offline:
5292       assert self.op.ignore_offline_nodes
5293       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5294     else:
5295       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5296       msg = result.fail_msg
5297       if msg:
5298         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5299
5300       _ShutdownInstanceDisks(self, instance)
5301
5302
5303 class LUInstanceReinstall(LogicalUnit):
5304   """Reinstall an instance.
5305
5306   """
5307   HPATH = "instance-reinstall"
5308   HTYPE = constants.HTYPE_INSTANCE
5309   REQ_BGL = False
5310
5311   def ExpandNames(self):
5312     self._ExpandAndLockInstance()
5313
5314   def BuildHooksEnv(self):
5315     """Build hooks env.
5316
5317     This runs on master, primary and secondary nodes of the instance.
5318
5319     """
5320     env = _BuildInstanceHookEnvByObject(self, self.instance)
5321     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5322     return env, nl, nl
5323
5324   def CheckPrereq(self):
5325     """Check prerequisites.
5326
5327     This checks that the instance is in the cluster and is not running.
5328
5329     """
5330     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5331     assert instance is not None, \
5332       "Cannot retrieve locked instance %s" % self.op.instance_name
5333     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5334                      " offline, cannot reinstall")
5335     for node in instance.secondary_nodes:
5336       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5337                        " cannot reinstall")
5338
5339     if instance.disk_template == constants.DT_DISKLESS:
5340       raise errors.OpPrereqError("Instance '%s' has no disks" %
5341                                  self.op.instance_name,
5342                                  errors.ECODE_INVAL)
5343     _CheckInstanceDown(self, instance, "cannot reinstall")
5344
5345     if self.op.os_type is not None:
5346       # OS verification
5347       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5348       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5349       instance_os = self.op.os_type
5350     else:
5351       instance_os = instance.os
5352
5353     nodelist = list(instance.all_nodes)
5354
5355     if self.op.osparams:
5356       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5357       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5358       self.os_inst = i_osdict # the new dict (without defaults)
5359     else:
5360       self.os_inst = None
5361
5362     self.instance = instance
5363
5364   def Exec(self, feedback_fn):
5365     """Reinstall the instance.
5366
5367     """
5368     inst = self.instance
5369
5370     if self.op.os_type is not None:
5371       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5372       inst.os = self.op.os_type
5373       # Write to configuration
5374       self.cfg.Update(inst, feedback_fn)
5375
5376     _StartInstanceDisks(self, inst, None)
5377     try:
5378       feedback_fn("Running the instance OS create scripts...")
5379       # FIXME: pass debug option from opcode to backend
5380       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5381                                              self.op.debug_level,
5382                                              osparams=self.os_inst)
5383       result.Raise("Could not install OS for instance %s on node %s" %
5384                    (inst.name, inst.primary_node))
5385     finally:
5386       _ShutdownInstanceDisks(self, inst)
5387
5388
5389 class LUInstanceRecreateDisks(LogicalUnit):
5390   """Recreate an instance's missing disks.
5391
5392   """
5393   HPATH = "instance-recreate-disks"
5394   HTYPE = constants.HTYPE_INSTANCE
5395   REQ_BGL = False
5396
5397   def ExpandNames(self):
5398     self._ExpandAndLockInstance()
5399
5400   def BuildHooksEnv(self):
5401     """Build hooks env.
5402
5403     This runs on master, primary and secondary nodes of the instance.
5404
5405     """
5406     env = _BuildInstanceHookEnvByObject(self, self.instance)
5407     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5408     return env, nl, nl
5409
5410   def CheckPrereq(self):
5411     """Check prerequisites.
5412
5413     This checks that the instance is in the cluster and is not running.
5414
5415     """
5416     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5417     assert instance is not None, \
5418       "Cannot retrieve locked instance %s" % self.op.instance_name
5419     _CheckNodeOnline(self, instance.primary_node)
5420
5421     if instance.disk_template == constants.DT_DISKLESS:
5422       raise errors.OpPrereqError("Instance '%s' has no disks" %
5423                                  self.op.instance_name, errors.ECODE_INVAL)
5424     _CheckInstanceDown(self, instance, "cannot recreate disks")
5425
5426     if not self.op.disks:
5427       self.op.disks = range(len(instance.disks))
5428     else:
5429       for idx in self.op.disks:
5430         if idx >= len(instance.disks):
5431           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5432                                      errors.ECODE_INVAL)
5433
5434     self.instance = instance
5435
5436   def Exec(self, feedback_fn):
5437     """Recreate the disks.
5438
5439     """
5440     to_skip = []
5441     for idx, _ in enumerate(self.instance.disks):
5442       if idx not in self.op.disks: # disk idx has not been passed in
5443         to_skip.append(idx)
5444         continue
5445
5446     _CreateDisks(self, self.instance, to_skip=to_skip)
5447
5448
5449 class LUInstanceRename(LogicalUnit):
5450   """Rename an instance.
5451
5452   """
5453   HPATH = "instance-rename"
5454   HTYPE = constants.HTYPE_INSTANCE
5455
5456   def CheckArguments(self):
5457     """Check arguments.
5458
5459     """
5460     if self.op.ip_check and not self.op.name_check:
5461       # TODO: make the ip check more flexible and not depend on the name check
5462       raise errors.OpPrereqError("Cannot do ip check without a name check",
5463                                  errors.ECODE_INVAL)
5464
5465   def BuildHooksEnv(self):
5466     """Build hooks env.
5467
5468     This runs on master, primary and secondary nodes of the instance.
5469
5470     """
5471     env = _BuildInstanceHookEnvByObject(self, self.instance)
5472     env["INSTANCE_NEW_NAME"] = self.op.new_name
5473     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5474     return env, nl, nl
5475
5476   def CheckPrereq(self):
5477     """Check prerequisites.
5478
5479     This checks that the instance is in the cluster and is not running.
5480
5481     """
5482     self.op.instance_name = _ExpandInstanceName(self.cfg,
5483                                                 self.op.instance_name)
5484     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5485     assert instance is not None
5486     _CheckNodeOnline(self, instance.primary_node)
5487     _CheckInstanceDown(self, instance, "cannot rename")
5488     self.instance = instance
5489
5490     new_name = self.op.new_name
5491     if self.op.name_check:
5492       hostname = netutils.GetHostname(name=new_name)
5493       self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5494                    hostname.name)
5495       new_name = self.op.new_name = hostname.name
5496       if (self.op.ip_check and
5497           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5498         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5499                                    (hostname.ip, new_name),
5500                                    errors.ECODE_NOTUNIQUE)
5501
5502     instance_list = self.cfg.GetInstanceList()
5503     if new_name in instance_list and new_name != instance.name:
5504       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5505                                  new_name, errors.ECODE_EXISTS)
5506
5507   def Exec(self, feedback_fn):
5508     """Rename the instance.
5509
5510     """
5511     inst = self.instance
5512     old_name = inst.name
5513
5514     rename_file_storage = False
5515     if (inst.disk_template == constants.DT_FILE and
5516         self.op.new_name != inst.name):
5517       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5518       rename_file_storage = True
5519
5520     self.cfg.RenameInstance(inst.name, self.op.new_name)
5521     # Change the instance lock. This is definitely safe while we hold the BGL
5522     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5523     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5524
5525     # re-read the instance from the configuration after rename
5526     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5527
5528     if rename_file_storage:
5529       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5530       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5531                                                      old_file_storage_dir,
5532                                                      new_file_storage_dir)
5533       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5534                    " (but the instance has been renamed in Ganeti)" %
5535                    (inst.primary_node, old_file_storage_dir,
5536                     new_file_storage_dir))
5537
5538     _StartInstanceDisks(self, inst, None)
5539     try:
5540       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5541                                                  old_name, self.op.debug_level)
5542       msg = result.fail_msg
5543       if msg:
5544         msg = ("Could not run OS rename script for instance %s on node %s"
5545                " (but the instance has been renamed in Ganeti): %s" %
5546                (inst.name, inst.primary_node, msg))
5547         self.proc.LogWarning(msg)
5548     finally:
5549       _ShutdownInstanceDisks(self, inst)
5550
5551     return inst.name
5552
5553
5554 class LUInstanceRemove(LogicalUnit):
5555   """Remove an instance.
5556
5557   """
5558   HPATH = "instance-remove"
5559   HTYPE = constants.HTYPE_INSTANCE
5560   REQ_BGL = False
5561
5562   def ExpandNames(self):
5563     self._ExpandAndLockInstance()
5564     self.needed_locks[locking.LEVEL_NODE] = []
5565     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5566
5567   def DeclareLocks(self, level):
5568     if level == locking.LEVEL_NODE:
5569       self._LockInstancesNodes()
5570
5571   def BuildHooksEnv(self):
5572     """Build hooks env.
5573
5574     This runs on master, primary and secondary nodes of the instance.
5575
5576     """
5577     env = _BuildInstanceHookEnvByObject(self, self.instance)
5578     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5579     nl = [self.cfg.GetMasterNode()]
5580     nl_post = list(self.instance.all_nodes) + nl
5581     return env, nl, nl_post
5582
5583   def CheckPrereq(self):
5584     """Check prerequisites.
5585
5586     This checks that the instance is in the cluster.
5587
5588     """
5589     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5590     assert self.instance is not None, \
5591       "Cannot retrieve locked instance %s" % self.op.instance_name
5592
5593   def Exec(self, feedback_fn):
5594     """Remove the instance.
5595
5596     """
5597     instance = self.instance
5598     logging.info("Shutting down instance %s on node %s",
5599                  instance.name, instance.primary_node)
5600
5601     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5602                                              self.op.shutdown_timeout)
5603     msg = result.fail_msg
5604     if msg:
5605       if self.op.ignore_failures:
5606         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5607       else:
5608         raise errors.OpExecError("Could not shutdown instance %s on"
5609                                  " node %s: %s" %
5610                                  (instance.name, instance.primary_node, msg))
5611
5612     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5613
5614
5615 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5616   """Utility function to remove an instance.
5617
5618   """
5619   logging.info("Removing block devices for instance %s", instance.name)
5620
5621   if not _RemoveDisks(lu, instance):
5622     if not ignore_failures:
5623       raise errors.OpExecError("Can't remove instance's disks")
5624     feedback_fn("Warning: can't remove instance's disks")
5625
5626   logging.info("Removing instance %s out of cluster config", instance.name)
5627
5628   lu.cfg.RemoveInstance(instance.name)
5629
5630   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5631     "Instance lock removal conflict"
5632
5633   # Remove lock for the instance
5634   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5635
5636
5637 class LUInstanceQuery(NoHooksLU):
5638   """Logical unit for querying instances.
5639
5640   """
5641   # pylint: disable-msg=W0142
5642   REQ_BGL = False
5643
5644   def CheckArguments(self):
5645     self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5646                              self.op.use_locking)
5647
5648   def ExpandNames(self):
5649     self.iq.ExpandNames(self)
5650
5651   def DeclareLocks(self, level):
5652     self.iq.DeclareLocks(self, level)
5653
5654   def Exec(self, feedback_fn):
5655     return self.iq.OldStyleQuery(self)
5656
5657
5658 class LUInstanceFailover(LogicalUnit):
5659   """Failover an instance.
5660
5661   """
5662   HPATH = "instance-failover"
5663   HTYPE = constants.HTYPE_INSTANCE
5664   REQ_BGL = False
5665
5666   def ExpandNames(self):
5667     self._ExpandAndLockInstance()
5668     self.needed_locks[locking.LEVEL_NODE] = []
5669     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5670
5671   def DeclareLocks(self, level):
5672     if level == locking.LEVEL_NODE:
5673       self._LockInstancesNodes()
5674
5675   def BuildHooksEnv(self):
5676     """Build hooks env.
5677
5678     This runs on master, primary and secondary nodes of the instance.
5679
5680     """
5681     instance = self.instance
5682     source_node = instance.primary_node
5683     target_node = instance.secondary_nodes[0]
5684     env = {
5685       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5686       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5687       "OLD_PRIMARY": source_node,
5688       "OLD_SECONDARY": target_node,
5689       "NEW_PRIMARY": target_node,
5690       "NEW_SECONDARY": source_node,
5691       }
5692     env.update(_BuildInstanceHookEnvByObject(self, instance))
5693     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5694     nl_post = list(nl)
5695     nl_post.append(source_node)
5696     return env, nl, nl_post
5697
5698   def CheckPrereq(self):
5699     """Check prerequisites.
5700
5701     This checks that the instance is in the cluster.
5702
5703     """
5704     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5705     assert self.instance is not None, \
5706       "Cannot retrieve locked instance %s" % self.op.instance_name
5707
5708     bep = self.cfg.GetClusterInfo().FillBE(instance)
5709     if instance.disk_template not in constants.DTS_NET_MIRROR:
5710       raise errors.OpPrereqError("Instance's disk layout is not"
5711                                  " network mirrored, cannot failover.",
5712                                  errors.ECODE_STATE)
5713
5714     secondary_nodes = instance.secondary_nodes
5715     if not secondary_nodes:
5716       raise errors.ProgrammerError("no secondary node but using "
5717                                    "a mirrored disk template")
5718
5719     target_node = secondary_nodes[0]
5720     _CheckNodeOnline(self, target_node)
5721     _CheckNodeNotDrained(self, target_node)
5722     if instance.admin_up:
5723       # check memory requirements on the secondary node
5724       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5725                            instance.name, bep[constants.BE_MEMORY],
5726                            instance.hypervisor)
5727     else:
5728       self.LogInfo("Not checking memory on the secondary node as"
5729                    " instance will not be started")
5730
5731     # check bridge existance
5732     _CheckInstanceBridgesExist(self, instance, node=target_node)
5733
5734   def Exec(self, feedback_fn):
5735     """Failover an instance.
5736
5737     The failover is done by shutting it down on its present node and
5738     starting it on the secondary.
5739
5740     """
5741     instance = self.instance
5742     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5743
5744     source_node = instance.primary_node
5745     target_node = instance.secondary_nodes[0]
5746
5747     if instance.admin_up:
5748       feedback_fn("* checking disk consistency between source and target")
5749       for dev in instance.disks:
5750         # for drbd, these are drbd over lvm
5751         if not _CheckDiskConsistency(self, dev, target_node, False):
5752           if not self.op.ignore_consistency:
5753             raise errors.OpExecError("Disk %s is degraded on target node,"
5754                                      " aborting failover." % dev.iv_name)
5755     else:
5756       feedback_fn("* not checking disk consistency as instance is not running")
5757
5758     feedback_fn("* shutting down instance on source node")
5759     logging.info("Shutting down instance %s on node %s",
5760                  instance.name, source_node)
5761
5762     result = self.rpc.call_instance_shutdown(source_node, instance,
5763                                              self.op.shutdown_timeout)
5764     msg = result.fail_msg
5765     if msg:
5766       if self.op.ignore_consistency or primary_node.offline:
5767         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5768                              " Proceeding anyway. Please make sure node"
5769                              " %s is down. Error details: %s",
5770                              instance.name, source_node, source_node, msg)
5771       else:
5772         raise errors.OpExecError("Could not shutdown instance %s on"
5773                                  " node %s: %s" %
5774                                  (instance.name, source_node, msg))
5775
5776     feedback_fn("* deactivating the instance's disks on source node")
5777     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5778       raise errors.OpExecError("Can't shut down the instance's disks.")
5779
5780     instance.primary_node = target_node
5781     # distribute new instance config to the other nodes
5782     self.cfg.Update(instance, feedback_fn)
5783
5784     # Only start the instance if it's marked as up
5785     if instance.admin_up:
5786       feedback_fn("* activating the instance's disks on target node")
5787       logging.info("Starting instance %s on node %s",
5788                    instance.name, target_node)
5789
5790       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5791                                            ignore_secondaries=True)
5792       if not disks_ok:
5793         _ShutdownInstanceDisks(self, instance)
5794         raise errors.OpExecError("Can't activate the instance's disks")
5795
5796       feedback_fn("* starting the instance on the target node")
5797       result = self.rpc.call_instance_start(target_node, instance, None, None)
5798       msg = result.fail_msg
5799       if msg:
5800         _ShutdownInstanceDisks(self, instance)
5801         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5802                                  (instance.name, target_node, msg))
5803
5804
5805 class LUInstanceMigrate(LogicalUnit):
5806   """Migrate an instance.
5807
5808   This is migration without shutting down, compared to the failover,
5809   which is done with shutdown.
5810
5811   """
5812   HPATH = "instance-migrate"
5813   HTYPE = constants.HTYPE_INSTANCE
5814   REQ_BGL = False
5815
5816   def ExpandNames(self):
5817     self._ExpandAndLockInstance()
5818
5819     self.needed_locks[locking.LEVEL_NODE] = []
5820     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5821
5822     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5823                                        self.op.cleanup)
5824     self.tasklets = [self._migrater]
5825
5826   def DeclareLocks(self, level):
5827     if level == locking.LEVEL_NODE:
5828       self._LockInstancesNodes()
5829
5830   def BuildHooksEnv(self):
5831     """Build hooks env.
5832
5833     This runs on master, primary and secondary nodes of the instance.
5834
5835     """
5836     instance = self._migrater.instance
5837     source_node = instance.primary_node
5838     target_node = instance.secondary_nodes[0]
5839     env = _BuildInstanceHookEnvByObject(self, instance)
5840     env["MIGRATE_LIVE"] = self._migrater.live
5841     env["MIGRATE_CLEANUP"] = self.op.cleanup
5842     env.update({
5843         "OLD_PRIMARY": source_node,
5844         "OLD_SECONDARY": target_node,
5845         "NEW_PRIMARY": target_node,
5846         "NEW_SECONDARY": source_node,
5847         })
5848     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5849     nl_post = list(nl)
5850     nl_post.append(source_node)
5851     return env, nl, nl_post
5852
5853
5854 class LUInstanceMove(LogicalUnit):
5855   """Move an instance by data-copying.
5856
5857   """
5858   HPATH = "instance-move"
5859   HTYPE = constants.HTYPE_INSTANCE
5860   REQ_BGL = False
5861
5862   def ExpandNames(self):
5863     self._ExpandAndLockInstance()
5864     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5865     self.op.target_node = target_node
5866     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5867     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5868
5869   def DeclareLocks(self, level):
5870     if level == locking.LEVEL_NODE:
5871       self._LockInstancesNodes(primary_only=True)
5872
5873   def BuildHooksEnv(self):
5874     """Build hooks env.
5875
5876     This runs on master, primary and secondary nodes of the instance.
5877
5878     """
5879     env = {
5880       "TARGET_NODE": self.op.target_node,
5881       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5882       }
5883     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5884     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5885                                        self.op.target_node]
5886     return env, nl, nl
5887
5888   def CheckPrereq(self):
5889     """Check prerequisites.
5890
5891     This checks that the instance is in the cluster.
5892
5893     """
5894     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5895     assert self.instance is not None, \
5896       "Cannot retrieve locked instance %s" % self.op.instance_name
5897
5898     node = self.cfg.GetNodeInfo(self.op.target_node)
5899     assert node is not None, \
5900       "Cannot retrieve locked node %s" % self.op.target_node
5901
5902     self.target_node = target_node = node.name
5903
5904     if target_node == instance.primary_node:
5905       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5906                                  (instance.name, target_node),
5907                                  errors.ECODE_STATE)
5908
5909     bep = self.cfg.GetClusterInfo().FillBE(instance)
5910
5911     for idx, dsk in enumerate(instance.disks):
5912       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5913         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5914                                    " cannot copy" % idx, errors.ECODE_STATE)
5915
5916     _CheckNodeOnline(self, target_node)
5917     _CheckNodeNotDrained(self, target_node)
5918     _CheckNodeVmCapable(self, target_node)
5919
5920     if instance.admin_up:
5921       # check memory requirements on the secondary node
5922       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5923                            instance.name, bep[constants.BE_MEMORY],
5924                            instance.hypervisor)
5925     else:
5926       self.LogInfo("Not checking memory on the secondary node as"
5927                    " instance will not be started")
5928
5929     # check bridge existance
5930     _CheckInstanceBridgesExist(self, instance, node=target_node)
5931
5932   def Exec(self, feedback_fn):
5933     """Move an instance.
5934
5935     The move is done by shutting it down on its present node, copying
5936     the data over (slow) and starting it on the new node.
5937
5938     """
5939     instance = self.instance
5940
5941     source_node = instance.primary_node
5942     target_node = self.target_node
5943
5944     self.LogInfo("Shutting down instance %s on source node %s",
5945                  instance.name, source_node)
5946
5947     result = self.rpc.call_instance_shutdown(source_node, instance,
5948                                              self.op.shutdown_timeout)
5949     msg = result.fail_msg
5950     if msg:
5951       if self.op.ignore_consistency:
5952         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5953                              " Proceeding anyway. Please make sure node"
5954                              " %s is down. Error details: %s",
5955                              instance.name, source_node, source_node, msg)
5956       else:
5957         raise errors.OpExecError("Could not shutdown instance %s on"
5958                                  " node %s: %s" %
5959                                  (instance.name, source_node, msg))
5960
5961     # create the target disks
5962     try:
5963       _CreateDisks(self, instance, target_node=target_node)
5964     except errors.OpExecError:
5965       self.LogWarning("Device creation failed, reverting...")
5966       try:
5967         _RemoveDisks(self, instance, target_node=target_node)
5968       finally:
5969         self.cfg.ReleaseDRBDMinors(instance.name)
5970         raise
5971
5972     cluster_name = self.cfg.GetClusterInfo().cluster_name
5973
5974     errs = []
5975     # activate, get path, copy the data over
5976     for idx, disk in enumerate(instance.disks):
5977       self.LogInfo("Copying data for disk %d", idx)
5978       result = self.rpc.call_blockdev_assemble(target_node, disk,
5979                                                instance.name, True, idx)
5980       if result.fail_msg:
5981         self.LogWarning("Can't assemble newly created disk %d: %s",
5982                         idx, result.fail_msg)
5983         errs.append(result.fail_msg)
5984         break
5985       dev_path = result.payload
5986       result = self.rpc.call_blockdev_export(source_node, disk,
5987                                              target_node, dev_path,
5988                                              cluster_name)
5989       if result.fail_msg:
5990         self.LogWarning("Can't copy data over for disk %d: %s",
5991                         idx, result.fail_msg)
5992         errs.append(result.fail_msg)
5993         break
5994
5995     if errs:
5996       self.LogWarning("Some disks failed to copy, aborting")
5997       try:
5998         _RemoveDisks(self, instance, target_node=target_node)
5999       finally:
6000         self.cfg.ReleaseDRBDMinors(instance.name)
6001         raise errors.OpExecError("Errors during disk copy: %s" %
6002                                  (",".join(errs),))
6003
6004     instance.primary_node = target_node
6005     self.cfg.Update(instance, feedback_fn)
6006
6007     self.LogInfo("Removing the disks on the original node")
6008     _RemoveDisks(self, instance, target_node=source_node)
6009
6010     # Only start the instance if it's marked as up
6011     if instance.admin_up:
6012       self.LogInfo("Starting instance %s on node %s",
6013                    instance.name, target_node)
6014
6015       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6016                                            ignore_secondaries=True)
6017       if not disks_ok:
6018         _ShutdownInstanceDisks(self, instance)
6019         raise errors.OpExecError("Can't activate the instance's disks")
6020
6021       result = self.rpc.call_instance_start(target_node, instance, None, None)
6022       msg = result.fail_msg
6023       if msg:
6024         _ShutdownInstanceDisks(self, instance)
6025         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6026                                  (instance.name, target_node, msg))
6027
6028
6029 class LUNodeMigrate(LogicalUnit):
6030   """Migrate all instances from a node.
6031
6032   """
6033   HPATH = "node-migrate"
6034   HTYPE = constants.HTYPE_NODE
6035   REQ_BGL = False
6036
6037   def ExpandNames(self):
6038     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6039
6040     self.needed_locks = {
6041       locking.LEVEL_NODE: [self.op.node_name],
6042       }
6043
6044     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6045
6046     # Create tasklets for migrating instances for all instances on this node
6047     names = []
6048     tasklets = []
6049
6050     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6051       logging.debug("Migrating instance %s", inst.name)
6052       names.append(inst.name)
6053
6054       tasklets.append(TLMigrateInstance(self, inst.name, False))
6055
6056     self.tasklets = tasklets
6057
6058     # Declare instance locks
6059     self.needed_locks[locking.LEVEL_INSTANCE] = names
6060
6061   def DeclareLocks(self, level):
6062     if level == locking.LEVEL_NODE:
6063       self._LockInstancesNodes()
6064
6065   def BuildHooksEnv(self):
6066     """Build hooks env.
6067
6068     This runs on the master, the primary and all the secondaries.
6069
6070     """
6071     env = {
6072       "NODE_NAME": self.op.node_name,
6073       }
6074
6075     nl = [self.cfg.GetMasterNode()]
6076
6077     return (env, nl, nl)
6078
6079
6080 class TLMigrateInstance(Tasklet):
6081   """Tasklet class for instance migration.
6082
6083   @type live: boolean
6084   @ivar live: whether the migration will be done live or non-live;
6085       this variable is initalized only after CheckPrereq has run
6086
6087   """
6088   def __init__(self, lu, instance_name, cleanup):
6089     """Initializes this class.
6090
6091     """
6092     Tasklet.__init__(self, lu)
6093
6094     # Parameters
6095     self.instance_name = instance_name
6096     self.cleanup = cleanup
6097     self.live = False # will be overridden later
6098
6099   def CheckPrereq(self):
6100     """Check prerequisites.
6101
6102     This checks that the instance is in the cluster.
6103
6104     """
6105     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6106     instance = self.cfg.GetInstanceInfo(instance_name)
6107     assert instance is not None
6108
6109     if instance.disk_template != constants.DT_DRBD8:
6110       raise errors.OpPrereqError("Instance's disk layout is not"
6111                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
6112
6113     secondary_nodes = instance.secondary_nodes
6114     if not secondary_nodes:
6115       raise errors.ConfigurationError("No secondary node but using"
6116                                       " drbd8 disk template")
6117
6118     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6119
6120     target_node = secondary_nodes[0]
6121     # check memory requirements on the secondary node
6122     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6123                          instance.name, i_be[constants.BE_MEMORY],
6124                          instance.hypervisor)
6125
6126     # check bridge existance
6127     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6128
6129     if not self.cleanup:
6130       _CheckNodeNotDrained(self.lu, target_node)
6131       result = self.rpc.call_instance_migratable(instance.primary_node,
6132                                                  instance)
6133       result.Raise("Can't migrate, please use failover",
6134                    prereq=True, ecode=errors.ECODE_STATE)
6135
6136     self.instance = instance
6137
6138     if self.lu.op.live is not None and self.lu.op.mode is not None:
6139       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6140                                  " parameters are accepted",
6141                                  errors.ECODE_INVAL)
6142     if self.lu.op.live is not None:
6143       if self.lu.op.live:
6144         self.lu.op.mode = constants.HT_MIGRATION_LIVE
6145       else:
6146         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6147       # reset the 'live' parameter to None so that repeated
6148       # invocations of CheckPrereq do not raise an exception
6149       self.lu.op.live = None
6150     elif self.lu.op.mode is None:
6151       # read the default value from the hypervisor
6152       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6153       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6154
6155     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6156
6157   def _WaitUntilSync(self):
6158     """Poll with custom rpc for disk sync.
6159
6160     This uses our own step-based rpc call.
6161
6162     """
6163     self.feedback_fn("* wait until resync is done")
6164     all_done = False
6165     while not all_done:
6166       all_done = True
6167       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6168                                             self.nodes_ip,
6169                                             self.instance.disks)
6170       min_percent = 100
6171       for node, nres in result.items():
6172         nres.Raise("Cannot resync disks on node %s" % node)
6173         node_done, node_percent = nres.payload
6174         all_done = all_done and node_done
6175         if node_percent is not None:
6176           min_percent = min(min_percent, node_percent)
6177       if not all_done:
6178         if min_percent < 100:
6179           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6180         time.sleep(2)
6181
6182   def _EnsureSecondary(self, node):
6183     """Demote a node to secondary.
6184
6185     """
6186     self.feedback_fn("* switching node %s to secondary mode" % node)
6187
6188     for dev in self.instance.disks:
6189       self.cfg.SetDiskID(dev, node)
6190
6191     result = self.rpc.call_blockdev_close(node, self.instance.name,
6192                                           self.instance.disks)
6193     result.Raise("Cannot change disk to secondary on node %s" % node)
6194
6195   def _GoStandalone(self):
6196     """Disconnect from the network.
6197
6198     """
6199     self.feedback_fn("* changing into standalone mode")
6200     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6201                                                self.instance.disks)
6202     for node, nres in result.items():
6203       nres.Raise("Cannot disconnect disks node %s" % node)
6204
6205   def _GoReconnect(self, multimaster):
6206     """Reconnect to the network.
6207
6208     """
6209     if multimaster:
6210       msg = "dual-master"
6211     else:
6212       msg = "single-master"
6213     self.feedback_fn("* changing disks into %s mode" % msg)
6214     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6215                                            self.instance.disks,
6216                                            self.instance.name, multimaster)
6217     for node, nres in result.items():
6218       nres.Raise("Cannot change disks config on node %s" % node)
6219
6220   def _ExecCleanup(self):
6221     """Try to cleanup after a failed migration.
6222
6223     The cleanup is done by:
6224       - check that the instance is running only on one node
6225         (and update the config if needed)
6226       - change disks on its secondary node to secondary
6227       - wait until disks are fully synchronized
6228       - disconnect from the network
6229       - change disks into single-master mode
6230       - wait again until disks are fully synchronized
6231
6232     """
6233     instance = self.instance
6234     target_node = self.target_node
6235     source_node = self.source_node
6236
6237     # check running on only one node
6238     self.feedback_fn("* checking where the instance actually runs"
6239                      " (if this hangs, the hypervisor might be in"
6240                      " a bad state)")
6241     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6242     for node, result in ins_l.items():
6243       result.Raise("Can't contact node %s" % node)
6244
6245     runningon_source = instance.name in ins_l[source_node].payload
6246     runningon_target = instance.name in ins_l[target_node].payload
6247
6248     if runningon_source and runningon_target:
6249       raise errors.OpExecError("Instance seems to be running on two nodes,"
6250                                " or the hypervisor is confused. You will have"
6251                                " to ensure manually that it runs only on one"
6252                                " and restart this operation.")
6253
6254     if not (runningon_source or runningon_target):
6255       raise errors.OpExecError("Instance does not seem to be running at all."
6256                                " In this case, it's safer to repair by"
6257                                " running 'gnt-instance stop' to ensure disk"
6258                                " shutdown, and then restarting it.")
6259
6260     if runningon_target:
6261       # the migration has actually succeeded, we need to update the config
6262       self.feedback_fn("* instance running on secondary node (%s),"
6263                        " updating config" % target_node)
6264       instance.primary_node = target_node
6265       self.cfg.Update(instance, self.feedback_fn)
6266       demoted_node = source_node
6267     else:
6268       self.feedback_fn("* instance confirmed to be running on its"
6269                        " primary node (%s)" % source_node)
6270       demoted_node = target_node
6271
6272     self._EnsureSecondary(demoted_node)
6273     try:
6274       self._WaitUntilSync()
6275     except errors.OpExecError:
6276       # we ignore here errors, since if the device is standalone, it
6277       # won't be able to sync
6278       pass
6279     self._GoStandalone()
6280     self._GoReconnect(False)
6281     self._WaitUntilSync()
6282
6283     self.feedback_fn("* done")
6284
6285   def _RevertDiskStatus(self):
6286     """Try to revert the disk status after a failed migration.
6287
6288     """
6289     target_node = self.target_node
6290     try:
6291       self._EnsureSecondary(target_node)
6292       self._GoStandalone()
6293       self._GoReconnect(False)
6294       self._WaitUntilSync()
6295     except errors.OpExecError, err:
6296       self.lu.LogWarning("Migration failed and I can't reconnect the"
6297                          " drives: error '%s'\n"
6298                          "Please look and recover the instance status" %
6299                          str(err))
6300
6301   def _AbortMigration(self):
6302     """Call the hypervisor code to abort a started migration.
6303
6304     """
6305     instance = self.instance
6306     target_node = self.target_node
6307     migration_info = self.migration_info
6308
6309     abort_result = self.rpc.call_finalize_migration(target_node,
6310                                                     instance,
6311                                                     migration_info,
6312                                                     False)
6313     abort_msg = abort_result.fail_msg
6314     if abort_msg:
6315       logging.error("Aborting migration failed on target node %s: %s",
6316                     target_node, abort_msg)
6317       # Don't raise an exception here, as we stil have to try to revert the
6318       # disk status, even if this step failed.
6319
6320   def _ExecMigration(self):
6321     """Migrate an instance.
6322
6323     The migrate is done by:
6324       - change the disks into dual-master mode
6325       - wait until disks are fully synchronized again
6326       - migrate the instance
6327       - change disks on the new secondary node (the old primary) to secondary
6328       - wait until disks are fully synchronized
6329       - change disks into single-master mode
6330
6331     """
6332     instance = self.instance
6333     target_node = self.target_node
6334     source_node = self.source_node
6335
6336     self.feedback_fn("* checking disk consistency between source and target")
6337     for dev in instance.disks:
6338       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6339         raise errors.OpExecError("Disk %s is degraded or not fully"
6340                                  " synchronized on target node,"
6341                                  " aborting migrate." % dev.iv_name)
6342
6343     # First get the migration information from the remote node
6344     result = self.rpc.call_migration_info(source_node, instance)
6345     msg = result.fail_msg
6346     if msg:
6347       log_err = ("Failed fetching source migration information from %s: %s" %
6348                  (source_node, msg))
6349       logging.error(log_err)
6350       raise errors.OpExecError(log_err)
6351
6352     self.migration_info = migration_info = result.payload
6353
6354     # Then switch the disks to master/master mode
6355     self._EnsureSecondary(target_node)
6356     self._GoStandalone()
6357     self._GoReconnect(True)
6358     self._WaitUntilSync()
6359
6360     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6361     result = self.rpc.call_accept_instance(target_node,
6362                                            instance,
6363                                            migration_info,
6364                                            self.nodes_ip[target_node])
6365
6366     msg = result.fail_msg
6367     if msg:
6368       logging.error("Instance pre-migration failed, trying to revert"
6369                     " disk status: %s", msg)
6370       self.feedback_fn("Pre-migration failed, aborting")
6371       self._AbortMigration()
6372       self._RevertDiskStatus()
6373       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6374                                (instance.name, msg))
6375
6376     self.feedback_fn("* migrating instance to %s" % target_node)
6377     time.sleep(10)
6378     result = self.rpc.call_instance_migrate(source_node, instance,
6379                                             self.nodes_ip[target_node],
6380                                             self.live)
6381     msg = result.fail_msg
6382     if msg:
6383       logging.error("Instance migration failed, trying to revert"
6384                     " disk status: %s", msg)
6385       self.feedback_fn("Migration failed, aborting")
6386       self._AbortMigration()
6387       self._RevertDiskStatus()
6388       raise errors.OpExecError("Could not migrate instance %s: %s" %
6389                                (instance.name, msg))
6390     time.sleep(10)
6391
6392     instance.primary_node = target_node
6393     # distribute new instance config to the other nodes
6394     self.cfg.Update(instance, self.feedback_fn)
6395
6396     result = self.rpc.call_finalize_migration(target_node,
6397                                               instance,
6398                                               migration_info,
6399                                               True)
6400     msg = result.fail_msg
6401     if msg:
6402       logging.error("Instance migration succeeded, but finalization failed:"
6403                     " %s", msg)
6404       raise errors.OpExecError("Could not finalize instance migration: %s" %
6405                                msg)
6406
6407     self._EnsureSecondary(source_node)
6408     self._WaitUntilSync()
6409     self._GoStandalone()
6410     self._GoReconnect(False)
6411     self._WaitUntilSync()
6412
6413     self.feedback_fn("* done")
6414
6415   def Exec(self, feedback_fn):
6416     """Perform the migration.
6417
6418     """
6419     feedback_fn("Migrating instance %s" % self.instance.name)
6420
6421     self.feedback_fn = feedback_fn
6422
6423     self.source_node = self.instance.primary_node
6424     self.target_node = self.instance.secondary_nodes[0]
6425     self.all_nodes = [self.source_node, self.target_node]
6426     self.nodes_ip = {
6427       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6428       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6429       }
6430
6431     if self.cleanup:
6432       return self._ExecCleanup()
6433     else:
6434       return self._ExecMigration()
6435
6436
6437 def _CreateBlockDev(lu, node, instance, device, force_create,
6438                     info, force_open):
6439   """Create a tree of block devices on a given node.
6440
6441   If this device type has to be created on secondaries, create it and
6442   all its children.
6443
6444   If not, just recurse to children keeping the same 'force' value.
6445
6446   @param lu: the lu on whose behalf we execute
6447   @param node: the node on which to create the device
6448   @type instance: L{objects.Instance}
6449   @param instance: the instance which owns the device
6450   @type device: L{objects.Disk}
6451   @param device: the device to create
6452   @type force_create: boolean
6453   @param force_create: whether to force creation of this device; this
6454       will be change to True whenever we find a device which has
6455       CreateOnSecondary() attribute
6456   @param info: the extra 'metadata' we should attach to the device
6457       (this will be represented as a LVM tag)
6458   @type force_open: boolean
6459   @param force_open: this parameter will be passes to the
6460       L{backend.BlockdevCreate} function where it specifies
6461       whether we run on primary or not, and it affects both
6462       the child assembly and the device own Open() execution
6463
6464   """
6465   if device.CreateOnSecondary():
6466     force_create = True
6467
6468   if device.children:
6469     for child in device.children:
6470       _CreateBlockDev(lu, node, instance, child, force_create,
6471                       info, force_open)
6472
6473   if not force_create:
6474     return
6475
6476   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6477
6478
6479 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6480   """Create a single block device on a given node.
6481
6482   This will not recurse over children of the device, so they must be
6483   created in advance.
6484
6485   @param lu: the lu on whose behalf we execute
6486   @param node: the node on which to create the device
6487   @type instance: L{objects.Instance}
6488   @param instance: the instance which owns the device
6489   @type device: L{objects.Disk}
6490   @param device: the device to create
6491   @param info: the extra 'metadata' we should attach to the device
6492       (this will be represented as a LVM tag)
6493   @type force_open: boolean
6494   @param force_open: this parameter will be passes to the
6495       L{backend.BlockdevCreate} function where it specifies
6496       whether we run on primary or not, and it affects both
6497       the child assembly and the device own Open() execution
6498
6499   """
6500   lu.cfg.SetDiskID(device, node)
6501   result = lu.rpc.call_blockdev_create(node, device, device.size,
6502                                        instance.name, force_open, info)
6503   result.Raise("Can't create block device %s on"
6504                " node %s for instance %s" % (device, node, instance.name))
6505   if device.physical_id is None:
6506     device.physical_id = result.payload
6507
6508
6509 def _GenerateUniqueNames(lu, exts):
6510   """Generate a suitable LV name.
6511
6512   This will generate a logical volume name for the given instance.
6513
6514   """
6515   results = []
6516   for val in exts:
6517     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6518     results.append("%s%s" % (new_id, val))
6519   return results
6520
6521
6522 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6523                          p_minor, s_minor):
6524   """Generate a drbd8 device complete with its children.
6525
6526   """
6527   port = lu.cfg.AllocatePort()
6528   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6529   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6530                           logical_id=(vgname, names[0]))
6531   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6532                           logical_id=(vgname, names[1]))
6533   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6534                           logical_id=(primary, secondary, port,
6535                                       p_minor, s_minor,
6536                                       shared_secret),
6537                           children=[dev_data, dev_meta],
6538                           iv_name=iv_name)
6539   return drbd_dev
6540
6541
6542 def _GenerateDiskTemplate(lu, template_name,
6543                           instance_name, primary_node,
6544                           secondary_nodes, disk_info,
6545                           file_storage_dir, file_driver,
6546                           base_index, feedback_fn):
6547   """Generate the entire disk layout for a given template type.
6548
6549   """
6550   #TODO: compute space requirements
6551
6552   vgname = lu.cfg.GetVGName()
6553   disk_count = len(disk_info)
6554   disks = []
6555   if template_name == constants.DT_DISKLESS:
6556     pass
6557   elif template_name == constants.DT_PLAIN:
6558     if len(secondary_nodes) != 0:
6559       raise errors.ProgrammerError("Wrong template configuration")
6560
6561     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6562                                       for i in range(disk_count)])
6563     for idx, disk in enumerate(disk_info):
6564       disk_index = idx + base_index
6565       vg = disk.get("vg", vgname)
6566       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6567       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6568                               logical_id=(vg, names[idx]),
6569                               iv_name="disk/%d" % disk_index,
6570                               mode=disk["mode"])
6571       disks.append(disk_dev)
6572   elif template_name == constants.DT_DRBD8:
6573     if len(secondary_nodes) != 1:
6574       raise errors.ProgrammerError("Wrong template configuration")
6575     remote_node = secondary_nodes[0]
6576     minors = lu.cfg.AllocateDRBDMinor(
6577       [primary_node, remote_node] * len(disk_info), instance_name)
6578
6579     names = []
6580     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6581                                                for i in range(disk_count)]):
6582       names.append(lv_prefix + "_data")
6583       names.append(lv_prefix + "_meta")
6584     for idx, disk in enumerate(disk_info):
6585       disk_index = idx + base_index
6586       vg = disk.get("vg", vgname)
6587       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6588                                       disk["size"], vg, names[idx*2:idx*2+2],
6589                                       "disk/%d" % disk_index,
6590                                       minors[idx*2], minors[idx*2+1])
6591       disk_dev.mode = disk["mode"]
6592       disks.append(disk_dev)
6593   elif template_name == constants.DT_FILE:
6594     if len(secondary_nodes) != 0:
6595       raise errors.ProgrammerError("Wrong template configuration")
6596
6597     opcodes.RequireFileStorage()
6598
6599     for idx, disk in enumerate(disk_info):
6600       disk_index = idx + base_index
6601       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6602                               iv_name="disk/%d" % disk_index,
6603                               logical_id=(file_driver,
6604                                           "%s/disk%d" % (file_storage_dir,
6605                                                          disk_index)),
6606                               mode=disk["mode"])
6607       disks.append(disk_dev)
6608   else:
6609     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6610   return disks
6611
6612
6613 def _GetInstanceInfoText(instance):
6614   """Compute that text that should be added to the disk's metadata.
6615
6616   """
6617   return "originstname+%s" % instance.name
6618
6619
6620 def _CalcEta(time_taken, written, total_size):
6621   """Calculates the ETA based on size written and total size.
6622
6623   @param time_taken: The time taken so far
6624   @param written: amount written so far
6625   @param total_size: The total size of data to be written
6626   @return: The remaining time in seconds
6627
6628   """
6629   avg_time = time_taken / float(written)
6630   return (total_size - written) * avg_time
6631
6632
6633 def _WipeDisks(lu, instance):
6634   """Wipes instance disks.
6635
6636   @type lu: L{LogicalUnit}
6637   @param lu: the logical unit on whose behalf we execute
6638   @type instance: L{objects.Instance}
6639   @param instance: the instance whose disks we should create
6640   @return: the success of the wipe
6641
6642   """
6643   node = instance.primary_node
6644
6645   for device in instance.disks:
6646     lu.cfg.SetDiskID(device, node)
6647
6648   logging.info("Pause sync of instance %s disks", instance.name)
6649   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6650
6651   for idx, success in enumerate(result.payload):
6652     if not success:
6653       logging.warn("pause-sync of instance %s for disks %d failed",
6654                    instance.name, idx)
6655
6656   try:
6657     for idx, device in enumerate(instance.disks):
6658       lu.LogInfo("* Wiping disk %d", idx)
6659       logging.info("Wiping disk %d for instance %s, node %s",
6660                    idx, instance.name, node)
6661
6662       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6663       # MAX_WIPE_CHUNK at max
6664       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6665                             constants.MIN_WIPE_CHUNK_PERCENT)
6666
6667       offset = 0
6668       size = device.size
6669       last_output = 0
6670       start_time = time.time()
6671
6672       while offset < size:
6673         wipe_size = min(wipe_chunk_size, size - offset)
6674         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6675         result.Raise("Could not wipe disk %d at offset %d for size %d" %
6676                      (idx, offset, wipe_size))
6677         now = time.time()
6678         offset += wipe_size
6679         if now - last_output >= 60:
6680           eta = _CalcEta(now - start_time, offset, size)
6681           lu.LogInfo(" - done: %.1f%% ETA: %s" %
6682                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
6683           last_output = now
6684   finally:
6685     logging.info("Resume sync of instance %s disks", instance.name)
6686
6687     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6688
6689     for idx, success in enumerate(result.payload):
6690       if not success:
6691         lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6692                       " look at the status and troubleshoot the issue.", idx)
6693         logging.warn("resume-sync of instance %s for disks %d failed",
6694                      instance.name, idx)
6695
6696
6697 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6698   """Create all disks for an instance.
6699
6700   This abstracts away some work from AddInstance.
6701
6702   @type lu: L{LogicalUnit}
6703   @param lu: the logical unit on whose behalf we execute
6704   @type instance: L{objects.Instance}
6705   @param instance: the instance whose disks we should create
6706   @type to_skip: list
6707   @param to_skip: list of indices to skip
6708   @type target_node: string
6709   @param target_node: if passed, overrides the target node for creation
6710   @rtype: boolean
6711   @return: the success of the creation
6712
6713   """
6714   info = _GetInstanceInfoText(instance)
6715   if target_node is None:
6716     pnode = instance.primary_node
6717     all_nodes = instance.all_nodes
6718   else:
6719     pnode = target_node
6720     all_nodes = [pnode]
6721
6722   if instance.disk_template == constants.DT_FILE:
6723     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6724     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6725
6726     result.Raise("Failed to create directory '%s' on"
6727                  " node %s" % (file_storage_dir, pnode))
6728
6729   # Note: this needs to be kept in sync with adding of disks in
6730   # LUInstanceSetParams
6731   for idx, device in enumerate(instance.disks):
6732     if to_skip and idx in to_skip:
6733       continue
6734     logging.info("Creating volume %s for instance %s",
6735                  device.iv_name, instance.name)
6736     #HARDCODE
6737     for node in all_nodes:
6738       f_create = node == pnode
6739       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6740
6741
6742 def _RemoveDisks(lu, instance, target_node=None):
6743   """Remove all disks for an instance.
6744
6745   This abstracts away some work from `AddInstance()` and
6746   `RemoveInstance()`. Note that in case some of the devices couldn't
6747   be removed, the removal will continue with the other ones (compare
6748   with `_CreateDisks()`).
6749
6750   @type lu: L{LogicalUnit}
6751   @param lu: the logical unit on whose behalf we execute
6752   @type instance: L{objects.Instance}
6753   @param instance: the instance whose disks we should remove
6754   @type target_node: string
6755   @param target_node: used to override the node on which to remove the disks
6756   @rtype: boolean
6757   @return: the success of the removal
6758
6759   """
6760   logging.info("Removing block devices for instance %s", instance.name)
6761
6762   all_result = True
6763   for device in instance.disks:
6764     if target_node:
6765       edata = [(target_node, device)]
6766     else:
6767       edata = device.ComputeNodeTree(instance.primary_node)
6768     for node, disk in edata:
6769       lu.cfg.SetDiskID(disk, node)
6770       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6771       if msg:
6772         lu.LogWarning("Could not remove block device %s on node %s,"
6773                       " continuing anyway: %s", device.iv_name, node, msg)
6774         all_result = False
6775
6776   if instance.disk_template == constants.DT_FILE:
6777     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6778     if target_node:
6779       tgt = target_node
6780     else:
6781       tgt = instance.primary_node
6782     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6783     if result.fail_msg:
6784       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6785                     file_storage_dir, instance.primary_node, result.fail_msg)
6786       all_result = False
6787
6788   return all_result
6789
6790
6791 def _ComputeDiskSizePerVG(disk_template, disks):
6792   """Compute disk size requirements in the volume group
6793
6794   """
6795   def _compute(disks, payload):
6796     """Universal algorithm
6797
6798     """
6799     vgs = {}
6800     for disk in disks:
6801       vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6802
6803     return vgs
6804
6805   # Required free disk space as a function of disk and swap space
6806   req_size_dict = {
6807     constants.DT_DISKLESS: {},
6808     constants.DT_PLAIN: _compute(disks, 0),
6809     # 128 MB are added for drbd metadata for each disk
6810     constants.DT_DRBD8: _compute(disks, 128),
6811     constants.DT_FILE: {},
6812   }
6813
6814   if disk_template not in req_size_dict:
6815     raise errors.ProgrammerError("Disk template '%s' size requirement"
6816                                  " is unknown" %  disk_template)
6817
6818   return req_size_dict[disk_template]
6819
6820
6821 def _ComputeDiskSize(disk_template, disks):
6822   """Compute disk size requirements in the volume group
6823
6824   """
6825   # Required free disk space as a function of disk and swap space
6826   req_size_dict = {
6827     constants.DT_DISKLESS: None,
6828     constants.DT_PLAIN: sum(d["size"] for d in disks),
6829     # 128 MB are added for drbd metadata for each disk
6830     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6831     constants.DT_FILE: None,
6832   }
6833
6834   if disk_template not in req_size_dict:
6835     raise errors.ProgrammerError("Disk template '%s' size requirement"
6836                                  " is unknown" %  disk_template)
6837
6838   return req_size_dict[disk_template]
6839
6840
6841 def _FilterVmNodes(lu, nodenames):
6842   """Filters out non-vm_capable nodes from a list.
6843
6844   @type lu: L{LogicalUnit}
6845   @param lu: the logical unit for which we check
6846   @type nodenames: list
6847   @param nodenames: the list of nodes on which we should check
6848   @rtype: list
6849   @return: the list of vm-capable nodes
6850
6851   """
6852   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
6853   return [name for name in nodenames if name not in vm_nodes]
6854
6855
6856 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6857   """Hypervisor parameter validation.
6858
6859   This function abstract the hypervisor parameter validation to be
6860   used in both instance create and instance modify.
6861
6862   @type lu: L{LogicalUnit}
6863   @param lu: the logical unit for which we check
6864   @type nodenames: list
6865   @param nodenames: the list of nodes on which we should check
6866   @type hvname: string
6867   @param hvname: the name of the hypervisor we should use
6868   @type hvparams: dict
6869   @param hvparams: the parameters which we need to check
6870   @raise errors.OpPrereqError: if the parameters are not valid
6871
6872   """
6873   nodenames = _FilterVmNodes(lu, nodenames)
6874   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6875                                                   hvname,
6876                                                   hvparams)
6877   for node in nodenames:
6878     info = hvinfo[node]
6879     if info.offline:
6880       continue
6881     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6882
6883
6884 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6885   """OS parameters validation.
6886
6887   @type lu: L{LogicalUnit}
6888   @param lu: the logical unit for which we check
6889   @type required: boolean
6890   @param required: whether the validation should fail if the OS is not
6891       found
6892   @type nodenames: list
6893   @param nodenames: the list of nodes on which we should check
6894   @type osname: string
6895   @param osname: the name of the hypervisor we should use
6896   @type osparams: dict
6897   @param osparams: the parameters which we need to check
6898   @raise errors.OpPrereqError: if the parameters are not valid
6899
6900   """
6901   nodenames = _FilterVmNodes(lu, nodenames)
6902   result = lu.rpc.call_os_validate(required, nodenames, osname,
6903                                    [constants.OS_VALIDATE_PARAMETERS],
6904                                    osparams)
6905   for node, nres in result.items():
6906     # we don't check for offline cases since this should be run only
6907     # against the master node and/or an instance's nodes
6908     nres.Raise("OS Parameters validation failed on node %s" % node)
6909     if not nres.payload:
6910       lu.LogInfo("OS %s not found on node %s, validation skipped",
6911                  osname, node)
6912
6913
6914 class LUInstanceCreate(LogicalUnit):
6915   """Create an instance.
6916
6917   """
6918   HPATH = "instance-add"
6919   HTYPE = constants.HTYPE_INSTANCE
6920   REQ_BGL = False
6921
6922   def CheckArguments(self):
6923     """Check arguments.
6924
6925     """
6926     # do not require name_check to ease forward/backward compatibility
6927     # for tools
6928     if self.op.no_install and self.op.start:
6929       self.LogInfo("No-installation mode selected, disabling startup")
6930       self.op.start = False
6931     # validate/normalize the instance name
6932     self.op.instance_name = \
6933       netutils.Hostname.GetNormalizedName(self.op.instance_name)
6934
6935     if self.op.ip_check and not self.op.name_check:
6936       # TODO: make the ip check more flexible and not depend on the name check
6937       raise errors.OpPrereqError("Cannot do ip check without a name check",
6938                                  errors.ECODE_INVAL)
6939
6940     # check nics' parameter names
6941     for nic in self.op.nics:
6942       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6943
6944     # check disks. parameter names and consistent adopt/no-adopt strategy
6945     has_adopt = has_no_adopt = False
6946     for disk in self.op.disks:
6947       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6948       if "adopt" in disk:
6949         has_adopt = True
6950       else:
6951         has_no_adopt = True
6952     if has_adopt and has_no_adopt:
6953       raise errors.OpPrereqError("Either all disks are adopted or none is",
6954                                  errors.ECODE_INVAL)
6955     if has_adopt:
6956       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6957         raise errors.OpPrereqError("Disk adoption is not supported for the"
6958                                    " '%s' disk template" %
6959                                    self.op.disk_template,
6960                                    errors.ECODE_INVAL)
6961       if self.op.iallocator is not None:
6962         raise errors.OpPrereqError("Disk adoption not allowed with an"
6963                                    " iallocator script", errors.ECODE_INVAL)
6964       if self.op.mode == constants.INSTANCE_IMPORT:
6965         raise errors.OpPrereqError("Disk adoption not allowed for"
6966                                    " instance import", errors.ECODE_INVAL)
6967
6968     self.adopt_disks = has_adopt
6969
6970     # instance name verification
6971     if self.op.name_check:
6972       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6973       self.op.instance_name = self.hostname1.name
6974       # used in CheckPrereq for ip ping check
6975       self.check_ip = self.hostname1.ip
6976     else:
6977       self.check_ip = None
6978
6979     # file storage checks
6980     if (self.op.file_driver and
6981         not self.op.file_driver in constants.FILE_DRIVER):
6982       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6983                                  self.op.file_driver, errors.ECODE_INVAL)
6984
6985     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6986       raise errors.OpPrereqError("File storage directory path not absolute",
6987                                  errors.ECODE_INVAL)
6988
6989     ### Node/iallocator related checks
6990     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6991
6992     if self.op.pnode is not None:
6993       if self.op.disk_template in constants.DTS_NET_MIRROR:
6994         if self.op.snode is None:
6995           raise errors.OpPrereqError("The networked disk templates need"
6996                                      " a mirror node", errors.ECODE_INVAL)
6997       elif self.op.snode:
6998         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6999                         " template")
7000         self.op.snode = None
7001
7002     self._cds = _GetClusterDomainSecret()
7003
7004     if self.op.mode == constants.INSTANCE_IMPORT:
7005       # On import force_variant must be True, because if we forced it at
7006       # initial install, our only chance when importing it back is that it
7007       # works again!
7008       self.op.force_variant = True
7009
7010       if self.op.no_install:
7011         self.LogInfo("No-installation mode has no effect during import")
7012
7013     elif self.op.mode == constants.INSTANCE_CREATE:
7014       if self.op.os_type is None:
7015         raise errors.OpPrereqError("No guest OS specified",
7016                                    errors.ECODE_INVAL)
7017       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7018         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7019                                    " installation" % self.op.os_type,
7020                                    errors.ECODE_STATE)
7021       if self.op.disk_template is None:
7022         raise errors.OpPrereqError("No disk template specified",
7023                                    errors.ECODE_INVAL)
7024
7025     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7026       # Check handshake to ensure both clusters have the same domain secret
7027       src_handshake = self.op.source_handshake
7028       if not src_handshake:
7029         raise errors.OpPrereqError("Missing source handshake",
7030                                    errors.ECODE_INVAL)
7031
7032       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7033                                                            src_handshake)
7034       if errmsg:
7035         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7036                                    errors.ECODE_INVAL)
7037
7038       # Load and check source CA
7039       self.source_x509_ca_pem = self.op.source_x509_ca
7040       if not self.source_x509_ca_pem:
7041         raise errors.OpPrereqError("Missing source X509 CA",
7042                                    errors.ECODE_INVAL)
7043
7044       try:
7045         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7046                                                     self._cds)
7047       except OpenSSL.crypto.Error, err:
7048         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7049                                    (err, ), errors.ECODE_INVAL)
7050
7051       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7052       if errcode is not None:
7053         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7054                                    errors.ECODE_INVAL)
7055
7056       self.source_x509_ca = cert
7057
7058       src_instance_name = self.op.source_instance_name
7059       if not src_instance_name:
7060         raise errors.OpPrereqError("Missing source instance name",
7061                                    errors.ECODE_INVAL)
7062
7063       self.source_instance_name = \
7064           netutils.GetHostname(name=src_instance_name).name
7065
7066     else:
7067       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7068                                  self.op.mode, errors.ECODE_INVAL)
7069
7070   def ExpandNames(self):
7071     """ExpandNames for CreateInstance.
7072
7073     Figure out the right locks for instance creation.
7074
7075     """
7076     self.needed_locks = {}
7077
7078     instance_name = self.op.instance_name
7079     # this is just a preventive check, but someone might still add this
7080     # instance in the meantime, and creation will fail at lock-add time
7081     if instance_name in self.cfg.GetInstanceList():
7082       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7083                                  instance_name, errors.ECODE_EXISTS)
7084
7085     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7086
7087     if self.op.iallocator:
7088       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7089     else:
7090       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7091       nodelist = [self.op.pnode]
7092       if self.op.snode is not None:
7093         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7094         nodelist.append(self.op.snode)
7095       self.needed_locks[locking.LEVEL_NODE] = nodelist
7096
7097     # in case of import lock the source node too
7098     if self.op.mode == constants.INSTANCE_IMPORT:
7099       src_node = self.op.src_node
7100       src_path = self.op.src_path
7101
7102       if src_path is None:
7103         self.op.src_path = src_path = self.op.instance_name
7104
7105       if src_node is None:
7106         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7107         self.op.src_node = None
7108         if os.path.isabs(src_path):
7109           raise errors.OpPrereqError("Importing an instance from an absolute"
7110                                      " path requires a source node option.",
7111                                      errors.ECODE_INVAL)
7112       else:
7113         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7114         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7115           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7116         if not os.path.isabs(src_path):
7117           self.op.src_path = src_path = \
7118             utils.PathJoin(constants.EXPORT_DIR, src_path)
7119
7120   def _RunAllocator(self):
7121     """Run the allocator based on input opcode.
7122
7123     """
7124     nics = [n.ToDict() for n in self.nics]
7125     ial = IAllocator(self.cfg, self.rpc,
7126                      mode=constants.IALLOCATOR_MODE_ALLOC,
7127                      name=self.op.instance_name,
7128                      disk_template=self.op.disk_template,
7129                      tags=[],
7130                      os=self.op.os_type,
7131                      vcpus=self.be_full[constants.BE_VCPUS],
7132                      mem_size=self.be_full[constants.BE_MEMORY],
7133                      disks=self.disks,
7134                      nics=nics,
7135                      hypervisor=self.op.hypervisor,
7136                      )
7137
7138     ial.Run(self.op.iallocator)
7139
7140     if not ial.success:
7141       raise errors.OpPrereqError("Can't compute nodes using"
7142                                  " iallocator '%s': %s" %
7143                                  (self.op.iallocator, ial.info),
7144                                  errors.ECODE_NORES)
7145     if len(ial.result) != ial.required_nodes:
7146       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7147                                  " of nodes (%s), required %s" %
7148                                  (self.op.iallocator, len(ial.result),
7149                                   ial.required_nodes), errors.ECODE_FAULT)
7150     self.op.pnode = ial.result[0]
7151     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7152                  self.op.instance_name, self.op.iallocator,
7153                  utils.CommaJoin(ial.result))
7154     if ial.required_nodes == 2:
7155       self.op.snode = ial.result[1]
7156
7157   def BuildHooksEnv(self):
7158     """Build hooks env.
7159
7160     This runs on master, primary and secondary nodes of the instance.
7161
7162     """
7163     env = {
7164       "ADD_MODE": self.op.mode,
7165       }
7166     if self.op.mode == constants.INSTANCE_IMPORT:
7167       env["SRC_NODE"] = self.op.src_node
7168       env["SRC_PATH"] = self.op.src_path
7169       env["SRC_IMAGES"] = self.src_images
7170
7171     env.update(_BuildInstanceHookEnv(
7172       name=self.op.instance_name,
7173       primary_node=self.op.pnode,
7174       secondary_nodes=self.secondaries,
7175       status=self.op.start,
7176       os_type=self.op.os_type,
7177       memory=self.be_full[constants.BE_MEMORY],
7178       vcpus=self.be_full[constants.BE_VCPUS],
7179       nics=_NICListToTuple(self, self.nics),
7180       disk_template=self.op.disk_template,
7181       disks=[(d["size"], d["mode"]) for d in self.disks],
7182       bep=self.be_full,
7183       hvp=self.hv_full,
7184       hypervisor_name=self.op.hypervisor,
7185     ))
7186
7187     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7188           self.secondaries)
7189     return env, nl, nl
7190
7191   def _ReadExportInfo(self):
7192     """Reads the export information from disk.
7193
7194     It will override the opcode source node and path with the actual
7195     information, if these two were not specified before.
7196
7197     @return: the export information
7198
7199     """
7200     assert self.op.mode == constants.INSTANCE_IMPORT
7201
7202     src_node = self.op.src_node
7203     src_path = self.op.src_path
7204
7205     if src_node is None:
7206       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7207       exp_list = self.rpc.call_export_list(locked_nodes)
7208       found = False
7209       for node in exp_list:
7210         if exp_list[node].fail_msg:
7211           continue
7212         if src_path in exp_list[node].payload:
7213           found = True
7214           self.op.src_node = src_node = node
7215           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7216                                                        src_path)
7217           break
7218       if not found:
7219         raise errors.OpPrereqError("No export found for relative path %s" %
7220                                     src_path, errors.ECODE_INVAL)
7221
7222     _CheckNodeOnline(self, src_node)
7223     result = self.rpc.call_export_info(src_node, src_path)
7224     result.Raise("No export or invalid export found in dir %s" % src_path)
7225
7226     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7227     if not export_info.has_section(constants.INISECT_EXP):
7228       raise errors.ProgrammerError("Corrupted export config",
7229                                    errors.ECODE_ENVIRON)
7230
7231     ei_version = export_info.get(constants.INISECT_EXP, "version")
7232     if (int(ei_version) != constants.EXPORT_VERSION):
7233       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7234                                  (ei_version, constants.EXPORT_VERSION),
7235                                  errors.ECODE_ENVIRON)
7236     return export_info
7237
7238   def _ReadExportParams(self, einfo):
7239     """Use export parameters as defaults.
7240
7241     In case the opcode doesn't specify (as in override) some instance
7242     parameters, then try to use them from the export information, if
7243     that declares them.
7244
7245     """
7246     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7247
7248     if self.op.disk_template is None:
7249       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7250         self.op.disk_template = einfo.get(constants.INISECT_INS,
7251                                           "disk_template")
7252       else:
7253         raise errors.OpPrereqError("No disk template specified and the export"
7254                                    " is missing the disk_template information",
7255                                    errors.ECODE_INVAL)
7256
7257     if not self.op.disks:
7258       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7259         disks = []
7260         # TODO: import the disk iv_name too
7261         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7262           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7263           disks.append({"size": disk_sz})
7264         self.op.disks = disks
7265       else:
7266         raise errors.OpPrereqError("No disk info specified and the export"
7267                                    " is missing the disk information",
7268                                    errors.ECODE_INVAL)
7269
7270     if (not self.op.nics and
7271         einfo.has_option(constants.INISECT_INS, "nic_count")):
7272       nics = []
7273       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7274         ndict = {}
7275         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7276           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7277           ndict[name] = v
7278         nics.append(ndict)
7279       self.op.nics = nics
7280
7281     if (self.op.hypervisor is None and
7282         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7283       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7284     if einfo.has_section(constants.INISECT_HYP):
7285       # use the export parameters but do not override the ones
7286       # specified by the user
7287       for name, value in einfo.items(constants.INISECT_HYP):
7288         if name not in self.op.hvparams:
7289           self.op.hvparams[name] = value
7290
7291     if einfo.has_section(constants.INISECT_BEP):
7292       # use the parameters, without overriding
7293       for name, value in einfo.items(constants.INISECT_BEP):
7294         if name not in self.op.beparams:
7295           self.op.beparams[name] = value
7296     else:
7297       # try to read the parameters old style, from the main section
7298       for name in constants.BES_PARAMETERS:
7299         if (name not in self.op.beparams and
7300             einfo.has_option(constants.INISECT_INS, name)):
7301           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7302
7303     if einfo.has_section(constants.INISECT_OSP):
7304       # use the parameters, without overriding
7305       for name, value in einfo.items(constants.INISECT_OSP):
7306         if name not in self.op.osparams:
7307           self.op.osparams[name] = value
7308
7309   def _RevertToDefaults(self, cluster):
7310     """Revert the instance parameters to the default values.
7311
7312     """
7313     # hvparams
7314     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7315     for name in self.op.hvparams.keys():
7316       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7317         del self.op.hvparams[name]
7318     # beparams
7319     be_defs = cluster.SimpleFillBE({})
7320     for name in self.op.beparams.keys():
7321       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7322         del self.op.beparams[name]
7323     # nic params
7324     nic_defs = cluster.SimpleFillNIC({})
7325     for nic in self.op.nics:
7326       for name in constants.NICS_PARAMETERS:
7327         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7328           del nic[name]
7329     # osparams
7330     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7331     for name in self.op.osparams.keys():
7332       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7333         del self.op.osparams[name]
7334
7335   def CheckPrereq(self):
7336     """Check prerequisites.
7337
7338     """
7339     if self.op.mode == constants.INSTANCE_IMPORT:
7340       export_info = self._ReadExportInfo()
7341       self._ReadExportParams(export_info)
7342
7343     if (not self.cfg.GetVGName() and
7344         self.op.disk_template not in constants.DTS_NOT_LVM):
7345       raise errors.OpPrereqError("Cluster does not support lvm-based"
7346                                  " instances", errors.ECODE_STATE)
7347
7348     if self.op.hypervisor is None:
7349       self.op.hypervisor = self.cfg.GetHypervisorType()
7350
7351     cluster = self.cfg.GetClusterInfo()
7352     enabled_hvs = cluster.enabled_hypervisors
7353     if self.op.hypervisor not in enabled_hvs:
7354       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7355                                  " cluster (%s)" % (self.op.hypervisor,
7356                                   ",".join(enabled_hvs)),
7357                                  errors.ECODE_STATE)
7358
7359     # check hypervisor parameter syntax (locally)
7360     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7361     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7362                                       self.op.hvparams)
7363     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7364     hv_type.CheckParameterSyntax(filled_hvp)
7365     self.hv_full = filled_hvp
7366     # check that we don't specify global parameters on an instance
7367     _CheckGlobalHvParams(self.op.hvparams)
7368
7369     # fill and remember the beparams dict
7370     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7371     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7372
7373     # build os parameters
7374     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7375
7376     # now that hvp/bep are in final format, let's reset to defaults,
7377     # if told to do so
7378     if self.op.identify_defaults:
7379       self._RevertToDefaults(cluster)
7380
7381     # NIC buildup
7382     self.nics = []
7383     for idx, nic in enumerate(self.op.nics):
7384       nic_mode_req = nic.get("mode", None)
7385       nic_mode = nic_mode_req
7386       if nic_mode is None:
7387         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7388
7389       # in routed mode, for the first nic, the default ip is 'auto'
7390       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7391         default_ip_mode = constants.VALUE_AUTO
7392       else:
7393         default_ip_mode = constants.VALUE_NONE
7394
7395       # ip validity checks
7396       ip = nic.get("ip", default_ip_mode)
7397       if ip is None or ip.lower() == constants.VALUE_NONE:
7398         nic_ip = None
7399       elif ip.lower() == constants.VALUE_AUTO:
7400         if not self.op.name_check:
7401           raise errors.OpPrereqError("IP address set to auto but name checks"
7402                                      " have been skipped",
7403                                      errors.ECODE_INVAL)
7404         nic_ip = self.hostname1.ip
7405       else:
7406         if not netutils.IPAddress.IsValid(ip):
7407           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7408                                      errors.ECODE_INVAL)
7409         nic_ip = ip
7410
7411       # TODO: check the ip address for uniqueness
7412       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7413         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7414                                    errors.ECODE_INVAL)
7415
7416       # MAC address verification
7417       mac = nic.get("mac", constants.VALUE_AUTO)
7418       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7419         mac = utils.NormalizeAndValidateMac(mac)
7420
7421         try:
7422           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7423         except errors.ReservationError:
7424           raise errors.OpPrereqError("MAC address %s already in use"
7425                                      " in cluster" % mac,
7426                                      errors.ECODE_NOTUNIQUE)
7427
7428       # bridge verification
7429       bridge = nic.get("bridge", None)
7430       link = nic.get("link", None)
7431       if bridge and link:
7432         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7433                                    " at the same time", errors.ECODE_INVAL)
7434       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7435         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7436                                    errors.ECODE_INVAL)
7437       elif bridge:
7438         link = bridge
7439
7440       nicparams = {}
7441       if nic_mode_req:
7442         nicparams[constants.NIC_MODE] = nic_mode_req
7443       if link:
7444         nicparams[constants.NIC_LINK] = link
7445
7446       check_params = cluster.SimpleFillNIC(nicparams)
7447       objects.NIC.CheckParameterSyntax(check_params)
7448       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7449
7450     # disk checks/pre-build
7451     self.disks = []
7452     for disk in self.op.disks:
7453       mode = disk.get("mode", constants.DISK_RDWR)
7454       if mode not in constants.DISK_ACCESS_SET:
7455         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7456                                    mode, errors.ECODE_INVAL)
7457       size = disk.get("size", None)
7458       if size is None:
7459         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7460       try:
7461         size = int(size)
7462       except (TypeError, ValueError):
7463         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7464                                    errors.ECODE_INVAL)
7465       vg = disk.get("vg", self.cfg.GetVGName())
7466       new_disk = {"size": size, "mode": mode, "vg": vg}
7467       if "adopt" in disk:
7468         new_disk["adopt"] = disk["adopt"]
7469       self.disks.append(new_disk)
7470
7471     if self.op.mode == constants.INSTANCE_IMPORT:
7472
7473       # Check that the new instance doesn't have less disks than the export
7474       instance_disks = len(self.disks)
7475       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7476       if instance_disks < export_disks:
7477         raise errors.OpPrereqError("Not enough disks to import."
7478                                    " (instance: %d, export: %d)" %
7479                                    (instance_disks, export_disks),
7480                                    errors.ECODE_INVAL)
7481
7482       disk_images = []
7483       for idx in range(export_disks):
7484         option = 'disk%d_dump' % idx
7485         if export_info.has_option(constants.INISECT_INS, option):
7486           # FIXME: are the old os-es, disk sizes, etc. useful?
7487           export_name = export_info.get(constants.INISECT_INS, option)
7488           image = utils.PathJoin(self.op.src_path, export_name)
7489           disk_images.append(image)
7490         else:
7491           disk_images.append(False)
7492
7493       self.src_images = disk_images
7494
7495       old_name = export_info.get(constants.INISECT_INS, 'name')
7496       try:
7497         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7498       except (TypeError, ValueError), err:
7499         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7500                                    " an integer: %s" % str(err),
7501                                    errors.ECODE_STATE)
7502       if self.op.instance_name == old_name:
7503         for idx, nic in enumerate(self.nics):
7504           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7505             nic_mac_ini = 'nic%d_mac' % idx
7506             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7507
7508     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7509
7510     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7511     if self.op.ip_check:
7512       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7513         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7514                                    (self.check_ip, self.op.instance_name),
7515                                    errors.ECODE_NOTUNIQUE)
7516
7517     #### mac address generation
7518     # By generating here the mac address both the allocator and the hooks get
7519     # the real final mac address rather than the 'auto' or 'generate' value.
7520     # There is a race condition between the generation and the instance object
7521     # creation, which means that we know the mac is valid now, but we're not
7522     # sure it will be when we actually add the instance. If things go bad
7523     # adding the instance will abort because of a duplicate mac, and the
7524     # creation job will fail.
7525     for nic in self.nics:
7526       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7527         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7528
7529     #### allocator run
7530
7531     if self.op.iallocator is not None:
7532       self._RunAllocator()
7533
7534     #### node related checks
7535
7536     # check primary node
7537     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7538     assert self.pnode is not None, \
7539       "Cannot retrieve locked node %s" % self.op.pnode
7540     if pnode.offline:
7541       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7542                                  pnode.name, errors.ECODE_STATE)
7543     if pnode.drained:
7544       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7545                                  pnode.name, errors.ECODE_STATE)
7546     if not pnode.vm_capable:
7547       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7548                                  " '%s'" % pnode.name, errors.ECODE_STATE)
7549
7550     self.secondaries = []
7551
7552     # mirror node verification
7553     if self.op.disk_template in constants.DTS_NET_MIRROR:
7554       if self.op.snode == pnode.name:
7555         raise errors.OpPrereqError("The secondary node cannot be the"
7556                                    " primary node.", errors.ECODE_INVAL)
7557       _CheckNodeOnline(self, self.op.snode)
7558       _CheckNodeNotDrained(self, self.op.snode)
7559       _CheckNodeVmCapable(self, self.op.snode)
7560       self.secondaries.append(self.op.snode)
7561
7562     nodenames = [pnode.name] + self.secondaries
7563
7564     if not self.adopt_disks:
7565       # Check lv size requirements, if not adopting
7566       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7567       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7568
7569     else: # instead, we must check the adoption data
7570       all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7571       if len(all_lvs) != len(self.disks):
7572         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7573                                    errors.ECODE_INVAL)
7574       for lv_name in all_lvs:
7575         try:
7576           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7577           # to ReserveLV uses the same syntax
7578           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7579         except errors.ReservationError:
7580           raise errors.OpPrereqError("LV named %s used by another instance" %
7581                                      lv_name, errors.ECODE_NOTUNIQUE)
7582
7583       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7584       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7585
7586       node_lvs = self.rpc.call_lv_list([pnode.name],
7587                                        vg_names.payload.keys())[pnode.name]
7588       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7589       node_lvs = node_lvs.payload
7590
7591       delta = all_lvs.difference(node_lvs.keys())
7592       if delta:
7593         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7594                                    utils.CommaJoin(delta),
7595                                    errors.ECODE_INVAL)
7596       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7597       if online_lvs:
7598         raise errors.OpPrereqError("Online logical volumes found, cannot"
7599                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7600                                    errors.ECODE_STATE)
7601       # update the size of disk based on what is found
7602       for dsk in self.disks:
7603         dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7604
7605     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7606
7607     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7608     # check OS parameters (remotely)
7609     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7610
7611     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7612
7613     # memory check on primary node
7614     if self.op.start:
7615       _CheckNodeFreeMemory(self, self.pnode.name,
7616                            "creating instance %s" % self.op.instance_name,
7617                            self.be_full[constants.BE_MEMORY],
7618                            self.op.hypervisor)
7619
7620     self.dry_run_result = list(nodenames)
7621
7622   def Exec(self, feedback_fn):
7623     """Create and add the instance to the cluster.
7624
7625     """
7626     instance = self.op.instance_name
7627     pnode_name = self.pnode.name
7628
7629     ht_kind = self.op.hypervisor
7630     if ht_kind in constants.HTS_REQ_PORT:
7631       network_port = self.cfg.AllocatePort()
7632     else:
7633       network_port = None
7634
7635     if constants.ENABLE_FILE_STORAGE:
7636       # this is needed because os.path.join does not accept None arguments
7637       if self.op.file_storage_dir is None:
7638         string_file_storage_dir = ""
7639       else:
7640         string_file_storage_dir = self.op.file_storage_dir
7641
7642       # build the full file storage dir path
7643       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7644                                         string_file_storage_dir, instance)
7645     else:
7646       file_storage_dir = ""
7647
7648     disks = _GenerateDiskTemplate(self,
7649                                   self.op.disk_template,
7650                                   instance, pnode_name,
7651                                   self.secondaries,
7652                                   self.disks,
7653                                   file_storage_dir,
7654                                   self.op.file_driver,
7655                                   0,
7656                                   feedback_fn)
7657
7658     iobj = objects.Instance(name=instance, os=self.op.os_type,
7659                             primary_node=pnode_name,
7660                             nics=self.nics, disks=disks,
7661                             disk_template=self.op.disk_template,
7662                             admin_up=False,
7663                             network_port=network_port,
7664                             beparams=self.op.beparams,
7665                             hvparams=self.op.hvparams,
7666                             hypervisor=self.op.hypervisor,
7667                             osparams=self.op.osparams,
7668                             )
7669
7670     if self.adopt_disks:
7671       # rename LVs to the newly-generated names; we need to construct
7672       # 'fake' LV disks with the old data, plus the new unique_id
7673       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7674       rename_to = []
7675       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7676         rename_to.append(t_dsk.logical_id)
7677         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7678         self.cfg.SetDiskID(t_dsk, pnode_name)
7679       result = self.rpc.call_blockdev_rename(pnode_name,
7680                                              zip(tmp_disks, rename_to))
7681       result.Raise("Failed to rename adoped LVs")
7682     else:
7683       feedback_fn("* creating instance disks...")
7684       try:
7685         _CreateDisks(self, iobj)
7686       except errors.OpExecError:
7687         self.LogWarning("Device creation failed, reverting...")
7688         try:
7689           _RemoveDisks(self, iobj)
7690         finally:
7691           self.cfg.ReleaseDRBDMinors(instance)
7692           raise
7693
7694       if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7695         feedback_fn("* wiping instance disks...")
7696         try:
7697           _WipeDisks(self, iobj)
7698         except errors.OpExecError:
7699           self.LogWarning("Device wiping failed, reverting...")
7700           try:
7701             _RemoveDisks(self, iobj)
7702           finally:
7703             self.cfg.ReleaseDRBDMinors(instance)
7704             raise
7705
7706     feedback_fn("adding instance %s to cluster config" % instance)
7707
7708     self.cfg.AddInstance(iobj, self.proc.GetECId())
7709
7710     # Declare that we don't want to remove the instance lock anymore, as we've
7711     # added the instance to the config
7712     del self.remove_locks[locking.LEVEL_INSTANCE]
7713     # Unlock all the nodes
7714     if self.op.mode == constants.INSTANCE_IMPORT:
7715       nodes_keep = [self.op.src_node]
7716       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7717                        if node != self.op.src_node]
7718       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7719       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7720     else:
7721       self.context.glm.release(locking.LEVEL_NODE)
7722       del self.acquired_locks[locking.LEVEL_NODE]
7723
7724     if self.op.wait_for_sync:
7725       disk_abort = not _WaitForSync(self, iobj)
7726     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7727       # make sure the disks are not degraded (still sync-ing is ok)
7728       time.sleep(15)
7729       feedback_fn("* checking mirrors status")
7730       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7731     else:
7732       disk_abort = False
7733
7734     if disk_abort:
7735       _RemoveDisks(self, iobj)
7736       self.cfg.RemoveInstance(iobj.name)
7737       # Make sure the instance lock gets removed
7738       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7739       raise errors.OpExecError("There are some degraded disks for"
7740                                " this instance")
7741
7742     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7743       if self.op.mode == constants.INSTANCE_CREATE:
7744         if not self.op.no_install:
7745           feedback_fn("* running the instance OS create scripts...")
7746           # FIXME: pass debug option from opcode to backend
7747           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7748                                                  self.op.debug_level)
7749           result.Raise("Could not add os for instance %s"
7750                        " on node %s" % (instance, pnode_name))
7751
7752       elif self.op.mode == constants.INSTANCE_IMPORT:
7753         feedback_fn("* running the instance OS import scripts...")
7754
7755         transfers = []
7756
7757         for idx, image in enumerate(self.src_images):
7758           if not image:
7759             continue
7760
7761           # FIXME: pass debug option from opcode to backend
7762           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7763                                              constants.IEIO_FILE, (image, ),
7764                                              constants.IEIO_SCRIPT,
7765                                              (iobj.disks[idx], idx),
7766                                              None)
7767           transfers.append(dt)
7768
7769         import_result = \
7770           masterd.instance.TransferInstanceData(self, feedback_fn,
7771                                                 self.op.src_node, pnode_name,
7772                                                 self.pnode.secondary_ip,
7773                                                 iobj, transfers)
7774         if not compat.all(import_result):
7775           self.LogWarning("Some disks for instance %s on node %s were not"
7776                           " imported successfully" % (instance, pnode_name))
7777
7778       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7779         feedback_fn("* preparing remote import...")
7780         # The source cluster will stop the instance before attempting to make a
7781         # connection. In some cases stopping an instance can take a long time,
7782         # hence the shutdown timeout is added to the connection timeout.
7783         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7784                            self.op.source_shutdown_timeout)
7785         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7786
7787         assert iobj.primary_node == self.pnode.name
7788         disk_results = \
7789           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7790                                         self.source_x509_ca,
7791                                         self._cds, timeouts)
7792         if not compat.all(disk_results):
7793           # TODO: Should the instance still be started, even if some disks
7794           # failed to import (valid for local imports, too)?
7795           self.LogWarning("Some disks for instance %s on node %s were not"
7796                           " imported successfully" % (instance, pnode_name))
7797
7798         # Run rename script on newly imported instance
7799         assert iobj.name == instance
7800         feedback_fn("Running rename script for %s" % instance)
7801         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7802                                                    self.source_instance_name,
7803                                                    self.op.debug_level)
7804         if result.fail_msg:
7805           self.LogWarning("Failed to run rename script for %s on node"
7806                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7807
7808       else:
7809         # also checked in the prereq part
7810         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7811                                      % self.op.mode)
7812
7813     if self.op.start:
7814       iobj.admin_up = True
7815       self.cfg.Update(iobj, feedback_fn)
7816       logging.info("Starting instance %s on node %s", instance, pnode_name)
7817       feedback_fn("* starting instance...")
7818       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7819       result.Raise("Could not start instance")
7820
7821     return list(iobj.all_nodes)
7822
7823
7824 class LUInstanceConsole(NoHooksLU):
7825   """Connect to an instance's console.
7826
7827   This is somewhat special in that it returns the command line that
7828   you need to run on the master node in order to connect to the
7829   console.
7830
7831   """
7832   REQ_BGL = False
7833
7834   def ExpandNames(self):
7835     self._ExpandAndLockInstance()
7836
7837   def CheckPrereq(self):
7838     """Check prerequisites.
7839
7840     This checks that the instance is in the cluster.
7841
7842     """
7843     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7844     assert self.instance is not None, \
7845       "Cannot retrieve locked instance %s" % self.op.instance_name
7846     _CheckNodeOnline(self, self.instance.primary_node)
7847
7848   def Exec(self, feedback_fn):
7849     """Connect to the console of an instance
7850
7851     """
7852     instance = self.instance
7853     node = instance.primary_node
7854
7855     node_insts = self.rpc.call_instance_list([node],
7856                                              [instance.hypervisor])[node]
7857     node_insts.Raise("Can't get node information from %s" % node)
7858
7859     if instance.name not in node_insts.payload:
7860       if instance.admin_up:
7861         state = "ERROR_down"
7862       else:
7863         state = "ADMIN_down"
7864       raise errors.OpExecError("Instance %s is not running (state %s)" %
7865                                (instance.name, state))
7866
7867     logging.debug("Connecting to console of %s on %s", instance.name, node)
7868
7869     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
7870
7871
7872 def _GetInstanceConsole(cluster, instance):
7873   """Returns console information for an instance.
7874
7875   @type cluster: L{objects.Cluster}
7876   @type instance: L{objects.Instance}
7877   @rtype: dict
7878
7879   """
7880   hyper = hypervisor.GetHypervisor(instance.hypervisor)
7881   # beparams and hvparams are passed separately, to avoid editing the
7882   # instance and then saving the defaults in the instance itself.
7883   hvparams = cluster.FillHV(instance)
7884   beparams = cluster.FillBE(instance)
7885   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7886
7887   assert console.instance == instance.name
7888   assert console.Validate()
7889
7890   return console.ToDict()
7891
7892
7893 class LUInstanceReplaceDisks(LogicalUnit):
7894   """Replace the disks of an instance.
7895
7896   """
7897   HPATH = "mirrors-replace"
7898   HTYPE = constants.HTYPE_INSTANCE
7899   REQ_BGL = False
7900
7901   def CheckArguments(self):
7902     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7903                                   self.op.iallocator)
7904
7905   def ExpandNames(self):
7906     self._ExpandAndLockInstance()
7907
7908     if self.op.iallocator is not None:
7909       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7910
7911     elif self.op.remote_node is not None:
7912       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7913       self.op.remote_node = remote_node
7914
7915       # Warning: do not remove the locking of the new secondary here
7916       # unless DRBD8.AddChildren is changed to work in parallel;
7917       # currently it doesn't since parallel invocations of
7918       # FindUnusedMinor will conflict
7919       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7920       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7921
7922     else:
7923       self.needed_locks[locking.LEVEL_NODE] = []
7924       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7925
7926     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7927                                    self.op.iallocator, self.op.remote_node,
7928                                    self.op.disks, False, self.op.early_release)
7929
7930     self.tasklets = [self.replacer]
7931
7932   def DeclareLocks(self, level):
7933     # If we're not already locking all nodes in the set we have to declare the
7934     # instance's primary/secondary nodes.
7935     if (level == locking.LEVEL_NODE and
7936         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7937       self._LockInstancesNodes()
7938
7939   def BuildHooksEnv(self):
7940     """Build hooks env.
7941
7942     This runs on the master, the primary and all the secondaries.
7943
7944     """
7945     instance = self.replacer.instance
7946     env = {
7947       "MODE": self.op.mode,
7948       "NEW_SECONDARY": self.op.remote_node,
7949       "OLD_SECONDARY": instance.secondary_nodes[0],
7950       }
7951     env.update(_BuildInstanceHookEnvByObject(self, instance))
7952     nl = [
7953       self.cfg.GetMasterNode(),
7954       instance.primary_node,
7955       ]
7956     if self.op.remote_node is not None:
7957       nl.append(self.op.remote_node)
7958     return env, nl, nl
7959
7960
7961 class TLReplaceDisks(Tasklet):
7962   """Replaces disks for an instance.
7963
7964   Note: Locking is not within the scope of this class.
7965
7966   """
7967   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7968                disks, delay_iallocator, early_release):
7969     """Initializes this class.
7970
7971     """
7972     Tasklet.__init__(self, lu)
7973
7974     # Parameters
7975     self.instance_name = instance_name
7976     self.mode = mode
7977     self.iallocator_name = iallocator_name
7978     self.remote_node = remote_node
7979     self.disks = disks
7980     self.delay_iallocator = delay_iallocator
7981     self.early_release = early_release
7982
7983     # Runtime data
7984     self.instance = None
7985     self.new_node = None
7986     self.target_node = None
7987     self.other_node = None
7988     self.remote_node_info = None
7989     self.node_secondary_ip = None
7990
7991   @staticmethod
7992   def CheckArguments(mode, remote_node, iallocator):
7993     """Helper function for users of this class.
7994
7995     """
7996     # check for valid parameter combination
7997     if mode == constants.REPLACE_DISK_CHG:
7998       if remote_node is None and iallocator is None:
7999         raise errors.OpPrereqError("When changing the secondary either an"
8000                                    " iallocator script must be used or the"
8001                                    " new node given", errors.ECODE_INVAL)
8002
8003       if remote_node is not None and iallocator is not None:
8004         raise errors.OpPrereqError("Give either the iallocator or the new"
8005                                    " secondary, not both", errors.ECODE_INVAL)
8006
8007     elif remote_node is not None or iallocator is not None:
8008       # Not replacing the secondary
8009       raise errors.OpPrereqError("The iallocator and new node options can"
8010                                  " only be used when changing the"
8011                                  " secondary node", errors.ECODE_INVAL)
8012
8013   @staticmethod
8014   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8015     """Compute a new secondary node using an IAllocator.
8016
8017     """
8018     ial = IAllocator(lu.cfg, lu.rpc,
8019                      mode=constants.IALLOCATOR_MODE_RELOC,
8020                      name=instance_name,
8021                      relocate_from=relocate_from)
8022
8023     ial.Run(iallocator_name)
8024
8025     if not ial.success:
8026       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8027                                  " %s" % (iallocator_name, ial.info),
8028                                  errors.ECODE_NORES)
8029
8030     if len(ial.result) != ial.required_nodes:
8031       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8032                                  " of nodes (%s), required %s" %
8033                                  (iallocator_name,
8034                                   len(ial.result), ial.required_nodes),
8035                                  errors.ECODE_FAULT)
8036
8037     remote_node_name = ial.result[0]
8038
8039     lu.LogInfo("Selected new secondary for instance '%s': %s",
8040                instance_name, remote_node_name)
8041
8042     return remote_node_name
8043
8044   def _FindFaultyDisks(self, node_name):
8045     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8046                                     node_name, True)
8047
8048   def CheckPrereq(self):
8049     """Check prerequisites.
8050
8051     This checks that the instance is in the cluster.
8052
8053     """
8054     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8055     assert instance is not None, \
8056       "Cannot retrieve locked instance %s" % self.instance_name
8057
8058     if instance.disk_template != constants.DT_DRBD8:
8059       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8060                                  " instances", errors.ECODE_INVAL)
8061
8062     if len(instance.secondary_nodes) != 1:
8063       raise errors.OpPrereqError("The instance has a strange layout,"
8064                                  " expected one secondary but found %d" %
8065                                  len(instance.secondary_nodes),
8066                                  errors.ECODE_FAULT)
8067
8068     if not self.delay_iallocator:
8069       self._CheckPrereq2()
8070
8071   def _CheckPrereq2(self):
8072     """Check prerequisites, second part.
8073
8074     This function should always be part of CheckPrereq. It was separated and is
8075     now called from Exec because during node evacuation iallocator was only
8076     called with an unmodified cluster model, not taking planned changes into
8077     account.
8078
8079     """
8080     instance = self.instance
8081     secondary_node = instance.secondary_nodes[0]
8082
8083     if self.iallocator_name is None:
8084       remote_node = self.remote_node
8085     else:
8086       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8087                                        instance.name, instance.secondary_nodes)
8088
8089     if remote_node is not None:
8090       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8091       assert self.remote_node_info is not None, \
8092         "Cannot retrieve locked node %s" % remote_node
8093     else:
8094       self.remote_node_info = None
8095
8096     if remote_node == self.instance.primary_node:
8097       raise errors.OpPrereqError("The specified node is the primary node of"
8098                                  " the instance.", errors.ECODE_INVAL)
8099
8100     if remote_node == secondary_node:
8101       raise errors.OpPrereqError("The specified node is already the"
8102                                  " secondary node of the instance.",
8103                                  errors.ECODE_INVAL)
8104
8105     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8106                                     constants.REPLACE_DISK_CHG):
8107       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8108                                  errors.ECODE_INVAL)
8109
8110     if self.mode == constants.REPLACE_DISK_AUTO:
8111       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8112       faulty_secondary = self._FindFaultyDisks(secondary_node)
8113
8114       if faulty_primary and faulty_secondary:
8115         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8116                                    " one node and can not be repaired"
8117                                    " automatically" % self.instance_name,
8118                                    errors.ECODE_STATE)
8119
8120       if faulty_primary:
8121         self.disks = faulty_primary
8122         self.target_node = instance.primary_node
8123         self.other_node = secondary_node
8124         check_nodes = [self.target_node, self.other_node]
8125       elif faulty_secondary:
8126         self.disks = faulty_secondary
8127         self.target_node = secondary_node
8128         self.other_node = instance.primary_node
8129         check_nodes = [self.target_node, self.other_node]
8130       else:
8131         self.disks = []
8132         check_nodes = []
8133
8134     else:
8135       # Non-automatic modes
8136       if self.mode == constants.REPLACE_DISK_PRI:
8137         self.target_node = instance.primary_node
8138         self.other_node = secondary_node
8139         check_nodes = [self.target_node, self.other_node]
8140
8141       elif self.mode == constants.REPLACE_DISK_SEC:
8142         self.target_node = secondary_node
8143         self.other_node = instance.primary_node
8144         check_nodes = [self.target_node, self.other_node]
8145
8146       elif self.mode == constants.REPLACE_DISK_CHG:
8147         self.new_node = remote_node
8148         self.other_node = instance.primary_node
8149         self.target_node = secondary_node
8150         check_nodes = [self.new_node, self.other_node]
8151
8152         _CheckNodeNotDrained(self.lu, remote_node)
8153         _CheckNodeVmCapable(self.lu, remote_node)
8154
8155         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8156         assert old_node_info is not None
8157         if old_node_info.offline and not self.early_release:
8158           # doesn't make sense to delay the release
8159           self.early_release = True
8160           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8161                           " early-release mode", secondary_node)
8162
8163       else:
8164         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8165                                      self.mode)
8166
8167       # If not specified all disks should be replaced
8168       if not self.disks:
8169         self.disks = range(len(self.instance.disks))
8170
8171     for node in check_nodes:
8172       _CheckNodeOnline(self.lu, node)
8173
8174     # Check whether disks are valid
8175     for disk_idx in self.disks:
8176       instance.FindDisk(disk_idx)
8177
8178     # Get secondary node IP addresses
8179     node_2nd_ip = {}
8180
8181     for node_name in [self.target_node, self.other_node, self.new_node]:
8182       if node_name is not None:
8183         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8184
8185     self.node_secondary_ip = node_2nd_ip
8186
8187   def Exec(self, feedback_fn):
8188     """Execute disk replacement.
8189
8190     This dispatches the disk replacement to the appropriate handler.
8191
8192     """
8193     if self.delay_iallocator:
8194       self._CheckPrereq2()
8195
8196     if not self.disks:
8197       feedback_fn("No disks need replacement")
8198       return
8199
8200     feedback_fn("Replacing disk(s) %s for %s" %
8201                 (utils.CommaJoin(self.disks), self.instance.name))
8202
8203     activate_disks = (not self.instance.admin_up)
8204
8205     # Activate the instance disks if we're replacing them on a down instance
8206     if activate_disks:
8207       _StartInstanceDisks(self.lu, self.instance, True)
8208
8209     try:
8210       # Should we replace the secondary node?
8211       if self.new_node is not None:
8212         fn = self._ExecDrbd8Secondary
8213       else:
8214         fn = self._ExecDrbd8DiskOnly
8215
8216       return fn(feedback_fn)
8217
8218     finally:
8219       # Deactivate the instance disks if we're replacing them on a
8220       # down instance
8221       if activate_disks:
8222         _SafeShutdownInstanceDisks(self.lu, self.instance)
8223
8224   def _CheckVolumeGroup(self, nodes):
8225     self.lu.LogInfo("Checking volume groups")
8226
8227     vgname = self.cfg.GetVGName()
8228
8229     # Make sure volume group exists on all involved nodes
8230     results = self.rpc.call_vg_list(nodes)
8231     if not results:
8232       raise errors.OpExecError("Can't list volume groups on the nodes")
8233
8234     for node in nodes:
8235       res = results[node]
8236       res.Raise("Error checking node %s" % node)
8237       if vgname not in res.payload:
8238         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8239                                  (vgname, node))
8240
8241   def _CheckDisksExistence(self, nodes):
8242     # Check disk existence
8243     for idx, dev in enumerate(self.instance.disks):
8244       if idx not in self.disks:
8245         continue
8246
8247       for node in nodes:
8248         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8249         self.cfg.SetDiskID(dev, node)
8250
8251         result = self.rpc.call_blockdev_find(node, dev)
8252
8253         msg = result.fail_msg
8254         if msg or not result.payload:
8255           if not msg:
8256             msg = "disk not found"
8257           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8258                                    (idx, node, msg))
8259
8260   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8261     for idx, dev in enumerate(self.instance.disks):
8262       if idx not in self.disks:
8263         continue
8264
8265       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8266                       (idx, node_name))
8267
8268       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8269                                    ldisk=ldisk):
8270         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8271                                  " replace disks for instance %s" %
8272                                  (node_name, self.instance.name))
8273
8274   def _CreateNewStorage(self, node_name):
8275     vgname = self.cfg.GetVGName()
8276     iv_names = {}
8277
8278     for idx, dev in enumerate(self.instance.disks):
8279       if idx not in self.disks:
8280         continue
8281
8282       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8283
8284       self.cfg.SetDiskID(dev, node_name)
8285
8286       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8287       names = _GenerateUniqueNames(self.lu, lv_names)
8288
8289       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8290                              logical_id=(vgname, names[0]))
8291       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8292                              logical_id=(vgname, names[1]))
8293
8294       new_lvs = [lv_data, lv_meta]
8295       old_lvs = dev.children
8296       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8297
8298       # we pass force_create=True to force the LVM creation
8299       for new_lv in new_lvs:
8300         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8301                         _GetInstanceInfoText(self.instance), False)
8302
8303     return iv_names
8304
8305   def _CheckDevices(self, node_name, iv_names):
8306     for name, (dev, _, _) in iv_names.iteritems():
8307       self.cfg.SetDiskID(dev, node_name)
8308
8309       result = self.rpc.call_blockdev_find(node_name, dev)
8310
8311       msg = result.fail_msg
8312       if msg or not result.payload:
8313         if not msg:
8314           msg = "disk not found"
8315         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8316                                  (name, msg))
8317
8318       if result.payload.is_degraded:
8319         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8320
8321   def _RemoveOldStorage(self, node_name, iv_names):
8322     for name, (_, old_lvs, _) in iv_names.iteritems():
8323       self.lu.LogInfo("Remove logical volumes for %s" % name)
8324
8325       for lv in old_lvs:
8326         self.cfg.SetDiskID(lv, node_name)
8327
8328         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8329         if msg:
8330           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8331                              hint="remove unused LVs manually")
8332
8333   def _ReleaseNodeLock(self, node_name):
8334     """Releases the lock for a given node."""
8335     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8336
8337   def _ExecDrbd8DiskOnly(self, feedback_fn):
8338     """Replace a disk on the primary or secondary for DRBD 8.
8339
8340     The algorithm for replace is quite complicated:
8341
8342       1. for each disk to be replaced:
8343
8344         1. create new LVs on the target node with unique names
8345         1. detach old LVs from the drbd device
8346         1. rename old LVs to name_replaced.<time_t>
8347         1. rename new LVs to old LVs
8348         1. attach the new LVs (with the old names now) to the drbd device
8349
8350       1. wait for sync across all devices
8351
8352       1. for each modified disk:
8353
8354         1. remove old LVs (which have the name name_replaces.<time_t>)
8355
8356     Failures are not very well handled.
8357
8358     """
8359     steps_total = 6
8360
8361     # Step: check device activation
8362     self.lu.LogStep(1, steps_total, "Check device existence")
8363     self._CheckDisksExistence([self.other_node, self.target_node])
8364     self._CheckVolumeGroup([self.target_node, self.other_node])
8365
8366     # Step: check other node consistency
8367     self.lu.LogStep(2, steps_total, "Check peer consistency")
8368     self._CheckDisksConsistency(self.other_node,
8369                                 self.other_node == self.instance.primary_node,
8370                                 False)
8371
8372     # Step: create new storage
8373     self.lu.LogStep(3, steps_total, "Allocate new storage")
8374     iv_names = self._CreateNewStorage(self.target_node)
8375
8376     # Step: for each lv, detach+rename*2+attach
8377     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8378     for dev, old_lvs, new_lvs in iv_names.itervalues():
8379       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8380
8381       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8382                                                      old_lvs)
8383       result.Raise("Can't detach drbd from local storage on node"
8384                    " %s for device %s" % (self.target_node, dev.iv_name))
8385       #dev.children = []
8386       #cfg.Update(instance)
8387
8388       # ok, we created the new LVs, so now we know we have the needed
8389       # storage; as such, we proceed on the target node to rename
8390       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8391       # using the assumption that logical_id == physical_id (which in
8392       # turn is the unique_id on that node)
8393
8394       # FIXME(iustin): use a better name for the replaced LVs
8395       temp_suffix = int(time.time())
8396       ren_fn = lambda d, suff: (d.physical_id[0],
8397                                 d.physical_id[1] + "_replaced-%s" % suff)
8398
8399       # Build the rename list based on what LVs exist on the node
8400       rename_old_to_new = []
8401       for to_ren in old_lvs:
8402         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8403         if not result.fail_msg and result.payload:
8404           # device exists
8405           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8406
8407       self.lu.LogInfo("Renaming the old LVs on the target node")
8408       result = self.rpc.call_blockdev_rename(self.target_node,
8409                                              rename_old_to_new)
8410       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8411
8412       # Now we rename the new LVs to the old LVs
8413       self.lu.LogInfo("Renaming the new LVs on the target node")
8414       rename_new_to_old = [(new, old.physical_id)
8415                            for old, new in zip(old_lvs, new_lvs)]
8416       result = self.rpc.call_blockdev_rename(self.target_node,
8417                                              rename_new_to_old)
8418       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8419
8420       for old, new in zip(old_lvs, new_lvs):
8421         new.logical_id = old.logical_id
8422         self.cfg.SetDiskID(new, self.target_node)
8423
8424       for disk in old_lvs:
8425         disk.logical_id = ren_fn(disk, temp_suffix)
8426         self.cfg.SetDiskID(disk, self.target_node)
8427
8428       # Now that the new lvs have the old name, we can add them to the device
8429       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8430       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8431                                                   new_lvs)
8432       msg = result.fail_msg
8433       if msg:
8434         for new_lv in new_lvs:
8435           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8436                                                new_lv).fail_msg
8437           if msg2:
8438             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8439                                hint=("cleanup manually the unused logical"
8440                                      "volumes"))
8441         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8442
8443       dev.children = new_lvs
8444
8445       self.cfg.Update(self.instance, feedback_fn)
8446
8447     cstep = 5
8448     if self.early_release:
8449       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8450       cstep += 1
8451       self._RemoveOldStorage(self.target_node, iv_names)
8452       # WARNING: we release both node locks here, do not do other RPCs
8453       # than WaitForSync to the primary node
8454       self._ReleaseNodeLock([self.target_node, self.other_node])
8455
8456     # Wait for sync
8457     # This can fail as the old devices are degraded and _WaitForSync
8458     # does a combined result over all disks, so we don't check its return value
8459     self.lu.LogStep(cstep, steps_total, "Sync devices")
8460     cstep += 1
8461     _WaitForSync(self.lu, self.instance)
8462
8463     # Check all devices manually
8464     self._CheckDevices(self.instance.primary_node, iv_names)
8465
8466     # Step: remove old storage
8467     if not self.early_release:
8468       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8469       cstep += 1
8470       self._RemoveOldStorage(self.target_node, iv_names)
8471
8472   def _ExecDrbd8Secondary(self, feedback_fn):
8473     """Replace the secondary node for DRBD 8.
8474
8475     The algorithm for replace is quite complicated:
8476       - for all disks of the instance:
8477         - create new LVs on the new node with same names
8478         - shutdown the drbd device on the old secondary
8479         - disconnect the drbd network on the primary
8480         - create the drbd device on the new secondary
8481         - network attach the drbd on the primary, using an artifice:
8482           the drbd code for Attach() will connect to the network if it
8483           finds a device which is connected to the good local disks but
8484           not network enabled
8485       - wait for sync across all devices
8486       - remove all disks from the old secondary
8487
8488     Failures are not very well handled.
8489
8490     """
8491     steps_total = 6
8492
8493     # Step: check device activation
8494     self.lu.LogStep(1, steps_total, "Check device existence")
8495     self._CheckDisksExistence([self.instance.primary_node])
8496     self._CheckVolumeGroup([self.instance.primary_node])
8497
8498     # Step: check other node consistency
8499     self.lu.LogStep(2, steps_total, "Check peer consistency")
8500     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8501
8502     # Step: create new storage
8503     self.lu.LogStep(3, steps_total, "Allocate new storage")
8504     for idx, dev in enumerate(self.instance.disks):
8505       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8506                       (self.new_node, idx))
8507       # we pass force_create=True to force LVM creation
8508       for new_lv in dev.children:
8509         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8510                         _GetInstanceInfoText(self.instance), False)
8511
8512     # Step 4: dbrd minors and drbd setups changes
8513     # after this, we must manually remove the drbd minors on both the
8514     # error and the success paths
8515     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8516     minors = self.cfg.AllocateDRBDMinor([self.new_node
8517                                          for dev in self.instance.disks],
8518                                         self.instance.name)
8519     logging.debug("Allocated minors %r", minors)
8520
8521     iv_names = {}
8522     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8523       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8524                       (self.new_node, idx))
8525       # create new devices on new_node; note that we create two IDs:
8526       # one without port, so the drbd will be activated without
8527       # networking information on the new node at this stage, and one
8528       # with network, for the latter activation in step 4
8529       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8530       if self.instance.primary_node == o_node1:
8531         p_minor = o_minor1
8532       else:
8533         assert self.instance.primary_node == o_node2, "Three-node instance?"
8534         p_minor = o_minor2
8535
8536       new_alone_id = (self.instance.primary_node, self.new_node, None,
8537                       p_minor, new_minor, o_secret)
8538       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8539                     p_minor, new_minor, o_secret)
8540
8541       iv_names[idx] = (dev, dev.children, new_net_id)
8542       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8543                     new_net_id)
8544       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8545                               logical_id=new_alone_id,
8546                               children=dev.children,
8547                               size=dev.size)
8548       try:
8549         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8550                               _GetInstanceInfoText(self.instance), False)
8551       except errors.GenericError:
8552         self.cfg.ReleaseDRBDMinors(self.instance.name)
8553         raise
8554
8555     # We have new devices, shutdown the drbd on the old secondary
8556     for idx, dev in enumerate(self.instance.disks):
8557       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8558       self.cfg.SetDiskID(dev, self.target_node)
8559       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8560       if msg:
8561         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8562                            "node: %s" % (idx, msg),
8563                            hint=("Please cleanup this device manually as"
8564                                  " soon as possible"))
8565
8566     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8567     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8568                                                self.node_secondary_ip,
8569                                                self.instance.disks)\
8570                                               [self.instance.primary_node]
8571
8572     msg = result.fail_msg
8573     if msg:
8574       # detaches didn't succeed (unlikely)
8575       self.cfg.ReleaseDRBDMinors(self.instance.name)
8576       raise errors.OpExecError("Can't detach the disks from the network on"
8577                                " old node: %s" % (msg,))
8578
8579     # if we managed to detach at least one, we update all the disks of
8580     # the instance to point to the new secondary
8581     self.lu.LogInfo("Updating instance configuration")
8582     for dev, _, new_logical_id in iv_names.itervalues():
8583       dev.logical_id = new_logical_id
8584       self.cfg.SetDiskID(dev, self.instance.primary_node)
8585
8586     self.cfg.Update(self.instance, feedback_fn)
8587
8588     # and now perform the drbd attach
8589     self.lu.LogInfo("Attaching primary drbds to new secondary"
8590                     " (standalone => connected)")
8591     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8592                                             self.new_node],
8593                                            self.node_secondary_ip,
8594                                            self.instance.disks,
8595                                            self.instance.name,
8596                                            False)
8597     for to_node, to_result in result.items():
8598       msg = to_result.fail_msg
8599       if msg:
8600         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8601                            to_node, msg,
8602                            hint=("please do a gnt-instance info to see the"
8603                                  " status of disks"))
8604     cstep = 5
8605     if self.early_release:
8606       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8607       cstep += 1
8608       self._RemoveOldStorage(self.target_node, iv_names)
8609       # WARNING: we release all node locks here, do not do other RPCs
8610       # than WaitForSync to the primary node
8611       self._ReleaseNodeLock([self.instance.primary_node,
8612                              self.target_node,
8613                              self.new_node])
8614
8615     # Wait for sync
8616     # This can fail as the old devices are degraded and _WaitForSync
8617     # does a combined result over all disks, so we don't check its return value
8618     self.lu.LogStep(cstep, steps_total, "Sync devices")
8619     cstep += 1
8620     _WaitForSync(self.lu, self.instance)
8621
8622     # Check all devices manually
8623     self._CheckDevices(self.instance.primary_node, iv_names)
8624
8625     # Step: remove old storage
8626     if not self.early_release:
8627       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8628       self._RemoveOldStorage(self.target_node, iv_names)
8629
8630
8631 class LURepairNodeStorage(NoHooksLU):
8632   """Repairs the volume group on a node.
8633
8634   """
8635   REQ_BGL = False
8636
8637   def CheckArguments(self):
8638     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8639
8640     storage_type = self.op.storage_type
8641
8642     if (constants.SO_FIX_CONSISTENCY not in
8643         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8644       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8645                                  " repaired" % storage_type,
8646                                  errors.ECODE_INVAL)
8647
8648   def ExpandNames(self):
8649     self.needed_locks = {
8650       locking.LEVEL_NODE: [self.op.node_name],
8651       }
8652
8653   def _CheckFaultyDisks(self, instance, node_name):
8654     """Ensure faulty disks abort the opcode or at least warn."""
8655     try:
8656       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8657                                   node_name, True):
8658         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8659                                    " node '%s'" % (instance.name, node_name),
8660                                    errors.ECODE_STATE)
8661     except errors.OpPrereqError, err:
8662       if self.op.ignore_consistency:
8663         self.proc.LogWarning(str(err.args[0]))
8664       else:
8665         raise
8666
8667   def CheckPrereq(self):
8668     """Check prerequisites.
8669
8670     """
8671     # Check whether any instance on this node has faulty disks
8672     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8673       if not inst.admin_up:
8674         continue
8675       check_nodes = set(inst.all_nodes)
8676       check_nodes.discard(self.op.node_name)
8677       for inst_node_name in check_nodes:
8678         self._CheckFaultyDisks(inst, inst_node_name)
8679
8680   def Exec(self, feedback_fn):
8681     feedback_fn("Repairing storage unit '%s' on %s ..." %
8682                 (self.op.name, self.op.node_name))
8683
8684     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8685     result = self.rpc.call_storage_execute(self.op.node_name,
8686                                            self.op.storage_type, st_args,
8687                                            self.op.name,
8688                                            constants.SO_FIX_CONSISTENCY)
8689     result.Raise("Failed to repair storage unit '%s' on %s" %
8690                  (self.op.name, self.op.node_name))
8691
8692
8693 class LUNodeEvacStrategy(NoHooksLU):
8694   """Computes the node evacuation strategy.
8695
8696   """
8697   REQ_BGL = False
8698
8699   def CheckArguments(self):
8700     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8701
8702   def ExpandNames(self):
8703     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8704     self.needed_locks = locks = {}
8705     if self.op.remote_node is None:
8706       locks[locking.LEVEL_NODE] = locking.ALL_SET
8707     else:
8708       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8709       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8710
8711   def Exec(self, feedback_fn):
8712     if self.op.remote_node is not None:
8713       instances = []
8714       for node in self.op.nodes:
8715         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8716       result = []
8717       for i in instances:
8718         if i.primary_node == self.op.remote_node:
8719           raise errors.OpPrereqError("Node %s is the primary node of"
8720                                      " instance %s, cannot use it as"
8721                                      " secondary" %
8722                                      (self.op.remote_node, i.name),
8723                                      errors.ECODE_INVAL)
8724         result.append([i.name, self.op.remote_node])
8725     else:
8726       ial = IAllocator(self.cfg, self.rpc,
8727                        mode=constants.IALLOCATOR_MODE_MEVAC,
8728                        evac_nodes=self.op.nodes)
8729       ial.Run(self.op.iallocator, validate=True)
8730       if not ial.success:
8731         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8732                                  errors.ECODE_NORES)
8733       result = ial.result
8734     return result
8735
8736
8737 class LUInstanceGrowDisk(LogicalUnit):
8738   """Grow a disk of an instance.
8739
8740   """
8741   HPATH = "disk-grow"
8742   HTYPE = constants.HTYPE_INSTANCE
8743   REQ_BGL = False
8744
8745   def ExpandNames(self):
8746     self._ExpandAndLockInstance()
8747     self.needed_locks[locking.LEVEL_NODE] = []
8748     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8749
8750   def DeclareLocks(self, level):
8751     if level == locking.LEVEL_NODE:
8752       self._LockInstancesNodes()
8753
8754   def BuildHooksEnv(self):
8755     """Build hooks env.
8756
8757     This runs on the master, the primary and all the secondaries.
8758
8759     """
8760     env = {
8761       "DISK": self.op.disk,
8762       "AMOUNT": self.op.amount,
8763       }
8764     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8765     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8766     return env, nl, nl
8767
8768   def CheckPrereq(self):
8769     """Check prerequisites.
8770
8771     This checks that the instance is in the cluster.
8772
8773     """
8774     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8775     assert instance is not None, \
8776       "Cannot retrieve locked instance %s" % self.op.instance_name
8777     nodenames = list(instance.all_nodes)
8778     for node in nodenames:
8779       _CheckNodeOnline(self, node)
8780
8781     self.instance = instance
8782
8783     if instance.disk_template not in constants.DTS_GROWABLE:
8784       raise errors.OpPrereqError("Instance's disk layout does not support"
8785                                  " growing.", errors.ECODE_INVAL)
8786
8787     self.disk = instance.FindDisk(self.op.disk)
8788
8789     if instance.disk_template != constants.DT_FILE:
8790       # TODO: check the free disk space for file, when that feature
8791       # will be supported
8792       _CheckNodesFreeDiskPerVG(self, nodenames,
8793                                self.disk.ComputeGrowth(self.op.amount))
8794
8795   def Exec(self, feedback_fn):
8796     """Execute disk grow.
8797
8798     """
8799     instance = self.instance
8800     disk = self.disk
8801
8802     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8803     if not disks_ok:
8804       raise errors.OpExecError("Cannot activate block device to grow")
8805
8806     for node in instance.all_nodes:
8807       self.cfg.SetDiskID(disk, node)
8808       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8809       result.Raise("Grow request failed to node %s" % node)
8810
8811       # TODO: Rewrite code to work properly
8812       # DRBD goes into sync mode for a short amount of time after executing the
8813       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8814       # calling "resize" in sync mode fails. Sleeping for a short amount of
8815       # time is a work-around.
8816       time.sleep(5)
8817
8818     disk.RecordGrow(self.op.amount)
8819     self.cfg.Update(instance, feedback_fn)
8820     if self.op.wait_for_sync:
8821       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8822       if disk_abort:
8823         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8824                              " status.\nPlease check the instance.")
8825       if not instance.admin_up:
8826         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8827     elif not instance.admin_up:
8828       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8829                            " not supposed to be running because no wait for"
8830                            " sync mode was requested.")
8831
8832
8833 class LUInstanceQueryData(NoHooksLU):
8834   """Query runtime instance data.
8835
8836   """
8837   REQ_BGL = False
8838
8839   def ExpandNames(self):
8840     self.needed_locks = {}
8841     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8842
8843     if self.op.instances:
8844       self.wanted_names = []
8845       for name in self.op.instances:
8846         full_name = _ExpandInstanceName(self.cfg, name)
8847         self.wanted_names.append(full_name)
8848       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8849     else:
8850       self.wanted_names = None
8851       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8852
8853     self.needed_locks[locking.LEVEL_NODE] = []
8854     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8855
8856   def DeclareLocks(self, level):
8857     if level == locking.LEVEL_NODE:
8858       self._LockInstancesNodes()
8859
8860   def CheckPrereq(self):
8861     """Check prerequisites.
8862
8863     This only checks the optional instance list against the existing names.
8864
8865     """
8866     if self.wanted_names is None:
8867       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8868
8869     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8870                              in self.wanted_names]
8871
8872   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8873     """Returns the status of a block device
8874
8875     """
8876     if self.op.static or not node:
8877       return None
8878
8879     self.cfg.SetDiskID(dev, node)
8880
8881     result = self.rpc.call_blockdev_find(node, dev)
8882     if result.offline:
8883       return None
8884
8885     result.Raise("Can't compute disk status for %s" % instance_name)
8886
8887     status = result.payload
8888     if status is None:
8889       return None
8890
8891     return (status.dev_path, status.major, status.minor,
8892             status.sync_percent, status.estimated_time,
8893             status.is_degraded, status.ldisk_status)
8894
8895   def _ComputeDiskStatus(self, instance, snode, dev):
8896     """Compute block device status.
8897
8898     """
8899     if dev.dev_type in constants.LDS_DRBD:
8900       # we change the snode then (otherwise we use the one passed in)
8901       if dev.logical_id[0] == instance.primary_node:
8902         snode = dev.logical_id[1]
8903       else:
8904         snode = dev.logical_id[0]
8905
8906     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8907                                               instance.name, dev)
8908     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8909
8910     if dev.children:
8911       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8912                       for child in dev.children]
8913     else:
8914       dev_children = []
8915
8916     data = {
8917       "iv_name": dev.iv_name,
8918       "dev_type": dev.dev_type,
8919       "logical_id": dev.logical_id,
8920       "physical_id": dev.physical_id,
8921       "pstatus": dev_pstatus,
8922       "sstatus": dev_sstatus,
8923       "children": dev_children,
8924       "mode": dev.mode,
8925       "size": dev.size,
8926       }
8927
8928     return data
8929
8930   def Exec(self, feedback_fn):
8931     """Gather and return data"""
8932     result = {}
8933
8934     cluster = self.cfg.GetClusterInfo()
8935
8936     for instance in self.wanted_instances:
8937       if not self.op.static:
8938         remote_info = self.rpc.call_instance_info(instance.primary_node,
8939                                                   instance.name,
8940                                                   instance.hypervisor)
8941         remote_info.Raise("Error checking node %s" % instance.primary_node)
8942         remote_info = remote_info.payload
8943         if remote_info and "state" in remote_info:
8944           remote_state = "up"
8945         else:
8946           remote_state = "down"
8947       else:
8948         remote_state = None
8949       if instance.admin_up:
8950         config_state = "up"
8951       else:
8952         config_state = "down"
8953
8954       disks = [self._ComputeDiskStatus(instance, None, device)
8955                for device in instance.disks]
8956
8957       idict = {
8958         "name": instance.name,
8959         "config_state": config_state,
8960         "run_state": remote_state,
8961         "pnode": instance.primary_node,
8962         "snodes": instance.secondary_nodes,
8963         "os": instance.os,
8964         # this happens to be the same format used for hooks
8965         "nics": _NICListToTuple(self, instance.nics),
8966         "disk_template": instance.disk_template,
8967         "disks": disks,
8968         "hypervisor": instance.hypervisor,
8969         "network_port": instance.network_port,
8970         "hv_instance": instance.hvparams,
8971         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8972         "be_instance": instance.beparams,
8973         "be_actual": cluster.FillBE(instance),
8974         "os_instance": instance.osparams,
8975         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8976         "serial_no": instance.serial_no,
8977         "mtime": instance.mtime,
8978         "ctime": instance.ctime,
8979         "uuid": instance.uuid,
8980         }
8981
8982       result[instance.name] = idict
8983
8984     return result
8985
8986
8987 class LUInstanceSetParams(LogicalUnit):
8988   """Modifies an instances's parameters.
8989
8990   """
8991   HPATH = "instance-modify"
8992   HTYPE = constants.HTYPE_INSTANCE
8993   REQ_BGL = False
8994
8995   def CheckArguments(self):
8996     if not (self.op.nics or self.op.disks or self.op.disk_template or
8997             self.op.hvparams or self.op.beparams or self.op.os_name):
8998       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8999
9000     if self.op.hvparams:
9001       _CheckGlobalHvParams(self.op.hvparams)
9002
9003     # Disk validation
9004     disk_addremove = 0
9005     for disk_op, disk_dict in self.op.disks:
9006       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9007       if disk_op == constants.DDM_REMOVE:
9008         disk_addremove += 1
9009         continue
9010       elif disk_op == constants.DDM_ADD:
9011         disk_addremove += 1
9012       else:
9013         if not isinstance(disk_op, int):
9014           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9015         if not isinstance(disk_dict, dict):
9016           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9017           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9018
9019       if disk_op == constants.DDM_ADD:
9020         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9021         if mode not in constants.DISK_ACCESS_SET:
9022           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9023                                      errors.ECODE_INVAL)
9024         size = disk_dict.get('size', None)
9025         if size is None:
9026           raise errors.OpPrereqError("Required disk parameter size missing",
9027                                      errors.ECODE_INVAL)
9028         try:
9029           size = int(size)
9030         except (TypeError, ValueError), err:
9031           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9032                                      str(err), errors.ECODE_INVAL)
9033         disk_dict['size'] = size
9034       else:
9035         # modification of disk
9036         if 'size' in disk_dict:
9037           raise errors.OpPrereqError("Disk size change not possible, use"
9038                                      " grow-disk", errors.ECODE_INVAL)
9039
9040     if disk_addremove > 1:
9041       raise errors.OpPrereqError("Only one disk add or remove operation"
9042                                  " supported at a time", errors.ECODE_INVAL)
9043
9044     if self.op.disks and self.op.disk_template is not None:
9045       raise errors.OpPrereqError("Disk template conversion and other disk"
9046                                  " changes not supported at the same time",
9047                                  errors.ECODE_INVAL)
9048
9049     if (self.op.disk_template and
9050         self.op.disk_template in constants.DTS_NET_MIRROR and
9051         self.op.remote_node is None):
9052       raise errors.OpPrereqError("Changing the disk template to a mirrored"
9053                                  " one requires specifying a secondary node",
9054                                  errors.ECODE_INVAL)
9055
9056     # NIC validation
9057     nic_addremove = 0
9058     for nic_op, nic_dict in self.op.nics:
9059       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9060       if nic_op == constants.DDM_REMOVE:
9061         nic_addremove += 1
9062         continue
9063       elif nic_op == constants.DDM_ADD:
9064         nic_addremove += 1
9065       else:
9066         if not isinstance(nic_op, int):
9067           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9068         if not isinstance(nic_dict, dict):
9069           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9070           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9071
9072       # nic_dict should be a dict
9073       nic_ip = nic_dict.get('ip', None)
9074       if nic_ip is not None:
9075         if nic_ip.lower() == constants.VALUE_NONE:
9076           nic_dict['ip'] = None
9077         else:
9078           if not netutils.IPAddress.IsValid(nic_ip):
9079             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9080                                        errors.ECODE_INVAL)
9081
9082       nic_bridge = nic_dict.get('bridge', None)
9083       nic_link = nic_dict.get('link', None)
9084       if nic_bridge and nic_link:
9085         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9086                                    " at the same time", errors.ECODE_INVAL)
9087       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9088         nic_dict['bridge'] = None
9089       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9090         nic_dict['link'] = None
9091
9092       if nic_op == constants.DDM_ADD:
9093         nic_mac = nic_dict.get('mac', None)
9094         if nic_mac is None:
9095           nic_dict['mac'] = constants.VALUE_AUTO
9096
9097       if 'mac' in nic_dict:
9098         nic_mac = nic_dict['mac']
9099         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9100           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9101
9102         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9103           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9104                                      " modifying an existing nic",
9105                                      errors.ECODE_INVAL)
9106
9107     if nic_addremove > 1:
9108       raise errors.OpPrereqError("Only one NIC add or remove operation"
9109                                  " supported at a time", errors.ECODE_INVAL)
9110
9111   def ExpandNames(self):
9112     self._ExpandAndLockInstance()
9113     self.needed_locks[locking.LEVEL_NODE] = []
9114     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9115
9116   def DeclareLocks(self, level):
9117     if level == locking.LEVEL_NODE:
9118       self._LockInstancesNodes()
9119       if self.op.disk_template and self.op.remote_node:
9120         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9121         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9122
9123   def BuildHooksEnv(self):
9124     """Build hooks env.
9125
9126     This runs on the master, primary and secondaries.
9127
9128     """
9129     args = dict()
9130     if constants.BE_MEMORY in self.be_new:
9131       args['memory'] = self.be_new[constants.BE_MEMORY]
9132     if constants.BE_VCPUS in self.be_new:
9133       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9134     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9135     # information at all.
9136     if self.op.nics:
9137       args['nics'] = []
9138       nic_override = dict(self.op.nics)
9139       for idx, nic in enumerate(self.instance.nics):
9140         if idx in nic_override:
9141           this_nic_override = nic_override[idx]
9142         else:
9143           this_nic_override = {}
9144         if 'ip' in this_nic_override:
9145           ip = this_nic_override['ip']
9146         else:
9147           ip = nic.ip
9148         if 'mac' in this_nic_override:
9149           mac = this_nic_override['mac']
9150         else:
9151           mac = nic.mac
9152         if idx in self.nic_pnew:
9153           nicparams = self.nic_pnew[idx]
9154         else:
9155           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9156         mode = nicparams[constants.NIC_MODE]
9157         link = nicparams[constants.NIC_LINK]
9158         args['nics'].append((ip, mac, mode, link))
9159       if constants.DDM_ADD in nic_override:
9160         ip = nic_override[constants.DDM_ADD].get('ip', None)
9161         mac = nic_override[constants.DDM_ADD]['mac']
9162         nicparams = self.nic_pnew[constants.DDM_ADD]
9163         mode = nicparams[constants.NIC_MODE]
9164         link = nicparams[constants.NIC_LINK]
9165         args['nics'].append((ip, mac, mode, link))
9166       elif constants.DDM_REMOVE in nic_override:
9167         del args['nics'][-1]
9168
9169     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9170     if self.op.disk_template:
9171       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9172     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9173     return env, nl, nl
9174
9175   def CheckPrereq(self):
9176     """Check prerequisites.
9177
9178     This only checks the instance list against the existing names.
9179
9180     """
9181     # checking the new params on the primary/secondary nodes
9182
9183     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9184     cluster = self.cluster = self.cfg.GetClusterInfo()
9185     assert self.instance is not None, \
9186       "Cannot retrieve locked instance %s" % self.op.instance_name
9187     pnode = instance.primary_node
9188     nodelist = list(instance.all_nodes)
9189
9190     # OS change
9191     if self.op.os_name and not self.op.force:
9192       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9193                       self.op.force_variant)
9194       instance_os = self.op.os_name
9195     else:
9196       instance_os = instance.os
9197
9198     if self.op.disk_template:
9199       if instance.disk_template == self.op.disk_template:
9200         raise errors.OpPrereqError("Instance already has disk template %s" %
9201                                    instance.disk_template, errors.ECODE_INVAL)
9202
9203       if (instance.disk_template,
9204           self.op.disk_template) not in self._DISK_CONVERSIONS:
9205         raise errors.OpPrereqError("Unsupported disk template conversion from"
9206                                    " %s to %s" % (instance.disk_template,
9207                                                   self.op.disk_template),
9208                                    errors.ECODE_INVAL)
9209       _CheckInstanceDown(self, instance, "cannot change disk template")
9210       if self.op.disk_template in constants.DTS_NET_MIRROR:
9211         if self.op.remote_node == pnode:
9212           raise errors.OpPrereqError("Given new secondary node %s is the same"
9213                                      " as the primary node of the instance" %
9214                                      self.op.remote_node, errors.ECODE_STATE)
9215         _CheckNodeOnline(self, self.op.remote_node)
9216         _CheckNodeNotDrained(self, self.op.remote_node)
9217         # FIXME: here we assume that the old instance type is DT_PLAIN
9218         assert instance.disk_template == constants.DT_PLAIN
9219         disks = [{"size": d.size, "vg": d.logical_id[0]}
9220                  for d in instance.disks]
9221         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9222         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9223
9224     # hvparams processing
9225     if self.op.hvparams:
9226       hv_type = instance.hypervisor
9227       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9228       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9229       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9230
9231       # local check
9232       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9233       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9234       self.hv_new = hv_new # the new actual values
9235       self.hv_inst = i_hvdict # the new dict (without defaults)
9236     else:
9237       self.hv_new = self.hv_inst = {}
9238
9239     # beparams processing
9240     if self.op.beparams:
9241       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9242                                    use_none=True)
9243       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9244       be_new = cluster.SimpleFillBE(i_bedict)
9245       self.be_new = be_new # the new actual values
9246       self.be_inst = i_bedict # the new dict (without defaults)
9247     else:
9248       self.be_new = self.be_inst = {}
9249
9250     # osparams processing
9251     if self.op.osparams:
9252       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9253       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9254       self.os_inst = i_osdict # the new dict (without defaults)
9255     else:
9256       self.os_inst = {}
9257
9258     self.warn = []
9259
9260     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9261       mem_check_list = [pnode]
9262       if be_new[constants.BE_AUTO_BALANCE]:
9263         # either we changed auto_balance to yes or it was from before
9264         mem_check_list.extend(instance.secondary_nodes)
9265       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9266                                                   instance.hypervisor)
9267       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9268                                          instance.hypervisor)
9269       pninfo = nodeinfo[pnode]
9270       msg = pninfo.fail_msg
9271       if msg:
9272         # Assume the primary node is unreachable and go ahead
9273         self.warn.append("Can't get info from primary node %s: %s" %
9274                          (pnode,  msg))
9275       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9276         self.warn.append("Node data from primary node %s doesn't contain"
9277                          " free memory information" % pnode)
9278       elif instance_info.fail_msg:
9279         self.warn.append("Can't get instance runtime information: %s" %
9280                         instance_info.fail_msg)
9281       else:
9282         if instance_info.payload:
9283           current_mem = int(instance_info.payload['memory'])
9284         else:
9285           # Assume instance not running
9286           # (there is a slight race condition here, but it's not very probable,
9287           # and we have no other way to check)
9288           current_mem = 0
9289         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9290                     pninfo.payload['memory_free'])
9291         if miss_mem > 0:
9292           raise errors.OpPrereqError("This change will prevent the instance"
9293                                      " from starting, due to %d MB of memory"
9294                                      " missing on its primary node" % miss_mem,
9295                                      errors.ECODE_NORES)
9296
9297       if be_new[constants.BE_AUTO_BALANCE]:
9298         for node, nres in nodeinfo.items():
9299           if node not in instance.secondary_nodes:
9300             continue
9301           msg = nres.fail_msg
9302           if msg:
9303             self.warn.append("Can't get info from secondary node %s: %s" %
9304                              (node, msg))
9305           elif not isinstance(nres.payload.get('memory_free', None), int):
9306             self.warn.append("Secondary node %s didn't return free"
9307                              " memory information" % node)
9308           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9309             self.warn.append("Not enough memory to failover instance to"
9310                              " secondary node %s" % node)
9311
9312     # NIC processing
9313     self.nic_pnew = {}
9314     self.nic_pinst = {}
9315     for nic_op, nic_dict in self.op.nics:
9316       if nic_op == constants.DDM_REMOVE:
9317         if not instance.nics:
9318           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9319                                      errors.ECODE_INVAL)
9320         continue
9321       if nic_op != constants.DDM_ADD:
9322         # an existing nic
9323         if not instance.nics:
9324           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9325                                      " no NICs" % nic_op,
9326                                      errors.ECODE_INVAL)
9327         if nic_op < 0 or nic_op >= len(instance.nics):
9328           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9329                                      " are 0 to %d" %
9330                                      (nic_op, len(instance.nics) - 1),
9331                                      errors.ECODE_INVAL)
9332         old_nic_params = instance.nics[nic_op].nicparams
9333         old_nic_ip = instance.nics[nic_op].ip
9334       else:
9335         old_nic_params = {}
9336         old_nic_ip = None
9337
9338       update_params_dict = dict([(key, nic_dict[key])
9339                                  for key in constants.NICS_PARAMETERS
9340                                  if key in nic_dict])
9341
9342       if 'bridge' in nic_dict:
9343         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9344
9345       new_nic_params = _GetUpdatedParams(old_nic_params,
9346                                          update_params_dict)
9347       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9348       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9349       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9350       self.nic_pinst[nic_op] = new_nic_params
9351       self.nic_pnew[nic_op] = new_filled_nic_params
9352       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9353
9354       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9355         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9356         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9357         if msg:
9358           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9359           if self.op.force:
9360             self.warn.append(msg)
9361           else:
9362             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9363       if new_nic_mode == constants.NIC_MODE_ROUTED:
9364         if 'ip' in nic_dict:
9365           nic_ip = nic_dict['ip']
9366         else:
9367           nic_ip = old_nic_ip
9368         if nic_ip is None:
9369           raise errors.OpPrereqError('Cannot set the nic ip to None'
9370                                      ' on a routed nic', errors.ECODE_INVAL)
9371       if 'mac' in nic_dict:
9372         nic_mac = nic_dict['mac']
9373         if nic_mac is None:
9374           raise errors.OpPrereqError('Cannot set the nic mac to None',
9375                                      errors.ECODE_INVAL)
9376         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9377           # otherwise generate the mac
9378           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9379         else:
9380           # or validate/reserve the current one
9381           try:
9382             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9383           except errors.ReservationError:
9384             raise errors.OpPrereqError("MAC address %s already in use"
9385                                        " in cluster" % nic_mac,
9386                                        errors.ECODE_NOTUNIQUE)
9387
9388     # DISK processing
9389     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9390       raise errors.OpPrereqError("Disk operations not supported for"
9391                                  " diskless instances",
9392                                  errors.ECODE_INVAL)
9393     for disk_op, _ in self.op.disks:
9394       if disk_op == constants.DDM_REMOVE:
9395         if len(instance.disks) == 1:
9396           raise errors.OpPrereqError("Cannot remove the last disk of"
9397                                      " an instance", errors.ECODE_INVAL)
9398         _CheckInstanceDown(self, instance, "cannot remove disks")
9399
9400       if (disk_op == constants.DDM_ADD and
9401           len(instance.disks) >= constants.MAX_DISKS):
9402         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9403                                    " add more" % constants.MAX_DISKS,
9404                                    errors.ECODE_STATE)
9405       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9406         # an existing disk
9407         if disk_op < 0 or disk_op >= len(instance.disks):
9408           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9409                                      " are 0 to %d" %
9410                                      (disk_op, len(instance.disks)),
9411                                      errors.ECODE_INVAL)
9412
9413     return
9414
9415   def _ConvertPlainToDrbd(self, feedback_fn):
9416     """Converts an instance from plain to drbd.
9417
9418     """
9419     feedback_fn("Converting template to drbd")
9420     instance = self.instance
9421     pnode = instance.primary_node
9422     snode = self.op.remote_node
9423
9424     # create a fake disk info for _GenerateDiskTemplate
9425     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9426     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9427                                       instance.name, pnode, [snode],
9428                                       disk_info, None, None, 0, feedback_fn)
9429     info = _GetInstanceInfoText(instance)
9430     feedback_fn("Creating aditional volumes...")
9431     # first, create the missing data and meta devices
9432     for disk in new_disks:
9433       # unfortunately this is... not too nice
9434       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9435                             info, True)
9436       for child in disk.children:
9437         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9438     # at this stage, all new LVs have been created, we can rename the
9439     # old ones
9440     feedback_fn("Renaming original volumes...")
9441     rename_list = [(o, n.children[0].logical_id)
9442                    for (o, n) in zip(instance.disks, new_disks)]
9443     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9444     result.Raise("Failed to rename original LVs")
9445
9446     feedback_fn("Initializing DRBD devices...")
9447     # all child devices are in place, we can now create the DRBD devices
9448     for disk in new_disks:
9449       for node in [pnode, snode]:
9450         f_create = node == pnode
9451         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9452
9453     # at this point, the instance has been modified
9454     instance.disk_template = constants.DT_DRBD8
9455     instance.disks = new_disks
9456     self.cfg.Update(instance, feedback_fn)
9457
9458     # disks are created, waiting for sync
9459     disk_abort = not _WaitForSync(self, instance)
9460     if disk_abort:
9461       raise errors.OpExecError("There are some degraded disks for"
9462                                " this instance, please cleanup manually")
9463
9464   def _ConvertDrbdToPlain(self, feedback_fn):
9465     """Converts an instance from drbd to plain.
9466
9467     """
9468     instance = self.instance
9469     assert len(instance.secondary_nodes) == 1
9470     pnode = instance.primary_node
9471     snode = instance.secondary_nodes[0]
9472     feedback_fn("Converting template to plain")
9473
9474     old_disks = instance.disks
9475     new_disks = [d.children[0] for d in old_disks]
9476
9477     # copy over size and mode
9478     for parent, child in zip(old_disks, new_disks):
9479       child.size = parent.size
9480       child.mode = parent.mode
9481
9482     # update instance structure
9483     instance.disks = new_disks
9484     instance.disk_template = constants.DT_PLAIN
9485     self.cfg.Update(instance, feedback_fn)
9486
9487     feedback_fn("Removing volumes on the secondary node...")
9488     for disk in old_disks:
9489       self.cfg.SetDiskID(disk, snode)
9490       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9491       if msg:
9492         self.LogWarning("Could not remove block device %s on node %s,"
9493                         " continuing anyway: %s", disk.iv_name, snode, msg)
9494
9495     feedback_fn("Removing unneeded volumes on the primary node...")
9496     for idx, disk in enumerate(old_disks):
9497       meta = disk.children[1]
9498       self.cfg.SetDiskID(meta, pnode)
9499       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9500       if msg:
9501         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9502                         " continuing anyway: %s", idx, pnode, msg)
9503
9504   def Exec(self, feedback_fn):
9505     """Modifies an instance.
9506
9507     All parameters take effect only at the next restart of the instance.
9508
9509     """
9510     # Process here the warnings from CheckPrereq, as we don't have a
9511     # feedback_fn there.
9512     for warn in self.warn:
9513       feedback_fn("WARNING: %s" % warn)
9514
9515     result = []
9516     instance = self.instance
9517     # disk changes
9518     for disk_op, disk_dict in self.op.disks:
9519       if disk_op == constants.DDM_REMOVE:
9520         # remove the last disk
9521         device = instance.disks.pop()
9522         device_idx = len(instance.disks)
9523         for node, disk in device.ComputeNodeTree(instance.primary_node):
9524           self.cfg.SetDiskID(disk, node)
9525           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9526           if msg:
9527             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9528                             " continuing anyway", device_idx, node, msg)
9529         result.append(("disk/%d" % device_idx, "remove"))
9530       elif disk_op == constants.DDM_ADD:
9531         # add a new disk
9532         if instance.disk_template == constants.DT_FILE:
9533           file_driver, file_path = instance.disks[0].logical_id
9534           file_path = os.path.dirname(file_path)
9535         else:
9536           file_driver = file_path = None
9537         disk_idx_base = len(instance.disks)
9538         new_disk = _GenerateDiskTemplate(self,
9539                                          instance.disk_template,
9540                                          instance.name, instance.primary_node,
9541                                          instance.secondary_nodes,
9542                                          [disk_dict],
9543                                          file_path,
9544                                          file_driver,
9545                                          disk_idx_base, feedback_fn)[0]
9546         instance.disks.append(new_disk)
9547         info = _GetInstanceInfoText(instance)
9548
9549         logging.info("Creating volume %s for instance %s",
9550                      new_disk.iv_name, instance.name)
9551         # Note: this needs to be kept in sync with _CreateDisks
9552         #HARDCODE
9553         for node in instance.all_nodes:
9554           f_create = node == instance.primary_node
9555           try:
9556             _CreateBlockDev(self, node, instance, new_disk,
9557                             f_create, info, f_create)
9558           except errors.OpExecError, err:
9559             self.LogWarning("Failed to create volume %s (%s) on"
9560                             " node %s: %s",
9561                             new_disk.iv_name, new_disk, node, err)
9562         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9563                        (new_disk.size, new_disk.mode)))
9564       else:
9565         # change a given disk
9566         instance.disks[disk_op].mode = disk_dict['mode']
9567         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9568
9569     if self.op.disk_template:
9570       r_shut = _ShutdownInstanceDisks(self, instance)
9571       if not r_shut:
9572         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9573                                  " proceed with disk template conversion")
9574       mode = (instance.disk_template, self.op.disk_template)
9575       try:
9576         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9577       except:
9578         self.cfg.ReleaseDRBDMinors(instance.name)
9579         raise
9580       result.append(("disk_template", self.op.disk_template))
9581
9582     # NIC changes
9583     for nic_op, nic_dict in self.op.nics:
9584       if nic_op == constants.DDM_REMOVE:
9585         # remove the last nic
9586         del instance.nics[-1]
9587         result.append(("nic.%d" % len(instance.nics), "remove"))
9588       elif nic_op == constants.DDM_ADD:
9589         # mac and bridge should be set, by now
9590         mac = nic_dict['mac']
9591         ip = nic_dict.get('ip', None)
9592         nicparams = self.nic_pinst[constants.DDM_ADD]
9593         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9594         instance.nics.append(new_nic)
9595         result.append(("nic.%d" % (len(instance.nics) - 1),
9596                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9597                        (new_nic.mac, new_nic.ip,
9598                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9599                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9600                        )))
9601       else:
9602         for key in 'mac', 'ip':
9603           if key in nic_dict:
9604             setattr(instance.nics[nic_op], key, nic_dict[key])
9605         if nic_op in self.nic_pinst:
9606           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9607         for key, val in nic_dict.iteritems():
9608           result.append(("nic.%s/%d" % (key, nic_op), val))
9609
9610     # hvparams changes
9611     if self.op.hvparams:
9612       instance.hvparams = self.hv_inst
9613       for key, val in self.op.hvparams.iteritems():
9614         result.append(("hv/%s" % key, val))
9615
9616     # beparams changes
9617     if self.op.beparams:
9618       instance.beparams = self.be_inst
9619       for key, val in self.op.beparams.iteritems():
9620         result.append(("be/%s" % key, val))
9621
9622     # OS change
9623     if self.op.os_name:
9624       instance.os = self.op.os_name
9625
9626     # osparams changes
9627     if self.op.osparams:
9628       instance.osparams = self.os_inst
9629       for key, val in self.op.osparams.iteritems():
9630         result.append(("os/%s" % key, val))
9631
9632     self.cfg.Update(instance, feedback_fn)
9633
9634     return result
9635
9636   _DISK_CONVERSIONS = {
9637     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9638     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9639     }
9640
9641
9642 class LUBackupQuery(NoHooksLU):
9643   """Query the exports list
9644
9645   """
9646   REQ_BGL = False
9647
9648   def ExpandNames(self):
9649     self.needed_locks = {}
9650     self.share_locks[locking.LEVEL_NODE] = 1
9651     if not self.op.nodes:
9652       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9653     else:
9654       self.needed_locks[locking.LEVEL_NODE] = \
9655         _GetWantedNodes(self, self.op.nodes)
9656
9657   def Exec(self, feedback_fn):
9658     """Compute the list of all the exported system images.
9659
9660     @rtype: dict
9661     @return: a dictionary with the structure node->(export-list)
9662         where export-list is a list of the instances exported on
9663         that node.
9664
9665     """
9666     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9667     rpcresult = self.rpc.call_export_list(self.nodes)
9668     result = {}
9669     for node in rpcresult:
9670       if rpcresult[node].fail_msg:
9671         result[node] = False
9672       else:
9673         result[node] = rpcresult[node].payload
9674
9675     return result
9676
9677
9678 class LUBackupPrepare(NoHooksLU):
9679   """Prepares an instance for an export and returns useful information.
9680
9681   """
9682   REQ_BGL = False
9683
9684   def ExpandNames(self):
9685     self._ExpandAndLockInstance()
9686
9687   def CheckPrereq(self):
9688     """Check prerequisites.
9689
9690     """
9691     instance_name = self.op.instance_name
9692
9693     self.instance = self.cfg.GetInstanceInfo(instance_name)
9694     assert self.instance is not None, \
9695           "Cannot retrieve locked instance %s" % self.op.instance_name
9696     _CheckNodeOnline(self, self.instance.primary_node)
9697
9698     self._cds = _GetClusterDomainSecret()
9699
9700   def Exec(self, feedback_fn):
9701     """Prepares an instance for an export.
9702
9703     """
9704     instance = self.instance
9705
9706     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9707       salt = utils.GenerateSecret(8)
9708
9709       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9710       result = self.rpc.call_x509_cert_create(instance.primary_node,
9711                                               constants.RIE_CERT_VALIDITY)
9712       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9713
9714       (name, cert_pem) = result.payload
9715
9716       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9717                                              cert_pem)
9718
9719       return {
9720         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9721         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9722                           salt),
9723         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9724         }
9725
9726     return None
9727
9728
9729 class LUBackupExport(LogicalUnit):
9730   """Export an instance to an image in the cluster.
9731
9732   """
9733   HPATH = "instance-export"
9734   HTYPE = constants.HTYPE_INSTANCE
9735   REQ_BGL = False
9736
9737   def CheckArguments(self):
9738     """Check the arguments.
9739
9740     """
9741     self.x509_key_name = self.op.x509_key_name
9742     self.dest_x509_ca_pem = self.op.destination_x509_ca
9743
9744     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9745       if not self.x509_key_name:
9746         raise errors.OpPrereqError("Missing X509 key name for encryption",
9747                                    errors.ECODE_INVAL)
9748
9749       if not self.dest_x509_ca_pem:
9750         raise errors.OpPrereqError("Missing destination X509 CA",
9751                                    errors.ECODE_INVAL)
9752
9753   def ExpandNames(self):
9754     self._ExpandAndLockInstance()
9755
9756     # Lock all nodes for local exports
9757     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9758       # FIXME: lock only instance primary and destination node
9759       #
9760       # Sad but true, for now we have do lock all nodes, as we don't know where
9761       # the previous export might be, and in this LU we search for it and
9762       # remove it from its current node. In the future we could fix this by:
9763       #  - making a tasklet to search (share-lock all), then create the
9764       #    new one, then one to remove, after
9765       #  - removing the removal operation altogether
9766       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9767
9768   def DeclareLocks(self, level):
9769     """Last minute lock declaration."""
9770     # All nodes are locked anyway, so nothing to do here.
9771
9772   def BuildHooksEnv(self):
9773     """Build hooks env.
9774
9775     This will run on the master, primary node and target node.
9776
9777     """
9778     env = {
9779       "EXPORT_MODE": self.op.mode,
9780       "EXPORT_NODE": self.op.target_node,
9781       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9782       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9783       # TODO: Generic function for boolean env variables
9784       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9785       }
9786
9787     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9788
9789     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9790
9791     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9792       nl.append(self.op.target_node)
9793
9794     return env, nl, nl
9795
9796   def CheckPrereq(self):
9797     """Check prerequisites.
9798
9799     This checks that the instance and node names are valid.
9800
9801     """
9802     instance_name = self.op.instance_name
9803
9804     self.instance = self.cfg.GetInstanceInfo(instance_name)
9805     assert self.instance is not None, \
9806           "Cannot retrieve locked instance %s" % self.op.instance_name
9807     _CheckNodeOnline(self, self.instance.primary_node)
9808
9809     if (self.op.remove_instance and self.instance.admin_up and
9810         not self.op.shutdown):
9811       raise errors.OpPrereqError("Can not remove instance without shutting it"
9812                                  " down before")
9813
9814     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9815       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9816       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9817       assert self.dst_node is not None
9818
9819       _CheckNodeOnline(self, self.dst_node.name)
9820       _CheckNodeNotDrained(self, self.dst_node.name)
9821
9822       self._cds = None
9823       self.dest_disk_info = None
9824       self.dest_x509_ca = None
9825
9826     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9827       self.dst_node = None
9828
9829       if len(self.op.target_node) != len(self.instance.disks):
9830         raise errors.OpPrereqError(("Received destination information for %s"
9831                                     " disks, but instance %s has %s disks") %
9832                                    (len(self.op.target_node), instance_name,
9833                                     len(self.instance.disks)),
9834                                    errors.ECODE_INVAL)
9835
9836       cds = _GetClusterDomainSecret()
9837
9838       # Check X509 key name
9839       try:
9840         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9841       except (TypeError, ValueError), err:
9842         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9843
9844       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9845         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9846                                    errors.ECODE_INVAL)
9847
9848       # Load and verify CA
9849       try:
9850         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9851       except OpenSSL.crypto.Error, err:
9852         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9853                                    (err, ), errors.ECODE_INVAL)
9854
9855       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9856       if errcode is not None:
9857         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9858                                    (msg, ), errors.ECODE_INVAL)
9859
9860       self.dest_x509_ca = cert
9861
9862       # Verify target information
9863       disk_info = []
9864       for idx, disk_data in enumerate(self.op.target_node):
9865         try:
9866           (host, port, magic) = \
9867             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9868         except errors.GenericError, err:
9869           raise errors.OpPrereqError("Target info for disk %s: %s" %
9870                                      (idx, err), errors.ECODE_INVAL)
9871
9872         disk_info.append((host, port, magic))
9873
9874       assert len(disk_info) == len(self.op.target_node)
9875       self.dest_disk_info = disk_info
9876
9877     else:
9878       raise errors.ProgrammerError("Unhandled export mode %r" %
9879                                    self.op.mode)
9880
9881     # instance disk type verification
9882     # TODO: Implement export support for file-based disks
9883     for disk in self.instance.disks:
9884       if disk.dev_type == constants.LD_FILE:
9885         raise errors.OpPrereqError("Export not supported for instances with"
9886                                    " file-based disks", errors.ECODE_INVAL)
9887
9888   def _CleanupExports(self, feedback_fn):
9889     """Removes exports of current instance from all other nodes.
9890
9891     If an instance in a cluster with nodes A..D was exported to node C, its
9892     exports will be removed from the nodes A, B and D.
9893
9894     """
9895     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9896
9897     nodelist = self.cfg.GetNodeList()
9898     nodelist.remove(self.dst_node.name)
9899
9900     # on one-node clusters nodelist will be empty after the removal
9901     # if we proceed the backup would be removed because OpBackupQuery
9902     # substitutes an empty list with the full cluster node list.
9903     iname = self.instance.name
9904     if nodelist:
9905       feedback_fn("Removing old exports for instance %s" % iname)
9906       exportlist = self.rpc.call_export_list(nodelist)
9907       for node in exportlist:
9908         if exportlist[node].fail_msg:
9909           continue
9910         if iname in exportlist[node].payload:
9911           msg = self.rpc.call_export_remove(node, iname).fail_msg
9912           if msg:
9913             self.LogWarning("Could not remove older export for instance %s"
9914                             " on node %s: %s", iname, node, msg)
9915
9916   def Exec(self, feedback_fn):
9917     """Export an instance to an image in the cluster.
9918
9919     """
9920     assert self.op.mode in constants.EXPORT_MODES
9921
9922     instance = self.instance
9923     src_node = instance.primary_node
9924
9925     if self.op.shutdown:
9926       # shutdown the instance, but not the disks
9927       feedback_fn("Shutting down instance %s" % instance.name)
9928       result = self.rpc.call_instance_shutdown(src_node, instance,
9929                                                self.op.shutdown_timeout)
9930       # TODO: Maybe ignore failures if ignore_remove_failures is set
9931       result.Raise("Could not shutdown instance %s on"
9932                    " node %s" % (instance.name, src_node))
9933
9934     # set the disks ID correctly since call_instance_start needs the
9935     # correct drbd minor to create the symlinks
9936     for disk in instance.disks:
9937       self.cfg.SetDiskID(disk, src_node)
9938
9939     activate_disks = (not instance.admin_up)
9940
9941     if activate_disks:
9942       # Activate the instance disks if we'exporting a stopped instance
9943       feedback_fn("Activating disks for %s" % instance.name)
9944       _StartInstanceDisks(self, instance, None)
9945
9946     try:
9947       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9948                                                      instance)
9949
9950       helper.CreateSnapshots()
9951       try:
9952         if (self.op.shutdown and instance.admin_up and
9953             not self.op.remove_instance):
9954           assert not activate_disks
9955           feedback_fn("Starting instance %s" % instance.name)
9956           result = self.rpc.call_instance_start(src_node, instance, None, None)
9957           msg = result.fail_msg
9958           if msg:
9959             feedback_fn("Failed to start instance: %s" % msg)
9960             _ShutdownInstanceDisks(self, instance)
9961             raise errors.OpExecError("Could not start instance: %s" % msg)
9962
9963         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9964           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9965         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9966           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9967           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9968
9969           (key_name, _, _) = self.x509_key_name
9970
9971           dest_ca_pem = \
9972             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9973                                             self.dest_x509_ca)
9974
9975           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9976                                                      key_name, dest_ca_pem,
9977                                                      timeouts)
9978       finally:
9979         helper.Cleanup()
9980
9981       # Check for backwards compatibility
9982       assert len(dresults) == len(instance.disks)
9983       assert compat.all(isinstance(i, bool) for i in dresults), \
9984              "Not all results are boolean: %r" % dresults
9985
9986     finally:
9987       if activate_disks:
9988         feedback_fn("Deactivating disks for %s" % instance.name)
9989         _ShutdownInstanceDisks(self, instance)
9990
9991     if not (compat.all(dresults) and fin_resu):
9992       failures = []
9993       if not fin_resu:
9994         failures.append("export finalization")
9995       if not compat.all(dresults):
9996         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9997                                if not dsk)
9998         failures.append("disk export: disk(s) %s" % fdsk)
9999
10000       raise errors.OpExecError("Export failed, errors in %s" %
10001                                utils.CommaJoin(failures))
10002
10003     # At this point, the export was successful, we can cleanup/finish
10004
10005     # Remove instance if requested
10006     if self.op.remove_instance:
10007       feedback_fn("Removing instance %s" % instance.name)
10008       _RemoveInstance(self, feedback_fn, instance,
10009                       self.op.ignore_remove_failures)
10010
10011     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10012       self._CleanupExports(feedback_fn)
10013
10014     return fin_resu, dresults
10015
10016
10017 class LUBackupRemove(NoHooksLU):
10018   """Remove exports related to the named instance.
10019
10020   """
10021   REQ_BGL = False
10022
10023   def ExpandNames(self):
10024     self.needed_locks = {}
10025     # We need all nodes to be locked in order for RemoveExport to work, but we
10026     # don't need to lock the instance itself, as nothing will happen to it (and
10027     # we can remove exports also for a removed instance)
10028     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10029
10030   def Exec(self, feedback_fn):
10031     """Remove any export.
10032
10033     """
10034     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10035     # If the instance was not found we'll try with the name that was passed in.
10036     # This will only work if it was an FQDN, though.
10037     fqdn_warn = False
10038     if not instance_name:
10039       fqdn_warn = True
10040       instance_name = self.op.instance_name
10041
10042     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10043     exportlist = self.rpc.call_export_list(locked_nodes)
10044     found = False
10045     for node in exportlist:
10046       msg = exportlist[node].fail_msg
10047       if msg:
10048         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10049         continue
10050       if instance_name in exportlist[node].payload:
10051         found = True
10052         result = self.rpc.call_export_remove(node, instance_name)
10053         msg = result.fail_msg
10054         if msg:
10055           logging.error("Could not remove export for instance %s"
10056                         " on node %s: %s", instance_name, node, msg)
10057
10058     if fqdn_warn and not found:
10059       feedback_fn("Export not found. If trying to remove an export belonging"
10060                   " to a deleted instance please use its Fully Qualified"
10061                   " Domain Name.")
10062
10063
10064 class LUGroupAdd(LogicalUnit):
10065   """Logical unit for creating node groups.
10066
10067   """
10068   HPATH = "group-add"
10069   HTYPE = constants.HTYPE_GROUP
10070   REQ_BGL = False
10071
10072   def ExpandNames(self):
10073     # We need the new group's UUID here so that we can create and acquire the
10074     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10075     # that it should not check whether the UUID exists in the configuration.
10076     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10077     self.needed_locks = {}
10078     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10079
10080   def CheckPrereq(self):
10081     """Check prerequisites.
10082
10083     This checks that the given group name is not an existing node group
10084     already.
10085
10086     """
10087     try:
10088       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10089     except errors.OpPrereqError:
10090       pass
10091     else:
10092       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10093                                  " node group (UUID: %s)" %
10094                                  (self.op.group_name, existing_uuid),
10095                                  errors.ECODE_EXISTS)
10096
10097     if self.op.ndparams:
10098       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10099
10100   def BuildHooksEnv(self):
10101     """Build hooks env.
10102
10103     """
10104     env = {
10105       "GROUP_NAME": self.op.group_name,
10106       }
10107     mn = self.cfg.GetMasterNode()
10108     return env, [mn], [mn]
10109
10110   def Exec(self, feedback_fn):
10111     """Add the node group to the cluster.
10112
10113     """
10114     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10115                                   uuid=self.group_uuid,
10116                                   alloc_policy=self.op.alloc_policy,
10117                                   ndparams=self.op.ndparams)
10118
10119     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10120     del self.remove_locks[locking.LEVEL_NODEGROUP]
10121
10122
10123 class LUGroupAssignNodes(NoHooksLU):
10124   """Logical unit for assigning nodes to groups.
10125
10126   """
10127   REQ_BGL = False
10128
10129   def ExpandNames(self):
10130     # These raise errors.OpPrereqError on their own:
10131     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10132     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10133
10134     # We want to lock all the affected nodes and groups. We have readily
10135     # available the list of nodes, and the *destination* group. To gather the
10136     # list of "source" groups, we need to fetch node information.
10137     self.node_data = self.cfg.GetAllNodesInfo()
10138     affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10139     affected_groups.add(self.group_uuid)
10140
10141     self.needed_locks = {
10142       locking.LEVEL_NODEGROUP: list(affected_groups),
10143       locking.LEVEL_NODE: self.op.nodes,
10144       }
10145
10146   def CheckPrereq(self):
10147     """Check prerequisites.
10148
10149     """
10150     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10151     instance_data = self.cfg.GetAllInstancesInfo()
10152
10153     if self.group is None:
10154       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10155                                (self.op.group_name, self.group_uuid))
10156
10157     (new_splits, previous_splits) = \
10158       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10159                                              for node in self.op.nodes],
10160                                             self.node_data, instance_data)
10161
10162     if new_splits:
10163       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10164
10165       if not self.op.force:
10166         raise errors.OpExecError("The following instances get split by this"
10167                                  " change and --force was not given: %s" %
10168                                  fmt_new_splits)
10169       else:
10170         self.LogWarning("This operation will split the following instances: %s",
10171                         fmt_new_splits)
10172
10173         if previous_splits:
10174           self.LogWarning("In addition, these already-split instances continue"
10175                           " to be spit across groups: %s",
10176                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10177
10178   def Exec(self, feedback_fn):
10179     """Assign nodes to a new group.
10180
10181     """
10182     for node in self.op.nodes:
10183       self.node_data[node].group = self.group_uuid
10184
10185     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10186
10187   @staticmethod
10188   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10189     """Check for split instances after a node assignment.
10190
10191     This method considers a series of node assignments as an atomic operation,
10192     and returns information about split instances after applying the set of
10193     changes.
10194
10195     In particular, it returns information about newly split instances, and
10196     instances that were already split, and remain so after the change.
10197
10198     Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10199     considered.
10200
10201     @type changes: list of (node_name, new_group_uuid) pairs.
10202     @param changes: list of node assignments to consider.
10203     @param node_data: a dict with data for all nodes
10204     @param instance_data: a dict with all instances to consider
10205     @rtype: a two-tuple
10206     @return: a list of instances that were previously okay and result split as a
10207       consequence of this change, and a list of instances that were previously
10208       split and this change does not fix.
10209
10210     """
10211     changed_nodes = dict((node, group) for node, group in changes
10212                          if node_data[node].group != group)
10213
10214     all_split_instances = set()
10215     previously_split_instances = set()
10216
10217     def InstanceNodes(instance):
10218       return [instance.primary_node] + list(instance.secondary_nodes)
10219
10220     for inst in instance_data.values():
10221       if inst.disk_template not in constants.DTS_NET_MIRROR:
10222         continue
10223
10224       instance_nodes = InstanceNodes(inst)
10225
10226       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10227         previously_split_instances.add(inst.name)
10228
10229       if len(set(changed_nodes.get(node, node_data[node].group)
10230                  for node in instance_nodes)) > 1:
10231         all_split_instances.add(inst.name)
10232
10233     return (list(all_split_instances - previously_split_instances),
10234             list(previously_split_instances & all_split_instances))
10235
10236
10237 class _GroupQuery(_QueryBase):
10238
10239   FIELDS = query.GROUP_FIELDS
10240
10241   def ExpandNames(self, lu):
10242     lu.needed_locks = {}
10243
10244     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10245     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10246
10247     if not self.names:
10248       self.wanted = [name_to_uuid[name]
10249                      for name in utils.NiceSort(name_to_uuid.keys())]
10250     else:
10251       # Accept names to be either names or UUIDs.
10252       missing = []
10253       self.wanted = []
10254       all_uuid = frozenset(self._all_groups.keys())
10255
10256       for name in self.names:
10257         if name in all_uuid:
10258           self.wanted.append(name)
10259         elif name in name_to_uuid:
10260           self.wanted.append(name_to_uuid[name])
10261         else:
10262           missing.append(name)
10263
10264       if missing:
10265         raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10266                                    errors.ECODE_NOENT)
10267
10268   def DeclareLocks(self, lu, level):
10269     pass
10270
10271   def _GetQueryData(self, lu):
10272     """Computes the list of node groups and their attributes.
10273
10274     """
10275     do_nodes = query.GQ_NODE in self.requested_data
10276     do_instances = query.GQ_INST in self.requested_data
10277
10278     group_to_nodes = None
10279     group_to_instances = None
10280
10281     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10282     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10283     # latter GetAllInstancesInfo() is not enough, for we have to go through
10284     # instance->node. Hence, we will need to process nodes even if we only need
10285     # instance information.
10286     if do_nodes or do_instances:
10287       all_nodes = lu.cfg.GetAllNodesInfo()
10288       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10289       node_to_group = {}
10290
10291       for node in all_nodes.values():
10292         if node.group in group_to_nodes:
10293           group_to_nodes[node.group].append(node.name)
10294           node_to_group[node.name] = node.group
10295
10296       if do_instances:
10297         all_instances = lu.cfg.GetAllInstancesInfo()
10298         group_to_instances = dict((uuid, []) for uuid in self.wanted)
10299
10300         for instance in all_instances.values():
10301           node = instance.primary_node
10302           if node in node_to_group:
10303             group_to_instances[node_to_group[node]].append(instance.name)
10304
10305         if not do_nodes:
10306           # Do not pass on node information if it was not requested.
10307           group_to_nodes = None
10308
10309     return query.GroupQueryData([self._all_groups[uuid]
10310                                  for uuid in self.wanted],
10311                                 group_to_nodes, group_to_instances)
10312
10313
10314 class LUGroupQuery(NoHooksLU):
10315   """Logical unit for querying node groups.
10316
10317   """
10318   REQ_BGL = False
10319
10320   def CheckArguments(self):
10321     self.gq = _GroupQuery(self.op.names, self.op.output_fields, False)
10322
10323   def ExpandNames(self):
10324     self.gq.ExpandNames(self)
10325
10326   def Exec(self, feedback_fn):
10327     return self.gq.OldStyleQuery(self)
10328
10329
10330 class LUGroupSetParams(LogicalUnit):
10331   """Modifies the parameters of a node group.
10332
10333   """
10334   HPATH = "group-modify"
10335   HTYPE = constants.HTYPE_GROUP
10336   REQ_BGL = False
10337
10338   def CheckArguments(self):
10339     all_changes = [
10340       self.op.ndparams,
10341       self.op.alloc_policy,
10342       ]
10343
10344     if all_changes.count(None) == len(all_changes):
10345       raise errors.OpPrereqError("Please pass at least one modification",
10346                                  errors.ECODE_INVAL)
10347
10348   def ExpandNames(self):
10349     # This raises errors.OpPrereqError on its own:
10350     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10351
10352     self.needed_locks = {
10353       locking.LEVEL_NODEGROUP: [self.group_uuid],
10354       }
10355
10356   def CheckPrereq(self):
10357     """Check prerequisites.
10358
10359     """
10360     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10361
10362     if self.group is None:
10363       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10364                                (self.op.group_name, self.group_uuid))
10365
10366     if self.op.ndparams:
10367       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10368       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10369       self.new_ndparams = new_ndparams
10370
10371   def BuildHooksEnv(self):
10372     """Build hooks env.
10373
10374     """
10375     env = {
10376       "GROUP_NAME": self.op.group_name,
10377       "NEW_ALLOC_POLICY": self.op.alloc_policy,
10378       }
10379     mn = self.cfg.GetMasterNode()
10380     return env, [mn], [mn]
10381
10382   def Exec(self, feedback_fn):
10383     """Modifies the node group.
10384
10385     """
10386     result = []
10387
10388     if self.op.ndparams:
10389       self.group.ndparams = self.new_ndparams
10390       result.append(("ndparams", str(self.group.ndparams)))
10391
10392     if self.op.alloc_policy:
10393       self.group.alloc_policy = self.op.alloc_policy
10394
10395     self.cfg.Update(self.group, feedback_fn)
10396     return result
10397
10398
10399
10400 class LUGroupRemove(LogicalUnit):
10401   HPATH = "group-remove"
10402   HTYPE = constants.HTYPE_GROUP
10403   REQ_BGL = False
10404
10405   def ExpandNames(self):
10406     # This will raises errors.OpPrereqError on its own:
10407     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10408     self.needed_locks = {
10409       locking.LEVEL_NODEGROUP: [self.group_uuid],
10410       }
10411
10412   def CheckPrereq(self):
10413     """Check prerequisites.
10414
10415     This checks that the given group name exists as a node group, that is
10416     empty (i.e., contains no nodes), and that is not the last group of the
10417     cluster.
10418
10419     """
10420     # Verify that the group is empty.
10421     group_nodes = [node.name
10422                    for node in self.cfg.GetAllNodesInfo().values()
10423                    if node.group == self.group_uuid]
10424
10425     if group_nodes:
10426       raise errors.OpPrereqError("Group '%s' not empty, has the following"
10427                                  " nodes: %s" %
10428                                  (self.op.group_name,
10429                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
10430                                  errors.ECODE_STATE)
10431
10432     # Verify the cluster would not be left group-less.
10433     if len(self.cfg.GetNodeGroupList()) == 1:
10434       raise errors.OpPrereqError("Group '%s' is the only group,"
10435                                  " cannot be removed" %
10436                                  self.op.group_name,
10437                                  errors.ECODE_STATE)
10438
10439   def BuildHooksEnv(self):
10440     """Build hooks env.
10441
10442     """
10443     env = {
10444       "GROUP_NAME": self.op.group_name,
10445       }
10446     mn = self.cfg.GetMasterNode()
10447     return env, [mn], [mn]
10448
10449   def Exec(self, feedback_fn):
10450     """Remove the node group.
10451
10452     """
10453     try:
10454       self.cfg.RemoveNodeGroup(self.group_uuid)
10455     except errors.ConfigurationError:
10456       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10457                                (self.op.group_name, self.group_uuid))
10458
10459     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10460
10461
10462 class LUGroupRename(LogicalUnit):
10463   HPATH = "group-rename"
10464   HTYPE = constants.HTYPE_GROUP
10465   REQ_BGL = False
10466
10467   def ExpandNames(self):
10468     # This raises errors.OpPrereqError on its own:
10469     self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10470
10471     self.needed_locks = {
10472       locking.LEVEL_NODEGROUP: [self.group_uuid],
10473       }
10474
10475   def CheckPrereq(self):
10476     """Check prerequisites.
10477
10478     This checks that the given old_name exists as a node group, and that
10479     new_name doesn't.
10480
10481     """
10482     try:
10483       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10484     except errors.OpPrereqError:
10485       pass
10486     else:
10487       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10488                                  " node group (UUID: %s)" %
10489                                  (self.op.new_name, new_name_uuid),
10490                                  errors.ECODE_EXISTS)
10491
10492   def BuildHooksEnv(self):
10493     """Build hooks env.
10494
10495     """
10496     env = {
10497       "OLD_NAME": self.op.old_name,
10498       "NEW_NAME": self.op.new_name,
10499       }
10500
10501     mn = self.cfg.GetMasterNode()
10502     all_nodes = self.cfg.GetAllNodesInfo()
10503     run_nodes = [mn]
10504     all_nodes.pop(mn, None)
10505
10506     for node in all_nodes.values():
10507       if node.group == self.group_uuid:
10508         run_nodes.append(node.name)
10509
10510     return env, run_nodes, run_nodes
10511
10512   def Exec(self, feedback_fn):
10513     """Rename the node group.
10514
10515     """
10516     group = self.cfg.GetNodeGroup(self.group_uuid)
10517
10518     if group is None:
10519       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10520                                (self.op.old_name, self.group_uuid))
10521
10522     group.name = self.op.new_name
10523     self.cfg.Update(group, feedback_fn)
10524
10525     return self.op.new_name
10526
10527
10528 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10529   """Generic tags LU.
10530
10531   This is an abstract class which is the parent of all the other tags LUs.
10532
10533   """
10534
10535   def ExpandNames(self):
10536     self.needed_locks = {}
10537     if self.op.kind == constants.TAG_NODE:
10538       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10539       self.needed_locks[locking.LEVEL_NODE] = self.op.name
10540     elif self.op.kind == constants.TAG_INSTANCE:
10541       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10542       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10543
10544     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10545     # not possible to acquire the BGL based on opcode parameters)
10546
10547   def CheckPrereq(self):
10548     """Check prerequisites.
10549
10550     """
10551     if self.op.kind == constants.TAG_CLUSTER:
10552       self.target = self.cfg.GetClusterInfo()
10553     elif self.op.kind == constants.TAG_NODE:
10554       self.target = self.cfg.GetNodeInfo(self.op.name)
10555     elif self.op.kind == constants.TAG_INSTANCE:
10556       self.target = self.cfg.GetInstanceInfo(self.op.name)
10557     else:
10558       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10559                                  str(self.op.kind), errors.ECODE_INVAL)
10560
10561
10562 class LUTagsGet(TagsLU):
10563   """Returns the tags of a given object.
10564
10565   """
10566   REQ_BGL = False
10567
10568   def ExpandNames(self):
10569     TagsLU.ExpandNames(self)
10570
10571     # Share locks as this is only a read operation
10572     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10573
10574   def Exec(self, feedback_fn):
10575     """Returns the tag list.
10576
10577     """
10578     return list(self.target.GetTags())
10579
10580
10581 class LUTagsSearch(NoHooksLU):
10582   """Searches the tags for a given pattern.
10583
10584   """
10585   REQ_BGL = False
10586
10587   def ExpandNames(self):
10588     self.needed_locks = {}
10589
10590   def CheckPrereq(self):
10591     """Check prerequisites.
10592
10593     This checks the pattern passed for validity by compiling it.
10594
10595     """
10596     try:
10597       self.re = re.compile(self.op.pattern)
10598     except re.error, err:
10599       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10600                                  (self.op.pattern, err), errors.ECODE_INVAL)
10601
10602   def Exec(self, feedback_fn):
10603     """Returns the tag list.
10604
10605     """
10606     cfg = self.cfg
10607     tgts = [("/cluster", cfg.GetClusterInfo())]
10608     ilist = cfg.GetAllInstancesInfo().values()
10609     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10610     nlist = cfg.GetAllNodesInfo().values()
10611     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10612     results = []
10613     for path, target in tgts:
10614       for tag in target.GetTags():
10615         if self.re.search(tag):
10616           results.append((path, tag))
10617     return results
10618
10619
10620 class LUTagsSet(TagsLU):
10621   """Sets a tag on a given object.
10622
10623   """
10624   REQ_BGL = False
10625
10626   def CheckPrereq(self):
10627     """Check prerequisites.
10628
10629     This checks the type and length of the tag name and value.
10630
10631     """
10632     TagsLU.CheckPrereq(self)
10633     for tag in self.op.tags:
10634       objects.TaggableObject.ValidateTag(tag)
10635
10636   def Exec(self, feedback_fn):
10637     """Sets the tag.
10638
10639     """
10640     try:
10641       for tag in self.op.tags:
10642         self.target.AddTag(tag)
10643     except errors.TagError, err:
10644       raise errors.OpExecError("Error while setting tag: %s" % str(err))
10645     self.cfg.Update(self.target, feedback_fn)
10646
10647
10648 class LUTagsDel(TagsLU):
10649   """Delete a list of tags from a given object.
10650
10651   """
10652   REQ_BGL = False
10653
10654   def CheckPrereq(self):
10655     """Check prerequisites.
10656
10657     This checks that we have the given tag.
10658
10659     """
10660     TagsLU.CheckPrereq(self)
10661     for tag in self.op.tags:
10662       objects.TaggableObject.ValidateTag(tag)
10663     del_tags = frozenset(self.op.tags)
10664     cur_tags = self.target.GetTags()
10665
10666     diff_tags = del_tags - cur_tags
10667     if diff_tags:
10668       diff_names = ("'%s'" % i for i in sorted(diff_tags))
10669       raise errors.OpPrereqError("Tag(s) %s not found" %
10670                                  (utils.CommaJoin(diff_names), ),
10671                                  errors.ECODE_NOENT)
10672
10673   def Exec(self, feedback_fn):
10674     """Remove the tag from the object.
10675
10676     """
10677     for tag in self.op.tags:
10678       self.target.RemoveTag(tag)
10679     self.cfg.Update(self.target, feedback_fn)
10680
10681
10682 class LUTestDelay(NoHooksLU):
10683   """Sleep for a specified amount of time.
10684
10685   This LU sleeps on the master and/or nodes for a specified amount of
10686   time.
10687
10688   """
10689   REQ_BGL = False
10690
10691   def ExpandNames(self):
10692     """Expand names and set required locks.
10693
10694     This expands the node list, if any.
10695
10696     """
10697     self.needed_locks = {}
10698     if self.op.on_nodes:
10699       # _GetWantedNodes can be used here, but is not always appropriate to use
10700       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10701       # more information.
10702       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10703       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10704
10705   def _TestDelay(self):
10706     """Do the actual sleep.
10707
10708     """
10709     if self.op.on_master:
10710       if not utils.TestDelay(self.op.duration):
10711         raise errors.OpExecError("Error during master delay test")
10712     if self.op.on_nodes:
10713       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10714       for node, node_result in result.items():
10715         node_result.Raise("Failure during rpc call to node %s" % node)
10716
10717   def Exec(self, feedback_fn):
10718     """Execute the test delay opcode, with the wanted repetitions.
10719
10720     """
10721     if self.op.repeat == 0:
10722       self._TestDelay()
10723     else:
10724       top_value = self.op.repeat - 1
10725       for i in range(self.op.repeat):
10726         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10727         self._TestDelay()
10728
10729
10730 class LUTestJqueue(NoHooksLU):
10731   """Utility LU to test some aspects of the job queue.
10732
10733   """
10734   REQ_BGL = False
10735
10736   # Must be lower than default timeout for WaitForJobChange to see whether it
10737   # notices changed jobs
10738   _CLIENT_CONNECT_TIMEOUT = 20.0
10739   _CLIENT_CONFIRM_TIMEOUT = 60.0
10740
10741   @classmethod
10742   def _NotifyUsingSocket(cls, cb, errcls):
10743     """Opens a Unix socket and waits for another program to connect.
10744
10745     @type cb: callable
10746     @param cb: Callback to send socket name to client
10747     @type errcls: class
10748     @param errcls: Exception class to use for errors
10749
10750     """
10751     # Using a temporary directory as there's no easy way to create temporary
10752     # sockets without writing a custom loop around tempfile.mktemp and
10753     # socket.bind
10754     tmpdir = tempfile.mkdtemp()
10755     try:
10756       tmpsock = utils.PathJoin(tmpdir, "sock")
10757
10758       logging.debug("Creating temporary socket at %s", tmpsock)
10759       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10760       try:
10761         sock.bind(tmpsock)
10762         sock.listen(1)
10763
10764         # Send details to client
10765         cb(tmpsock)
10766
10767         # Wait for client to connect before continuing
10768         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10769         try:
10770           (conn, _) = sock.accept()
10771         except socket.error, err:
10772           raise errcls("Client didn't connect in time (%s)" % err)
10773       finally:
10774         sock.close()
10775     finally:
10776       # Remove as soon as client is connected
10777       shutil.rmtree(tmpdir)
10778
10779     # Wait for client to close
10780     try:
10781       try:
10782         # pylint: disable-msg=E1101
10783         # Instance of '_socketobject' has no ... member
10784         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10785         conn.recv(1)
10786       except socket.error, err:
10787         raise errcls("Client failed to confirm notification (%s)" % err)
10788     finally:
10789       conn.close()
10790
10791   def _SendNotification(self, test, arg, sockname):
10792     """Sends a notification to the client.
10793
10794     @type test: string
10795     @param test: Test name
10796     @param arg: Test argument (depends on test)
10797     @type sockname: string
10798     @param sockname: Socket path
10799
10800     """
10801     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10802
10803   def _Notify(self, prereq, test, arg):
10804     """Notifies the client of a test.
10805
10806     @type prereq: bool
10807     @param prereq: Whether this is a prereq-phase test
10808     @type test: string
10809     @param test: Test name
10810     @param arg: Test argument (depends on test)
10811
10812     """
10813     if prereq:
10814       errcls = errors.OpPrereqError
10815     else:
10816       errcls = errors.OpExecError
10817
10818     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10819                                                   test, arg),
10820                                    errcls)
10821
10822   def CheckArguments(self):
10823     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10824     self.expandnames_calls = 0
10825
10826   def ExpandNames(self):
10827     checkargs_calls = getattr(self, "checkargs_calls", 0)
10828     if checkargs_calls < 1:
10829       raise errors.ProgrammerError("CheckArguments was not called")
10830
10831     self.expandnames_calls += 1
10832
10833     if self.op.notify_waitlock:
10834       self._Notify(True, constants.JQT_EXPANDNAMES, None)
10835
10836     self.LogInfo("Expanding names")
10837
10838     # Get lock on master node (just to get a lock, not for a particular reason)
10839     self.needed_locks = {
10840       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10841       }
10842
10843   def Exec(self, feedback_fn):
10844     if self.expandnames_calls < 1:
10845       raise errors.ProgrammerError("ExpandNames was not called")
10846
10847     if self.op.notify_exec:
10848       self._Notify(False, constants.JQT_EXEC, None)
10849
10850     self.LogInfo("Executing")
10851
10852     if self.op.log_messages:
10853       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10854       for idx, msg in enumerate(self.op.log_messages):
10855         self.LogInfo("Sending log message %s", idx + 1)
10856         feedback_fn(constants.JQT_MSGPREFIX + msg)
10857         # Report how many test messages have been sent
10858         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10859
10860     if self.op.fail:
10861       raise errors.OpExecError("Opcode failure was requested")
10862
10863     return True
10864
10865
10866 class IAllocator(object):
10867   """IAllocator framework.
10868
10869   An IAllocator instance has three sets of attributes:
10870     - cfg that is needed to query the cluster
10871     - input data (all members of the _KEYS class attribute are required)
10872     - four buffer attributes (in|out_data|text), that represent the
10873       input (to the external script) in text and data structure format,
10874       and the output from it, again in two formats
10875     - the result variables from the script (success, info, nodes) for
10876       easy usage
10877
10878   """
10879   # pylint: disable-msg=R0902
10880   # lots of instance attributes
10881   _ALLO_KEYS = [
10882     "name", "mem_size", "disks", "disk_template",
10883     "os", "tags", "nics", "vcpus", "hypervisor",
10884     ]
10885   _RELO_KEYS = [
10886     "name", "relocate_from",
10887     ]
10888   _EVAC_KEYS = [
10889     "evac_nodes",
10890     ]
10891
10892   def __init__(self, cfg, rpc, mode, **kwargs):
10893     self.cfg = cfg
10894     self.rpc = rpc
10895     # init buffer variables
10896     self.in_text = self.out_text = self.in_data = self.out_data = None
10897     # init all input fields so that pylint is happy
10898     self.mode = mode
10899     self.mem_size = self.disks = self.disk_template = None
10900     self.os = self.tags = self.nics = self.vcpus = None
10901     self.hypervisor = None
10902     self.relocate_from = None
10903     self.name = None
10904     self.evac_nodes = None
10905     # computed fields
10906     self.required_nodes = None
10907     # init result fields
10908     self.success = self.info = self.result = None
10909     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10910       keyset = self._ALLO_KEYS
10911       fn = self._AddNewInstance
10912     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10913       keyset = self._RELO_KEYS
10914       fn = self._AddRelocateInstance
10915     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10916       keyset = self._EVAC_KEYS
10917       fn = self._AddEvacuateNodes
10918     else:
10919       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10920                                    " IAllocator" % self.mode)
10921     for key in kwargs:
10922       if key not in keyset:
10923         raise errors.ProgrammerError("Invalid input parameter '%s' to"
10924                                      " IAllocator" % key)
10925       setattr(self, key, kwargs[key])
10926
10927     for key in keyset:
10928       if key not in kwargs:
10929         raise errors.ProgrammerError("Missing input parameter '%s' to"
10930                                      " IAllocator" % key)
10931     self._BuildInputData(fn)
10932
10933   def _ComputeClusterData(self):
10934     """Compute the generic allocator input data.
10935
10936     This is the data that is independent of the actual operation.
10937
10938     """
10939     cfg = self.cfg
10940     cluster_info = cfg.GetClusterInfo()
10941     # cluster data
10942     data = {
10943       "version": constants.IALLOCATOR_VERSION,
10944       "cluster_name": cfg.GetClusterName(),
10945       "cluster_tags": list(cluster_info.GetTags()),
10946       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10947       # we don't have job IDs
10948       }
10949     ninfo = cfg.GetAllNodesInfo()
10950     iinfo = cfg.GetAllInstancesInfo().values()
10951     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10952
10953     # node data
10954     node_list = [n.name for n in ninfo.values() if n.vm_capable]
10955
10956     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10957       hypervisor_name = self.hypervisor
10958     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10959       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10960     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10961       hypervisor_name = cluster_info.enabled_hypervisors[0]
10962
10963     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10964                                         hypervisor_name)
10965     node_iinfo = \
10966       self.rpc.call_all_instances_info(node_list,
10967                                        cluster_info.enabled_hypervisors)
10968
10969     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10970
10971     config_ndata = self._ComputeBasicNodeData(ninfo)
10972     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
10973                                                  i_list, config_ndata)
10974     assert len(data["nodes"]) == len(ninfo), \
10975         "Incomplete node data computed"
10976
10977     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10978
10979     self.in_data = data
10980
10981   @staticmethod
10982   def _ComputeNodeGroupData(cfg):
10983     """Compute node groups data.
10984
10985     """
10986     ng = {}
10987     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10988       ng[guuid] = {
10989         "name": gdata.name,
10990         "alloc_policy": gdata.alloc_policy,
10991         }
10992     return ng
10993
10994   @staticmethod
10995   def _ComputeBasicNodeData(node_cfg):
10996     """Compute global node data.
10997
10998     @rtype: dict
10999     @returns: a dict of name: (node dict, node config)
11000
11001     """
11002     node_results = {}
11003     for ninfo in node_cfg.values():
11004       # fill in static (config-based) values
11005       pnr = {
11006         "tags": list(ninfo.GetTags()),
11007         "primary_ip": ninfo.primary_ip,
11008         "secondary_ip": ninfo.secondary_ip,
11009         "offline": ninfo.offline,
11010         "drained": ninfo.drained,
11011         "master_candidate": ninfo.master_candidate,
11012         "group": ninfo.group,
11013         "master_capable": ninfo.master_capable,
11014         "vm_capable": ninfo.vm_capable,
11015         }
11016
11017       node_results[ninfo.name] = pnr
11018
11019     return node_results
11020
11021   @staticmethod
11022   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11023                               node_results):
11024     """Compute global node data.
11025
11026     @param node_results: the basic node structures as filled from the config
11027
11028     """
11029     # make a copy of the current dict
11030     node_results = dict(node_results)
11031     for nname, nresult in node_data.items():
11032       assert nname in node_results, "Missing basic data for node %s" % nname
11033       ninfo = node_cfg[nname]
11034
11035       if not (ninfo.offline or ninfo.drained):
11036         nresult.Raise("Can't get data for node %s" % nname)
11037         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11038                                 nname)
11039         remote_info = nresult.payload
11040
11041         for attr in ['memory_total', 'memory_free', 'memory_dom0',
11042                      'vg_size', 'vg_free', 'cpu_total']:
11043           if attr not in remote_info:
11044             raise errors.OpExecError("Node '%s' didn't return attribute"
11045                                      " '%s'" % (nname, attr))
11046           if not isinstance(remote_info[attr], int):
11047             raise errors.OpExecError("Node '%s' returned invalid value"
11048                                      " for '%s': %s" %
11049                                      (nname, attr, remote_info[attr]))
11050         # compute memory used by primary instances
11051         i_p_mem = i_p_up_mem = 0
11052         for iinfo, beinfo in i_list:
11053           if iinfo.primary_node == nname:
11054             i_p_mem += beinfo[constants.BE_MEMORY]
11055             if iinfo.name not in node_iinfo[nname].payload:
11056               i_used_mem = 0
11057             else:
11058               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11059             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11060             remote_info['memory_free'] -= max(0, i_mem_diff)
11061
11062             if iinfo.admin_up:
11063               i_p_up_mem += beinfo[constants.BE_MEMORY]
11064
11065         # compute memory used by instances
11066         pnr_dyn = {
11067           "total_memory": remote_info['memory_total'],
11068           "reserved_memory": remote_info['memory_dom0'],
11069           "free_memory": remote_info['memory_free'],
11070           "total_disk": remote_info['vg_size'],
11071           "free_disk": remote_info['vg_free'],
11072           "total_cpus": remote_info['cpu_total'],
11073           "i_pri_memory": i_p_mem,
11074           "i_pri_up_memory": i_p_up_mem,
11075           }
11076         pnr_dyn.update(node_results[nname])
11077         node_results[nname] = pnr_dyn
11078
11079     return node_results
11080
11081   @staticmethod
11082   def _ComputeInstanceData(cluster_info, i_list):
11083     """Compute global instance data.
11084
11085     """
11086     instance_data = {}
11087     for iinfo, beinfo in i_list:
11088       nic_data = []
11089       for nic in iinfo.nics:
11090         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11091         nic_dict = {"mac": nic.mac,
11092                     "ip": nic.ip,
11093                     "mode": filled_params[constants.NIC_MODE],
11094                     "link": filled_params[constants.NIC_LINK],
11095                    }
11096         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11097           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11098         nic_data.append(nic_dict)
11099       pir = {
11100         "tags": list(iinfo.GetTags()),
11101         "admin_up": iinfo.admin_up,
11102         "vcpus": beinfo[constants.BE_VCPUS],
11103         "memory": beinfo[constants.BE_MEMORY],
11104         "os": iinfo.os,
11105         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11106         "nics": nic_data,
11107         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11108         "disk_template": iinfo.disk_template,
11109         "hypervisor": iinfo.hypervisor,
11110         }
11111       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11112                                                  pir["disks"])
11113       instance_data[iinfo.name] = pir
11114
11115     return instance_data
11116
11117   def _AddNewInstance(self):
11118     """Add new instance data to allocator structure.
11119
11120     This in combination with _AllocatorGetClusterData will create the
11121     correct structure needed as input for the allocator.
11122
11123     The checks for the completeness of the opcode must have already been
11124     done.
11125
11126     """
11127     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11128
11129     if self.disk_template in constants.DTS_NET_MIRROR:
11130       self.required_nodes = 2
11131     else:
11132       self.required_nodes = 1
11133     request = {
11134       "name": self.name,
11135       "disk_template": self.disk_template,
11136       "tags": self.tags,
11137       "os": self.os,
11138       "vcpus": self.vcpus,
11139       "memory": self.mem_size,
11140       "disks": self.disks,
11141       "disk_space_total": disk_space,
11142       "nics": self.nics,
11143       "required_nodes": self.required_nodes,
11144       }
11145     return request
11146
11147   def _AddRelocateInstance(self):
11148     """Add relocate instance data to allocator structure.
11149
11150     This in combination with _IAllocatorGetClusterData will create the
11151     correct structure needed as input for the allocator.
11152
11153     The checks for the completeness of the opcode must have already been
11154     done.
11155
11156     """
11157     instance = self.cfg.GetInstanceInfo(self.name)
11158     if instance is None:
11159       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11160                                    " IAllocator" % self.name)
11161
11162     if instance.disk_template not in constants.DTS_NET_MIRROR:
11163       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11164                                  errors.ECODE_INVAL)
11165
11166     if len(instance.secondary_nodes) != 1:
11167       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11168                                  errors.ECODE_STATE)
11169
11170     self.required_nodes = 1
11171     disk_sizes = [{'size': disk.size} for disk in instance.disks]
11172     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11173
11174     request = {
11175       "name": self.name,
11176       "disk_space_total": disk_space,
11177       "required_nodes": self.required_nodes,
11178       "relocate_from": self.relocate_from,
11179       }
11180     return request
11181
11182   def _AddEvacuateNodes(self):
11183     """Add evacuate nodes data to allocator structure.
11184
11185     """
11186     request = {
11187       "evac_nodes": self.evac_nodes
11188       }
11189     return request
11190
11191   def _BuildInputData(self, fn):
11192     """Build input data structures.
11193
11194     """
11195     self._ComputeClusterData()
11196
11197     request = fn()
11198     request["type"] = self.mode
11199     self.in_data["request"] = request
11200
11201     self.in_text = serializer.Dump(self.in_data)
11202
11203   def Run(self, name, validate=True, call_fn=None):
11204     """Run an instance allocator and return the results.
11205
11206     """
11207     if call_fn is None:
11208       call_fn = self.rpc.call_iallocator_runner
11209
11210     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11211     result.Raise("Failure while running the iallocator script")
11212
11213     self.out_text = result.payload
11214     if validate:
11215       self._ValidateResult()
11216
11217   def _ValidateResult(self):
11218     """Process the allocator results.
11219
11220     This will process and if successful save the result in
11221     self.out_data and the other parameters.
11222
11223     """
11224     try:
11225       rdict = serializer.Load(self.out_text)
11226     except Exception, err:
11227       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11228
11229     if not isinstance(rdict, dict):
11230       raise errors.OpExecError("Can't parse iallocator results: not a dict")
11231
11232     # TODO: remove backwards compatiblity in later versions
11233     if "nodes" in rdict and "result" not in rdict:
11234       rdict["result"] = rdict["nodes"]
11235       del rdict["nodes"]
11236
11237     for key in "success", "info", "result":
11238       if key not in rdict:
11239         raise errors.OpExecError("Can't parse iallocator results:"
11240                                  " missing key '%s'" % key)
11241       setattr(self, key, rdict[key])
11242
11243     if not isinstance(rdict["result"], list):
11244       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11245                                " is not a list")
11246     self.out_data = rdict
11247
11248
11249 class LUTestAllocator(NoHooksLU):
11250   """Run allocator tests.
11251
11252   This LU runs the allocator tests
11253
11254   """
11255   def CheckPrereq(self):
11256     """Check prerequisites.
11257
11258     This checks the opcode parameters depending on the director and mode test.
11259
11260     """
11261     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11262       for attr in ["mem_size", "disks", "disk_template",
11263                    "os", "tags", "nics", "vcpus"]:
11264         if not hasattr(self.op, attr):
11265           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11266                                      attr, errors.ECODE_INVAL)
11267       iname = self.cfg.ExpandInstanceName(self.op.name)
11268       if iname is not None:
11269         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11270                                    iname, errors.ECODE_EXISTS)
11271       if not isinstance(self.op.nics, list):
11272         raise errors.OpPrereqError("Invalid parameter 'nics'",
11273                                    errors.ECODE_INVAL)
11274       if not isinstance(self.op.disks, list):
11275         raise errors.OpPrereqError("Invalid parameter 'disks'",
11276                                    errors.ECODE_INVAL)
11277       for row in self.op.disks:
11278         if (not isinstance(row, dict) or
11279             "size" not in row or
11280             not isinstance(row["size"], int) or
11281             "mode" not in row or
11282             row["mode"] not in ['r', 'w']):
11283           raise errors.OpPrereqError("Invalid contents of the 'disks'"
11284                                      " parameter", errors.ECODE_INVAL)
11285       if self.op.hypervisor is None:
11286         self.op.hypervisor = self.cfg.GetHypervisorType()
11287     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11288       fname = _ExpandInstanceName(self.cfg, self.op.name)
11289       self.op.name = fname
11290       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11291     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11292       if not hasattr(self.op, "evac_nodes"):
11293         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11294                                    " opcode input", errors.ECODE_INVAL)
11295     else:
11296       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11297                                  self.op.mode, errors.ECODE_INVAL)
11298
11299     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11300       if self.op.allocator is None:
11301         raise errors.OpPrereqError("Missing allocator name",
11302                                    errors.ECODE_INVAL)
11303     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11304       raise errors.OpPrereqError("Wrong allocator test '%s'" %
11305                                  self.op.direction, errors.ECODE_INVAL)
11306
11307   def Exec(self, feedback_fn):
11308     """Run the allocator test.
11309
11310     """
11311     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11312       ial = IAllocator(self.cfg, self.rpc,
11313                        mode=self.op.mode,
11314                        name=self.op.name,
11315                        mem_size=self.op.mem_size,
11316                        disks=self.op.disks,
11317                        disk_template=self.op.disk_template,
11318                        os=self.op.os,
11319                        tags=self.op.tags,
11320                        nics=self.op.nics,
11321                        vcpus=self.op.vcpus,
11322                        hypervisor=self.op.hypervisor,
11323                        )
11324     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11325       ial = IAllocator(self.cfg, self.rpc,
11326                        mode=self.op.mode,
11327                        name=self.op.name,
11328                        relocate_from=list(self.relocate_from),
11329                        )
11330     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11331       ial = IAllocator(self.cfg, self.rpc,
11332                        mode=self.op.mode,
11333                        evac_nodes=self.op.evac_nodes)
11334     else:
11335       raise errors.ProgrammerError("Uncatched mode %s in"
11336                                    " LUTestAllocator.Exec", self.op.mode)
11337
11338     if self.op.direction == constants.IALLOCATOR_DIR_IN:
11339       result = ial.in_text
11340     else:
11341       ial.Run(self.op.allocator, validate=False)
11342       result = ial.out_text
11343     return result
11344
11345
11346 #: Query type implementations
11347 _QUERY_IMPL = {
11348   constants.QR_INSTANCE: _InstanceQuery,
11349   constants.QR_NODE: _NodeQuery,
11350   constants.QR_GROUP: _GroupQuery,
11351   }
11352
11353
11354 def _GetQueryImplementation(name):
11355   """Returns the implemtnation for a query type.
11356
11357   @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11358
11359   """
11360   try:
11361     return _QUERY_IMPL[name]
11362   except KeyError:
11363     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11364                                errors.ECODE_INVAL)