code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 # End types
  78 class LogicalUnit(object):
  79   """Logical Unit base class.
  80
  81   Subclasses must follow these rules:
  82     - implement ExpandNames
  83     - implement CheckPrereq (except when tasklets are used)
  84     - implement Exec (except when tasklets are used)
  85     - implement BuildHooksEnv
  86     - redefine HPATH and HTYPE
  87     - optionally redefine their run requirements:
  88         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  89
  90   Note that all commands require root permissions.
  91
  92   @ivar dry_run_result: the value (if any) that will be returned to the caller
  93       in dry-run mode (signalled by opcode dry_run parameter)
  94
  95   """
  96   HPATH = None
  97   HTYPE = None
  98   REQ_BGL = True
  99
 100   def __init__(self, processor, op, context, rpc):
 101     """Constructor for LogicalUnit.
 102
 103     This needs to be overridden in derived classes in order to check op
 104     validity.
 105
 106     """
 107     self.proc = processor
 108     self.op = op
 109     self.cfg = context.cfg
 110     self.context = context
 111     self.rpc = rpc
 112     # Dicts used to declare locking needs to mcpu
 113     self.needed_locks = None
 114     self.acquired_locks = {}
 115     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 116     self.add_locks = {}
 117     self.remove_locks = {}
 118     # Used to force good behavior when calling helper functions
 119     self.recalculate_locks = {}
 120     self.__ssh = None
 121     # logging
 122     self.Log = processor.Log # pylint: disable-msg=C0103
 123     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 124     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 125     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 126     # support for dry-run
 127     self.dry_run_result = None
 128     # support for generic debug attribute
 129     if (not hasattr(self.op, "debug_level") or
 130         not isinstance(self.op.debug_level, int)):
 131       self.op.debug_level = 0
 132
 133     # Tasklets
 134     self.tasklets = None
 135
 136     # Validate opcode parameters and set defaults
 137     self.op.Validate(True)
 138
 139     self.CheckArguments()
 140
 141   def __GetSSH(self):
 142     """Returns the SshRunner object
 143
 144     """
 145     if not self.__ssh:
 146       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 147     return self.__ssh
 148
 149   ssh = property(fget=__GetSSH)
 150
 151   def CheckArguments(self):
 152     """Check syntactic validity for the opcode arguments.
 153
 154     This method is for doing a simple syntactic check and ensure
 155     validity of opcode parameters, without any cluster-related
 156     checks. While the same can be accomplished in ExpandNames and/or
 157     CheckPrereq, doing these separate is better because:
 158
 159       - ExpandNames is left as as purely a lock-related function
 160       - CheckPrereq is run after we have acquired locks (and possible
 161         waited for them)
 162
 163     The function is allowed to change the self.op attribute so that
 164     later methods can no longer worry about missing parameters.
 165
 166     """
 167     pass
 168
 169   def ExpandNames(self):
 170     """Expand names for this LU.
 171
 172     This method is called before starting to execute the opcode, and it should
 173     update all the parameters of the opcode to their canonical form (e.g. a
 174     short node name must be fully expanded after this method has successfully
 175     completed). This way locking, hooks, logging, etc. can work correctly.
 176
 177     LUs which implement this method must also populate the self.needed_locks
 178     member, as a dict with lock levels as keys, and a list of needed lock names
 179     as values. Rules:
 180
 181       - use an empty dict if you don't need any lock
 182       - if you don't need any lock at a particular level omit that level
 183       - don't put anything for the BGL level
 184       - if you want all locks at a level use locking.ALL_SET as a value
 185
 186     If you need to share locks (rather than acquire them exclusively) at one
 187     level you can modify self.share_locks, setting a true value (usually 1) for
 188     that level. By default locks are not shared.
 189
 190     This function can also define a list of tasklets, which then will be
 191     executed in order instead of the usual LU-level CheckPrereq and Exec
 192     functions, if those are not defined by the LU.
 193
 194     Examples::
 195
 196       # Acquire all nodes and one instance
 197       self.needed_locks = {
 198         locking.LEVEL_NODE: locking.ALL_SET,
 199         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 200       }
 201       # Acquire just two nodes
 202       self.needed_locks = {
 203         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 204       }
 205       # Acquire no locks
 206       self.needed_locks = {} # No, you can't leave it to the default value None
 207
 208     """
 209     # The implementation of this method is mandatory only if the new LU is
 210     # concurrent, so that old LUs don't need to be changed all at the same
 211     # time.
 212     if self.REQ_BGL:
 213       self.needed_locks = {} # Exclusive LUs don't need locks.
 214     else:
 215       raise NotImplementedError
 216
 217   def DeclareLocks(self, level):
 218     """Declare LU locking needs for a level
 219
 220     While most LUs can just declare their locking needs at ExpandNames time,
 221     sometimes there's the need to calculate some locks after having acquired
 222     the ones before. This function is called just before acquiring locks at a
 223     particular level, but after acquiring the ones at lower levels, and permits
 224     such calculations. It can be used to modify self.needed_locks, and by
 225     default it does nothing.
 226
 227     This function is only called if you have something already set in
 228     self.needed_locks for the level.
 229
 230     @param level: Locking level which is going to be locked
 231     @type level: member of ganeti.locking.LEVELS
 232
 233     """
 234
 235   def CheckPrereq(self):
 236     """Check prerequisites for this LU.
 237
 238     This method should check that the prerequisites for the execution
 239     of this LU are fulfilled. It can do internode communication, but
 240     it should be idempotent - no cluster or system changes are
 241     allowed.
 242
 243     The method should raise errors.OpPrereqError in case something is
 244     not fulfilled. Its return value is ignored.
 245
 246     This method should also update all the parameters of the opcode to
 247     their canonical form if it hasn't been done by ExpandNames before.
 248
 249     """
 250     if self.tasklets is not None:
 251       for (idx, tl) in enumerate(self.tasklets):
 252         logging.debug("Checking prerequisites for tasklet %s/%s",
 253                       idx + 1, len(self.tasklets))
 254         tl.CheckPrereq()
 255     else:
 256       pass
 257
 258   def Exec(self, feedback_fn):
 259     """Execute the LU.
 260
 261     This method should implement the actual work. It should raise
 262     errors.OpExecError for failures that are somewhat dealt with in
 263     code, or expected.
 264
 265     """
 266     if self.tasklets is not None:
 267       for (idx, tl) in enumerate(self.tasklets):
 268         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 269         tl.Exec(feedback_fn)
 270     else:
 271       raise NotImplementedError
 272
 273   def BuildHooksEnv(self):
 274     """Build hooks environment for this LU.
 275
 276     This method should return a three-node tuple consisting of: a dict
 277     containing the environment that will be used for running the
 278     specific hook for this LU, a list of node names on which the hook
 279     should run before the execution, and a list of node names on which
 280     the hook should run after the execution.
 281
 282     The keys of the dict must not have 'GANETI_' prefixed as this will
 283     be handled in the hooks runner. Also note additional keys will be
 284     added by the hooks runner. If the LU doesn't define any
 285     environment, an empty dict (and not None) should be returned.
 286
 287     No nodes should be returned as an empty list (and not None).
 288
 289     Note that if the HPATH for a LU class is None, this function will
 290     not be called.
 291
 292     """
 293     raise NotImplementedError
 294
 295   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 296     """Notify the LU about the results of its hooks.
 297
 298     This method is called every time a hooks phase is executed, and notifies
 299     the Logical Unit about the hooks' result. The LU can then use it to alter
 300     its result based on the hooks.  By default the method does nothing and the
 301     previous result is passed back unchanged but any LU can define it if it
 302     wants to use the local cluster hook-scripts somehow.
 303
 304     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 305         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 306     @param hook_results: the results of the multi-node hooks rpc call
 307     @param feedback_fn: function used send feedback back to the caller
 308     @param lu_result: the previous Exec result this LU had, or None
 309         in the PRE phase
 310     @return: the new Exec result, based on the previous result
 311         and hook results
 312
 313     """
 314     # API must be kept, thus we ignore the unused argument and could
 315     # be a function warnings
 316     # pylint: disable-msg=W0613,R0201
 317     return lu_result
 318
 319   def _ExpandAndLockInstance(self):
 320     """Helper function to expand and lock an instance.
 321
 322     Many LUs that work on an instance take its name in self.op.instance_name
 323     and need to expand it and then declare the expanded name for locking. This
 324     function does it, and then updates self.op.instance_name to the expanded
 325     name. It also initializes needed_locks as a dict, if this hasn't been done
 326     before.
 327
 328     """
 329     if self.needed_locks is None:
 330       self.needed_locks = {}
 331     else:
 332       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 333         "_ExpandAndLockInstance called with instance-level locks set"
 334     self.op.instance_name = _ExpandInstanceName(self.cfg,
 335                                                 self.op.instance_name)
 336     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 337
 338   def _LockInstancesNodes(self, primary_only=False):
 339     """Helper function to declare instances' nodes for locking.
 340
 341     This function should be called after locking one or more instances to lock
 342     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 343     with all primary or secondary nodes for instances already locked and
 344     present in self.needed_locks[locking.LEVEL_INSTANCE].
 345
 346     It should be called from DeclareLocks, and for safety only works if
 347     self.recalculate_locks[locking.LEVEL_NODE] is set.
 348
 349     In the future it may grow parameters to just lock some instance's nodes, or
 350     to just lock primaries or secondary nodes, if needed.
 351
 352     If should be called in DeclareLocks in a way similar to::
 353
 354       if level == locking.LEVEL_NODE:
 355         self._LockInstancesNodes()
 356
 357     @type primary_only: boolean
 358     @param primary_only: only lock primary nodes of locked instances
 359
 360     """
 361     assert locking.LEVEL_NODE in self.recalculate_locks, \
 362       "_LockInstancesNodes helper function called with no nodes to recalculate"
 363
 364     # TODO: check if we're really been called with the instance locks held
 365
 366     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 367     # future we might want to have different behaviors depending on the value
 368     # of self.recalculate_locks[locking.LEVEL_NODE]
 369     wanted_nodes = []
 370     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 371       instance = self.context.cfg.GetInstanceInfo(instance_name)
 372       wanted_nodes.append(instance.primary_node)
 373       if not primary_only:
 374         wanted_nodes.extend(instance.secondary_nodes)
 375
 376     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 377       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 378     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 379       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 380
 381     del self.recalculate_locks[locking.LEVEL_NODE]
 382
 383
 384 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 385   """Simple LU which runs no hooks.
 386
 387   This LU is intended as a parent for other LogicalUnits which will
 388   run no hooks, in order to reduce duplicate code.
 389
 390   """
 391   HPATH = None
 392   HTYPE = None
 393
 394   def BuildHooksEnv(self):
 395     """Empty BuildHooksEnv for NoHooksLu.
 396
 397     This just raises an error.
 398
 399     """
 400     assert False, "BuildHooksEnv called for NoHooksLUs"
 401
 402
 403 class Tasklet:
 404   """Tasklet base class.
 405
 406   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 407   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 408   tasklets know nothing about locks.
 409
 410   Subclasses must follow these rules:
 411     - Implement CheckPrereq
 412     - Implement Exec
 413
 414   """
 415   def __init__(self, lu):
 416     self.lu = lu
 417
 418     # Shortcuts
 419     self.cfg = lu.cfg
 420     self.rpc = lu.rpc
 421
 422   def CheckPrereq(self):
 423     """Check prerequisites for this tasklets.
 424
 425     This method should check whether the prerequisites for the execution of
 426     this tasklet are fulfilled. It can do internode communication, but it
 427     should be idempotent - no cluster or system changes are allowed.
 428
 429     The method should raise errors.OpPrereqError in case something is not
 430     fulfilled. Its return value is ignored.
 431
 432     This method should also update all parameters to their canonical form if it
 433     hasn't been done before.
 434
 435     """
 436     pass
 437
 438   def Exec(self, feedback_fn):
 439     """Execute the tasklet.
 440
 441     This method should implement the actual work. It should raise
 442     errors.OpExecError for failures that are somewhat dealt with in code, or
 443     expected.
 444
 445     """
 446     raise NotImplementedError
 447
 448
 449 class _QueryBase:
 450   """Base for query utility classes.
 451
 452   """
 453   #: Attribute holding field definitions
 454   FIELDS = None
 455
 456   def __init__(self, names, fields, use_locking):
 457     """Initializes this class.
 458
 459     """
 460     self.names = names
 461     self.use_locking = use_locking
 462
 463     self.query = query.Query(self.FIELDS, fields)
 464     self.requested_data = self.query.RequestedData()
 465
 466     self.do_locking = None
 467     self.wanted = None
 468
 469   def _GetNames(self, lu, all_names, lock_level):
 470     """Helper function to determine names asked for in the query.
 471
 472     """
 473     if self.do_locking:
 474       names = lu.acquired_locks[lock_level]
 475     else:
 476       names = all_names
 477
 478     if self.wanted == locking.ALL_SET:
 479       assert not self.names
 480       # caller didn't specify names, so ordering is not important
 481       return utils.NiceSort(names)
 482
 483     # caller specified names and we must keep the same order
 484     assert self.names
 485     assert not self.do_locking or lu.acquired_locks[lock_level]
 486
 487     missing = set(self.wanted).difference(names)
 488     if missing:
 489       raise errors.OpExecError("Some items were removed before retrieving"
 490                                " their data: %s" % missing)
 491
 492     # Return expanded names
 493     return self.wanted
 494
 495   @classmethod
 496   def FieldsQuery(cls, fields):
 497     """Returns list of available fields.
 498
 499     @return: List of L{objects.QueryFieldDefinition}
 500
 501     """
 502     return query.QueryFields(cls.FIELDS, fields)
 503
 504   def ExpandNames(self, lu):
 505     """Expand names for this query.
 506
 507     See L{LogicalUnit.ExpandNames}.
 508
 509     """
 510     raise NotImplementedError()
 511
 512   def DeclareLocks(self, lu, level):
 513     """Declare locks for this query.
 514
 515     See L{LogicalUnit.DeclareLocks}.
 516
 517     """
 518     raise NotImplementedError()
 519
 520   def _GetQueryData(self, lu):
 521     """Collects all data for this query.
 522
 523     @return: Query data object
 524
 525     """
 526     raise NotImplementedError()
 527
 528   def NewStyleQuery(self, lu):
 529     """Collect data and execute query.
 530
 531     """
 532     return query.GetQueryResponse(self.query, self._GetQueryData(lu))
 533
 534   def OldStyleQuery(self, lu):
 535     """Collect data and execute query.
 536
 537     """
 538     return self.query.OldStyleQuery(self._GetQueryData(lu))
 539
 540
 541 def _GetWantedNodes(lu, nodes):
 542   """Returns list of checked and expanded node names.
 543
 544   @type lu: L{LogicalUnit}
 545   @param lu: the logical unit on whose behalf we execute
 546   @type nodes: list
 547   @param nodes: list of node names or None for all nodes
 548   @rtype: list
 549   @return: the list of nodes, sorted
 550   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 551
 552   """
 553   if nodes:
 554     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 555
 556   return utils.NiceSort(lu.cfg.GetNodeList())
 557
 558
 559 def _GetWantedInstances(lu, instances):
 560   """Returns list of checked and expanded instance names.
 561
 562   @type lu: L{LogicalUnit}
 563   @param lu: the logical unit on whose behalf we execute
 564   @type instances: list
 565   @param instances: list of instance names or None for all instances
 566   @rtype: list
 567   @return: the list of instances, sorted
 568   @raise errors.OpPrereqError: if the instances parameter is wrong type
 569   @raise errors.OpPrereqError: if any of the passed instances is not found
 570
 571   """
 572   if instances:
 573     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 574   else:
 575     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 576   return wanted
 577
 578
 579 def _GetUpdatedParams(old_params, update_dict,
 580                       use_default=True, use_none=False):
 581   """Return the new version of a parameter dictionary.
 582
 583   @type old_params: dict
 584   @param old_params: old parameters
 585   @type update_dict: dict
 586   @param update_dict: dict containing new parameter values, or
 587       constants.VALUE_DEFAULT to reset the parameter to its default
 588       value
 589   @param use_default: boolean
 590   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 591       values as 'to be deleted' values
 592   @param use_none: boolean
 593   @type use_none: whether to recognise C{None} values as 'to be
 594       deleted' values
 595   @rtype: dict
 596   @return: the new parameter dictionary
 597
 598   """
 599   params_copy = copy.deepcopy(old_params)
 600   for key, val in update_dict.iteritems():
 601     if ((use_default and val == constants.VALUE_DEFAULT) or
 602         (use_none and val is None)):
 603       try:
 604         del params_copy[key]
 605       except KeyError:
 606         pass
 607     else:
 608       params_copy[key] = val
 609   return params_copy
 610
 611
 612 def _CheckOutputFields(static, dynamic, selected):
 613   """Checks whether all selected fields are valid.
 614
 615   @type static: L{utils.FieldSet}
 616   @param static: static fields set
 617   @type dynamic: L{utils.FieldSet}
 618   @param dynamic: dynamic fields set
 619
 620   """
 621   f = utils.FieldSet()
 622   f.Extend(static)
 623   f.Extend(dynamic)
 624
 625   delta = f.NonMatching(selected)
 626   if delta:
 627     raise errors.OpPrereqError("Unknown output fields selected: %s"
 628                                % ",".join(delta), errors.ECODE_INVAL)
 629
 630
 631 def _CheckGlobalHvParams(params):
 632   """Validates that given hypervisor params are not global ones.
 633
 634   This will ensure that instances don't get customised versions of
 635   global params.
 636
 637   """
 638   used_globals = constants.HVC_GLOBALS.intersection(params)
 639   if used_globals:
 640     msg = ("The following hypervisor parameters are global and cannot"
 641            " be customized at instance level, please modify them at"
 642            " cluster level: %s" % utils.CommaJoin(used_globals))
 643     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 644
 645
 646 def _CheckNodeOnline(lu, node, msg=None):
 647   """Ensure that a given node is online.
 648
 649   @param lu: the LU on behalf of which we make the check
 650   @param node: the node to check
 651   @param msg: if passed, should be a message to replace the default one
 652   @raise errors.OpPrereqError: if the node is offline
 653
 654   """
 655   if msg is None:
 656     msg = "Can't use offline node"
 657   if lu.cfg.GetNodeInfo(node).offline:
 658     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 659
 660
 661 def _CheckNodeNotDrained(lu, node):
 662   """Ensure that a given node is not drained.
 663
 664   @param lu: the LU on behalf of which we make the check
 665   @param node: the node to check
 666   @raise errors.OpPrereqError: if the node is drained
 667
 668   """
 669   if lu.cfg.GetNodeInfo(node).drained:
 670     raise errors.OpPrereqError("Can't use drained node %s" % node,
 671                                errors.ECODE_STATE)
 672
 673
 674 def _CheckNodeVmCapable(lu, node):
 675   """Ensure that a given node is vm capable.
 676
 677   @param lu: the LU on behalf of which we make the check
 678   @param node: the node to check
 679   @raise errors.OpPrereqError: if the node is not vm capable
 680
 681   """
 682   if not lu.cfg.GetNodeInfo(node).vm_capable:
 683     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 684                                errors.ECODE_STATE)
 685
 686
 687 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 688   """Ensure that a node supports a given OS.
 689
 690   @param lu: the LU on behalf of which we make the check
 691   @param node: the node to check
 692   @param os_name: the OS to query about
 693   @param force_variant: whether to ignore variant errors
 694   @raise errors.OpPrereqError: if the node is not supporting the OS
 695
 696   """
 697   result = lu.rpc.call_os_get(node, os_name)
 698   result.Raise("OS '%s' not in supported OS list for node %s" %
 699                (os_name, node),
 700                prereq=True, ecode=errors.ECODE_INVAL)
 701   if not force_variant:
 702     _CheckOSVariant(result.payload, os_name)
 703
 704
 705 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 706   """Ensure that a node has the given secondary ip.
 707
 708   @type lu: L{LogicalUnit}
 709   @param lu: the LU on behalf of which we make the check
 710   @type node: string
 711   @param node: the node to check
 712   @type secondary_ip: string
 713   @param secondary_ip: the ip to check
 714   @type prereq: boolean
 715   @param prereq: whether to throw a prerequisite or an execute error
 716   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 717   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 718
 719   """
 720   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 721   result.Raise("Failure checking secondary ip on node %s" % node,
 722                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 723   if not result.payload:
 724     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 725            " please fix and re-run this command" % secondary_ip)
 726     if prereq:
 727       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 728     else:
 729       raise errors.OpExecError(msg)
 730
 731
 732 def _GetClusterDomainSecret():
 733   """Reads the cluster domain secret.
 734
 735   """
 736   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 737                                strict=True)
 738
 739
 740 def _CheckInstanceDown(lu, instance, reason):
 741   """Ensure that an instance is not running."""
 742   if instance.admin_up:
 743     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 744                                (instance.name, reason), errors.ECODE_STATE)
 745
 746   pnode = instance.primary_node
 747   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 748   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 749               prereq=True, ecode=errors.ECODE_ENVIRON)
 750
 751   if instance.name in ins_l.payload:
 752     raise errors.OpPrereqError("Instance %s is running, %s" %
 753                                (instance.name, reason), errors.ECODE_STATE)
 754
 755
 756 def _ExpandItemName(fn, name, kind):
 757   """Expand an item name.
 758
 759   @param fn: the function to use for expansion
 760   @param name: requested item name
 761   @param kind: text description ('Node' or 'Instance')
 762   @return: the resolved (full) name
 763   @raise errors.OpPrereqError: if the item is not found
 764
 765   """
 766   full_name = fn(name)
 767   if full_name is None:
 768     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 769                                errors.ECODE_NOENT)
 770   return full_name
 771
 772
 773 def _ExpandNodeName(cfg, name):
 774   """Wrapper over L{_ExpandItemName} for nodes."""
 775   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 776
 777
 778 def _ExpandInstanceName(cfg, name):
 779   """Wrapper over L{_ExpandItemName} for instance."""
 780   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 781
 782
 783 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 784                           memory, vcpus, nics, disk_template, disks,
 785                           bep, hvp, hypervisor_name):
 786   """Builds instance related env variables for hooks
 787
 788   This builds the hook environment from individual variables.
 789
 790   @type name: string
 791   @param name: the name of the instance
 792   @type primary_node: string
 793   @param primary_node: the name of the instance's primary node
 794   @type secondary_nodes: list
 795   @param secondary_nodes: list of secondary nodes as strings
 796   @type os_type: string
 797   @param os_type: the name of the instance's OS
 798   @type status: boolean
 799   @param status: the should_run status of the instance
 800   @type memory: string
 801   @param memory: the memory size of the instance
 802   @type vcpus: string
 803   @param vcpus: the count of VCPUs the instance has
 804   @type nics: list
 805   @param nics: list of tuples (ip, mac, mode, link) representing
 806       the NICs the instance has
 807   @type disk_template: string
 808   @param disk_template: the disk template of the instance
 809   @type disks: list
 810   @param disks: the list of (size, mode) pairs
 811   @type bep: dict
 812   @param bep: the backend parameters for the instance
 813   @type hvp: dict
 814   @param hvp: the hypervisor parameters for the instance
 815   @type hypervisor_name: string
 816   @param hypervisor_name: the hypervisor for the instance
 817   @rtype: dict
 818   @return: the hook environment for this instance
 819
 820   """
 821   if status:
 822     str_status = "up"
 823   else:
 824     str_status = "down"
 825   env = {
 826     "OP_TARGET": name,
 827     "INSTANCE_NAME": name,
 828     "INSTANCE_PRIMARY": primary_node,
 829     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 830     "INSTANCE_OS_TYPE": os_type,
 831     "INSTANCE_STATUS": str_status,
 832     "INSTANCE_MEMORY": memory,
 833     "INSTANCE_VCPUS": vcpus,
 834     "INSTANCE_DISK_TEMPLATE": disk_template,
 835     "INSTANCE_HYPERVISOR": hypervisor_name,
 836   }
 837
 838   if nics:
 839     nic_count = len(nics)
 840     for idx, (ip, mac, mode, link) in enumerate(nics):
 841       if ip is None:
 842         ip = ""
 843       env["INSTANCE_NIC%d_IP" % idx] = ip
 844       env["INSTANCE_NIC%d_MAC" % idx] = mac
 845       env["INSTANCE_NIC%d_MODE" % idx] = mode
 846       env["INSTANCE_NIC%d_LINK" % idx] = link
 847       if mode == constants.NIC_MODE_BRIDGED:
 848         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 849   else:
 850     nic_count = 0
 851
 852   env["INSTANCE_NIC_COUNT"] = nic_count
 853
 854   if disks:
 855     disk_count = len(disks)
 856     for idx, (size, mode) in enumerate(disks):
 857       env["INSTANCE_DISK%d_SIZE" % idx] = size
 858       env["INSTANCE_DISK%d_MODE" % idx] = mode
 859   else:
 860     disk_count = 0
 861
 862   env["INSTANCE_DISK_COUNT"] = disk_count
 863
 864   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 865     for key, value in source.items():
 866       env["INSTANCE_%s_%s" % (kind, key)] = value
 867
 868   return env
 869
 870
 871 def _NICListToTuple(lu, nics):
 872   """Build a list of nic information tuples.
 873
 874   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 875   value in LUInstanceQueryData.
 876
 877   @type lu:  L{LogicalUnit}
 878   @param lu: the logical unit on whose behalf we execute
 879   @type nics: list of L{objects.NIC}
 880   @param nics: list of nics to convert to hooks tuples
 881
 882   """
 883   hooks_nics = []
 884   cluster = lu.cfg.GetClusterInfo()
 885   for nic in nics:
 886     ip = nic.ip
 887     mac = nic.mac
 888     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 889     mode = filled_params[constants.NIC_MODE]
 890     link = filled_params[constants.NIC_LINK]
 891     hooks_nics.append((ip, mac, mode, link))
 892   return hooks_nics
 893
 894
 895 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 896   """Builds instance related env variables for hooks from an object.
 897
 898   @type lu: L{LogicalUnit}
 899   @param lu: the logical unit on whose behalf we execute
 900   @type instance: L{objects.Instance}
 901   @param instance: the instance for which we should build the
 902       environment
 903   @type override: dict
 904   @param override: dictionary with key/values that will override
 905       our values
 906   @rtype: dict
 907   @return: the hook environment dictionary
 908
 909   """
 910   cluster = lu.cfg.GetClusterInfo()
 911   bep = cluster.FillBE(instance)
 912   hvp = cluster.FillHV(instance)
 913   args = {
 914     'name': instance.name,
 915     'primary_node': instance.primary_node,
 916     'secondary_nodes': instance.secondary_nodes,
 917     'os_type': instance.os,
 918     'status': instance.admin_up,
 919     'memory': bep[constants.BE_MEMORY],
 920     'vcpus': bep[constants.BE_VCPUS],
 921     'nics': _NICListToTuple(lu, instance.nics),
 922     'disk_template': instance.disk_template,
 923     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 924     'bep': bep,
 925     'hvp': hvp,
 926     'hypervisor_name': instance.hypervisor,
 927   }
 928   if override:
 929     args.update(override)
 930   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 931
 932
 933 def _AdjustCandidatePool(lu, exceptions):
 934   """Adjust the candidate pool after node operations.
 935
 936   """
 937   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 938   if mod_list:
 939     lu.LogInfo("Promoted nodes to master candidate role: %s",
 940                utils.CommaJoin(node.name for node in mod_list))
 941     for name in mod_list:
 942       lu.context.ReaddNode(name)
 943   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 944   if mc_now > mc_max:
 945     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 946                (mc_now, mc_max))
 947
 948
 949 def _DecideSelfPromotion(lu, exceptions=None):
 950   """Decide whether I should promote myself as a master candidate.
 951
 952   """
 953   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 954   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 955   # the new node will increase mc_max with one, so:
 956   mc_should = min(mc_should + 1, cp_size)
 957   return mc_now < mc_should
 958
 959
 960 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 961   """Check that the brigdes needed by a list of nics exist.
 962
 963   """
 964   cluster = lu.cfg.GetClusterInfo()
 965   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 966   brlist = [params[constants.NIC_LINK] for params in paramslist
 967             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 968   if brlist:
 969     result = lu.rpc.call_bridges_exist(target_node, brlist)
 970     result.Raise("Error checking bridges on destination node '%s'" %
 971                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 972
 973
 974 def _CheckInstanceBridgesExist(lu, instance, node=None):
 975   """Check that the brigdes needed by an instance exist.
 976
 977   """
 978   if node is None:
 979     node = instance.primary_node
 980   _CheckNicsBridgesExist(lu, instance.nics, node)
 981
 982
 983 def _CheckOSVariant(os_obj, name):
 984   """Check whether an OS name conforms to the os variants specification.
 985
 986   @type os_obj: L{objects.OS}
 987   @param os_obj: OS object to check
 988   @type name: string
 989   @param name: OS name passed by the user, to check for validity
 990
 991   """
 992   if not os_obj.supported_variants:
 993     return
 994   variant = objects.OS.GetVariant(name)
 995   if not variant:
 996     raise errors.OpPrereqError("OS name must include a variant",
 997                                errors.ECODE_INVAL)
 998
 999   if variant not in os_obj.supported_variants:
1000     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1001
1002
1003 def _GetNodeInstancesInner(cfg, fn):
1004   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1005
1006
1007 def _GetNodeInstances(cfg, node_name):
1008   """Returns a list of all primary and secondary instances on a node.
1009
1010   """
1011
1012   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1013
1014
1015 def _GetNodePrimaryInstances(cfg, node_name):
1016   """Returns primary instances on a node.
1017
1018   """
1019   return _GetNodeInstancesInner(cfg,
1020                                 lambda inst: node_name == inst.primary_node)
1021
1022
1023 def _GetNodeSecondaryInstances(cfg, node_name):
1024   """Returns secondary instances on a node.
1025
1026   """
1027   return _GetNodeInstancesInner(cfg,
1028                                 lambda inst: node_name in inst.secondary_nodes)
1029
1030
1031 def _GetStorageTypeArgs(cfg, storage_type):
1032   """Returns the arguments for a storage type.
1033
1034   """
1035   # Special case for file storage
1036   if storage_type == constants.ST_FILE:
1037     # storage.FileStorage wants a list of storage directories
1038     return [[cfg.GetFileStorageDir()]]
1039
1040   return []
1041
1042
1043 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1044   faulty = []
1045
1046   for dev in instance.disks:
1047     cfg.SetDiskID(dev, node_name)
1048
1049   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1050   result.Raise("Failed to get disk status from node %s" % node_name,
1051                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1052
1053   for idx, bdev_status in enumerate(result.payload):
1054     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1055       faulty.append(idx)
1056
1057   return faulty
1058
1059
1060 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1061   """Check the sanity of iallocator and node arguments and use the
1062   cluster-wide iallocator if appropriate.
1063
1064   Check that at most one of (iallocator, node) is specified. If none is
1065   specified, then the LU's opcode's iallocator slot is filled with the
1066   cluster-wide default iallocator.
1067
1068   @type iallocator_slot: string
1069   @param iallocator_slot: the name of the opcode iallocator slot
1070   @type node_slot: string
1071   @param node_slot: the name of the opcode target node slot
1072
1073   """
1074   node = getattr(lu.op, node_slot, None)
1075   iallocator = getattr(lu.op, iallocator_slot, None)
1076
1077   if node is not None and iallocator is not None:
1078     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1079                                errors.ECODE_INVAL)
1080   elif node is None and iallocator is None:
1081     default_iallocator = lu.cfg.GetDefaultIAllocator()
1082     if default_iallocator:
1083       setattr(lu.op, iallocator_slot, default_iallocator)
1084     else:
1085       raise errors.OpPrereqError("No iallocator or node given and no"
1086                                  " cluster-wide default iallocator found."
1087                                  " Please specify either an iallocator or a"
1088                                  " node, or set a cluster-wide default"
1089                                  " iallocator.")
1090
1091
1092 class LUClusterPostInit(LogicalUnit):
1093   """Logical unit for running hooks after cluster initialization.
1094
1095   """
1096   HPATH = "cluster-init"
1097   HTYPE = constants.HTYPE_CLUSTER
1098
1099   def BuildHooksEnv(self):
1100     """Build hooks env.
1101
1102     """
1103     env = {"OP_TARGET": self.cfg.GetClusterName()}
1104     mn = self.cfg.GetMasterNode()
1105     return env, [], [mn]
1106
1107   def Exec(self, feedback_fn):
1108     """Nothing to do.
1109
1110     """
1111     return True
1112
1113
1114 class LUClusterDestroy(LogicalUnit):
1115   """Logical unit for destroying the cluster.
1116
1117   """
1118   HPATH = "cluster-destroy"
1119   HTYPE = constants.HTYPE_CLUSTER
1120
1121   def BuildHooksEnv(self):
1122     """Build hooks env.
1123
1124     """
1125     env = {"OP_TARGET": self.cfg.GetClusterName()}
1126     return env, [], []
1127
1128   def CheckPrereq(self):
1129     """Check prerequisites.
1130
1131     This checks whether the cluster is empty.
1132
1133     Any errors are signaled by raising errors.OpPrereqError.
1134
1135     """
1136     master = self.cfg.GetMasterNode()
1137
1138     nodelist = self.cfg.GetNodeList()
1139     if len(nodelist) != 1 or nodelist[0] != master:
1140       raise errors.OpPrereqError("There are still %d node(s) in"
1141                                  " this cluster." % (len(nodelist) - 1),
1142                                  errors.ECODE_INVAL)
1143     instancelist = self.cfg.GetInstanceList()
1144     if instancelist:
1145       raise errors.OpPrereqError("There are still %d instance(s) in"
1146                                  " this cluster." % len(instancelist),
1147                                  errors.ECODE_INVAL)
1148
1149   def Exec(self, feedback_fn):
1150     """Destroys the cluster.
1151
1152     """
1153     master = self.cfg.GetMasterNode()
1154
1155     # Run post hooks on master node before it's removed
1156     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1157     try:
1158       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1159     except:
1160       # pylint: disable-msg=W0702
1161       self.LogWarning("Errors occurred running hooks on %s" % master)
1162
1163     result = self.rpc.call_node_stop_master(master, False)
1164     result.Raise("Could not disable the master role")
1165
1166     return master
1167
1168
1169 def _VerifyCertificate(filename):
1170   """Verifies a certificate for LUClusterVerify.
1171
1172   @type filename: string
1173   @param filename: Path to PEM file
1174
1175   """
1176   try:
1177     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1178                                            utils.ReadFile(filename))
1179   except Exception, err: # pylint: disable-msg=W0703
1180     return (LUClusterVerify.ETYPE_ERROR,
1181             "Failed to load X509 certificate %s: %s" % (filename, err))
1182
1183   (errcode, msg) = \
1184     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1185                                 constants.SSL_CERT_EXPIRATION_ERROR)
1186
1187   if msg:
1188     fnamemsg = "While verifying %s: %s" % (filename, msg)
1189   else:
1190     fnamemsg = None
1191
1192   if errcode is None:
1193     return (None, fnamemsg)
1194   elif errcode == utils.CERT_WARNING:
1195     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1196   elif errcode == utils.CERT_ERROR:
1197     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1198
1199   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1200
1201
1202 class LUClusterVerify(LogicalUnit):
1203   """Verifies the cluster status.
1204
1205   """
1206   HPATH = "cluster-verify"
1207   HTYPE = constants.HTYPE_CLUSTER
1208   REQ_BGL = False
1209
1210   TCLUSTER = "cluster"
1211   TNODE = "node"
1212   TINSTANCE = "instance"
1213
1214   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1215   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1216   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1217   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1218   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1219   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1220   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1221   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1222   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1223   ENODEDRBD = (TNODE, "ENODEDRBD")
1224   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1225   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1226   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1227   ENODEHV = (TNODE, "ENODEHV")
1228   ENODELVM = (TNODE, "ENODELVM")
1229   ENODEN1 = (TNODE, "ENODEN1")
1230   ENODENET = (TNODE, "ENODENET")
1231   ENODEOS = (TNODE, "ENODEOS")
1232   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1233   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1234   ENODERPC = (TNODE, "ENODERPC")
1235   ENODESSH = (TNODE, "ENODESSH")
1236   ENODEVERSION = (TNODE, "ENODEVERSION")
1237   ENODESETUP = (TNODE, "ENODESETUP")
1238   ENODETIME = (TNODE, "ENODETIME")
1239   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1240
1241   ETYPE_FIELD = "code"
1242   ETYPE_ERROR = "ERROR"
1243   ETYPE_WARNING = "WARNING"
1244
1245   _HOOKS_INDENT_RE = re.compile("^", re.M)
1246
1247   class NodeImage(object):
1248     """A class representing the logical and physical status of a node.
1249
1250     @type name: string
1251     @ivar name: the node name to which this object refers
1252     @ivar volumes: a structure as returned from
1253         L{ganeti.backend.GetVolumeList} (runtime)
1254     @ivar instances: a list of running instances (runtime)
1255     @ivar pinst: list of configured primary instances (config)
1256     @ivar sinst: list of configured secondary instances (config)
1257     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1258         of this node (config)
1259     @ivar mfree: free memory, as reported by hypervisor (runtime)
1260     @ivar dfree: free disk, as reported by the node (runtime)
1261     @ivar offline: the offline status (config)
1262     @type rpc_fail: boolean
1263     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1264         not whether the individual keys were correct) (runtime)
1265     @type lvm_fail: boolean
1266     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1267     @type hyp_fail: boolean
1268     @ivar hyp_fail: whether the RPC call didn't return the instance list
1269     @type ghost: boolean
1270     @ivar ghost: whether this is a known node or not (config)
1271     @type os_fail: boolean
1272     @ivar os_fail: whether the RPC call didn't return valid OS data
1273     @type oslist: list
1274     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1275     @type vm_capable: boolean
1276     @ivar vm_capable: whether the node can host instances
1277
1278     """
1279     def __init__(self, offline=False, name=None, vm_capable=True):
1280       self.name = name
1281       self.volumes = {}
1282       self.instances = []
1283       self.pinst = []
1284       self.sinst = []
1285       self.sbp = {}
1286       self.mfree = 0
1287       self.dfree = 0
1288       self.offline = offline
1289       self.vm_capable = vm_capable
1290       self.rpc_fail = False
1291       self.lvm_fail = False
1292       self.hyp_fail = False
1293       self.ghost = False
1294       self.os_fail = False
1295       self.oslist = {}
1296
1297   def ExpandNames(self):
1298     self.needed_locks = {
1299       locking.LEVEL_NODE: locking.ALL_SET,
1300       locking.LEVEL_INSTANCE: locking.ALL_SET,
1301     }
1302     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1303
1304   def _Error(self, ecode, item, msg, *args, **kwargs):
1305     """Format an error message.
1306
1307     Based on the opcode's error_codes parameter, either format a
1308     parseable error code, or a simpler error string.
1309
1310     This must be called only from Exec and functions called from Exec.
1311
1312     """
1313     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1314     itype, etxt = ecode
1315     # first complete the msg
1316     if args:
1317       msg = msg % args
1318     # then format the whole message
1319     if self.op.error_codes:
1320       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1321     else:
1322       if item:
1323         item = " " + item
1324       else:
1325         item = ""
1326       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1327     # and finally report it via the feedback_fn
1328     self._feedback_fn("  - %s" % msg)
1329
1330   def _ErrorIf(self, cond, *args, **kwargs):
1331     """Log an error message if the passed condition is True.
1332
1333     """
1334     cond = bool(cond) or self.op.debug_simulate_errors
1335     if cond:
1336       self._Error(*args, **kwargs)
1337     # do not mark the operation as failed for WARN cases only
1338     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1339       self.bad = self.bad or cond
1340
1341   def _VerifyNode(self, ninfo, nresult):
1342     """Perform some basic validation on data returned from a node.
1343
1344       - check the result data structure is well formed and has all the
1345         mandatory fields
1346       - check ganeti version
1347
1348     @type ninfo: L{objects.Node}
1349     @param ninfo: the node to check
1350     @param nresult: the results from the node
1351     @rtype: boolean
1352     @return: whether overall this call was successful (and we can expect
1353          reasonable values in the respose)
1354
1355     """
1356     node = ninfo.name
1357     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1358
1359     # main result, nresult should be a non-empty dict
1360     test = not nresult or not isinstance(nresult, dict)
1361     _ErrorIf(test, self.ENODERPC, node,
1362                   "unable to verify node: no data returned")
1363     if test:
1364       return False
1365
1366     # compares ganeti version
1367     local_version = constants.PROTOCOL_VERSION
1368     remote_version = nresult.get("version", None)
1369     test = not (remote_version and
1370                 isinstance(remote_version, (list, tuple)) and
1371                 len(remote_version) == 2)
1372     _ErrorIf(test, self.ENODERPC, node,
1373              "connection to node returned invalid data")
1374     if test:
1375       return False
1376
1377     test = local_version != remote_version[0]
1378     _ErrorIf(test, self.ENODEVERSION, node,
1379              "incompatible protocol versions: master %s,"
1380              " node %s", local_version, remote_version[0])
1381     if test:
1382       return False
1383
1384     # node seems compatible, we can actually try to look into its results
1385
1386     # full package version
1387     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1388                   self.ENODEVERSION, node,
1389                   "software version mismatch: master %s, node %s",
1390                   constants.RELEASE_VERSION, remote_version[1],
1391                   code=self.ETYPE_WARNING)
1392
1393     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1394     if ninfo.vm_capable and isinstance(hyp_result, dict):
1395       for hv_name, hv_result in hyp_result.iteritems():
1396         test = hv_result is not None
1397         _ErrorIf(test, self.ENODEHV, node,
1398                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1399
1400     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1401     if ninfo.vm_capable and isinstance(hvp_result, list):
1402       for item, hv_name, hv_result in hvp_result:
1403         _ErrorIf(True, self.ENODEHV, node,
1404                  "hypervisor %s parameter verify failure (source %s): %s",
1405                  hv_name, item, hv_result)
1406
1407     test = nresult.get(constants.NV_NODESETUP,
1408                            ["Missing NODESETUP results"])
1409     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1410              "; ".join(test))
1411
1412     return True
1413
1414   def _VerifyNodeTime(self, ninfo, nresult,
1415                       nvinfo_starttime, nvinfo_endtime):
1416     """Check the node time.
1417
1418     @type ninfo: L{objects.Node}
1419     @param ninfo: the node to check
1420     @param nresult: the remote results for the node
1421     @param nvinfo_starttime: the start time of the RPC call
1422     @param nvinfo_endtime: the end time of the RPC call
1423
1424     """
1425     node = ninfo.name
1426     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1427
1428     ntime = nresult.get(constants.NV_TIME, None)
1429     try:
1430       ntime_merged = utils.MergeTime(ntime)
1431     except (ValueError, TypeError):
1432       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1433       return
1434
1435     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1436       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1437     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1438       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1439     else:
1440       ntime_diff = None
1441
1442     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1443              "Node time diverges by at least %s from master node time",
1444              ntime_diff)
1445
1446   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1447     """Check the node time.
1448
1449     @type ninfo: L{objects.Node}
1450     @param ninfo: the node to check
1451     @param nresult: the remote results for the node
1452     @param vg_name: the configured VG name
1453
1454     """
1455     if vg_name is None:
1456       return
1457
1458     node = ninfo.name
1459     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1460
1461     # checks vg existence and size > 20G
1462     vglist = nresult.get(constants.NV_VGLIST, None)
1463     test = not vglist
1464     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1465     if not test:
1466       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1467                                             constants.MIN_VG_SIZE)
1468       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1469
1470     # check pv names
1471     pvlist = nresult.get(constants.NV_PVLIST, None)
1472     test = pvlist is None
1473     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1474     if not test:
1475       # check that ':' is not present in PV names, since it's a
1476       # special character for lvcreate (denotes the range of PEs to
1477       # use on the PV)
1478       for _, pvname, owner_vg in pvlist:
1479         test = ":" in pvname
1480         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1481                  " '%s' of VG '%s'", pvname, owner_vg)
1482
1483   def _VerifyNodeNetwork(self, ninfo, nresult):
1484     """Check the node time.
1485
1486     @type ninfo: L{objects.Node}
1487     @param ninfo: the node to check
1488     @param nresult: the remote results for the node
1489
1490     """
1491     node = ninfo.name
1492     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1493
1494     test = constants.NV_NODELIST not in nresult
1495     _ErrorIf(test, self.ENODESSH, node,
1496              "node hasn't returned node ssh connectivity data")
1497     if not test:
1498       if nresult[constants.NV_NODELIST]:
1499         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1500           _ErrorIf(True, self.ENODESSH, node,
1501                    "ssh communication with node '%s': %s", a_node, a_msg)
1502
1503     test = constants.NV_NODENETTEST not in nresult
1504     _ErrorIf(test, self.ENODENET, node,
1505              "node hasn't returned node tcp connectivity data")
1506     if not test:
1507       if nresult[constants.NV_NODENETTEST]:
1508         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1509         for anode in nlist:
1510           _ErrorIf(True, self.ENODENET, node,
1511                    "tcp communication with node '%s': %s",
1512                    anode, nresult[constants.NV_NODENETTEST][anode])
1513
1514     test = constants.NV_MASTERIP not in nresult
1515     _ErrorIf(test, self.ENODENET, node,
1516              "node hasn't returned node master IP reachability data")
1517     if not test:
1518       if not nresult[constants.NV_MASTERIP]:
1519         if node == self.master_node:
1520           msg = "the master node cannot reach the master IP (not configured?)"
1521         else:
1522           msg = "cannot reach the master IP"
1523         _ErrorIf(True, self.ENODENET, node, msg)
1524
1525   def _VerifyInstance(self, instance, instanceconfig, node_image,
1526                       diskstatus):
1527     """Verify an instance.
1528
1529     This function checks to see if the required block devices are
1530     available on the instance's node.
1531
1532     """
1533     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1534     node_current = instanceconfig.primary_node
1535
1536     node_vol_should = {}
1537     instanceconfig.MapLVsByNode(node_vol_should)
1538
1539     for node in node_vol_should:
1540       n_img = node_image[node]
1541       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1542         # ignore missing volumes on offline or broken nodes
1543         continue
1544       for volume in node_vol_should[node]:
1545         test = volume not in n_img.volumes
1546         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1547                  "volume %s missing on node %s", volume, node)
1548
1549     if instanceconfig.admin_up:
1550       pri_img = node_image[node_current]
1551       test = instance not in pri_img.instances and not pri_img.offline
1552       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1553                "instance not running on its primary node %s",
1554                node_current)
1555
1556     for node, n_img in node_image.items():
1557       if node != node_current:
1558         test = instance in n_img.instances
1559         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1560                  "instance should not run on node %s", node)
1561
1562     diskdata = [(nname, success, status, idx)
1563                 for (nname, disks) in diskstatus.items()
1564                 for idx, (success, status) in enumerate(disks)]
1565
1566     for nname, success, bdev_status, idx in diskdata:
1567       # the 'ghost node' construction in Exec() ensures that we have a
1568       # node here
1569       snode = node_image[nname]
1570       bad_snode = snode.ghost or snode.offline
1571       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1572                self.EINSTANCEFAULTYDISK, instance,
1573                "couldn't retrieve status for disk/%s on %s: %s",
1574                idx, nname, bdev_status)
1575       _ErrorIf((instanceconfig.admin_up and success and
1576                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1577                self.EINSTANCEFAULTYDISK, instance,
1578                "disk/%s on %s is faulty", idx, nname)
1579
1580   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1581     """Verify if there are any unknown volumes in the cluster.
1582
1583     The .os, .swap and backup volumes are ignored. All other volumes are
1584     reported as unknown.
1585
1586     @type reserved: L{ganeti.utils.FieldSet}
1587     @param reserved: a FieldSet of reserved volume names
1588
1589     """
1590     for node, n_img in node_image.items():
1591       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1592         # skip non-healthy nodes
1593         continue
1594       for volume in n_img.volumes:
1595         test = ((node not in node_vol_should or
1596                 volume not in node_vol_should[node]) and
1597                 not reserved.Matches(volume))
1598         self._ErrorIf(test, self.ENODEORPHANLV, node,
1599                       "volume %s is unknown", volume)
1600
1601   def _VerifyOrphanInstances(self, instancelist, node_image):
1602     """Verify the list of running instances.
1603
1604     This checks what instances are running but unknown to the cluster.
1605
1606     """
1607     for node, n_img in node_image.items():
1608       for o_inst in n_img.instances:
1609         test = o_inst not in instancelist
1610         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1611                       "instance %s on node %s should not exist", o_inst, node)
1612
1613   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1614     """Verify N+1 Memory Resilience.
1615
1616     Check that if one single node dies we can still start all the
1617     instances it was primary for.
1618
1619     """
1620     for node, n_img in node_image.items():
1621       # This code checks that every node which is now listed as
1622       # secondary has enough memory to host all instances it is
1623       # supposed to should a single other node in the cluster fail.
1624       # FIXME: not ready for failover to an arbitrary node
1625       # FIXME: does not support file-backed instances
1626       # WARNING: we currently take into account down instances as well
1627       # as up ones, considering that even if they're down someone
1628       # might want to start them even in the event of a node failure.
1629       if n_img.offline:
1630         # we're skipping offline nodes from the N+1 warning, since
1631         # most likely we don't have good memory infromation from them;
1632         # we already list instances living on such nodes, and that's
1633         # enough warning
1634         continue
1635       for prinode, instances in n_img.sbp.items():
1636         needed_mem = 0
1637         for instance in instances:
1638           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1639           if bep[constants.BE_AUTO_BALANCE]:
1640             needed_mem += bep[constants.BE_MEMORY]
1641         test = n_img.mfree < needed_mem
1642         self._ErrorIf(test, self.ENODEN1, node,
1643                       "not enough memory to accomodate instance failovers"
1644                       " should node %s fail (%dMiB needed, %dMiB available)",
1645                       prinode, needed_mem, n_img.mfree)
1646
1647   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1648                        master_files):
1649     """Verifies and computes the node required file checksums.
1650
1651     @type ninfo: L{objects.Node}
1652     @param ninfo: the node to check
1653     @param nresult: the remote results for the node
1654     @param file_list: required list of files
1655     @param local_cksum: dictionary of local files and their checksums
1656     @param master_files: list of files that only masters should have
1657
1658     """
1659     node = ninfo.name
1660     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1661
1662     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1663     test = not isinstance(remote_cksum, dict)
1664     _ErrorIf(test, self.ENODEFILECHECK, node,
1665              "node hasn't returned file checksum data")
1666     if test:
1667       return
1668
1669     for file_name in file_list:
1670       node_is_mc = ninfo.master_candidate
1671       must_have = (file_name not in master_files) or node_is_mc
1672       # missing
1673       test1 = file_name not in remote_cksum
1674       # invalid checksum
1675       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1676       # existing and good
1677       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1678       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1679                "file '%s' missing", file_name)
1680       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1681                "file '%s' has wrong checksum", file_name)
1682       # not candidate and this is not a must-have file
1683       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1684                "file '%s' should not exist on non master"
1685                " candidates (and the file is outdated)", file_name)
1686       # all good, except non-master/non-must have combination
1687       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1688                "file '%s' should not exist"
1689                " on non master candidates", file_name)
1690
1691   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1692                       drbd_map):
1693     """Verifies and the node DRBD status.
1694
1695     @type ninfo: L{objects.Node}
1696     @param ninfo: the node to check
1697     @param nresult: the remote results for the node
1698     @param instanceinfo: the dict of instances
1699     @param drbd_helper: the configured DRBD usermode helper
1700     @param drbd_map: the DRBD map as returned by
1701         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1702
1703     """
1704     node = ninfo.name
1705     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1706
1707     if drbd_helper:
1708       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1709       test = (helper_result == None)
1710       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1711                "no drbd usermode helper returned")
1712       if helper_result:
1713         status, payload = helper_result
1714         test = not status
1715         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1716                  "drbd usermode helper check unsuccessful: %s", payload)
1717         test = status and (payload != drbd_helper)
1718         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1719                  "wrong drbd usermode helper: %s", payload)
1720
1721     # compute the DRBD minors
1722     node_drbd = {}
1723     for minor, instance in drbd_map[node].items():
1724       test = instance not in instanceinfo
1725       _ErrorIf(test, self.ECLUSTERCFG, None,
1726                "ghost instance '%s' in temporary DRBD map", instance)
1727         # ghost instance should not be running, but otherwise we
1728         # don't give double warnings (both ghost instance and
1729         # unallocated minor in use)
1730       if test:
1731         node_drbd[minor] = (instance, False)
1732       else:
1733         instance = instanceinfo[instance]
1734         node_drbd[minor] = (instance.name, instance.admin_up)
1735
1736     # and now check them
1737     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1738     test = not isinstance(used_minors, (tuple, list))
1739     _ErrorIf(test, self.ENODEDRBD, node,
1740              "cannot parse drbd status file: %s", str(used_minors))
1741     if test:
1742       # we cannot check drbd status
1743       return
1744
1745     for minor, (iname, must_exist) in node_drbd.items():
1746       test = minor not in used_minors and must_exist
1747       _ErrorIf(test, self.ENODEDRBD, node,
1748                "drbd minor %d of instance %s is not active", minor, iname)
1749     for minor in used_minors:
1750       test = minor not in node_drbd
1751       _ErrorIf(test, self.ENODEDRBD, node,
1752                "unallocated drbd minor %d is in use", minor)
1753
1754   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1755     """Builds the node OS structures.
1756
1757     @type ninfo: L{objects.Node}
1758     @param ninfo: the node to check
1759     @param nresult: the remote results for the node
1760     @param nimg: the node image object
1761
1762     """
1763     node = ninfo.name
1764     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1765
1766     remote_os = nresult.get(constants.NV_OSLIST, None)
1767     test = (not isinstance(remote_os, list) or
1768             not compat.all(isinstance(v, list) and len(v) == 7
1769                            for v in remote_os))
1770
1771     _ErrorIf(test, self.ENODEOS, node,
1772              "node hasn't returned valid OS data")
1773
1774     nimg.os_fail = test
1775
1776     if test:
1777       return
1778
1779     os_dict = {}
1780
1781     for (name, os_path, status, diagnose,
1782          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1783
1784       if name not in os_dict:
1785         os_dict[name] = []
1786
1787       # parameters is a list of lists instead of list of tuples due to
1788       # JSON lacking a real tuple type, fix it:
1789       parameters = [tuple(v) for v in parameters]
1790       os_dict[name].append((os_path, status, diagnose,
1791                             set(variants), set(parameters), set(api_ver)))
1792
1793     nimg.oslist = os_dict
1794
1795   def _VerifyNodeOS(self, ninfo, nimg, base):
1796     """Verifies the node OS list.
1797
1798     @type ninfo: L{objects.Node}
1799     @param ninfo: the node to check
1800     @param nimg: the node image object
1801     @param base: the 'template' node we match against (e.g. from the master)
1802
1803     """
1804     node = ninfo.name
1805     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1806
1807     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1808
1809     for os_name, os_data in nimg.oslist.items():
1810       assert os_data, "Empty OS status for OS %s?!" % os_name
1811       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1812       _ErrorIf(not f_status, self.ENODEOS, node,
1813                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1814       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1815                "OS '%s' has multiple entries (first one shadows the rest): %s",
1816                os_name, utils.CommaJoin([v[0] for v in os_data]))
1817       # this will catched in backend too
1818       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1819                and not f_var, self.ENODEOS, node,
1820                "OS %s with API at least %d does not declare any variant",
1821                os_name, constants.OS_API_V15)
1822       # comparisons with the 'base' image
1823       test = os_name not in base.oslist
1824       _ErrorIf(test, self.ENODEOS, node,
1825                "Extra OS %s not present on reference node (%s)",
1826                os_name, base.name)
1827       if test:
1828         continue
1829       assert base.oslist[os_name], "Base node has empty OS status?"
1830       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1831       if not b_status:
1832         # base OS is invalid, skipping
1833         continue
1834       for kind, a, b in [("API version", f_api, b_api),
1835                          ("variants list", f_var, b_var),
1836                          ("parameters", f_param, b_param)]:
1837         _ErrorIf(a != b, self.ENODEOS, node,
1838                  "OS %s %s differs from reference node %s: %s vs. %s",
1839                  kind, os_name, base.name,
1840                  utils.CommaJoin(a), utils.CommaJoin(b))
1841
1842     # check any missing OSes
1843     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1844     _ErrorIf(missing, self.ENODEOS, node,
1845              "OSes present on reference node %s but missing on this node: %s",
1846              base.name, utils.CommaJoin(missing))
1847
1848   def _VerifyOob(self, ninfo, nresult):
1849     """Verifies out of band functionality of a node.
1850
1851     @type ninfo: L{objects.Node}
1852     @param ninfo: the node to check
1853     @param nresult: the remote results for the node
1854
1855     """
1856     node = ninfo.name
1857     # We just have to verify the paths on master and/or master candidates
1858     # as the oob helper is invoked on the master
1859     if ((ninfo.master_candidate or ninfo.master_capable) and
1860         constants.NV_OOB_PATHS in nresult):
1861       for path_result in nresult[constants.NV_OOB_PATHS]:
1862         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1863
1864   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1865     """Verifies and updates the node volume data.
1866
1867     This function will update a L{NodeImage}'s internal structures
1868     with data from the remote call.
1869
1870     @type ninfo: L{objects.Node}
1871     @param ninfo: the node to check
1872     @param nresult: the remote results for the node
1873     @param nimg: the node image object
1874     @param vg_name: the configured VG name
1875
1876     """
1877     node = ninfo.name
1878     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1879
1880     nimg.lvm_fail = True
1881     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1882     if vg_name is None:
1883       pass
1884     elif isinstance(lvdata, basestring):
1885       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1886                utils.SafeEncode(lvdata))
1887     elif not isinstance(lvdata, dict):
1888       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1889     else:
1890       nimg.volumes = lvdata
1891       nimg.lvm_fail = False
1892
1893   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1894     """Verifies and updates the node instance list.
1895
1896     If the listing was successful, then updates this node's instance
1897     list. Otherwise, it marks the RPC call as failed for the instance
1898     list key.
1899
1900     @type ninfo: L{objects.Node}
1901     @param ninfo: the node to check
1902     @param nresult: the remote results for the node
1903     @param nimg: the node image object
1904
1905     """
1906     idata = nresult.get(constants.NV_INSTANCELIST, None)
1907     test = not isinstance(idata, list)
1908     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1909                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1910     if test:
1911       nimg.hyp_fail = True
1912     else:
1913       nimg.instances = idata
1914
1915   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1916     """Verifies and computes a node information map
1917
1918     @type ninfo: L{objects.Node}
1919     @param ninfo: the node to check
1920     @param nresult: the remote results for the node
1921     @param nimg: the node image object
1922     @param vg_name: the configured VG name
1923
1924     """
1925     node = ninfo.name
1926     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1927
1928     # try to read free memory (from the hypervisor)
1929     hv_info = nresult.get(constants.NV_HVINFO, None)
1930     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1931     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1932     if not test:
1933       try:
1934         nimg.mfree = int(hv_info["memory_free"])
1935       except (ValueError, TypeError):
1936         _ErrorIf(True, self.ENODERPC, node,
1937                  "node returned invalid nodeinfo, check hypervisor")
1938
1939     # FIXME: devise a free space model for file based instances as well
1940     if vg_name is not None:
1941       test = (constants.NV_VGLIST not in nresult or
1942               vg_name not in nresult[constants.NV_VGLIST])
1943       _ErrorIf(test, self.ENODELVM, node,
1944                "node didn't return data for the volume group '%s'"
1945                " - it is either missing or broken", vg_name)
1946       if not test:
1947         try:
1948           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1949         except (ValueError, TypeError):
1950           _ErrorIf(True, self.ENODERPC, node,
1951                    "node returned invalid LVM info, check LVM status")
1952
1953   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1954     """Gets per-disk status information for all instances.
1955
1956     @type nodelist: list of strings
1957     @param nodelist: Node names
1958     @type node_image: dict of (name, L{objects.Node})
1959     @param node_image: Node objects
1960     @type instanceinfo: dict of (name, L{objects.Instance})
1961     @param instanceinfo: Instance objects
1962     @rtype: {instance: {node: [(succes, payload)]}}
1963     @return: a dictionary of per-instance dictionaries with nodes as
1964         keys and disk information as values; the disk information is a
1965         list of tuples (success, payload)
1966
1967     """
1968     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1969
1970     node_disks = {}
1971     node_disks_devonly = {}
1972     diskless_instances = set()
1973     diskless = constants.DT_DISKLESS
1974
1975     for nname in nodelist:
1976       node_instances = list(itertools.chain(node_image[nname].pinst,
1977                                             node_image[nname].sinst))
1978       diskless_instances.update(inst for inst in node_instances
1979                                 if instanceinfo[inst].disk_template == diskless)
1980       disks = [(inst, disk)
1981                for inst in node_instances
1982                for disk in instanceinfo[inst].disks]
1983
1984       if not disks:
1985         # No need to collect data
1986         continue
1987
1988       node_disks[nname] = disks
1989
1990       # Creating copies as SetDiskID below will modify the objects and that can
1991       # lead to incorrect data returned from nodes
1992       devonly = [dev.Copy() for (_, dev) in disks]
1993
1994       for dev in devonly:
1995         self.cfg.SetDiskID(dev, nname)
1996
1997       node_disks_devonly[nname] = devonly
1998
1999     assert len(node_disks) == len(node_disks_devonly)
2000
2001     # Collect data from all nodes with disks
2002     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2003                                                           node_disks_devonly)
2004
2005     assert len(result) == len(node_disks)
2006
2007     instdisk = {}
2008
2009     for (nname, nres) in result.items():
2010       disks = node_disks[nname]
2011
2012       if nres.offline:
2013         # No data from this node
2014         data = len(disks) * [(False, "node offline")]
2015       else:
2016         msg = nres.fail_msg
2017         _ErrorIf(msg, self.ENODERPC, nname,
2018                  "while getting disk information: %s", msg)
2019         if msg:
2020           # No data from this node
2021           data = len(disks) * [(False, msg)]
2022         else:
2023           data = []
2024           for idx, i in enumerate(nres.payload):
2025             if isinstance(i, (tuple, list)) and len(i) == 2:
2026               data.append(i)
2027             else:
2028               logging.warning("Invalid result from node %s, entry %d: %s",
2029                               nname, idx, i)
2030               data.append((False, "Invalid result from the remote node"))
2031
2032       for ((inst, _), status) in zip(disks, data):
2033         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2034
2035     # Add empty entries for diskless instances.
2036     for inst in diskless_instances:
2037       assert inst not in instdisk
2038       instdisk[inst] = {}
2039
2040     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2041                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2042                       compat.all(isinstance(s, (tuple, list)) and
2043                                  len(s) == 2 for s in statuses)
2044                       for inst, nnames in instdisk.items()
2045                       for nname, statuses in nnames.items())
2046     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2047
2048     return instdisk
2049
2050   def _VerifyHVP(self, hvp_data):
2051     """Verifies locally the syntax of the hypervisor parameters.
2052
2053     """
2054     for item, hv_name, hv_params in hvp_data:
2055       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2056              (item, hv_name))
2057       try:
2058         hv_class = hypervisor.GetHypervisor(hv_name)
2059         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2060         hv_class.CheckParameterSyntax(hv_params)
2061       except errors.GenericError, err:
2062         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2063
2064
2065   def BuildHooksEnv(self):
2066     """Build hooks env.
2067
2068     Cluster-Verify hooks just ran in the post phase and their failure makes
2069     the output be logged in the verify output and the verification to fail.
2070
2071     """
2072     all_nodes = self.cfg.GetNodeList()
2073     env = {
2074       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2075       }
2076     for node in self.cfg.GetAllNodesInfo().values():
2077       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2078
2079     return env, [], all_nodes
2080
2081   def Exec(self, feedback_fn):
2082     """Verify integrity of cluster, performing various test on nodes.
2083
2084     """
2085     # This method has too many local variables. pylint: disable-msg=R0914
2086     self.bad = False
2087     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2088     verbose = self.op.verbose
2089     self._feedback_fn = feedback_fn
2090     feedback_fn("* Verifying global settings")
2091     for msg in self.cfg.VerifyConfig():
2092       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2093
2094     # Check the cluster certificates
2095     for cert_filename in constants.ALL_CERT_FILES:
2096       (errcode, msg) = _VerifyCertificate(cert_filename)
2097       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2098
2099     vg_name = self.cfg.GetVGName()
2100     drbd_helper = self.cfg.GetDRBDHelper()
2101     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2102     cluster = self.cfg.GetClusterInfo()
2103     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2104     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2105     nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2106     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2107     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2108                         for iname in instancelist)
2109     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2110     i_non_redundant = [] # Non redundant instances
2111     i_non_a_balanced = [] # Non auto-balanced instances
2112     n_offline = 0 # Count of offline nodes
2113     n_drained = 0 # Count of nodes being drained
2114     node_vol_should = {}
2115
2116     # FIXME: verify OS list
2117     # do local checksums
2118     master_files = [constants.CLUSTER_CONF_FILE]
2119     master_node = self.master_node = self.cfg.GetMasterNode()
2120     master_ip = self.cfg.GetMasterIP()
2121
2122     file_names = ssconf.SimpleStore().GetFileList()
2123     file_names.extend(constants.ALL_CERT_FILES)
2124     file_names.extend(master_files)
2125     if cluster.modify_etc_hosts:
2126       file_names.append(constants.ETC_HOSTS)
2127
2128     local_checksums = utils.FingerprintFiles(file_names)
2129
2130     # Compute the set of hypervisor parameters
2131     hvp_data = []
2132     for hv_name in hypervisors:
2133       hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2134     for os_name, os_hvp in cluster.os_hvp.items():
2135       for hv_name, hv_params in os_hvp.items():
2136         if not hv_params:
2137           continue
2138         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2139         hvp_data.append(("os %s" % os_name, hv_name, full_params))
2140     # TODO: collapse identical parameter values in a single one
2141     for instance in instanceinfo.values():
2142       if not instance.hvparams:
2143         continue
2144       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2145                        cluster.FillHV(instance)))
2146     # and verify them locally
2147     self._VerifyHVP(hvp_data)
2148
2149     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2150     node_verify_param = {
2151       constants.NV_FILELIST: file_names,
2152       constants.NV_NODELIST: [node.name for node in nodeinfo
2153                               if not node.offline],
2154       constants.NV_HYPERVISOR: hypervisors,
2155       constants.NV_HVPARAMS: hvp_data,
2156       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2157                                   node.secondary_ip) for node in nodeinfo
2158                                  if not node.offline],
2159       constants.NV_INSTANCELIST: hypervisors,
2160       constants.NV_VERSION: None,
2161       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2162       constants.NV_NODESETUP: None,
2163       constants.NV_TIME: None,
2164       constants.NV_MASTERIP: (master_node, master_ip),
2165       constants.NV_OSLIST: None,
2166       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2167       }
2168
2169     if vg_name is not None:
2170       node_verify_param[constants.NV_VGLIST] = None
2171       node_verify_param[constants.NV_LVLIST] = vg_name
2172       node_verify_param[constants.NV_PVLIST] = [vg_name]
2173       node_verify_param[constants.NV_DRBDLIST] = None
2174
2175     if drbd_helper:
2176       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2177
2178     # Build our expected cluster state
2179     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2180                                                  name=node.name,
2181                                                  vm_capable=node.vm_capable))
2182                       for node in nodeinfo)
2183
2184     # Gather OOB paths
2185     oob_paths = []
2186     for node in nodeinfo:
2187       path = _SupportsOob(self.cfg, node)
2188       if path and path not in oob_paths:
2189         oob_paths.append(path)
2190
2191     if oob_paths:
2192       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2193
2194     for instance in instancelist:
2195       inst_config = instanceinfo[instance]
2196
2197       for nname in inst_config.all_nodes:
2198         if nname not in node_image:
2199           # ghost node
2200           gnode = self.NodeImage(name=nname)
2201           gnode.ghost = True
2202           node_image[nname] = gnode
2203
2204       inst_config.MapLVsByNode(node_vol_should)
2205
2206       pnode = inst_config.primary_node
2207       node_image[pnode].pinst.append(instance)
2208
2209       for snode in inst_config.secondary_nodes:
2210         nimg = node_image[snode]
2211         nimg.sinst.append(instance)
2212         if pnode not in nimg.sbp:
2213           nimg.sbp[pnode] = []
2214         nimg.sbp[pnode].append(instance)
2215
2216     # At this point, we have the in-memory data structures complete,
2217     # except for the runtime information, which we'll gather next
2218
2219     # Due to the way our RPC system works, exact response times cannot be
2220     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2221     # time before and after executing the request, we can at least have a time
2222     # window.
2223     nvinfo_starttime = time.time()
2224     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2225                                            self.cfg.GetClusterName())
2226     nvinfo_endtime = time.time()
2227
2228     all_drbd_map = self.cfg.ComputeDRBDMap()
2229
2230     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2231     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2232
2233     feedback_fn("* Verifying node status")
2234
2235     refos_img = None
2236
2237     for node_i in nodeinfo:
2238       node = node_i.name
2239       nimg = node_image[node]
2240
2241       if node_i.offline:
2242         if verbose:
2243           feedback_fn("* Skipping offline node %s" % (node,))
2244         n_offline += 1
2245         continue
2246
2247       if node == master_node:
2248         ntype = "master"
2249       elif node_i.master_candidate:
2250         ntype = "master candidate"
2251       elif node_i.drained:
2252         ntype = "drained"
2253         n_drained += 1
2254       else:
2255         ntype = "regular"
2256       if verbose:
2257         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2258
2259       msg = all_nvinfo[node].fail_msg
2260       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2261       if msg:
2262         nimg.rpc_fail = True
2263         continue
2264
2265       nresult = all_nvinfo[node].payload
2266
2267       nimg.call_ok = self._VerifyNode(node_i, nresult)
2268       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2269       self._VerifyNodeNetwork(node_i, nresult)
2270       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2271                             master_files)
2272
2273       self._VerifyOob(node_i, nresult)
2274
2275       if nimg.vm_capable:
2276         self._VerifyNodeLVM(node_i, nresult, vg_name)
2277         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2278                              all_drbd_map)
2279
2280         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2281         self._UpdateNodeInstances(node_i, nresult, nimg)
2282         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2283         self._UpdateNodeOS(node_i, nresult, nimg)
2284         if not nimg.os_fail:
2285           if refos_img is None:
2286             refos_img = nimg
2287           self._VerifyNodeOS(node_i, nimg, refos_img)
2288
2289     feedback_fn("* Verifying instance status")
2290     for instance in instancelist:
2291       if verbose:
2292         feedback_fn("* Verifying instance %s" % instance)
2293       inst_config = instanceinfo[instance]
2294       self._VerifyInstance(instance, inst_config, node_image,
2295                            instdisk[instance])
2296       inst_nodes_offline = []
2297
2298       pnode = inst_config.primary_node
2299       pnode_img = node_image[pnode]
2300       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2301                self.ENODERPC, pnode, "instance %s, connection to"
2302                " primary node failed", instance)
2303
2304       _ErrorIf(pnode_img.offline, self.EINSTANCEBADNODE, instance,
2305                "instance lives on offline node %s", inst_config.primary_node)
2306
2307       # If the instance is non-redundant we cannot survive losing its primary
2308       # node, so we are not N+1 compliant. On the other hand we have no disk
2309       # templates with more than one secondary so that situation is not well
2310       # supported either.
2311       # FIXME: does not support file-backed instances
2312       if not inst_config.secondary_nodes:
2313         i_non_redundant.append(instance)
2314
2315       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2316                instance, "instance has multiple secondary nodes: %s",
2317                utils.CommaJoin(inst_config.secondary_nodes),
2318                code=self.ETYPE_WARNING)
2319
2320       if inst_config.disk_template in constants.DTS_NET_MIRROR:
2321         pnode = inst_config.primary_node
2322         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2323         instance_groups = {}
2324
2325         for node in instance_nodes:
2326           instance_groups.setdefault(nodeinfo_byname[node].group,
2327                                      []).append(node)
2328
2329         pretty_list = [
2330           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2331           # Sort so that we always list the primary node first.
2332           for group, nodes in sorted(instance_groups.items(),
2333                                      key=lambda (_, nodes): pnode in nodes,
2334                                      reverse=True)]
2335
2336         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2337                       instance, "instance has primary and secondary nodes in"
2338                       " different groups: %s", utils.CommaJoin(pretty_list),
2339                       code=self.ETYPE_WARNING)
2340
2341       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2342         i_non_a_balanced.append(instance)
2343
2344       for snode in inst_config.secondary_nodes:
2345         s_img = node_image[snode]
2346         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2347                  "instance %s, connection to secondary node failed", instance)
2348
2349         if s_img.offline:
2350           inst_nodes_offline.append(snode)
2351
2352       # warn that the instance lives on offline nodes
2353       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2354                "instance has offline secondary node(s) %s",
2355                utils.CommaJoin(inst_nodes_offline))
2356       # ... or ghost/non-vm_capable nodes
2357       for node in inst_config.all_nodes:
2358         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2359                  "instance lives on ghost node %s", node)
2360         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2361                  instance, "instance lives on non-vm_capable node %s", node)
2362
2363     feedback_fn("* Verifying orphan volumes")
2364     reserved = utils.FieldSet(*cluster.reserved_lvs)
2365     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2366
2367     feedback_fn("* Verifying orphan instances")
2368     self._VerifyOrphanInstances(instancelist, node_image)
2369
2370     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2371       feedback_fn("* Verifying N+1 Memory redundancy")
2372       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2373
2374     feedback_fn("* Other Notes")
2375     if i_non_redundant:
2376       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2377                   % len(i_non_redundant))
2378
2379     if i_non_a_balanced:
2380       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2381                   % len(i_non_a_balanced))
2382
2383     if n_offline:
2384       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2385
2386     if n_drained:
2387       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2388
2389     return not self.bad
2390
2391   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2392     """Analyze the post-hooks' result
2393
2394     This method analyses the hook result, handles it, and sends some
2395     nicely-formatted feedback back to the user.
2396
2397     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2398         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2399     @param hooks_results: the results of the multi-node hooks rpc call
2400     @param feedback_fn: function used send feedback back to the caller
2401     @param lu_result: previous Exec result
2402     @return: the new Exec result, based on the previous result
2403         and hook results
2404
2405     """
2406     # We only really run POST phase hooks, and are only interested in
2407     # their results
2408     if phase == constants.HOOKS_PHASE_POST:
2409       # Used to change hooks' output to proper indentation
2410       feedback_fn("* Hooks Results")
2411       assert hooks_results, "invalid result from hooks"
2412
2413       for node_name in hooks_results:
2414         res = hooks_results[node_name]
2415         msg = res.fail_msg
2416         test = msg and not res.offline
2417         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2418                       "Communication failure in hooks execution: %s", msg)
2419         if res.offline or msg:
2420           # No need to investigate payload if node is offline or gave an error.
2421           # override manually lu_result here as _ErrorIf only
2422           # overrides self.bad
2423           lu_result = 1
2424           continue
2425         for script, hkr, output in res.payload:
2426           test = hkr == constants.HKR_FAIL
2427           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2428                         "Script %s failed, output:", script)
2429           if test:
2430             output = self._HOOKS_INDENT_RE.sub('      ', output)
2431             feedback_fn("%s" % output)
2432             lu_result = 0
2433
2434       return lu_result
2435
2436
2437 class LUClusterVerifyDisks(NoHooksLU):
2438   """Verifies the cluster disks status.
2439
2440   """
2441   REQ_BGL = False
2442
2443   def ExpandNames(self):
2444     self.needed_locks = {
2445       locking.LEVEL_NODE: locking.ALL_SET,
2446       locking.LEVEL_INSTANCE: locking.ALL_SET,
2447     }
2448     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2449
2450   def Exec(self, feedback_fn):
2451     """Verify integrity of cluster disks.
2452
2453     @rtype: tuple of three items
2454     @return: a tuple of (dict of node-to-node_error, list of instances
2455         which need activate-disks, dict of instance: (node, volume) for
2456         missing volumes
2457
2458     """
2459     result = res_nodes, res_instances, res_missing = {}, [], {}
2460
2461     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2462     instances = self.cfg.GetAllInstancesInfo().values()
2463
2464     nv_dict = {}
2465     for inst in instances:
2466       inst_lvs = {}
2467       if not inst.admin_up:
2468         continue
2469       inst.MapLVsByNode(inst_lvs)
2470       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2471       for node, vol_list in inst_lvs.iteritems():
2472         for vol in vol_list:
2473           nv_dict[(node, vol)] = inst
2474
2475     if not nv_dict:
2476       return result
2477
2478     node_lvs = self.rpc.call_lv_list(nodes, [])
2479     for node, node_res in node_lvs.items():
2480       if node_res.offline:
2481         continue
2482       msg = node_res.fail_msg
2483       if msg:
2484         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2485         res_nodes[node] = msg
2486         continue
2487
2488       lvs = node_res.payload
2489       for lv_name, (_, _, lv_online) in lvs.items():
2490         inst = nv_dict.pop((node, lv_name), None)
2491         if (not lv_online and inst is not None
2492             and inst.name not in res_instances):
2493           res_instances.append(inst.name)
2494
2495     # any leftover items in nv_dict are missing LVs, let's arrange the
2496     # data better
2497     for key, inst in nv_dict.iteritems():
2498       if inst.name not in res_missing:
2499         res_missing[inst.name] = []
2500       res_missing[inst.name].append(key)
2501
2502     return result
2503
2504
2505 class LUClusterRepairDiskSizes(NoHooksLU):
2506   """Verifies the cluster disks sizes.
2507
2508   """
2509   REQ_BGL = False
2510
2511   def ExpandNames(self):
2512     if self.op.instances:
2513       self.wanted_names = []
2514       for name in self.op.instances:
2515         full_name = _ExpandInstanceName(self.cfg, name)
2516         self.wanted_names.append(full_name)
2517       self.needed_locks = {
2518         locking.LEVEL_NODE: [],
2519         locking.LEVEL_INSTANCE: self.wanted_names,
2520         }
2521       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2522     else:
2523       self.wanted_names = None
2524       self.needed_locks = {
2525         locking.LEVEL_NODE: locking.ALL_SET,
2526         locking.LEVEL_INSTANCE: locking.ALL_SET,
2527         }
2528     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2529
2530   def DeclareLocks(self, level):
2531     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2532       self._LockInstancesNodes(primary_only=True)
2533
2534   def CheckPrereq(self):
2535     """Check prerequisites.
2536
2537     This only checks the optional instance list against the existing names.
2538
2539     """
2540     if self.wanted_names is None:
2541       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2542
2543     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2544                              in self.wanted_names]
2545
2546   def _EnsureChildSizes(self, disk):
2547     """Ensure children of the disk have the needed disk size.
2548
2549     This is valid mainly for DRBD8 and fixes an issue where the
2550     children have smaller disk size.
2551
2552     @param disk: an L{ganeti.objects.Disk} object
2553
2554     """
2555     if disk.dev_type == constants.LD_DRBD8:
2556       assert disk.children, "Empty children for DRBD8?"
2557       fchild = disk.children[0]
2558       mismatch = fchild.size < disk.size
2559       if mismatch:
2560         self.LogInfo("Child disk has size %d, parent %d, fixing",
2561                      fchild.size, disk.size)
2562         fchild.size = disk.size
2563
2564       # and we recurse on this child only, not on the metadev
2565       return self._EnsureChildSizes(fchild) or mismatch
2566     else:
2567       return False
2568
2569   def Exec(self, feedback_fn):
2570     """Verify the size of cluster disks.
2571
2572     """
2573     # TODO: check child disks too
2574     # TODO: check differences in size between primary/secondary nodes
2575     per_node_disks = {}
2576     for instance in self.wanted_instances:
2577       pnode = instance.primary_node
2578       if pnode not in per_node_disks:
2579         per_node_disks[pnode] = []
2580       for idx, disk in enumerate(instance.disks):
2581         per_node_disks[pnode].append((instance, idx, disk))
2582
2583     changed = []
2584     for node, dskl in per_node_disks.items():
2585       newl = [v[2].Copy() for v in dskl]
2586       for dsk in newl:
2587         self.cfg.SetDiskID(dsk, node)
2588       result = self.rpc.call_blockdev_getsize(node, newl)
2589       if result.fail_msg:
2590         self.LogWarning("Failure in blockdev_getsize call to node"
2591                         " %s, ignoring", node)
2592         continue
2593       if len(result.payload) != len(dskl):
2594         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2595                         " result.payload=%s", node, len(dskl), result.payload)
2596         self.LogWarning("Invalid result from node %s, ignoring node results",
2597                         node)
2598         continue
2599       for ((instance, idx, disk), size) in zip(dskl, result.payload):
2600         if size is None:
2601           self.LogWarning("Disk %d of instance %s did not return size"
2602                           " information, ignoring", idx, instance.name)
2603           continue
2604         if not isinstance(size, (int, long)):
2605           self.LogWarning("Disk %d of instance %s did not return valid"
2606                           " size information, ignoring", idx, instance.name)
2607           continue
2608         size = size >> 20
2609         if size != disk.size:
2610           self.LogInfo("Disk %d of instance %s has mismatched size,"
2611                        " correcting: recorded %d, actual %d", idx,
2612                        instance.name, disk.size, size)
2613           disk.size = size
2614           self.cfg.Update(instance, feedback_fn)
2615           changed.append((instance.name, idx, size))
2616         if self._EnsureChildSizes(disk):
2617           self.cfg.Update(instance, feedback_fn)
2618           changed.append((instance.name, idx, disk.size))
2619     return changed
2620
2621
2622 class LUClusterRename(LogicalUnit):
2623   """Rename the cluster.
2624
2625   """
2626   HPATH = "cluster-rename"
2627   HTYPE = constants.HTYPE_CLUSTER
2628
2629   def BuildHooksEnv(self):
2630     """Build hooks env.
2631
2632     """
2633     env = {
2634       "OP_TARGET": self.cfg.GetClusterName(),
2635       "NEW_NAME": self.op.name,
2636       }
2637     mn = self.cfg.GetMasterNode()
2638     all_nodes = self.cfg.GetNodeList()
2639     return env, [mn], all_nodes
2640
2641   def CheckPrereq(self):
2642     """Verify that the passed name is a valid one.
2643
2644     """
2645     hostname = netutils.GetHostname(name=self.op.name,
2646                                     family=self.cfg.GetPrimaryIPFamily())
2647
2648     new_name = hostname.name
2649     self.ip = new_ip = hostname.ip
2650     old_name = self.cfg.GetClusterName()
2651     old_ip = self.cfg.GetMasterIP()
2652     if new_name == old_name and new_ip == old_ip:
2653       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2654                                  " cluster has changed",
2655                                  errors.ECODE_INVAL)
2656     if new_ip != old_ip:
2657       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2658         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2659                                    " reachable on the network" %
2660                                    new_ip, errors.ECODE_NOTUNIQUE)
2661
2662     self.op.name = new_name
2663
2664   def Exec(self, feedback_fn):
2665     """Rename the cluster.
2666
2667     """
2668     clustername = self.op.name
2669     ip = self.ip
2670
2671     # shutdown the master IP
2672     master = self.cfg.GetMasterNode()
2673     result = self.rpc.call_node_stop_master(master, False)
2674     result.Raise("Could not disable the master role")
2675
2676     try:
2677       cluster = self.cfg.GetClusterInfo()
2678       cluster.cluster_name = clustername
2679       cluster.master_ip = ip
2680       self.cfg.Update(cluster, feedback_fn)
2681
2682       # update the known hosts file
2683       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2684       node_list = self.cfg.GetOnlineNodeList()
2685       try:
2686         node_list.remove(master)
2687       except ValueError:
2688         pass
2689       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2690     finally:
2691       result = self.rpc.call_node_start_master(master, False, False)
2692       msg = result.fail_msg
2693       if msg:
2694         self.LogWarning("Could not re-enable the master role on"
2695                         " the master, please restart manually: %s", msg)
2696
2697     return clustername
2698
2699
2700 class LUClusterSetParams(LogicalUnit):
2701   """Change the parameters of the cluster.
2702
2703   """
2704   HPATH = "cluster-modify"
2705   HTYPE = constants.HTYPE_CLUSTER
2706   REQ_BGL = False
2707
2708   def CheckArguments(self):
2709     """Check parameters
2710
2711     """
2712     if self.op.uid_pool:
2713       uidpool.CheckUidPool(self.op.uid_pool)
2714
2715     if self.op.add_uids:
2716       uidpool.CheckUidPool(self.op.add_uids)
2717
2718     if self.op.remove_uids:
2719       uidpool.CheckUidPool(self.op.remove_uids)
2720
2721   def ExpandNames(self):
2722     # FIXME: in the future maybe other cluster params won't require checking on
2723     # all nodes to be modified.
2724     self.needed_locks = {
2725       locking.LEVEL_NODE: locking.ALL_SET,
2726     }
2727     self.share_locks[locking.LEVEL_NODE] = 1
2728
2729   def BuildHooksEnv(self):
2730     """Build hooks env.
2731
2732     """
2733     env = {
2734       "OP_TARGET": self.cfg.GetClusterName(),
2735       "NEW_VG_NAME": self.op.vg_name,
2736       }
2737     mn = self.cfg.GetMasterNode()
2738     return env, [mn], [mn]
2739
2740   def CheckPrereq(self):
2741     """Check prerequisites.
2742
2743     This checks whether the given params don't conflict and
2744     if the given volume group is valid.
2745
2746     """
2747     if self.op.vg_name is not None and not self.op.vg_name:
2748       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2749         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2750                                    " instances exist", errors.ECODE_INVAL)
2751
2752     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2753       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2754         raise errors.OpPrereqError("Cannot disable drbd helper while"
2755                                    " drbd-based instances exist",
2756                                    errors.ECODE_INVAL)
2757
2758     node_list = self.acquired_locks[locking.LEVEL_NODE]
2759
2760     # if vg_name not None, checks given volume group on all nodes
2761     if self.op.vg_name:
2762       vglist = self.rpc.call_vg_list(node_list)
2763       for node in node_list:
2764         msg = vglist[node].fail_msg
2765         if msg:
2766           # ignoring down node
2767           self.LogWarning("Error while gathering data on node %s"
2768                           " (ignoring node): %s", node, msg)
2769           continue
2770         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2771                                               self.op.vg_name,
2772                                               constants.MIN_VG_SIZE)
2773         if vgstatus:
2774           raise errors.OpPrereqError("Error on node '%s': %s" %
2775                                      (node, vgstatus), errors.ECODE_ENVIRON)
2776
2777     if self.op.drbd_helper:
2778       # checks given drbd helper on all nodes
2779       helpers = self.rpc.call_drbd_helper(node_list)
2780       for node in node_list:
2781         ninfo = self.cfg.GetNodeInfo(node)
2782         if ninfo.offline:
2783           self.LogInfo("Not checking drbd helper on offline node %s", node)
2784           continue
2785         msg = helpers[node].fail_msg
2786         if msg:
2787           raise errors.OpPrereqError("Error checking drbd helper on node"
2788                                      " '%s': %s" % (node, msg),
2789                                      errors.ECODE_ENVIRON)
2790         node_helper = helpers[node].payload
2791         if node_helper != self.op.drbd_helper:
2792           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2793                                      (node, node_helper), errors.ECODE_ENVIRON)
2794
2795     self.cluster = cluster = self.cfg.GetClusterInfo()
2796     # validate params changes
2797     if self.op.beparams:
2798       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2799       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2800
2801     if self.op.ndparams:
2802       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2803       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2804
2805       # TODO: we need a more general way to handle resetting
2806       # cluster-level parameters to default values
2807       if self.new_ndparams["oob_program"] == "":
2808         self.new_ndparams["oob_program"] = \
2809             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2810
2811     if self.op.nicparams:
2812       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2813       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2814       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2815       nic_errors = []
2816
2817       # check all instances for consistency
2818       for instance in self.cfg.GetAllInstancesInfo().values():
2819         for nic_idx, nic in enumerate(instance.nics):
2820           params_copy = copy.deepcopy(nic.nicparams)
2821           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2822
2823           # check parameter syntax
2824           try:
2825             objects.NIC.CheckParameterSyntax(params_filled)
2826           except errors.ConfigurationError, err:
2827             nic_errors.append("Instance %s, nic/%d: %s" %
2828                               (instance.name, nic_idx, err))
2829
2830           # if we're moving instances to routed, check that they have an ip
2831           target_mode = params_filled[constants.NIC_MODE]
2832           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2833             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2834                               (instance.name, nic_idx))
2835       if nic_errors:
2836         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2837                                    "\n".join(nic_errors))
2838
2839     # hypervisor list/parameters
2840     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2841     if self.op.hvparams:
2842       for hv_name, hv_dict in self.op.hvparams.items():
2843         if hv_name not in self.new_hvparams:
2844           self.new_hvparams[hv_name] = hv_dict
2845         else:
2846           self.new_hvparams[hv_name].update(hv_dict)
2847
2848     # os hypervisor parameters
2849     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2850     if self.op.os_hvp:
2851       for os_name, hvs in self.op.os_hvp.items():
2852         if os_name not in self.new_os_hvp:
2853           self.new_os_hvp[os_name] = hvs
2854         else:
2855           for hv_name, hv_dict in hvs.items():
2856             if hv_name not in self.new_os_hvp[os_name]:
2857               self.new_os_hvp[os_name][hv_name] = hv_dict
2858             else:
2859               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2860
2861     # os parameters
2862     self.new_osp = objects.FillDict(cluster.osparams, {})
2863     if self.op.osparams:
2864       for os_name, osp in self.op.osparams.items():
2865         if os_name not in self.new_osp:
2866           self.new_osp[os_name] = {}
2867
2868         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2869                                                   use_none=True)
2870
2871         if not self.new_osp[os_name]:
2872           # we removed all parameters
2873           del self.new_osp[os_name]
2874         else:
2875           # check the parameter validity (remote check)
2876           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2877                          os_name, self.new_osp[os_name])
2878
2879     # changes to the hypervisor list
2880     if self.op.enabled_hypervisors is not None:
2881       self.hv_list = self.op.enabled_hypervisors
2882       for hv in self.hv_list:
2883         # if the hypervisor doesn't already exist in the cluster
2884         # hvparams, we initialize it to empty, and then (in both
2885         # cases) we make sure to fill the defaults, as we might not
2886         # have a complete defaults list if the hypervisor wasn't
2887         # enabled before
2888         if hv not in new_hvp:
2889           new_hvp[hv] = {}
2890         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2891         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2892     else:
2893       self.hv_list = cluster.enabled_hypervisors
2894
2895     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2896       # either the enabled list has changed, or the parameters have, validate
2897       for hv_name, hv_params in self.new_hvparams.items():
2898         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2899             (self.op.enabled_hypervisors and
2900              hv_name in self.op.enabled_hypervisors)):
2901           # either this is a new hypervisor, or its parameters have changed
2902           hv_class = hypervisor.GetHypervisor(hv_name)
2903           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2904           hv_class.CheckParameterSyntax(hv_params)
2905           _CheckHVParams(self, node_list, hv_name, hv_params)
2906
2907     if self.op.os_hvp:
2908       # no need to check any newly-enabled hypervisors, since the
2909       # defaults have already been checked in the above code-block
2910       for os_name, os_hvp in self.new_os_hvp.items():
2911         for hv_name, hv_params in os_hvp.items():
2912           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2913           # we need to fill in the new os_hvp on top of the actual hv_p
2914           cluster_defaults = self.new_hvparams.get(hv_name, {})
2915           new_osp = objects.FillDict(cluster_defaults, hv_params)
2916           hv_class = hypervisor.GetHypervisor(hv_name)
2917           hv_class.CheckParameterSyntax(new_osp)
2918           _CheckHVParams(self, node_list, hv_name, new_osp)
2919
2920     if self.op.default_iallocator:
2921       alloc_script = utils.FindFile(self.op.default_iallocator,
2922                                     constants.IALLOCATOR_SEARCH_PATH,
2923                                     os.path.isfile)
2924       if alloc_script is None:
2925         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2926                                    " specified" % self.op.default_iallocator,
2927                                    errors.ECODE_INVAL)
2928
2929   def Exec(self, feedback_fn):
2930     """Change the parameters of the cluster.
2931
2932     """
2933     if self.op.vg_name is not None:
2934       new_volume = self.op.vg_name
2935       if not new_volume:
2936         new_volume = None
2937       if new_volume != self.cfg.GetVGName():
2938         self.cfg.SetVGName(new_volume)
2939       else:
2940         feedback_fn("Cluster LVM configuration already in desired"
2941                     " state, not changing")
2942     if self.op.drbd_helper is not None:
2943       new_helper = self.op.drbd_helper
2944       if not new_helper:
2945         new_helper = None
2946       if new_helper != self.cfg.GetDRBDHelper():
2947         self.cfg.SetDRBDHelper(new_helper)
2948       else:
2949         feedback_fn("Cluster DRBD helper already in desired state,"
2950                     " not changing")
2951     if self.op.hvparams:
2952       self.cluster.hvparams = self.new_hvparams
2953     if self.op.os_hvp:
2954       self.cluster.os_hvp = self.new_os_hvp
2955     if self.op.enabled_hypervisors is not None:
2956       self.cluster.hvparams = self.new_hvparams
2957       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2958     if self.op.beparams:
2959       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2960     if self.op.nicparams:
2961       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2962     if self.op.osparams:
2963       self.cluster.osparams = self.new_osp
2964     if self.op.ndparams:
2965       self.cluster.ndparams = self.new_ndparams
2966
2967     if self.op.candidate_pool_size is not None:
2968       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2969       # we need to update the pool size here, otherwise the save will fail
2970       _AdjustCandidatePool(self, [])
2971
2972     if self.op.maintain_node_health is not None:
2973       self.cluster.maintain_node_health = self.op.maintain_node_health
2974
2975     if self.op.prealloc_wipe_disks is not None:
2976       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2977
2978     if self.op.add_uids is not None:
2979       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2980
2981     if self.op.remove_uids is not None:
2982       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2983
2984     if self.op.uid_pool is not None:
2985       self.cluster.uid_pool = self.op.uid_pool
2986
2987     if self.op.default_iallocator is not None:
2988       self.cluster.default_iallocator = self.op.default_iallocator
2989
2990     if self.op.reserved_lvs is not None:
2991       self.cluster.reserved_lvs = self.op.reserved_lvs
2992
2993     def helper_os(aname, mods, desc):
2994       desc += " OS list"
2995       lst = getattr(self.cluster, aname)
2996       for key, val in mods:
2997         if key == constants.DDM_ADD:
2998           if val in lst:
2999             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3000           else:
3001             lst.append(val)
3002         elif key == constants.DDM_REMOVE:
3003           if val in lst:
3004             lst.remove(val)
3005           else:
3006             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3007         else:
3008           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3009
3010     if self.op.hidden_os:
3011       helper_os("hidden_os", self.op.hidden_os, "hidden")
3012
3013     if self.op.blacklisted_os:
3014       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3015
3016     if self.op.master_netdev:
3017       master = self.cfg.GetMasterNode()
3018       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3019                   self.cluster.master_netdev)
3020       result = self.rpc.call_node_stop_master(master, False)
3021       result.Raise("Could not disable the master ip")
3022       feedback_fn("Changing master_netdev from %s to %s" %
3023                   (self.cluster.master_netdev, self.op.master_netdev))
3024       self.cluster.master_netdev = self.op.master_netdev
3025
3026     self.cfg.Update(self.cluster, feedback_fn)
3027
3028     if self.op.master_netdev:
3029       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3030                   self.op.master_netdev)
3031       result = self.rpc.call_node_start_master(master, False, False)
3032       if result.fail_msg:
3033         self.LogWarning("Could not re-enable the master ip on"
3034                         " the master, please restart manually: %s",
3035                         result.fail_msg)
3036
3037
3038 def _UploadHelper(lu, nodes, fname):
3039   """Helper for uploading a file and showing warnings.
3040
3041   """
3042   if os.path.exists(fname):
3043     result = lu.rpc.call_upload_file(nodes, fname)
3044     for to_node, to_result in result.items():
3045       msg = to_result.fail_msg
3046       if msg:
3047         msg = ("Copy of file %s to node %s failed: %s" %
3048                (fname, to_node, msg))
3049         lu.proc.LogWarning(msg)
3050
3051
3052 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3053   """Distribute additional files which are part of the cluster configuration.
3054
3055   ConfigWriter takes care of distributing the config and ssconf files, but
3056   there are more files which should be distributed to all nodes. This function
3057   makes sure those are copied.
3058
3059   @param lu: calling logical unit
3060   @param additional_nodes: list of nodes not in the config to distribute to
3061   @type additional_vm: boolean
3062   @param additional_vm: whether the additional nodes are vm-capable or not
3063
3064   """
3065   # 1. Gather target nodes
3066   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3067   dist_nodes = lu.cfg.GetOnlineNodeList()
3068   nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3069   vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3070   if additional_nodes is not None:
3071     dist_nodes.extend(additional_nodes)
3072     if additional_vm:
3073       vm_nodes.extend(additional_nodes)
3074   if myself.name in dist_nodes:
3075     dist_nodes.remove(myself.name)
3076   if myself.name in vm_nodes:
3077     vm_nodes.remove(myself.name)
3078
3079   # 2. Gather files to distribute
3080   dist_files = set([constants.ETC_HOSTS,
3081                     constants.SSH_KNOWN_HOSTS_FILE,
3082                     constants.RAPI_CERT_FILE,
3083                     constants.RAPI_USERS_FILE,
3084                     constants.CONFD_HMAC_KEY,
3085                     constants.CLUSTER_DOMAIN_SECRET_FILE,
3086                    ])
3087
3088   vm_files = set()
3089   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3090   for hv_name in enabled_hypervisors:
3091     hv_class = hypervisor.GetHypervisor(hv_name)
3092     vm_files.update(hv_class.GetAncillaryFiles())
3093
3094   # 3. Perform the files upload
3095   for fname in dist_files:
3096     _UploadHelper(lu, dist_nodes, fname)
3097   for fname in vm_files:
3098     _UploadHelper(lu, vm_nodes, fname)
3099
3100
3101 class LUClusterRedistConf(NoHooksLU):
3102   """Force the redistribution of cluster configuration.
3103
3104   This is a very simple LU.
3105
3106   """
3107   REQ_BGL = False
3108
3109   def ExpandNames(self):
3110     self.needed_locks = {
3111       locking.LEVEL_NODE: locking.ALL_SET,
3112     }
3113     self.share_locks[locking.LEVEL_NODE] = 1
3114
3115   def Exec(self, feedback_fn):
3116     """Redistribute the configuration.
3117
3118     """
3119     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3120     _RedistributeAncillaryFiles(self)
3121
3122
3123 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3124   """Sleep and poll for an instance's disk to sync.
3125
3126   """
3127   if not instance.disks or disks is not None and not disks:
3128     return True
3129
3130   disks = _ExpandCheckDisks(instance, disks)
3131
3132   if not oneshot:
3133     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3134
3135   node = instance.primary_node
3136
3137   for dev in disks:
3138     lu.cfg.SetDiskID(dev, node)
3139
3140   # TODO: Convert to utils.Retry
3141
3142   retries = 0
3143   degr_retries = 10 # in seconds, as we sleep 1 second each time
3144   while True:
3145     max_time = 0
3146     done = True
3147     cumul_degraded = False
3148     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3149     msg = rstats.fail_msg
3150     if msg:
3151       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3152       retries += 1
3153       if retries >= 10:
3154         raise errors.RemoteError("Can't contact node %s for mirror data,"
3155                                  " aborting." % node)
3156       time.sleep(6)
3157       continue
3158     rstats = rstats.payload
3159     retries = 0
3160     for i, mstat in enumerate(rstats):
3161       if mstat is None:
3162         lu.LogWarning("Can't compute data for node %s/%s",
3163                            node, disks[i].iv_name)
3164         continue
3165
3166       cumul_degraded = (cumul_degraded or
3167                         (mstat.is_degraded and mstat.sync_percent is None))
3168       if mstat.sync_percent is not None:
3169         done = False
3170         if mstat.estimated_time is not None:
3171           rem_time = ("%s remaining (estimated)" %
3172                       utils.FormatSeconds(mstat.estimated_time))
3173           max_time = mstat.estimated_time
3174         else:
3175           rem_time = "no time estimate"
3176         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3177                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3178
3179     # if we're done but degraded, let's do a few small retries, to
3180     # make sure we see a stable and not transient situation; therefore
3181     # we force restart of the loop
3182     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3183       logging.info("Degraded disks found, %d retries left", degr_retries)
3184       degr_retries -= 1
3185       time.sleep(1)
3186       continue
3187
3188     if done or oneshot:
3189       break
3190
3191     time.sleep(min(60, max_time))
3192
3193   if done:
3194     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3195   return not cumul_degraded
3196
3197
3198 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3199   """Check that mirrors are not degraded.
3200
3201   The ldisk parameter, if True, will change the test from the
3202   is_degraded attribute (which represents overall non-ok status for
3203   the device(s)) to the ldisk (representing the local storage status).
3204
3205   """
3206   lu.cfg.SetDiskID(dev, node)
3207
3208   result = True
3209
3210   if on_primary or dev.AssembleOnSecondary():
3211     rstats = lu.rpc.call_blockdev_find(node, dev)
3212     msg = rstats.fail_msg
3213     if msg:
3214       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3215       result = False
3216     elif not rstats.payload:
3217       lu.LogWarning("Can't find disk on node %s", node)
3218       result = False
3219     else:
3220       if ldisk:
3221         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3222       else:
3223         result = result and not rstats.payload.is_degraded
3224
3225   if dev.children:
3226     for child in dev.children:
3227       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3228
3229   return result
3230
3231
3232 class LUOobCommand(NoHooksLU):
3233   """Logical unit for OOB handling.
3234
3235   """
3236   REG_BGL = False
3237
3238   def CheckPrereq(self):
3239     """Check prerequisites.
3240
3241     This checks:
3242      - the node exists in the configuration
3243      - OOB is supported
3244
3245     Any errors are signaled by raising errors.OpPrereqError.
3246
3247     """
3248     self.nodes = []
3249     for node_name in self.op.node_names:
3250       node = self.cfg.GetNodeInfo(node_name)
3251
3252       if node is None:
3253         raise errors.OpPrereqError("Node %s not found" % node_name,
3254                                    errors.ECODE_NOENT)
3255       else:
3256         self.nodes.append(node)
3257
3258       if (self.op.command == constants.OOB_POWER_OFF and not node.offline):
3259         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3260                                     " not marked offline") % node_name,
3261                                    errors.ECODE_STATE)
3262
3263   def ExpandNames(self):
3264     """Gather locks we need.
3265
3266     """
3267     if self.op.node_names:
3268       self.op.node_names = [_ExpandNodeName(self.cfg, name)
3269                             for name in self.op.node_names]
3270     else:
3271       self.op.node_names = self.cfg.GetNodeList()
3272
3273     self.needed_locks = {
3274       locking.LEVEL_NODE: self.op.node_names,
3275       }
3276
3277   def Exec(self, feedback_fn):
3278     """Execute OOB and return result if we expect any.
3279
3280     """
3281     master_node = self.cfg.GetMasterNode()
3282     ret = []
3283
3284     for node in self.nodes:
3285       node_entry = [(constants.RS_NORMAL, node.name)]
3286       ret.append(node_entry)
3287
3288       oob_program = _SupportsOob(self.cfg, node)
3289
3290       if not oob_program:
3291         node_entry.append((constants.RS_UNAVAIL, None))
3292         continue
3293
3294       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3295                    self.op.command, oob_program, node.name)
3296       result = self.rpc.call_run_oob(master_node, oob_program,
3297                                      self.op.command, node.name,
3298                                      self.op.timeout)
3299
3300       if result.fail_msg:
3301         self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3302                         node.name, result.fail_msg)
3303         node_entry.append((constants.RS_NODATA, None))
3304       else:
3305         try:
3306           self._CheckPayload(result)
3307         except errors.OpExecError, err:
3308           self.LogWarning("The payload returned by '%s' is not valid: %s",
3309                           node.name, err)
3310           node_entry.append((constants.RS_NODATA, None))
3311         else:
3312           if self.op.command == constants.OOB_HEALTH:
3313             # For health we should log important events
3314             for item, status in result.payload:
3315               if status in [constants.OOB_STATUS_WARNING,
3316                             constants.OOB_STATUS_CRITICAL]:
3317                 self.LogWarning("On node '%s' item '%s' has status '%s'",
3318                                 node.name, item, status)
3319
3320           if self.op.command == constants.OOB_POWER_ON:
3321             node.powered = True
3322           elif self.op.command == constants.OOB_POWER_OFF:
3323             node.powered = False
3324           elif self.op.command == constants.OOB_POWER_STATUS:
3325             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3326             if powered != node.powered:
3327               logging.warning(("Recorded power state (%s) of node '%s' does not"
3328                                " match actual power state (%s)"), node.powered,
3329                               node.name, powered)
3330
3331           # For configuration changing commands we should update the node
3332           if self.op.command in (constants.OOB_POWER_ON,
3333                                  constants.OOB_POWER_OFF):
3334             self.cfg.Update(node, feedback_fn)
3335
3336           node_entry.append((constants.RS_NORMAL, result.payload))
3337
3338     return ret
3339
3340   def _CheckPayload(self, result):
3341     """Checks if the payload is valid.
3342
3343     @param result: RPC result
3344     @raises errors.OpExecError: If payload is not valid
3345
3346     """
3347     errs = []
3348     if self.op.command == constants.OOB_HEALTH:
3349       if not isinstance(result.payload, list):
3350         errs.append("command 'health' is expected to return a list but got %s" %
3351                     type(result.payload))
3352       else:
3353         for item, status in result.payload:
3354           if status not in constants.OOB_STATUSES:
3355             errs.append("health item '%s' has invalid status '%s'" %
3356                         (item, status))
3357
3358     if self.op.command == constants.OOB_POWER_STATUS:
3359       if not isinstance(result.payload, dict):
3360         errs.append("power-status is expected to return a dict but got %s" %
3361                     type(result.payload))
3362
3363     if self.op.command in [
3364         constants.OOB_POWER_ON,
3365         constants.OOB_POWER_OFF,
3366         constants.OOB_POWER_CYCLE,
3367         ]:
3368       if result.payload is not None:
3369         errs.append("%s is expected to not return payload but got '%s'" %
3370                     (self.op.command, result.payload))
3371
3372     if errs:
3373       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3374                                utils.CommaJoin(errs))
3375
3376
3377
3378 class LUOsDiagnose(NoHooksLU):
3379   """Logical unit for OS diagnose/query.
3380
3381   """
3382   REQ_BGL = False
3383   _HID = "hidden"
3384   _BLK = "blacklisted"
3385   _VLD = "valid"
3386   _FIELDS_STATIC = utils.FieldSet()
3387   _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3388                                    "parameters", "api_versions", _HID, _BLK)
3389
3390   def CheckArguments(self):
3391     if self.op.names:
3392       raise errors.OpPrereqError("Selective OS query not supported",
3393                                  errors.ECODE_INVAL)
3394
3395     _CheckOutputFields(static=self._FIELDS_STATIC,
3396                        dynamic=self._FIELDS_DYNAMIC,
3397                        selected=self.op.output_fields)
3398
3399   def ExpandNames(self):
3400     # Lock all nodes, in shared mode
3401     # Temporary removal of locks, should be reverted later
3402     # TODO: reintroduce locks when they are lighter-weight
3403     self.needed_locks = {}
3404     #self.share_locks[locking.LEVEL_NODE] = 1
3405     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3406
3407   @staticmethod
3408   def _DiagnoseByOS(rlist):
3409     """Remaps a per-node return list into an a per-os per-node dictionary
3410
3411     @param rlist: a map with node names as keys and OS objects as values
3412
3413     @rtype: dict
3414     @return: a dictionary with osnames as keys and as value another
3415         map, with nodes as keys and tuples of (path, status, diagnose,
3416         variants, parameters, api_versions) as values, eg::
3417
3418           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3419                                      (/srv/..., False, "invalid api")],
3420                            "node2": [(/srv/..., True, "", [], [])]}
3421           }
3422
3423     """
3424     all_os = {}
3425     # we build here the list of nodes that didn't fail the RPC (at RPC
3426     # level), so that nodes with a non-responding node daemon don't
3427     # make all OSes invalid
3428     good_nodes = [node_name for node_name in rlist
3429                   if not rlist[node_name].fail_msg]
3430     for node_name, nr in rlist.items():
3431       if nr.fail_msg or not nr.payload:
3432         continue
3433       for (name, path, status, diagnose, variants,
3434            params, api_versions) in nr.payload:
3435         if name not in all_os:
3436           # build a list of nodes for this os containing empty lists
3437           # for each node in node_list
3438           all_os[name] = {}
3439           for nname in good_nodes:
3440             all_os[name][nname] = []
3441         # convert params from [name, help] to (name, help)
3442         params = [tuple(v) for v in params]
3443         all_os[name][node_name].append((path, status, diagnose,
3444                                         variants, params, api_versions))
3445     return all_os
3446
3447   def Exec(self, feedback_fn):
3448     """Compute the list of OSes.
3449
3450     """
3451     valid_nodes = [node.name
3452                    for node in self.cfg.GetAllNodesInfo().values()
3453                    if not node.offline and node.vm_capable]
3454     node_data = self.rpc.call_os_diagnose(valid_nodes)
3455     pol = self._DiagnoseByOS(node_data)
3456     output = []
3457     cluster = self.cfg.GetClusterInfo()
3458
3459     for os_name in utils.NiceSort(pol.keys()):
3460       os_data = pol[os_name]
3461       row = []
3462       valid = True
3463       (variants, params, api_versions) = null_state = (set(), set(), set())
3464       for idx, osl in enumerate(os_data.values()):
3465         valid = bool(valid and osl and osl[0][1])
3466         if not valid:
3467           (variants, params, api_versions) = null_state
3468           break
3469         node_variants, node_params, node_api = osl[0][3:6]
3470         if idx == 0: # first entry
3471           variants = set(node_variants)
3472           params = set(node_params)
3473           api_versions = set(node_api)
3474         else: # keep consistency
3475           variants.intersection_update(node_variants)
3476           params.intersection_update(node_params)
3477           api_versions.intersection_update(node_api)
3478
3479       is_hid = os_name in cluster.hidden_os
3480       is_blk = os_name in cluster.blacklisted_os
3481       if ((self._HID not in self.op.output_fields and is_hid) or
3482           (self._BLK not in self.op.output_fields and is_blk) or
3483           (self._VLD not in self.op.output_fields and not valid)):
3484         continue
3485
3486       for field in self.op.output_fields:
3487         if field == "name":
3488           val = os_name
3489         elif field == self._VLD:
3490           val = valid
3491         elif field == "node_status":
3492           # this is just a copy of the dict
3493           val = {}
3494           for node_name, nos_list in os_data.items():
3495             val[node_name] = nos_list
3496         elif field == "variants":
3497           val = utils.NiceSort(list(variants))
3498         elif field == "parameters":
3499           val = list(params)
3500         elif field == "api_versions":
3501           val = list(api_versions)
3502         elif field == self._HID:
3503           val = is_hid
3504         elif field == self._BLK:
3505           val = is_blk
3506         else:
3507           raise errors.ParameterError(field)
3508         row.append(val)
3509       output.append(row)
3510
3511     return output
3512
3513
3514 class LUNodeRemove(LogicalUnit):
3515   """Logical unit for removing a node.
3516
3517   """
3518   HPATH = "node-remove"
3519   HTYPE = constants.HTYPE_NODE
3520
3521   def BuildHooksEnv(self):
3522     """Build hooks env.
3523
3524     This doesn't run on the target node in the pre phase as a failed
3525     node would then be impossible to remove.
3526
3527     """
3528     env = {
3529       "OP_TARGET": self.op.node_name,
3530       "NODE_NAME": self.op.node_name,
3531       }
3532     all_nodes = self.cfg.GetNodeList()
3533     try:
3534       all_nodes.remove(self.op.node_name)
3535     except ValueError:
3536       logging.warning("Node %s which is about to be removed not found"
3537                       " in the all nodes list", self.op.node_name)
3538     return env, all_nodes, all_nodes
3539
3540   def CheckPrereq(self):
3541     """Check prerequisites.
3542
3543     This checks:
3544      - the node exists in the configuration
3545      - it does not have primary or secondary instances
3546      - it's not the master
3547
3548     Any errors are signaled by raising errors.OpPrereqError.
3549
3550     """
3551     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3552     node = self.cfg.GetNodeInfo(self.op.node_name)
3553     assert node is not None
3554
3555     instance_list = self.cfg.GetInstanceList()
3556
3557     masternode = self.cfg.GetMasterNode()
3558     if node.name == masternode:
3559       raise errors.OpPrereqError("Node is the master node,"
3560                                  " you need to failover first.",
3561                                  errors.ECODE_INVAL)
3562
3563     for instance_name in instance_list:
3564       instance = self.cfg.GetInstanceInfo(instance_name)
3565       if node.name in instance.all_nodes:
3566         raise errors.OpPrereqError("Instance %s is still running on the node,"
3567                                    " please remove first." % instance_name,
3568                                    errors.ECODE_INVAL)
3569     self.op.node_name = node.name
3570     self.node = node
3571
3572   def Exec(self, feedback_fn):
3573     """Removes the node from the cluster.
3574
3575     """
3576     node = self.node
3577     logging.info("Stopping the node daemon and removing configs from node %s",
3578                  node.name)
3579
3580     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3581
3582     # Promote nodes to master candidate as needed
3583     _AdjustCandidatePool(self, exceptions=[node.name])
3584     self.context.RemoveNode(node.name)
3585
3586     # Run post hooks on the node before it's removed
3587     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3588     try:
3589       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3590     except:
3591       # pylint: disable-msg=W0702
3592       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3593
3594     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3595     msg = result.fail_msg
3596     if msg:
3597       self.LogWarning("Errors encountered on the remote node while leaving"
3598                       " the cluster: %s", msg)
3599
3600     # Remove node from our /etc/hosts
3601     if self.cfg.GetClusterInfo().modify_etc_hosts:
3602       master_node = self.cfg.GetMasterNode()
3603       result = self.rpc.call_etc_hosts_modify(master_node,
3604                                               constants.ETC_HOSTS_REMOVE,
3605                                               node.name, None)
3606       result.Raise("Can't update hosts file with new host data")
3607       _RedistributeAncillaryFiles(self)
3608
3609
3610 class _NodeQuery(_QueryBase):
3611   FIELDS = query.NODE_FIELDS
3612
3613   def ExpandNames(self, lu):
3614     lu.needed_locks = {}
3615     lu.share_locks[locking.LEVEL_NODE] = 1
3616
3617     if self.names:
3618       self.wanted = _GetWantedNodes(lu, self.names)
3619     else:
3620       self.wanted = locking.ALL_SET
3621
3622     self.do_locking = (self.use_locking and
3623                        query.NQ_LIVE in self.requested_data)
3624
3625     if self.do_locking:
3626       # if we don't request only static fields, we need to lock the nodes
3627       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3628
3629   def DeclareLocks(self, lu, level):
3630     pass
3631
3632   def _GetQueryData(self, lu):
3633     """Computes the list of nodes and their attributes.
3634
3635     """
3636     all_info = lu.cfg.GetAllNodesInfo()
3637
3638     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3639
3640     # Gather data as requested
3641     if query.NQ_LIVE in self.requested_data:
3642       # filter out non-vm_capable nodes
3643       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3644
3645       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3646                                         lu.cfg.GetHypervisorType())
3647       live_data = dict((name, nresult.payload)
3648                        for (name, nresult) in node_data.items()
3649                        if not nresult.fail_msg and nresult.payload)
3650     else:
3651       live_data = None
3652
3653     if query.NQ_INST in self.requested_data:
3654       node_to_primary = dict([(name, set()) for name in nodenames])
3655       node_to_secondary = dict([(name, set()) for name in nodenames])
3656
3657       inst_data = lu.cfg.GetAllInstancesInfo()
3658
3659       for inst in inst_data.values():
3660         if inst.primary_node in node_to_primary:
3661           node_to_primary[inst.primary_node].add(inst.name)
3662         for secnode in inst.secondary_nodes:
3663           if secnode in node_to_secondary:
3664             node_to_secondary[secnode].add(inst.name)
3665     else:
3666       node_to_primary = None
3667       node_to_secondary = None
3668
3669     if query.NQ_OOB in self.requested_data:
3670       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3671                          for name, node in all_info.iteritems())
3672     else:
3673       oob_support = None
3674
3675     if query.NQ_GROUP in self.requested_data:
3676       groups = lu.cfg.GetAllNodeGroupsInfo()
3677     else:
3678       groups = {}
3679
3680     return query.NodeQueryData([all_info[name] for name in nodenames],
3681                                live_data, lu.cfg.GetMasterNode(),
3682                                node_to_primary, node_to_secondary, groups,
3683                                oob_support, lu.cfg.GetClusterInfo())
3684
3685
3686 class LUNodeQuery(NoHooksLU):
3687   """Logical unit for querying nodes.
3688
3689   """
3690   # pylint: disable-msg=W0142
3691   REQ_BGL = False
3692
3693   def CheckArguments(self):
3694     self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3695                          self.op.use_locking)
3696
3697   def ExpandNames(self):
3698     self.nq.ExpandNames(self)
3699
3700   def Exec(self, feedback_fn):
3701     return self.nq.OldStyleQuery(self)
3702
3703
3704 class LUNodeQueryvols(NoHooksLU):
3705   """Logical unit for getting volumes on node(s).
3706
3707   """
3708   REQ_BGL = False
3709   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3710   _FIELDS_STATIC = utils.FieldSet("node")
3711
3712   def CheckArguments(self):
3713     _CheckOutputFields(static=self._FIELDS_STATIC,
3714                        dynamic=self._FIELDS_DYNAMIC,
3715                        selected=self.op.output_fields)
3716
3717   def ExpandNames(self):
3718     self.needed_locks = {}
3719     self.share_locks[locking.LEVEL_NODE] = 1
3720     if not self.op.nodes:
3721       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3722     else:
3723       self.needed_locks[locking.LEVEL_NODE] = \
3724         _GetWantedNodes(self, self.op.nodes)
3725
3726   def Exec(self, feedback_fn):
3727     """Computes the list of nodes and their attributes.
3728
3729     """
3730     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3731     volumes = self.rpc.call_node_volumes(nodenames)
3732
3733     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3734              in self.cfg.GetInstanceList()]
3735
3736     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3737
3738     output = []
3739     for node in nodenames:
3740       nresult = volumes[node]
3741       if nresult.offline:
3742         continue
3743       msg = nresult.fail_msg
3744       if msg:
3745         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3746         continue
3747
3748       node_vols = nresult.payload[:]
3749       node_vols.sort(key=lambda vol: vol['dev'])
3750
3751       for vol in node_vols:
3752         node_output = []
3753         for field in self.op.output_fields:
3754           if field == "node":
3755             val = node
3756           elif field == "phys":
3757             val = vol['dev']
3758           elif field == "vg":
3759             val = vol['vg']
3760           elif field == "name":
3761             val = vol['name']
3762           elif field == "size":
3763             val = int(float(vol['size']))
3764           elif field == "instance":
3765             for inst in ilist:
3766               if node not in lv_by_node[inst]:
3767                 continue
3768               if vol['name'] in lv_by_node[inst][node]:
3769                 val = inst.name
3770                 break
3771             else:
3772               val = '-'
3773           else:
3774             raise errors.ParameterError(field)
3775           node_output.append(str(val))
3776
3777         output.append(node_output)
3778
3779     return output
3780
3781
3782 class LUNodeQueryStorage(NoHooksLU):
3783   """Logical unit for getting information on storage units on node(s).
3784
3785   """
3786   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3787   REQ_BGL = False
3788
3789   def CheckArguments(self):
3790     _CheckOutputFields(static=self._FIELDS_STATIC,
3791                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3792                        selected=self.op.output_fields)
3793
3794   def ExpandNames(self):
3795     self.needed_locks = {}
3796     self.share_locks[locking.LEVEL_NODE] = 1
3797
3798     if self.op.nodes:
3799       self.needed_locks[locking.LEVEL_NODE] = \
3800         _GetWantedNodes(self, self.op.nodes)
3801     else:
3802       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3803
3804   def Exec(self, feedback_fn):
3805     """Computes the list of nodes and their attributes.
3806
3807     """
3808     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3809
3810     # Always get name to sort by
3811     if constants.SF_NAME in self.op.output_fields:
3812       fields = self.op.output_fields[:]
3813     else:
3814       fields = [constants.SF_NAME] + self.op.output_fields
3815
3816     # Never ask for node or type as it's only known to the LU
3817     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3818       while extra in fields:
3819         fields.remove(extra)
3820
3821     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3822     name_idx = field_idx[constants.SF_NAME]
3823
3824     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3825     data = self.rpc.call_storage_list(self.nodes,
3826                                       self.op.storage_type, st_args,
3827                                       self.op.name, fields)
3828
3829     result = []
3830
3831     for node in utils.NiceSort(self.nodes):
3832       nresult = data[node]
3833       if nresult.offline:
3834         continue
3835
3836       msg = nresult.fail_msg
3837       if msg:
3838         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3839         continue
3840
3841       rows = dict([(row[name_idx], row) for row in nresult.payload])
3842
3843       for name in utils.NiceSort(rows.keys()):
3844         row = rows[name]
3845
3846         out = []
3847
3848         for field in self.op.output_fields:
3849           if field == constants.SF_NODE:
3850             val = node
3851           elif field == constants.SF_TYPE:
3852             val = self.op.storage_type
3853           elif field in field_idx:
3854             val = row[field_idx[field]]
3855           else:
3856             raise errors.ParameterError(field)
3857
3858           out.append(val)
3859
3860         result.append(out)
3861
3862     return result
3863
3864
3865 class _InstanceQuery(_QueryBase):
3866   FIELDS = query.INSTANCE_FIELDS
3867
3868   def ExpandNames(self, lu):
3869     lu.needed_locks = {}
3870     lu.share_locks[locking.LEVEL_INSTANCE] = 1
3871     lu.share_locks[locking.LEVEL_NODE] = 1
3872
3873     if self.names:
3874       self.wanted = _GetWantedInstances(lu, self.names)
3875     else:
3876       self.wanted = locking.ALL_SET
3877
3878     self.do_locking = (self.use_locking and
3879                        query.IQ_LIVE in self.requested_data)
3880     if self.do_locking:
3881       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3882       lu.needed_locks[locking.LEVEL_NODE] = []
3883       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3884
3885   def DeclareLocks(self, lu, level):
3886     if level == locking.LEVEL_NODE and self.do_locking:
3887       lu._LockInstancesNodes() # pylint: disable-msg=W0212
3888
3889   def _GetQueryData(self, lu):
3890     """Computes the list of instances and their attributes.
3891
3892     """
3893     cluster = lu.cfg.GetClusterInfo()
3894     all_info = lu.cfg.GetAllInstancesInfo()
3895
3896     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3897
3898     instance_list = [all_info[name] for name in instance_names]
3899     nodes = frozenset(itertools.chain(*(inst.all_nodes
3900                                         for inst in instance_list)))
3901     hv_list = list(set([inst.hypervisor for inst in instance_list]))
3902     bad_nodes = []
3903     offline_nodes = []
3904     wrongnode_inst = set()
3905
3906     # Gather data as requested
3907     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
3908       live_data = {}
3909       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3910       for name in nodes:
3911         result = node_data[name]
3912         if result.offline:
3913           # offline nodes will be in both lists
3914           assert result.fail_msg
3915           offline_nodes.append(name)
3916         if result.fail_msg:
3917           bad_nodes.append(name)
3918         elif result.payload:
3919           for inst in result.payload:
3920             if inst in all_info:
3921               if all_info[inst].primary_node == name:
3922                 live_data.update(result.payload)
3923               else:
3924                 wrongnode_inst.add(inst)
3925             else:
3926               # orphan instance; we don't list it here as we don't
3927               # handle this case yet in the output of instance listing
3928               logging.warning("Orphan instance '%s' found on node %s",
3929                               inst, name)
3930         # else no instance is alive
3931     else:
3932       live_data = {}
3933
3934     if query.IQ_DISKUSAGE in self.requested_data:
3935       disk_usage = dict((inst.name,
3936                          _ComputeDiskSize(inst.disk_template,
3937                                           [{"size": disk.size}
3938                                            for disk in inst.disks]))
3939                         for inst in instance_list)
3940     else:
3941       disk_usage = None
3942
3943     if query.IQ_CONSOLE in self.requested_data:
3944       consinfo = {}
3945       for inst in instance_list:
3946         if inst.name in live_data:
3947           # Instance is running
3948           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
3949         else:
3950           consinfo[inst.name] = None
3951       assert set(consinfo.keys()) == set(instance_names)
3952     else:
3953       consinfo = None
3954
3955     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3956                                    disk_usage, offline_nodes, bad_nodes,
3957                                    live_data, wrongnode_inst, consinfo)
3958
3959
3960 class LUQuery(NoHooksLU):
3961   """Query for resources/items of a certain kind.
3962
3963   """
3964   # pylint: disable-msg=W0142
3965   REQ_BGL = False
3966
3967   def CheckArguments(self):
3968     qcls = _GetQueryImplementation(self.op.what)
3969     names = qlang.ReadSimpleFilter("name", self.op.filter)
3970
3971     self.impl = qcls(names, self.op.fields, False)
3972
3973   def ExpandNames(self):
3974     self.impl.ExpandNames(self)
3975
3976   def DeclareLocks(self, level):
3977     self.impl.DeclareLocks(self, level)
3978
3979   def Exec(self, feedback_fn):
3980     return self.impl.NewStyleQuery(self)
3981
3982
3983 class LUQueryFields(NoHooksLU):
3984   """Query for resources/items of a certain kind.
3985
3986   """
3987   # pylint: disable-msg=W0142
3988   REQ_BGL = False
3989
3990   def CheckArguments(self):
3991     self.qcls = _GetQueryImplementation(self.op.what)
3992
3993   def ExpandNames(self):
3994     self.needed_locks = {}
3995
3996   def Exec(self, feedback_fn):
3997     return self.qcls.FieldsQuery(self.op.fields)
3998
3999
4000 class LUNodeModifyStorage(NoHooksLU):
4001   """Logical unit for modifying a storage volume on a node.
4002
4003   """
4004   REQ_BGL = False
4005
4006   def CheckArguments(self):
4007     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4008
4009     storage_type = self.op.storage_type
4010
4011     try:
4012       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4013     except KeyError:
4014       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4015                                  " modified" % storage_type,
4016                                  errors.ECODE_INVAL)
4017
4018     diff = set(self.op.changes.keys()) - modifiable
4019     if diff:
4020       raise errors.OpPrereqError("The following fields can not be modified for"
4021                                  " storage units of type '%s': %r" %
4022                                  (storage_type, list(diff)),
4023                                  errors.ECODE_INVAL)
4024
4025   def ExpandNames(self):
4026     self.needed_locks = {
4027       locking.LEVEL_NODE: self.op.node_name,
4028       }
4029
4030   def Exec(self, feedback_fn):
4031     """Computes the list of nodes and their attributes.
4032
4033     """
4034     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4035     result = self.rpc.call_storage_modify(self.op.node_name,
4036                                           self.op.storage_type, st_args,
4037                                           self.op.name, self.op.changes)
4038     result.Raise("Failed to modify storage unit '%s' on %s" %
4039                  (self.op.name, self.op.node_name))
4040
4041
4042 class LUNodeAdd(LogicalUnit):
4043   """Logical unit for adding node to the cluster.
4044
4045   """
4046   HPATH = "node-add"
4047   HTYPE = constants.HTYPE_NODE
4048   _NFLAGS = ["master_capable", "vm_capable"]
4049
4050   def CheckArguments(self):
4051     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4052     # validate/normalize the node name
4053     self.hostname = netutils.GetHostname(name=self.op.node_name,
4054                                          family=self.primary_ip_family)
4055     self.op.node_name = self.hostname.name
4056     if self.op.readd and self.op.group:
4057       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4058                                  " being readded", errors.ECODE_INVAL)
4059
4060   def BuildHooksEnv(self):
4061     """Build hooks env.
4062
4063     This will run on all nodes before, and on all nodes + the new node after.
4064
4065     """
4066     env = {
4067       "OP_TARGET": self.op.node_name,
4068       "NODE_NAME": self.op.node_name,
4069       "NODE_PIP": self.op.primary_ip,
4070       "NODE_SIP": self.op.secondary_ip,
4071       "MASTER_CAPABLE": str(self.op.master_capable),
4072       "VM_CAPABLE": str(self.op.vm_capable),
4073       }
4074     nodes_0 = self.cfg.GetNodeList()
4075     nodes_1 = nodes_0 + [self.op.node_name, ]
4076     return env, nodes_0, nodes_1
4077
4078   def CheckPrereq(self):
4079     """Check prerequisites.
4080
4081     This checks:
4082      - the new node is not already in the config
4083      - it is resolvable
4084      - its parameters (single/dual homed) matches the cluster
4085
4086     Any errors are signaled by raising errors.OpPrereqError.
4087
4088     """
4089     cfg = self.cfg
4090     hostname = self.hostname
4091     node = hostname.name
4092     primary_ip = self.op.primary_ip = hostname.ip
4093     if self.op.secondary_ip is None:
4094       if self.primary_ip_family == netutils.IP6Address.family:
4095         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4096                                    " IPv4 address must be given as secondary",
4097                                    errors.ECODE_INVAL)
4098       self.op.secondary_ip = primary_ip
4099
4100     secondary_ip = self.op.secondary_ip
4101     if not netutils.IP4Address.IsValid(secondary_ip):
4102       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4103                                  " address" % secondary_ip, errors.ECODE_INVAL)
4104
4105     node_list = cfg.GetNodeList()
4106     if not self.op.readd and node in node_list:
4107       raise errors.OpPrereqError("Node %s is already in the configuration" %
4108                                  node, errors.ECODE_EXISTS)
4109     elif self.op.readd and node not in node_list:
4110       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4111                                  errors.ECODE_NOENT)
4112
4113     self.changed_primary_ip = False
4114
4115     for existing_node_name in node_list:
4116       existing_node = cfg.GetNodeInfo(existing_node_name)
4117
4118       if self.op.readd and node == existing_node_name:
4119         if existing_node.secondary_ip != secondary_ip:
4120           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4121                                      " address configuration as before",
4122                                      errors.ECODE_INVAL)
4123         if existing_node.primary_ip != primary_ip:
4124           self.changed_primary_ip = True
4125
4126         continue
4127
4128       if (existing_node.primary_ip == primary_ip or
4129           existing_node.secondary_ip == primary_ip or
4130           existing_node.primary_ip == secondary_ip or
4131           existing_node.secondary_ip == secondary_ip):
4132         raise errors.OpPrereqError("New node ip address(es) conflict with"
4133                                    " existing node %s" % existing_node.name,
4134                                    errors.ECODE_NOTUNIQUE)
4135
4136     # After this 'if' block, None is no longer a valid value for the
4137     # _capable op attributes
4138     if self.op.readd:
4139       old_node = self.cfg.GetNodeInfo(node)
4140       assert old_node is not None, "Can't retrieve locked node %s" % node
4141       for attr in self._NFLAGS:
4142         if getattr(self.op, attr) is None:
4143           setattr(self.op, attr, getattr(old_node, attr))
4144     else:
4145       for attr in self._NFLAGS:
4146         if getattr(self.op, attr) is None:
4147           setattr(self.op, attr, True)
4148
4149     if self.op.readd and not self.op.vm_capable:
4150       pri, sec = cfg.GetNodeInstances(node)
4151       if pri or sec:
4152         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4153                                    " flag set to false, but it already holds"
4154                                    " instances" % node,
4155                                    errors.ECODE_STATE)
4156
4157     # check that the type of the node (single versus dual homed) is the
4158     # same as for the master
4159     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4160     master_singlehomed = myself.secondary_ip == myself.primary_ip
4161     newbie_singlehomed = secondary_ip == primary_ip
4162     if master_singlehomed != newbie_singlehomed:
4163       if master_singlehomed:
4164         raise errors.OpPrereqError("The master has no secondary ip but the"
4165                                    " new node has one",
4166                                    errors.ECODE_INVAL)
4167       else:
4168         raise errors.OpPrereqError("The master has a secondary ip but the"
4169                                    " new node doesn't have one",
4170                                    errors.ECODE_INVAL)
4171
4172     # checks reachability
4173     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4174       raise errors.OpPrereqError("Node not reachable by ping",
4175                                  errors.ECODE_ENVIRON)
4176
4177     if not newbie_singlehomed:
4178       # check reachability from my secondary ip to newbie's secondary ip
4179       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4180                            source=myself.secondary_ip):
4181         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4182                                    " based ping to node daemon port",
4183                                    errors.ECODE_ENVIRON)
4184
4185     if self.op.readd:
4186       exceptions = [node]
4187     else:
4188       exceptions = []
4189
4190     if self.op.master_capable:
4191       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4192     else:
4193       self.master_candidate = False
4194
4195     if self.op.readd:
4196       self.new_node = old_node
4197     else:
4198       node_group = cfg.LookupNodeGroup(self.op.group)
4199       self.new_node = objects.Node(name=node,
4200                                    primary_ip=primary_ip,
4201                                    secondary_ip=secondary_ip,
4202                                    master_candidate=self.master_candidate,
4203                                    offline=False, drained=False,
4204                                    group=node_group)
4205
4206     if self.op.ndparams:
4207       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4208
4209   def Exec(self, feedback_fn):
4210     """Adds the new node to the cluster.
4211
4212     """
4213     new_node = self.new_node
4214     node = new_node.name
4215
4216     # We adding a new node so we assume it's powered
4217     new_node.powered = True
4218
4219     # for re-adds, reset the offline/drained/master-candidate flags;
4220     # we need to reset here, otherwise offline would prevent RPC calls
4221     # later in the procedure; this also means that if the re-add
4222     # fails, we are left with a non-offlined, broken node
4223     if self.op.readd:
4224       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4225       self.LogInfo("Readding a node, the offline/drained flags were reset")
4226       # if we demote the node, we do cleanup later in the procedure
4227       new_node.master_candidate = self.master_candidate
4228       if self.changed_primary_ip:
4229         new_node.primary_ip = self.op.primary_ip
4230
4231     # copy the master/vm_capable flags
4232     for attr in self._NFLAGS:
4233       setattr(new_node, attr, getattr(self.op, attr))
4234
4235     # notify the user about any possible mc promotion
4236     if new_node.master_candidate:
4237       self.LogInfo("Node will be a master candidate")
4238
4239     if self.op.ndparams:
4240       new_node.ndparams = self.op.ndparams
4241     else:
4242       new_node.ndparams = {}
4243
4244     # check connectivity
4245     result = self.rpc.call_version([node])[node]
4246     result.Raise("Can't get version information from node %s" % node)
4247     if constants.PROTOCOL_VERSION == result.payload:
4248       logging.info("Communication to node %s fine, sw version %s match",
4249                    node, result.payload)
4250     else:
4251       raise errors.OpExecError("Version mismatch master version %s,"
4252                                " node version %s" %
4253                                (constants.PROTOCOL_VERSION, result.payload))
4254
4255     # Add node to our /etc/hosts, and add key to known_hosts
4256     if self.cfg.GetClusterInfo().modify_etc_hosts:
4257       master_node = self.cfg.GetMasterNode()
4258       result = self.rpc.call_etc_hosts_modify(master_node,
4259                                               constants.ETC_HOSTS_ADD,
4260                                               self.hostname.name,
4261                                               self.hostname.ip)
4262       result.Raise("Can't update hosts file with new host data")
4263
4264     if new_node.secondary_ip != new_node.primary_ip:
4265       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4266                                False)
4267
4268     node_verify_list = [self.cfg.GetMasterNode()]
4269     node_verify_param = {
4270       constants.NV_NODELIST: [node],
4271       # TODO: do a node-net-test as well?
4272     }
4273
4274     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4275                                        self.cfg.GetClusterName())
4276     for verifier in node_verify_list:
4277       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4278       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4279       if nl_payload:
4280         for failed in nl_payload:
4281           feedback_fn("ssh/hostname verification failed"
4282                       " (checking from %s): %s" %
4283                       (verifier, nl_payload[failed]))
4284         raise errors.OpExecError("ssh/hostname verification failed.")
4285
4286     if self.op.readd:
4287       _RedistributeAncillaryFiles(self)
4288       self.context.ReaddNode(new_node)
4289       # make sure we redistribute the config
4290       self.cfg.Update(new_node, feedback_fn)
4291       # and make sure the new node will not have old files around
4292       if not new_node.master_candidate:
4293         result = self.rpc.call_node_demote_from_mc(new_node.name)
4294         msg = result.fail_msg
4295         if msg:
4296           self.LogWarning("Node failed to demote itself from master"
4297                           " candidate status: %s" % msg)
4298     else:
4299       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4300                                   additional_vm=self.op.vm_capable)
4301       self.context.AddNode(new_node, self.proc.GetECId())
4302
4303
4304 class LUNodeSetParams(LogicalUnit):
4305   """Modifies the parameters of a node.
4306
4307   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4308       to the node role (as _ROLE_*)
4309   @cvar _R2F: a dictionary from node role to tuples of flags
4310   @cvar _FLAGS: a list of attribute names corresponding to the flags
4311
4312   """
4313   HPATH = "node-modify"
4314   HTYPE = constants.HTYPE_NODE
4315   REQ_BGL = False
4316   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4317   _F2R = {
4318     (True, False, False): _ROLE_CANDIDATE,
4319     (False, True, False): _ROLE_DRAINED,
4320     (False, False, True): _ROLE_OFFLINE,
4321     (False, False, False): _ROLE_REGULAR,
4322     }
4323   _R2F = dict((v, k) for k, v in _F2R.items())
4324   _FLAGS = ["master_candidate", "drained", "offline"]
4325
4326   def CheckArguments(self):
4327     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4328     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4329                 self.op.master_capable, self.op.vm_capable,
4330                 self.op.secondary_ip, self.op.ndparams]
4331     if all_mods.count(None) == len(all_mods):
4332       raise errors.OpPrereqError("Please pass at least one modification",
4333                                  errors.ECODE_INVAL)
4334     if all_mods.count(True) > 1:
4335       raise errors.OpPrereqError("Can't set the node into more than one"
4336                                  " state at the same time",
4337                                  errors.ECODE_INVAL)
4338
4339     # Boolean value that tells us whether we might be demoting from MC
4340     self.might_demote = (self.op.master_candidate == False or
4341                          self.op.offline == True or
4342                          self.op.drained == True or
4343                          self.op.master_capable == False)
4344
4345     if self.op.secondary_ip:
4346       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4347         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4348                                    " address" % self.op.secondary_ip,
4349                                    errors.ECODE_INVAL)
4350
4351     self.lock_all = self.op.auto_promote and self.might_demote
4352     self.lock_instances = self.op.secondary_ip is not None
4353
4354   def ExpandNames(self):
4355     if self.lock_all:
4356       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4357     else:
4358       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4359
4360     if self.lock_instances:
4361       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4362
4363   def DeclareLocks(self, level):
4364     # If we have locked all instances, before waiting to lock nodes, release
4365     # all the ones living on nodes unrelated to the current operation.
4366     if level == locking.LEVEL_NODE and self.lock_instances:
4367       instances_release = []
4368       instances_keep = []
4369       self.affected_instances = []
4370       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4371         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4372           instance = self.context.cfg.GetInstanceInfo(instance_name)
4373           i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4374           if i_mirrored and self.op.node_name in instance.all_nodes:
4375             instances_keep.append(instance_name)
4376             self.affected_instances.append(instance)
4377           else:
4378             instances_release.append(instance_name)
4379         if instances_release:
4380           self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4381           self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4382
4383   def BuildHooksEnv(self):
4384     """Build hooks env.
4385
4386     This runs on the master node.
4387
4388     """
4389     env = {
4390       "OP_TARGET": self.op.node_name,
4391       "MASTER_CANDIDATE": str(self.op.master_candidate),
4392       "OFFLINE": str(self.op.offline),
4393       "DRAINED": str(self.op.drained),
4394       "MASTER_CAPABLE": str(self.op.master_capable),
4395       "VM_CAPABLE": str(self.op.vm_capable),
4396       }
4397     nl = [self.cfg.GetMasterNode(),
4398           self.op.node_name]
4399     return env, nl, nl
4400
4401   def CheckPrereq(self):
4402     """Check prerequisites.
4403
4404     This only checks the instance list against the existing names.
4405
4406     """
4407     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4408
4409     if (self.op.master_candidate is not None or
4410         self.op.drained is not None or
4411         self.op.offline is not None):
4412       # we can't change the master's node flags
4413       if self.op.node_name == self.cfg.GetMasterNode():
4414         raise errors.OpPrereqError("The master role can be changed"
4415                                    " only via master-failover",
4416                                    errors.ECODE_INVAL)
4417
4418     if self.op.master_candidate and not node.master_capable:
4419       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4420                                  " it a master candidate" % node.name,
4421                                  errors.ECODE_STATE)
4422
4423     if self.op.vm_capable == False:
4424       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4425       if ipri or isec:
4426         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4427                                    " the vm_capable flag" % node.name,
4428                                    errors.ECODE_STATE)
4429
4430     if node.master_candidate and self.might_demote and not self.lock_all:
4431       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4432       # check if after removing the current node, we're missing master
4433       # candidates
4434       (mc_remaining, mc_should, _) = \
4435           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4436       if mc_remaining < mc_should:
4437         raise errors.OpPrereqError("Not enough master candidates, please"
4438                                    " pass auto promote option to allow"
4439                                    " promotion", errors.ECODE_STATE)
4440
4441     self.old_flags = old_flags = (node.master_candidate,
4442                                   node.drained, node.offline)
4443     assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4444     self.old_role = old_role = self._F2R[old_flags]
4445
4446     # Check for ineffective changes
4447     for attr in self._FLAGS:
4448       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4449         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4450         setattr(self.op, attr, None)
4451
4452     # Past this point, any flag change to False means a transition
4453     # away from the respective state, as only real changes are kept
4454
4455     # TODO: We might query the real power state if it supports OOB
4456     if _SupportsOob(self.cfg, node):
4457       if self.op.offline is False and not (node.powered or
4458                                            self.op.powered == True):
4459         raise errors.OpPrereqError(("Please power on node %s first before you"
4460                                     " can reset offline state") %
4461                                    self.op.node_name)
4462     elif self.op.powered is not None:
4463       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4464                                   " which does not support out-of-band"
4465                                   " handling") % self.op.node_name)
4466
4467     # If we're being deofflined/drained, we'll MC ourself if needed
4468     if (self.op.drained == False or self.op.offline == False or
4469         (self.op.master_capable and not node.master_capable)):
4470       if _DecideSelfPromotion(self):
4471         self.op.master_candidate = True
4472         self.LogInfo("Auto-promoting node to master candidate")
4473
4474     # If we're no longer master capable, we'll demote ourselves from MC
4475     if self.op.master_capable == False and node.master_candidate:
4476       self.LogInfo("Demoting from master candidate")
4477       self.op.master_candidate = False
4478
4479     # Compute new role
4480     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4481     if self.op.master_candidate:
4482       new_role = self._ROLE_CANDIDATE
4483     elif self.op.drained:
4484       new_role = self._ROLE_DRAINED
4485     elif self.op.offline:
4486       new_role = self._ROLE_OFFLINE
4487     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4488       # False is still in new flags, which means we're un-setting (the
4489       # only) True flag
4490       new_role = self._ROLE_REGULAR
4491     else: # no new flags, nothing, keep old role
4492       new_role = old_role
4493
4494     self.new_role = new_role
4495
4496     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4497       # Trying to transition out of offline status
4498       result = self.rpc.call_version([node.name])[node.name]
4499       if result.fail_msg:
4500         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4501                                    " to report its version: %s" %
4502                                    (node.name, result.fail_msg),
4503                                    errors.ECODE_STATE)
4504       else:
4505         self.LogWarning("Transitioning node from offline to online state"
4506                         " without using re-add. Please make sure the node"
4507                         " is healthy!")
4508
4509     if self.op.secondary_ip:
4510       # Ok even without locking, because this can't be changed by any LU
4511       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4512       master_singlehomed = master.secondary_ip == master.primary_ip
4513       if master_singlehomed and self.op.secondary_ip:
4514         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4515                                    " homed cluster", errors.ECODE_INVAL)
4516
4517       if node.offline:
4518         if self.affected_instances:
4519           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4520                                      " node has instances (%s) configured"
4521                                      " to use it" % self.affected_instances)
4522       else:
4523         # On online nodes, check that no instances are running, and that
4524         # the node has the new ip and we can reach it.
4525         for instance in self.affected_instances:
4526           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4527
4528         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4529         if master.name != node.name:
4530           # check reachability from master secondary ip to new secondary ip
4531           if not netutils.TcpPing(self.op.secondary_ip,
4532                                   constants.DEFAULT_NODED_PORT,
4533                                   source=master.secondary_ip):
4534             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4535                                        " based ping to node daemon port",
4536                                        errors.ECODE_ENVIRON)
4537
4538     if self.op.ndparams:
4539       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4540       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4541       self.new_ndparams = new_ndparams
4542
4543   def Exec(self, feedback_fn):
4544     """Modifies a node.
4545
4546     """
4547     node = self.node
4548     old_role = self.old_role
4549     new_role = self.new_role
4550
4551     result = []
4552
4553     if self.op.ndparams:
4554       node.ndparams = self.new_ndparams
4555
4556     if self.op.powered is not None:
4557       node.powered = self.op.powered
4558
4559     for attr in ["master_capable", "vm_capable"]:
4560       val = getattr(self.op, attr)
4561       if val is not None:
4562         setattr(node, attr, val)
4563         result.append((attr, str(val)))
4564
4565     if new_role != old_role:
4566       # Tell the node to demote itself, if no longer MC and not offline
4567       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4568         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4569         if msg:
4570           self.LogWarning("Node failed to demote itself: %s", msg)
4571
4572       new_flags = self._R2F[new_role]
4573       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4574         if of != nf:
4575           result.append((desc, str(nf)))
4576       (node.master_candidate, node.drained, node.offline) = new_flags
4577
4578       # we locked all nodes, we adjust the CP before updating this node
4579       if self.lock_all:
4580         _AdjustCandidatePool(self, [node.name])
4581
4582     if self.op.secondary_ip:
4583       node.secondary_ip = self.op.secondary_ip
4584       result.append(("secondary_ip", self.op.secondary_ip))
4585
4586     # this will trigger configuration file update, if needed
4587     self.cfg.Update(node, feedback_fn)
4588
4589     # this will trigger job queue propagation or cleanup if the mc
4590     # flag changed
4591     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4592       self.context.ReaddNode(node)
4593
4594     return result
4595
4596
4597 class LUNodePowercycle(NoHooksLU):
4598   """Powercycles a node.
4599
4600   """
4601   REQ_BGL = False
4602
4603   def CheckArguments(self):
4604     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4605     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4606       raise errors.OpPrereqError("The node is the master and the force"
4607                                  " parameter was not set",
4608                                  errors.ECODE_INVAL)
4609
4610   def ExpandNames(self):
4611     """Locking for PowercycleNode.
4612
4613     This is a last-resort option and shouldn't block on other
4614     jobs. Therefore, we grab no locks.
4615
4616     """
4617     self.needed_locks = {}
4618
4619   def Exec(self, feedback_fn):
4620     """Reboots a node.
4621
4622     """
4623     result = self.rpc.call_node_powercycle(self.op.node_name,
4624                                            self.cfg.GetHypervisorType())
4625     result.Raise("Failed to schedule the reboot")
4626     return result.payload
4627
4628
4629 class LUClusterQuery(NoHooksLU):
4630   """Query cluster configuration.
4631
4632   """
4633   REQ_BGL = False
4634
4635   def ExpandNames(self):
4636     self.needed_locks = {}
4637
4638   def Exec(self, feedback_fn):
4639     """Return cluster config.
4640
4641     """
4642     cluster = self.cfg.GetClusterInfo()
4643     os_hvp = {}
4644
4645     # Filter just for enabled hypervisors
4646     for os_name, hv_dict in cluster.os_hvp.items():
4647       os_hvp[os_name] = {}
4648       for hv_name, hv_params in hv_dict.items():
4649         if hv_name in cluster.enabled_hypervisors:
4650           os_hvp[os_name][hv_name] = hv_params
4651
4652     # Convert ip_family to ip_version
4653     primary_ip_version = constants.IP4_VERSION
4654     if cluster.primary_ip_family == netutils.IP6Address.family:
4655       primary_ip_version = constants.IP6_VERSION
4656
4657     result = {
4658       "software_version": constants.RELEASE_VERSION,
4659       "protocol_version": constants.PROTOCOL_VERSION,
4660       "config_version": constants.CONFIG_VERSION,
4661       "os_api_version": max(constants.OS_API_VERSIONS),
4662       "export_version": constants.EXPORT_VERSION,
4663       "architecture": (platform.architecture()[0], platform.machine()),
4664       "name": cluster.cluster_name,
4665       "master": cluster.master_node,
4666       "default_hypervisor": cluster.enabled_hypervisors[0],
4667       "enabled_hypervisors": cluster.enabled_hypervisors,
4668       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4669                         for hypervisor_name in cluster.enabled_hypervisors]),
4670       "os_hvp": os_hvp,
4671       "beparams": cluster.beparams,
4672       "osparams": cluster.osparams,
4673       "nicparams": cluster.nicparams,
4674       "ndparams": cluster.ndparams,
4675       "candidate_pool_size": cluster.candidate_pool_size,
4676       "master_netdev": cluster.master_netdev,
4677       "volume_group_name": cluster.volume_group_name,
4678       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4679       "file_storage_dir": cluster.file_storage_dir,
4680       "maintain_node_health": cluster.maintain_node_health,
4681       "ctime": cluster.ctime,
4682       "mtime": cluster.mtime,
4683       "uuid": cluster.uuid,
4684       "tags": list(cluster.GetTags()),
4685       "uid_pool": cluster.uid_pool,
4686       "default_iallocator": cluster.default_iallocator,
4687       "reserved_lvs": cluster.reserved_lvs,
4688       "primary_ip_version": primary_ip_version,
4689       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4690       "hidden_os": cluster.hidden_os,
4691       "blacklisted_os": cluster.blacklisted_os,
4692       }
4693
4694     return result
4695
4696
4697 class LUClusterConfigQuery(NoHooksLU):
4698   """Return configuration values.
4699
4700   """
4701   REQ_BGL = False
4702   _FIELDS_DYNAMIC = utils.FieldSet()
4703   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4704                                   "watcher_pause", "volume_group_name")
4705
4706   def CheckArguments(self):
4707     _CheckOutputFields(static=self._FIELDS_STATIC,
4708                        dynamic=self._FIELDS_DYNAMIC,
4709                        selected=self.op.output_fields)
4710
4711   def ExpandNames(self):
4712     self.needed_locks = {}
4713
4714   def Exec(self, feedback_fn):
4715     """Dump a representation of the cluster config to the standard output.
4716
4717     """
4718     values = []
4719     for field in self.op.output_fields:
4720       if field == "cluster_name":
4721         entry = self.cfg.GetClusterName()
4722       elif field == "master_node":
4723         entry = self.cfg.GetMasterNode()
4724       elif field == "drain_flag":
4725         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4726       elif field == "watcher_pause":
4727         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4728       elif field == "volume_group_name":
4729         entry = self.cfg.GetVGName()
4730       else:
4731         raise errors.ParameterError(field)
4732       values.append(entry)
4733     return values
4734
4735
4736 class LUInstanceActivateDisks(NoHooksLU):
4737   """Bring up an instance's disks.
4738
4739   """
4740   REQ_BGL = False
4741
4742   def ExpandNames(self):
4743     self._ExpandAndLockInstance()
4744     self.needed_locks[locking.LEVEL_NODE] = []
4745     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4746
4747   def DeclareLocks(self, level):
4748     if level == locking.LEVEL_NODE:
4749       self._LockInstancesNodes()
4750
4751   def CheckPrereq(self):
4752     """Check prerequisites.
4753
4754     This checks that the instance is in the cluster.
4755
4756     """
4757     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4758     assert self.instance is not None, \
4759       "Cannot retrieve locked instance %s" % self.op.instance_name
4760     _CheckNodeOnline(self, self.instance.primary_node)
4761
4762   def Exec(self, feedback_fn):
4763     """Activate the disks.
4764
4765     """
4766     disks_ok, disks_info = \
4767               _AssembleInstanceDisks(self, self.instance,
4768                                      ignore_size=self.op.ignore_size)
4769     if not disks_ok:
4770       raise errors.OpExecError("Cannot activate block devices")
4771
4772     return disks_info
4773
4774
4775 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4776                            ignore_size=False):
4777   """Prepare the block devices for an instance.
4778
4779   This sets up the block devices on all nodes.
4780
4781   @type lu: L{LogicalUnit}
4782   @param lu: the logical unit on whose behalf we execute
4783   @type instance: L{objects.Instance}
4784   @param instance: the instance for whose disks we assemble
4785   @type disks: list of L{objects.Disk} or None
4786   @param disks: which disks to assemble (or all, if None)
4787   @type ignore_secondaries: boolean
4788   @param ignore_secondaries: if true, errors on secondary nodes
4789       won't result in an error return from the function
4790   @type ignore_size: boolean
4791   @param ignore_size: if true, the current known size of the disk
4792       will not be used during the disk activation, useful for cases
4793       when the size is wrong
4794   @return: False if the operation failed, otherwise a list of
4795       (host, instance_visible_name, node_visible_name)
4796       with the mapping from node devices to instance devices
4797
4798   """
4799   device_info = []
4800   disks_ok = True
4801   iname = instance.name
4802   disks = _ExpandCheckDisks(instance, disks)
4803
4804   # With the two passes mechanism we try to reduce the window of
4805   # opportunity for the race condition of switching DRBD to primary
4806   # before handshaking occured, but we do not eliminate it
4807
4808   # The proper fix would be to wait (with some limits) until the
4809   # connection has been made and drbd transitions from WFConnection
4810   # into any other network-connected state (Connected, SyncTarget,
4811   # SyncSource, etc.)
4812
4813   # 1st pass, assemble on all nodes in secondary mode
4814   for idx, inst_disk in enumerate(disks):
4815     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4816       if ignore_size:
4817         node_disk = node_disk.Copy()
4818         node_disk.UnsetSize()
4819       lu.cfg.SetDiskID(node_disk, node)
4820       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
4821       msg = result.fail_msg
4822       if msg:
4823         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4824                            " (is_primary=False, pass=1): %s",
4825                            inst_disk.iv_name, node, msg)
4826         if not ignore_secondaries:
4827           disks_ok = False
4828
4829   # FIXME: race condition on drbd migration to primary
4830
4831   # 2nd pass, do only the primary node
4832   for idx, inst_disk in enumerate(disks):
4833     dev_path = None
4834
4835     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4836       if node != instance.primary_node:
4837         continue
4838       if ignore_size:
4839         node_disk = node_disk.Copy()
4840         node_disk.UnsetSize()
4841       lu.cfg.SetDiskID(node_disk, node)
4842       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
4843       msg = result.fail_msg
4844       if msg:
4845         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4846                            " (is_primary=True, pass=2): %s",
4847                            inst_disk.iv_name, node, msg)
4848         disks_ok = False
4849       else:
4850         dev_path = result.payload
4851
4852     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4853
4854   # leave the disks configured for the primary node
4855   # this is a workaround that would be fixed better by
4856   # improving the logical/physical id handling
4857   for disk in disks:
4858     lu.cfg.SetDiskID(disk, instance.primary_node)
4859
4860   return disks_ok, device_info
4861
4862
4863 def _StartInstanceDisks(lu, instance, force):
4864   """Start the disks of an instance.
4865
4866   """
4867   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4868                                            ignore_secondaries=force)
4869   if not disks_ok:
4870     _ShutdownInstanceDisks(lu, instance)
4871     if force is not None and not force:
4872       lu.proc.LogWarning("", hint="If the message above refers to a"
4873                          " secondary node,"
4874                          " you can retry the operation using '--force'.")
4875     raise errors.OpExecError("Disk consistency error")
4876
4877
4878 class LUInstanceDeactivateDisks(NoHooksLU):
4879   """Shutdown an instance's disks.
4880
4881   """
4882   REQ_BGL = False
4883
4884   def ExpandNames(self):
4885     self._ExpandAndLockInstance()
4886     self.needed_locks[locking.LEVEL_NODE] = []
4887     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4888
4889   def DeclareLocks(self, level):
4890     if level == locking.LEVEL_NODE:
4891       self._LockInstancesNodes()
4892
4893   def CheckPrereq(self):
4894     """Check prerequisites.
4895
4896     This checks that the instance is in the cluster.
4897
4898     """
4899     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4900     assert self.instance is not None, \
4901       "Cannot retrieve locked instance %s" % self.op.instance_name
4902
4903   def Exec(self, feedback_fn):
4904     """Deactivate the disks
4905
4906     """
4907     instance = self.instance
4908     if self.op.force:
4909       _ShutdownInstanceDisks(self, instance)
4910     else:
4911       _SafeShutdownInstanceDisks(self, instance)
4912
4913
4914 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4915   """Shutdown block devices of an instance.
4916
4917   This function checks if an instance is running, before calling
4918   _ShutdownInstanceDisks.
4919
4920   """
4921   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4922   _ShutdownInstanceDisks(lu, instance, disks=disks)
4923
4924
4925 def _ExpandCheckDisks(instance, disks):
4926   """Return the instance disks selected by the disks list
4927
4928   @type disks: list of L{objects.Disk} or None
4929   @param disks: selected disks
4930   @rtype: list of L{objects.Disk}
4931   @return: selected instance disks to act on
4932
4933   """
4934   if disks is None:
4935     return instance.disks
4936   else:
4937     if not set(disks).issubset(instance.disks):
4938       raise errors.ProgrammerError("Can only act on disks belonging to the"
4939                                    " target instance")
4940     return disks
4941
4942
4943 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4944   """Shutdown block devices of an instance.
4945
4946   This does the shutdown on all nodes of the instance.
4947
4948   If the ignore_primary is false, errors on the primary node are
4949   ignored.
4950
4951   """
4952   all_result = True
4953   disks = _ExpandCheckDisks(instance, disks)
4954
4955   for disk in disks:
4956     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4957       lu.cfg.SetDiskID(top_disk, node)
4958       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4959       msg = result.fail_msg
4960       if msg:
4961         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4962                       disk.iv_name, node, msg)
4963         if ((node == instance.primary_node and not ignore_primary) or
4964             (node != instance.primary_node and not result.offline)):
4965           all_result = False
4966   return all_result
4967
4968
4969 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4970   """Checks if a node has enough free memory.
4971
4972   This function check if a given node has the needed amount of free
4973   memory. In case the node has less memory or we cannot get the
4974   information from the node, this function raise an OpPrereqError
4975   exception.
4976
4977   @type lu: C{LogicalUnit}
4978   @param lu: a logical unit from which we get configuration data
4979   @type node: C{str}
4980   @param node: the node to check
4981   @type reason: C{str}
4982   @param reason: string to use in the error message
4983   @type requested: C{int}
4984   @param requested: the amount of memory in MiB to check for
4985   @type hypervisor_name: C{str}
4986   @param hypervisor_name: the hypervisor to ask for memory stats
4987   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4988       we cannot check the node
4989
4990   """
4991   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4992   nodeinfo[node].Raise("Can't get data from node %s" % node,
4993                        prereq=True, ecode=errors.ECODE_ENVIRON)
4994   free_mem = nodeinfo[node].payload.get('memory_free', None)
4995   if not isinstance(free_mem, int):
4996     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4997                                " was '%s'" % (node, free_mem),
4998                                errors.ECODE_ENVIRON)
4999   if requested > free_mem:
5000     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5001                                " needed %s MiB, available %s MiB" %
5002                                (node, reason, requested, free_mem),
5003                                errors.ECODE_NORES)
5004
5005
5006 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5007   """Checks if nodes have enough free disk space in the all VGs.
5008
5009   This function check if all given nodes have the needed amount of
5010   free disk. In case any node has less disk or we cannot get the
5011   information from the node, this function raise an OpPrereqError
5012   exception.
5013
5014   @type lu: C{LogicalUnit}
5015   @param lu: a logical unit from which we get configuration data
5016   @type nodenames: C{list}
5017   @param nodenames: the list of node names to check
5018   @type req_sizes: C{dict}
5019   @param req_sizes: the hash of vg and corresponding amount of disk in
5020       MiB to check for
5021   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5022       or we cannot check the node
5023
5024   """
5025   for vg, req_size in req_sizes.items():
5026     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5027
5028
5029 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5030   """Checks if nodes have enough free disk space in the specified VG.
5031
5032   This function check if all given nodes have the needed amount of
5033   free disk. In case any node has less disk or we cannot get the
5034   information from the node, this function raise an OpPrereqError
5035   exception.
5036
5037   @type lu: C{LogicalUnit}
5038   @param lu: a logical unit from which we get configuration data
5039   @type nodenames: C{list}
5040   @param nodenames: the list of node names to check
5041   @type vg: C{str}
5042   @param vg: the volume group to check
5043   @type requested: C{int}
5044   @param requested: the amount of disk in MiB to check for
5045   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5046       or we cannot check the node
5047
5048   """
5049   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5050   for node in nodenames:
5051     info = nodeinfo[node]
5052     info.Raise("Cannot get current information from node %s" % node,
5053                prereq=True, ecode=errors.ECODE_ENVIRON)
5054     vg_free = info.payload.get("vg_free", None)
5055     if not isinstance(vg_free, int):
5056       raise errors.OpPrereqError("Can't compute free disk space on node"
5057                                  " %s for vg %s, result was '%s'" %
5058                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5059     if requested > vg_free:
5060       raise errors.OpPrereqError("Not enough disk space on target node %s"
5061                                  " vg %s: required %d MiB, available %d MiB" %
5062                                  (node, vg, requested, vg_free),
5063                                  errors.ECODE_NORES)
5064
5065
5066 class LUInstanceStartup(LogicalUnit):
5067   """Starts an instance.
5068
5069   """
5070   HPATH = "instance-start"
5071   HTYPE = constants.HTYPE_INSTANCE
5072   REQ_BGL = False
5073
5074   def CheckArguments(self):
5075     # extra beparams
5076     if self.op.beparams:
5077       # fill the beparams dict
5078       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5079
5080   def ExpandNames(self):
5081     self._ExpandAndLockInstance()
5082
5083   def BuildHooksEnv(self):
5084     """Build hooks env.
5085
5086     This runs on master, primary and secondary nodes of the instance.
5087
5088     """
5089     env = {
5090       "FORCE": self.op.force,
5091       }
5092     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5093     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5094     return env, nl, nl
5095
5096   def CheckPrereq(self):
5097     """Check prerequisites.
5098
5099     This checks that the instance is in the cluster.
5100
5101     """
5102     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5103     assert self.instance is not None, \
5104       "Cannot retrieve locked instance %s" % self.op.instance_name
5105
5106     # extra hvparams
5107     if self.op.hvparams:
5108       # check hypervisor parameter syntax (locally)
5109       cluster = self.cfg.GetClusterInfo()
5110       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5111       filled_hvp = cluster.FillHV(instance)
5112       filled_hvp.update(self.op.hvparams)
5113       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5114       hv_type.CheckParameterSyntax(filled_hvp)
5115       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5116
5117     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5118
5119     if self.primary_offline and self.op.ignore_offline_nodes:
5120       self.proc.LogWarning("Ignoring offline primary node")
5121
5122       if self.op.hvparams or self.op.beparams:
5123         self.proc.LogWarning("Overridden parameters are ignored")
5124     else:
5125       _CheckNodeOnline(self, instance.primary_node)
5126
5127       bep = self.cfg.GetClusterInfo().FillBE(instance)
5128
5129       # check bridges existence
5130       _CheckInstanceBridgesExist(self, instance)
5131
5132       remote_info = self.rpc.call_instance_info(instance.primary_node,
5133                                                 instance.name,
5134                                                 instance.hypervisor)
5135       remote_info.Raise("Error checking node %s" % instance.primary_node,
5136                         prereq=True, ecode=errors.ECODE_ENVIRON)
5137       if not remote_info.payload: # not running already
5138         _CheckNodeFreeMemory(self, instance.primary_node,
5139                              "starting instance %s" % instance.name,
5140                              bep[constants.BE_MEMORY], instance.hypervisor)
5141
5142   def Exec(self, feedback_fn):
5143     """Start the instance.
5144
5145     """
5146     instance = self.instance
5147     force = self.op.force
5148
5149     self.cfg.MarkInstanceUp(instance.name)
5150
5151     if self.primary_offline:
5152       assert self.op.ignore_offline_nodes
5153       self.proc.LogInfo("Primary node offline, marked instance as started")
5154     else:
5155       node_current = instance.primary_node
5156
5157       _StartInstanceDisks(self, instance, force)
5158
5159       result = self.rpc.call_instance_start(node_current, instance,
5160                                             self.op.hvparams, self.op.beparams)
5161       msg = result.fail_msg
5162       if msg:
5163         _ShutdownInstanceDisks(self, instance)
5164         raise errors.OpExecError("Could not start instance: %s" % msg)
5165
5166
5167 class LUInstanceReboot(LogicalUnit):
5168   """Reboot an instance.
5169
5170   """
5171   HPATH = "instance-reboot"
5172   HTYPE = constants.HTYPE_INSTANCE
5173   REQ_BGL = False
5174
5175   def ExpandNames(self):
5176     self._ExpandAndLockInstance()
5177
5178   def BuildHooksEnv(self):
5179     """Build hooks env.
5180
5181     This runs on master, primary and secondary nodes of the instance.
5182
5183     """
5184     env = {
5185       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5186       "REBOOT_TYPE": self.op.reboot_type,
5187       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5188       }
5189     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5190     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5191     return env, nl, nl
5192
5193   def CheckPrereq(self):
5194     """Check prerequisites.
5195
5196     This checks that the instance is in the cluster.
5197
5198     """
5199     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5200     assert self.instance is not None, \
5201       "Cannot retrieve locked instance %s" % self.op.instance_name
5202
5203     _CheckNodeOnline(self, instance.primary_node)
5204
5205     # check bridges existence
5206     _CheckInstanceBridgesExist(self, instance)
5207
5208   def Exec(self, feedback_fn):
5209     """Reboot the instance.
5210
5211     """
5212     instance = self.instance
5213     ignore_secondaries = self.op.ignore_secondaries
5214     reboot_type = self.op.reboot_type
5215
5216     node_current = instance.primary_node
5217
5218     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5219                        constants.INSTANCE_REBOOT_HARD]:
5220       for disk in instance.disks:
5221         self.cfg.SetDiskID(disk, node_current)
5222       result = self.rpc.call_instance_reboot(node_current, instance,
5223                                              reboot_type,
5224                                              self.op.shutdown_timeout)
5225       result.Raise("Could not reboot instance")
5226     else:
5227       result = self.rpc.call_instance_shutdown(node_current, instance,
5228                                                self.op.shutdown_timeout)
5229       result.Raise("Could not shutdown instance for full reboot")
5230       _ShutdownInstanceDisks(self, instance)
5231       _StartInstanceDisks(self, instance, ignore_secondaries)
5232       result = self.rpc.call_instance_start(node_current, instance, None, None)
5233       msg = result.fail_msg
5234       if msg:
5235         _ShutdownInstanceDisks(self, instance)
5236         raise errors.OpExecError("Could not start instance for"
5237                                  " full reboot: %s" % msg)
5238
5239     self.cfg.MarkInstanceUp(instance.name)
5240
5241
5242 class LUInstanceShutdown(LogicalUnit):
5243   """Shutdown an instance.
5244
5245   """
5246   HPATH = "instance-stop"
5247   HTYPE = constants.HTYPE_INSTANCE
5248   REQ_BGL = False
5249
5250   def ExpandNames(self):
5251     self._ExpandAndLockInstance()
5252
5253   def BuildHooksEnv(self):
5254     """Build hooks env.
5255
5256     This runs on master, primary and secondary nodes of the instance.
5257
5258     """
5259     env = _BuildInstanceHookEnvByObject(self, self.instance)
5260     env["TIMEOUT"] = self.op.timeout
5261     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5262     return env, nl, nl
5263
5264   def CheckPrereq(self):
5265     """Check prerequisites.
5266
5267     This checks that the instance is in the cluster.
5268
5269     """
5270     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5271     assert self.instance is not None, \
5272       "Cannot retrieve locked instance %s" % self.op.instance_name
5273
5274     self.primary_offline = \
5275       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5276
5277     if self.primary_offline and self.op.ignore_offline_nodes:
5278       self.proc.LogWarning("Ignoring offline primary node")
5279     else:
5280       _CheckNodeOnline(self, self.instance.primary_node)
5281
5282   def Exec(self, feedback_fn):
5283     """Shutdown the instance.
5284
5285     """
5286     instance = self.instance
5287     node_current = instance.primary_node
5288     timeout = self.op.timeout
5289
5290     self.cfg.MarkInstanceDown(instance.name)
5291
5292     if self.primary_offline:
5293       assert self.op.ignore_offline_nodes
5294       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5295     else:
5296       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5297       msg = result.fail_msg
5298       if msg:
5299         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5300
5301       _ShutdownInstanceDisks(self, instance)
5302
5303
5304 class LUInstanceReinstall(LogicalUnit):
5305   """Reinstall an instance.
5306
5307   """
5308   HPATH = "instance-reinstall"
5309   HTYPE = constants.HTYPE_INSTANCE
5310   REQ_BGL = False
5311
5312   def ExpandNames(self):
5313     self._ExpandAndLockInstance()
5314
5315   def BuildHooksEnv(self):
5316     """Build hooks env.
5317
5318     This runs on master, primary and secondary nodes of the instance.
5319
5320     """
5321     env = _BuildInstanceHookEnvByObject(self, self.instance)
5322     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5323     return env, nl, nl
5324
5325   def CheckPrereq(self):
5326     """Check prerequisites.
5327
5328     This checks that the instance is in the cluster and is not running.
5329
5330     """
5331     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5332     assert instance is not None, \
5333       "Cannot retrieve locked instance %s" % self.op.instance_name
5334     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5335                      " offline, cannot reinstall")
5336     for node in instance.secondary_nodes:
5337       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5338                        " cannot reinstall")
5339
5340     if instance.disk_template == constants.DT_DISKLESS:
5341       raise errors.OpPrereqError("Instance '%s' has no disks" %
5342                                  self.op.instance_name,
5343                                  errors.ECODE_INVAL)
5344     _CheckInstanceDown(self, instance, "cannot reinstall")
5345
5346     if self.op.os_type is not None:
5347       # OS verification
5348       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5349       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5350       instance_os = self.op.os_type
5351     else:
5352       instance_os = instance.os
5353
5354     nodelist = list(instance.all_nodes)
5355
5356     if self.op.osparams:
5357       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5358       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5359       self.os_inst = i_osdict # the new dict (without defaults)
5360     else:
5361       self.os_inst = None
5362
5363     self.instance = instance
5364
5365   def Exec(self, feedback_fn):
5366     """Reinstall the instance.
5367
5368     """
5369     inst = self.instance
5370
5371     if self.op.os_type is not None:
5372       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5373       inst.os = self.op.os_type
5374       # Write to configuration
5375       self.cfg.Update(inst, feedback_fn)
5376
5377     _StartInstanceDisks(self, inst, None)
5378     try:
5379       feedback_fn("Running the instance OS create scripts...")
5380       # FIXME: pass debug option from opcode to backend
5381       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5382                                              self.op.debug_level,
5383                                              osparams=self.os_inst)
5384       result.Raise("Could not install OS for instance %s on node %s" %
5385                    (inst.name, inst.primary_node))
5386     finally:
5387       _ShutdownInstanceDisks(self, inst)
5388
5389
5390 class LUInstanceRecreateDisks(LogicalUnit):
5391   """Recreate an instance's missing disks.
5392
5393   """
5394   HPATH = "instance-recreate-disks"
5395   HTYPE = constants.HTYPE_INSTANCE
5396   REQ_BGL = False
5397
5398   def ExpandNames(self):
5399     self._ExpandAndLockInstance()
5400
5401   def BuildHooksEnv(self):
5402     """Build hooks env.
5403
5404     This runs on master, primary and secondary nodes of the instance.
5405
5406     """
5407     env = _BuildInstanceHookEnvByObject(self, self.instance)
5408     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5409     return env, nl, nl
5410
5411   def CheckPrereq(self):
5412     """Check prerequisites.
5413
5414     This checks that the instance is in the cluster and is not running.
5415
5416     """
5417     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5418     assert instance is not None, \
5419       "Cannot retrieve locked instance %s" % self.op.instance_name
5420     _CheckNodeOnline(self, instance.primary_node)
5421
5422     if instance.disk_template == constants.DT_DISKLESS:
5423       raise errors.OpPrereqError("Instance '%s' has no disks" %
5424                                  self.op.instance_name, errors.ECODE_INVAL)
5425     _CheckInstanceDown(self, instance, "cannot recreate disks")
5426
5427     if not self.op.disks:
5428       self.op.disks = range(len(instance.disks))
5429     else:
5430       for idx in self.op.disks:
5431         if idx >= len(instance.disks):
5432           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5433                                      errors.ECODE_INVAL)
5434
5435     self.instance = instance
5436
5437   def Exec(self, feedback_fn):
5438     """Recreate the disks.
5439
5440     """
5441     to_skip = []
5442     for idx, _ in enumerate(self.instance.disks):
5443       if idx not in self.op.disks: # disk idx has not been passed in
5444         to_skip.append(idx)
5445         continue
5446
5447     _CreateDisks(self, self.instance, to_skip=to_skip)
5448
5449
5450 class LUInstanceRename(LogicalUnit):
5451   """Rename an instance.
5452
5453   """
5454   HPATH = "instance-rename"
5455   HTYPE = constants.HTYPE_INSTANCE
5456
5457   def CheckArguments(self):
5458     """Check arguments.
5459
5460     """
5461     if self.op.ip_check and not self.op.name_check:
5462       # TODO: make the ip check more flexible and not depend on the name check
5463       raise errors.OpPrereqError("Cannot do ip check without a name check",
5464                                  errors.ECODE_INVAL)
5465
5466   def BuildHooksEnv(self):
5467     """Build hooks env.
5468
5469     This runs on master, primary and secondary nodes of the instance.
5470
5471     """
5472     env = _BuildInstanceHookEnvByObject(self, self.instance)
5473     env["INSTANCE_NEW_NAME"] = self.op.new_name
5474     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5475     return env, nl, nl
5476
5477   def CheckPrereq(self):
5478     """Check prerequisites.
5479
5480     This checks that the instance is in the cluster and is not running.
5481
5482     """
5483     self.op.instance_name = _ExpandInstanceName(self.cfg,
5484                                                 self.op.instance_name)
5485     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5486     assert instance is not None
5487     _CheckNodeOnline(self, instance.primary_node)
5488     _CheckInstanceDown(self, instance, "cannot rename")
5489     self.instance = instance
5490
5491     new_name = self.op.new_name
5492     if self.op.name_check:
5493       hostname = netutils.GetHostname(name=new_name)
5494       self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5495                    hostname.name)
5496       new_name = self.op.new_name = hostname.name
5497       if (self.op.ip_check and
5498           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5499         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5500                                    (hostname.ip, new_name),
5501                                    errors.ECODE_NOTUNIQUE)
5502
5503     instance_list = self.cfg.GetInstanceList()
5504     if new_name in instance_list and new_name != instance.name:
5505       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5506                                  new_name, errors.ECODE_EXISTS)
5507
5508   def Exec(self, feedback_fn):
5509     """Rename the instance.
5510
5511     """
5512     inst = self.instance
5513     old_name = inst.name
5514
5515     rename_file_storage = False
5516     if (inst.disk_template == constants.DT_FILE and
5517         self.op.new_name != inst.name):
5518       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5519       rename_file_storage = True
5520
5521     self.cfg.RenameInstance(inst.name, self.op.new_name)
5522     # Change the instance lock. This is definitely safe while we hold the BGL
5523     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5524     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5525
5526     # re-read the instance from the configuration after rename
5527     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5528
5529     if rename_file_storage:
5530       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5531       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5532                                                      old_file_storage_dir,
5533                                                      new_file_storage_dir)
5534       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5535                    " (but the instance has been renamed in Ganeti)" %
5536                    (inst.primary_node, old_file_storage_dir,
5537                     new_file_storage_dir))
5538
5539     _StartInstanceDisks(self, inst, None)
5540     try:
5541       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5542                                                  old_name, self.op.debug_level)
5543       msg = result.fail_msg
5544       if msg:
5545         msg = ("Could not run OS rename script for instance %s on node %s"
5546                " (but the instance has been renamed in Ganeti): %s" %
5547                (inst.name, inst.primary_node, msg))
5548         self.proc.LogWarning(msg)
5549     finally:
5550       _ShutdownInstanceDisks(self, inst)
5551
5552     return inst.name
5553
5554
5555 class LUInstanceRemove(LogicalUnit):
5556   """Remove an instance.
5557
5558   """
5559   HPATH = "instance-remove"
5560   HTYPE = constants.HTYPE_INSTANCE
5561   REQ_BGL = False
5562
5563   def ExpandNames(self):
5564     self._ExpandAndLockInstance()
5565     self.needed_locks[locking.LEVEL_NODE] = []
5566     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5567
5568   def DeclareLocks(self, level):
5569     if level == locking.LEVEL_NODE:
5570       self._LockInstancesNodes()
5571
5572   def BuildHooksEnv(self):
5573     """Build hooks env.
5574
5575     This runs on master, primary and secondary nodes of the instance.
5576
5577     """
5578     env = _BuildInstanceHookEnvByObject(self, self.instance)
5579     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5580     nl = [self.cfg.GetMasterNode()]
5581     nl_post = list(self.instance.all_nodes) + nl
5582     return env, nl, nl_post
5583
5584   def CheckPrereq(self):
5585     """Check prerequisites.
5586
5587     This checks that the instance is in the cluster.
5588
5589     """
5590     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5591     assert self.instance is not None, \
5592       "Cannot retrieve locked instance %s" % self.op.instance_name
5593
5594   def Exec(self, feedback_fn):
5595     """Remove the instance.
5596
5597     """
5598     instance = self.instance
5599     logging.info("Shutting down instance %s on node %s",
5600                  instance.name, instance.primary_node)
5601
5602     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5603                                              self.op.shutdown_timeout)
5604     msg = result.fail_msg
5605     if msg:
5606       if self.op.ignore_failures:
5607         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5608       else:
5609         raise errors.OpExecError("Could not shutdown instance %s on"
5610                                  " node %s: %s" %
5611                                  (instance.name, instance.primary_node, msg))
5612
5613     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5614
5615
5616 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5617   """Utility function to remove an instance.
5618
5619   """
5620   logging.info("Removing block devices for instance %s", instance.name)
5621
5622   if not _RemoveDisks(lu, instance):
5623     if not ignore_failures:
5624       raise errors.OpExecError("Can't remove instance's disks")
5625     feedback_fn("Warning: can't remove instance's disks")
5626
5627   logging.info("Removing instance %s out of cluster config", instance.name)
5628
5629   lu.cfg.RemoveInstance(instance.name)
5630
5631   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5632     "Instance lock removal conflict"
5633
5634   # Remove lock for the instance
5635   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5636
5637
5638 class LUInstanceQuery(NoHooksLU):
5639   """Logical unit for querying instances.
5640
5641   """
5642   # pylint: disable-msg=W0142
5643   REQ_BGL = False
5644
5645   def CheckArguments(self):
5646     self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5647                              self.op.use_locking)
5648
5649   def ExpandNames(self):
5650     self.iq.ExpandNames(self)
5651
5652   def DeclareLocks(self, level):
5653     self.iq.DeclareLocks(self, level)
5654
5655   def Exec(self, feedback_fn):
5656     return self.iq.OldStyleQuery(self)
5657
5658
5659 class LUInstanceFailover(LogicalUnit):
5660   """Failover an instance.
5661
5662   """
5663   HPATH = "instance-failover"
5664   HTYPE = constants.HTYPE_INSTANCE
5665   REQ_BGL = False
5666
5667   def ExpandNames(self):
5668     self._ExpandAndLockInstance()
5669     self.needed_locks[locking.LEVEL_NODE] = []
5670     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5671
5672   def DeclareLocks(self, level):
5673     if level == locking.LEVEL_NODE:
5674       self._LockInstancesNodes()
5675
5676   def BuildHooksEnv(self):
5677     """Build hooks env.
5678
5679     This runs on master, primary and secondary nodes of the instance.
5680
5681     """
5682     instance = self.instance
5683     source_node = instance.primary_node
5684     target_node = instance.secondary_nodes[0]
5685     env = {
5686       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5687       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5688       "OLD_PRIMARY": source_node,
5689       "OLD_SECONDARY": target_node,
5690       "NEW_PRIMARY": target_node,
5691       "NEW_SECONDARY": source_node,
5692       }
5693     env.update(_BuildInstanceHookEnvByObject(self, instance))
5694     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5695     nl_post = list(nl)
5696     nl_post.append(source_node)
5697     return env, nl, nl_post
5698
5699   def CheckPrereq(self):
5700     """Check prerequisites.
5701
5702     This checks that the instance is in the cluster.
5703
5704     """
5705     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5706     assert self.instance is not None, \
5707       "Cannot retrieve locked instance %s" % self.op.instance_name
5708
5709     bep = self.cfg.GetClusterInfo().FillBE(instance)
5710     if instance.disk_template not in constants.DTS_NET_MIRROR:
5711       raise errors.OpPrereqError("Instance's disk layout is not"
5712                                  " network mirrored, cannot failover.",
5713                                  errors.ECODE_STATE)
5714
5715     secondary_nodes = instance.secondary_nodes
5716     if not secondary_nodes:
5717       raise errors.ProgrammerError("no secondary node but using "
5718                                    "a mirrored disk template")
5719
5720     target_node = secondary_nodes[0]
5721     _CheckNodeOnline(self, target_node)
5722     _CheckNodeNotDrained(self, target_node)
5723     if instance.admin_up:
5724       # check memory requirements on the secondary node
5725       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5726                            instance.name, bep[constants.BE_MEMORY],
5727                            instance.hypervisor)
5728     else:
5729       self.LogInfo("Not checking memory on the secondary node as"
5730                    " instance will not be started")
5731
5732     # check bridge existance
5733     _CheckInstanceBridgesExist(self, instance, node=target_node)
5734
5735   def Exec(self, feedback_fn):
5736     """Failover an instance.
5737
5738     The failover is done by shutting it down on its present node and
5739     starting it on the secondary.
5740
5741     """
5742     instance = self.instance
5743     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5744
5745     source_node = instance.primary_node
5746     target_node = instance.secondary_nodes[0]
5747
5748     if instance.admin_up:
5749       feedback_fn("* checking disk consistency between source and target")
5750       for dev in instance.disks:
5751         # for drbd, these are drbd over lvm
5752         if not _CheckDiskConsistency(self, dev, target_node, False):
5753           if not self.op.ignore_consistency:
5754             raise errors.OpExecError("Disk %s is degraded on target node,"
5755                                      " aborting failover." % dev.iv_name)
5756     else:
5757       feedback_fn("* not checking disk consistency as instance is not running")
5758
5759     feedback_fn("* shutting down instance on source node")
5760     logging.info("Shutting down instance %s on node %s",
5761                  instance.name, source_node)
5762
5763     result = self.rpc.call_instance_shutdown(source_node, instance,
5764                                              self.op.shutdown_timeout)
5765     msg = result.fail_msg
5766     if msg:
5767       if self.op.ignore_consistency or primary_node.offline:
5768         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5769                              " Proceeding anyway. Please make sure node"
5770                              " %s is down. Error details: %s",
5771                              instance.name, source_node, source_node, msg)
5772       else:
5773         raise errors.OpExecError("Could not shutdown instance %s on"
5774                                  " node %s: %s" %
5775                                  (instance.name, source_node, msg))
5776
5777     feedback_fn("* deactivating the instance's disks on source node")
5778     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5779       raise errors.OpExecError("Can't shut down the instance's disks.")
5780
5781     instance.primary_node = target_node
5782     # distribute new instance config to the other nodes
5783     self.cfg.Update(instance, feedback_fn)
5784
5785     # Only start the instance if it's marked as up
5786     if instance.admin_up:
5787       feedback_fn("* activating the instance's disks on target node")
5788       logging.info("Starting instance %s on node %s",
5789                    instance.name, target_node)
5790
5791       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5792                                            ignore_secondaries=True)
5793       if not disks_ok:
5794         _ShutdownInstanceDisks(self, instance)
5795         raise errors.OpExecError("Can't activate the instance's disks")
5796
5797       feedback_fn("* starting the instance on the target node")
5798       result = self.rpc.call_instance_start(target_node, instance, None, None)
5799       msg = result.fail_msg
5800       if msg:
5801         _ShutdownInstanceDisks(self, instance)
5802         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5803                                  (instance.name, target_node, msg))
5804
5805
5806 class LUInstanceMigrate(LogicalUnit):
5807   """Migrate an instance.
5808
5809   This is migration without shutting down, compared to the failover,
5810   which is done with shutdown.
5811
5812   """
5813   HPATH = "instance-migrate"
5814   HTYPE = constants.HTYPE_INSTANCE
5815   REQ_BGL = False
5816
5817   def ExpandNames(self):
5818     self._ExpandAndLockInstance()
5819
5820     self.needed_locks[locking.LEVEL_NODE] = []
5821     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5822
5823     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5824                                        self.op.cleanup)
5825     self.tasklets = [self._migrater]
5826
5827   def DeclareLocks(self, level):
5828     if level == locking.LEVEL_NODE:
5829       self._LockInstancesNodes()
5830
5831   def BuildHooksEnv(self):
5832     """Build hooks env.
5833
5834     This runs on master, primary and secondary nodes of the instance.
5835
5836     """
5837     instance = self._migrater.instance
5838     source_node = instance.primary_node
5839     target_node = instance.secondary_nodes[0]
5840     env = _BuildInstanceHookEnvByObject(self, instance)
5841     env["MIGRATE_LIVE"] = self._migrater.live
5842     env["MIGRATE_CLEANUP"] = self.op.cleanup
5843     env.update({
5844         "OLD_PRIMARY": source_node,
5845         "OLD_SECONDARY": target_node,
5846         "NEW_PRIMARY": target_node,
5847         "NEW_SECONDARY": source_node,
5848         })
5849     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5850     nl_post = list(nl)
5851     nl_post.append(source_node)
5852     return env, nl, nl_post
5853
5854
5855 class LUInstanceMove(LogicalUnit):
5856   """Move an instance by data-copying.
5857
5858   """
5859   HPATH = "instance-move"
5860   HTYPE = constants.HTYPE_INSTANCE
5861   REQ_BGL = False
5862
5863   def ExpandNames(self):
5864     self._ExpandAndLockInstance()
5865     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5866     self.op.target_node = target_node
5867     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5868     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5869
5870   def DeclareLocks(self, level):
5871     if level == locking.LEVEL_NODE:
5872       self._LockInstancesNodes(primary_only=True)
5873
5874   def BuildHooksEnv(self):
5875     """Build hooks env.
5876
5877     This runs on master, primary and secondary nodes of the instance.
5878
5879     """
5880     env = {
5881       "TARGET_NODE": self.op.target_node,
5882       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5883       }
5884     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5885     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5886                                        self.op.target_node]
5887     return env, nl, nl
5888
5889   def CheckPrereq(self):
5890     """Check prerequisites.
5891
5892     This checks that the instance is in the cluster.
5893
5894     """
5895     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5896     assert self.instance is not None, \
5897       "Cannot retrieve locked instance %s" % self.op.instance_name
5898
5899     node = self.cfg.GetNodeInfo(self.op.target_node)
5900     assert node is not None, \
5901       "Cannot retrieve locked node %s" % self.op.target_node
5902
5903     self.target_node = target_node = node.name
5904
5905     if target_node == instance.primary_node:
5906       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5907                                  (instance.name, target_node),
5908                                  errors.ECODE_STATE)
5909
5910     bep = self.cfg.GetClusterInfo().FillBE(instance)
5911
5912     for idx, dsk in enumerate(instance.disks):
5913       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5914         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5915                                    " cannot copy" % idx, errors.ECODE_STATE)
5916
5917     _CheckNodeOnline(self, target_node)
5918     _CheckNodeNotDrained(self, target_node)
5919     _CheckNodeVmCapable(self, target_node)
5920
5921     if instance.admin_up:
5922       # check memory requirements on the secondary node
5923       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5924                            instance.name, bep[constants.BE_MEMORY],
5925                            instance.hypervisor)
5926     else:
5927       self.LogInfo("Not checking memory on the secondary node as"
5928                    " instance will not be started")
5929
5930     # check bridge existance
5931     _CheckInstanceBridgesExist(self, instance, node=target_node)
5932
5933   def Exec(self, feedback_fn):
5934     """Move an instance.
5935
5936     The move is done by shutting it down on its present node, copying
5937     the data over (slow) and starting it on the new node.
5938
5939     """
5940     instance = self.instance
5941
5942     source_node = instance.primary_node
5943     target_node = self.target_node
5944
5945     self.LogInfo("Shutting down instance %s on source node %s",
5946                  instance.name, source_node)
5947
5948     result = self.rpc.call_instance_shutdown(source_node, instance,
5949                                              self.op.shutdown_timeout)
5950     msg = result.fail_msg
5951     if msg:
5952       if self.op.ignore_consistency:
5953         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5954                              " Proceeding anyway. Please make sure node"
5955                              " %s is down. Error details: %s",
5956                              instance.name, source_node, source_node, msg)
5957       else:
5958         raise errors.OpExecError("Could not shutdown instance %s on"
5959                                  " node %s: %s" %
5960                                  (instance.name, source_node, msg))
5961
5962     # create the target disks
5963     try:
5964       _CreateDisks(self, instance, target_node=target_node)
5965     except errors.OpExecError:
5966       self.LogWarning("Device creation failed, reverting...")
5967       try:
5968         _RemoveDisks(self, instance, target_node=target_node)
5969       finally:
5970         self.cfg.ReleaseDRBDMinors(instance.name)
5971         raise
5972
5973     cluster_name = self.cfg.GetClusterInfo().cluster_name
5974
5975     errs = []
5976     # activate, get path, copy the data over
5977     for idx, disk in enumerate(instance.disks):
5978       self.LogInfo("Copying data for disk %d", idx)
5979       result = self.rpc.call_blockdev_assemble(target_node, disk,
5980                                                instance.name, True, idx)
5981       if result.fail_msg:
5982         self.LogWarning("Can't assemble newly created disk %d: %s",
5983                         idx, result.fail_msg)
5984         errs.append(result.fail_msg)
5985         break
5986       dev_path = result.payload
5987       result = self.rpc.call_blockdev_export(source_node, disk,
5988                                              target_node, dev_path,
5989                                              cluster_name)
5990       if result.fail_msg:
5991         self.LogWarning("Can't copy data over for disk %d: %s",
5992                         idx, result.fail_msg)
5993         errs.append(result.fail_msg)
5994         break
5995
5996     if errs:
5997       self.LogWarning("Some disks failed to copy, aborting")
5998       try:
5999         _RemoveDisks(self, instance, target_node=target_node)
6000       finally:
6001         self.cfg.ReleaseDRBDMinors(instance.name)
6002         raise errors.OpExecError("Errors during disk copy: %s" %
6003                                  (",".join(errs),))
6004
6005     instance.primary_node = target_node
6006     self.cfg.Update(instance, feedback_fn)
6007
6008     self.LogInfo("Removing the disks on the original node")
6009     _RemoveDisks(self, instance, target_node=source_node)
6010
6011     # Only start the instance if it's marked as up
6012     if instance.admin_up:
6013       self.LogInfo("Starting instance %s on node %s",
6014                    instance.name, target_node)
6015
6016       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6017                                            ignore_secondaries=True)
6018       if not disks_ok:
6019         _ShutdownInstanceDisks(self, instance)
6020         raise errors.OpExecError("Can't activate the instance's disks")
6021
6022       result = self.rpc.call_instance_start(target_node, instance, None, None)
6023       msg = result.fail_msg
6024       if msg:
6025         _ShutdownInstanceDisks(self, instance)
6026         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6027                                  (instance.name, target_node, msg))
6028
6029
6030 class LUNodeMigrate(LogicalUnit):
6031   """Migrate all instances from a node.
6032
6033   """
6034   HPATH = "node-migrate"
6035   HTYPE = constants.HTYPE_NODE
6036   REQ_BGL = False
6037
6038   def ExpandNames(self):
6039     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6040
6041     self.needed_locks = {
6042       locking.LEVEL_NODE: [self.op.node_name],
6043       }
6044
6045     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6046
6047     # Create tasklets for migrating instances for all instances on this node
6048     names = []
6049     tasklets = []
6050
6051     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6052       logging.debug("Migrating instance %s", inst.name)
6053       names.append(inst.name)
6054
6055       tasklets.append(TLMigrateInstance(self, inst.name, False))
6056
6057     self.tasklets = tasklets
6058
6059     # Declare instance locks
6060     self.needed_locks[locking.LEVEL_INSTANCE] = names
6061
6062   def DeclareLocks(self, level):
6063     if level == locking.LEVEL_NODE:
6064       self._LockInstancesNodes()
6065
6066   def BuildHooksEnv(self):
6067     """Build hooks env.
6068
6069     This runs on the master, the primary and all the secondaries.
6070
6071     """
6072     env = {
6073       "NODE_NAME": self.op.node_name,
6074       }
6075
6076     nl = [self.cfg.GetMasterNode()]
6077
6078     return (env, nl, nl)
6079
6080
6081 class TLMigrateInstance(Tasklet):
6082   """Tasklet class for instance migration.
6083
6084   @type live: boolean
6085   @ivar live: whether the migration will be done live or non-live;
6086       this variable is initalized only after CheckPrereq has run
6087
6088   """
6089   def __init__(self, lu, instance_name, cleanup):
6090     """Initializes this class.
6091
6092     """
6093     Tasklet.__init__(self, lu)
6094
6095     # Parameters
6096     self.instance_name = instance_name
6097     self.cleanup = cleanup
6098     self.live = False # will be overridden later
6099
6100   def CheckPrereq(self):
6101     """Check prerequisites.
6102
6103     This checks that the instance is in the cluster.
6104
6105     """
6106     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6107     instance = self.cfg.GetInstanceInfo(instance_name)
6108     assert instance is not None
6109
6110     if instance.disk_template != constants.DT_DRBD8:
6111       raise errors.OpPrereqError("Instance's disk layout is not"
6112                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
6113
6114     secondary_nodes = instance.secondary_nodes
6115     if not secondary_nodes:
6116       raise errors.ConfigurationError("No secondary node but using"
6117                                       " drbd8 disk template")
6118
6119     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6120
6121     target_node = secondary_nodes[0]
6122     # check memory requirements on the secondary node
6123     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6124                          instance.name, i_be[constants.BE_MEMORY],
6125                          instance.hypervisor)
6126
6127     # check bridge existance
6128     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6129
6130     if not self.cleanup:
6131       _CheckNodeNotDrained(self.lu, target_node)
6132       result = self.rpc.call_instance_migratable(instance.primary_node,
6133                                                  instance)
6134       result.Raise("Can't migrate, please use failover",
6135                    prereq=True, ecode=errors.ECODE_STATE)
6136
6137     self.instance = instance
6138
6139     if self.lu.op.live is not None and self.lu.op.mode is not None:
6140       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6141                                  " parameters are accepted",
6142                                  errors.ECODE_INVAL)
6143     if self.lu.op.live is not None:
6144       if self.lu.op.live:
6145         self.lu.op.mode = constants.HT_MIGRATION_LIVE
6146       else:
6147         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6148       # reset the 'live' parameter to None so that repeated
6149       # invocations of CheckPrereq do not raise an exception
6150       self.lu.op.live = None
6151     elif self.lu.op.mode is None:
6152       # read the default value from the hypervisor
6153       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6154       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6155
6156     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6157
6158   def _WaitUntilSync(self):
6159     """Poll with custom rpc for disk sync.
6160
6161     This uses our own step-based rpc call.
6162
6163     """
6164     self.feedback_fn("* wait until resync is done")
6165     all_done = False
6166     while not all_done:
6167       all_done = True
6168       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6169                                             self.nodes_ip,
6170                                             self.instance.disks)
6171       min_percent = 100
6172       for node, nres in result.items():
6173         nres.Raise("Cannot resync disks on node %s" % node)
6174         node_done, node_percent = nres.payload
6175         all_done = all_done and node_done
6176         if node_percent is not None:
6177           min_percent = min(min_percent, node_percent)
6178       if not all_done:
6179         if min_percent < 100:
6180           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6181         time.sleep(2)
6182
6183   def _EnsureSecondary(self, node):
6184     """Demote a node to secondary.
6185
6186     """
6187     self.feedback_fn("* switching node %s to secondary mode" % node)
6188
6189     for dev in self.instance.disks:
6190       self.cfg.SetDiskID(dev, node)
6191
6192     result = self.rpc.call_blockdev_close(node, self.instance.name,
6193                                           self.instance.disks)
6194     result.Raise("Cannot change disk to secondary on node %s" % node)
6195
6196   def _GoStandalone(self):
6197     """Disconnect from the network.
6198
6199     """
6200     self.feedback_fn("* changing into standalone mode")
6201     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6202                                                self.instance.disks)
6203     for node, nres in result.items():
6204       nres.Raise("Cannot disconnect disks node %s" % node)
6205
6206   def _GoReconnect(self, multimaster):
6207     """Reconnect to the network.
6208
6209     """
6210     if multimaster:
6211       msg = "dual-master"
6212     else:
6213       msg = "single-master"
6214     self.feedback_fn("* changing disks into %s mode" % msg)
6215     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6216                                            self.instance.disks,
6217                                            self.instance.name, multimaster)
6218     for node, nres in result.items():
6219       nres.Raise("Cannot change disks config on node %s" % node)
6220
6221   def _ExecCleanup(self):
6222     """Try to cleanup after a failed migration.
6223
6224     The cleanup is done by:
6225       - check that the instance is running only on one node
6226         (and update the config if needed)
6227       - change disks on its secondary node to secondary
6228       - wait until disks are fully synchronized
6229       - disconnect from the network
6230       - change disks into single-master mode
6231       - wait again until disks are fully synchronized
6232
6233     """
6234     instance = self.instance
6235     target_node = self.target_node
6236     source_node = self.source_node
6237
6238     # check running on only one node
6239     self.feedback_fn("* checking where the instance actually runs"
6240                      " (if this hangs, the hypervisor might be in"
6241                      " a bad state)")
6242     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6243     for node, result in ins_l.items():
6244       result.Raise("Can't contact node %s" % node)
6245
6246     runningon_source = instance.name in ins_l[source_node].payload
6247     runningon_target = instance.name in ins_l[target_node].payload
6248
6249     if runningon_source and runningon_target:
6250       raise errors.OpExecError("Instance seems to be running on two nodes,"
6251                                " or the hypervisor is confused. You will have"
6252                                " to ensure manually that it runs only on one"
6253                                " and restart this operation.")
6254
6255     if not (runningon_source or runningon_target):
6256       raise errors.OpExecError("Instance does not seem to be running at all."
6257                                " In this case, it's safer to repair by"
6258                                " running 'gnt-instance stop' to ensure disk"
6259                                " shutdown, and then restarting it.")
6260
6261     if runningon_target:
6262       # the migration has actually succeeded, we need to update the config
6263       self.feedback_fn("* instance running on secondary node (%s),"
6264                        " updating config" % target_node)
6265       instance.primary_node = target_node
6266       self.cfg.Update(instance, self.feedback_fn)
6267       demoted_node = source_node
6268     else:
6269       self.feedback_fn("* instance confirmed to be running on its"
6270                        " primary node (%s)" % source_node)
6271       demoted_node = target_node
6272
6273     self._EnsureSecondary(demoted_node)
6274     try:
6275       self._WaitUntilSync()
6276     except errors.OpExecError:
6277       # we ignore here errors, since if the device is standalone, it
6278       # won't be able to sync
6279       pass
6280     self._GoStandalone()
6281     self._GoReconnect(False)
6282     self._WaitUntilSync()
6283
6284     self.feedback_fn("* done")
6285
6286   def _RevertDiskStatus(self):
6287     """Try to revert the disk status after a failed migration.
6288
6289     """
6290     target_node = self.target_node
6291     try:
6292       self._EnsureSecondary(target_node)
6293       self._GoStandalone()
6294       self._GoReconnect(False)
6295       self._WaitUntilSync()
6296     except errors.OpExecError, err:
6297       self.lu.LogWarning("Migration failed and I can't reconnect the"
6298                          " drives: error '%s'\n"
6299                          "Please look and recover the instance status" %
6300                          str(err))
6301
6302   def _AbortMigration(self):
6303     """Call the hypervisor code to abort a started migration.
6304
6305     """
6306     instance = self.instance
6307     target_node = self.target_node
6308     migration_info = self.migration_info
6309
6310     abort_result = self.rpc.call_finalize_migration(target_node,
6311                                                     instance,
6312                                                     migration_info,
6313                                                     False)
6314     abort_msg = abort_result.fail_msg
6315     if abort_msg:
6316       logging.error("Aborting migration failed on target node %s: %s",
6317                     target_node, abort_msg)
6318       # Don't raise an exception here, as we stil have to try to revert the
6319       # disk status, even if this step failed.
6320
6321   def _ExecMigration(self):
6322     """Migrate an instance.
6323
6324     The migrate is done by:
6325       - change the disks into dual-master mode
6326       - wait until disks are fully synchronized again
6327       - migrate the instance
6328       - change disks on the new secondary node (the old primary) to secondary
6329       - wait until disks are fully synchronized
6330       - change disks into single-master mode
6331
6332     """
6333     instance = self.instance
6334     target_node = self.target_node
6335     source_node = self.source_node
6336
6337     self.feedback_fn("* checking disk consistency between source and target")
6338     for dev in instance.disks:
6339       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6340         raise errors.OpExecError("Disk %s is degraded or not fully"
6341                                  " synchronized on target node,"
6342                                  " aborting migrate." % dev.iv_name)
6343
6344     # First get the migration information from the remote node
6345     result = self.rpc.call_migration_info(source_node, instance)
6346     msg = result.fail_msg
6347     if msg:
6348       log_err = ("Failed fetching source migration information from %s: %s" %
6349                  (source_node, msg))
6350       logging.error(log_err)
6351       raise errors.OpExecError(log_err)
6352
6353     self.migration_info = migration_info = result.payload
6354
6355     # Then switch the disks to master/master mode
6356     self._EnsureSecondary(target_node)
6357     self._GoStandalone()
6358     self._GoReconnect(True)
6359     self._WaitUntilSync()
6360
6361     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6362     result = self.rpc.call_accept_instance(target_node,
6363                                            instance,
6364                                            migration_info,
6365                                            self.nodes_ip[target_node])
6366
6367     msg = result.fail_msg
6368     if msg:
6369       logging.error("Instance pre-migration failed, trying to revert"
6370                     " disk status: %s", msg)
6371       self.feedback_fn("Pre-migration failed, aborting")
6372       self._AbortMigration()
6373       self._RevertDiskStatus()
6374       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6375                                (instance.name, msg))
6376
6377     self.feedback_fn("* migrating instance to %s" % target_node)
6378     time.sleep(10)
6379     result = self.rpc.call_instance_migrate(source_node, instance,
6380                                             self.nodes_ip[target_node],
6381                                             self.live)
6382     msg = result.fail_msg
6383     if msg:
6384       logging.error("Instance migration failed, trying to revert"
6385                     " disk status: %s", msg)
6386       self.feedback_fn("Migration failed, aborting")
6387       self._AbortMigration()
6388       self._RevertDiskStatus()
6389       raise errors.OpExecError("Could not migrate instance %s: %s" %
6390                                (instance.name, msg))
6391     time.sleep(10)
6392
6393     instance.primary_node = target_node
6394     # distribute new instance config to the other nodes
6395     self.cfg.Update(instance, self.feedback_fn)
6396
6397     result = self.rpc.call_finalize_migration(target_node,
6398                                               instance,
6399                                               migration_info,
6400                                               True)
6401     msg = result.fail_msg
6402     if msg:
6403       logging.error("Instance migration succeeded, but finalization failed:"
6404                     " %s", msg)
6405       raise errors.OpExecError("Could not finalize instance migration: %s" %
6406                                msg)
6407
6408     self._EnsureSecondary(source_node)
6409     self._WaitUntilSync()
6410     self._GoStandalone()
6411     self._GoReconnect(False)
6412     self._WaitUntilSync()
6413
6414     self.feedback_fn("* done")
6415
6416   def Exec(self, feedback_fn):
6417     """Perform the migration.
6418
6419     """
6420     feedback_fn("Migrating instance %s" % self.instance.name)
6421
6422     self.feedback_fn = feedback_fn
6423
6424     self.source_node = self.instance.primary_node
6425     self.target_node = self.instance.secondary_nodes[0]
6426     self.all_nodes = [self.source_node, self.target_node]
6427     self.nodes_ip = {
6428       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6429       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6430       }
6431
6432     if self.cleanup:
6433       return self._ExecCleanup()
6434     else:
6435       return self._ExecMigration()
6436
6437
6438 def _CreateBlockDev(lu, node, instance, device, force_create,
6439                     info, force_open):
6440   """Create a tree of block devices on a given node.
6441
6442   If this device type has to be created on secondaries, create it and
6443   all its children.
6444
6445   If not, just recurse to children keeping the same 'force' value.
6446
6447   @param lu: the lu on whose behalf we execute
6448   @param node: the node on which to create the device
6449   @type instance: L{objects.Instance}
6450   @param instance: the instance which owns the device
6451   @type device: L{objects.Disk}
6452   @param device: the device to create
6453   @type force_create: boolean
6454   @param force_create: whether to force creation of this device; this
6455       will be change to True whenever we find a device which has
6456       CreateOnSecondary() attribute
6457   @param info: the extra 'metadata' we should attach to the device
6458       (this will be represented as a LVM tag)
6459   @type force_open: boolean
6460   @param force_open: this parameter will be passes to the
6461       L{backend.BlockdevCreate} function where it specifies
6462       whether we run on primary or not, and it affects both
6463       the child assembly and the device own Open() execution
6464
6465   """
6466   if device.CreateOnSecondary():
6467     force_create = True
6468
6469   if device.children:
6470     for child in device.children:
6471       _CreateBlockDev(lu, node, instance, child, force_create,
6472                       info, force_open)
6473
6474   if not force_create:
6475     return
6476
6477   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6478
6479
6480 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6481   """Create a single block device on a given node.
6482
6483   This will not recurse over children of the device, so they must be
6484   created in advance.
6485
6486   @param lu: the lu on whose behalf we execute
6487   @param node: the node on which to create the device
6488   @type instance: L{objects.Instance}
6489   @param instance: the instance which owns the device
6490   @type device: L{objects.Disk}
6491   @param device: the device to create
6492   @param info: the extra 'metadata' we should attach to the device
6493       (this will be represented as a LVM tag)
6494   @type force_open: boolean
6495   @param force_open: this parameter will be passes to the
6496       L{backend.BlockdevCreate} function where it specifies
6497       whether we run on primary or not, and it affects both
6498       the child assembly and the device own Open() execution
6499
6500   """
6501   lu.cfg.SetDiskID(device, node)
6502   result = lu.rpc.call_blockdev_create(node, device, device.size,
6503                                        instance.name, force_open, info)
6504   result.Raise("Can't create block device %s on"
6505                " node %s for instance %s" % (device, node, instance.name))
6506   if device.physical_id is None:
6507     device.physical_id = result.payload
6508
6509
6510 def _GenerateUniqueNames(lu, exts):
6511   """Generate a suitable LV name.
6512
6513   This will generate a logical volume name for the given instance.
6514
6515   """
6516   results = []
6517   for val in exts:
6518     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6519     results.append("%s%s" % (new_id, val))
6520   return results
6521
6522
6523 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6524                          p_minor, s_minor):
6525   """Generate a drbd8 device complete with its children.
6526
6527   """
6528   port = lu.cfg.AllocatePort()
6529   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6530   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6531                           logical_id=(vgname, names[0]))
6532   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6533                           logical_id=(vgname, names[1]))
6534   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6535                           logical_id=(primary, secondary, port,
6536                                       p_minor, s_minor,
6537                                       shared_secret),
6538                           children=[dev_data, dev_meta],
6539                           iv_name=iv_name)
6540   return drbd_dev
6541
6542
6543 def _GenerateDiskTemplate(lu, template_name,
6544                           instance_name, primary_node,
6545                           secondary_nodes, disk_info,
6546                           file_storage_dir, file_driver,
6547                           base_index, feedback_fn):
6548   """Generate the entire disk layout for a given template type.
6549
6550   """
6551   #TODO: compute space requirements
6552
6553   vgname = lu.cfg.GetVGName()
6554   disk_count = len(disk_info)
6555   disks = []
6556   if template_name == constants.DT_DISKLESS:
6557     pass
6558   elif template_name == constants.DT_PLAIN:
6559     if len(secondary_nodes) != 0:
6560       raise errors.ProgrammerError("Wrong template configuration")
6561
6562     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6563                                       for i in range(disk_count)])
6564     for idx, disk in enumerate(disk_info):
6565       disk_index = idx + base_index
6566       vg = disk.get("vg", vgname)
6567       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6568       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6569                               logical_id=(vg, names[idx]),
6570                               iv_name="disk/%d" % disk_index,
6571                               mode=disk["mode"])
6572       disks.append(disk_dev)
6573   elif template_name == constants.DT_DRBD8:
6574     if len(secondary_nodes) != 1:
6575       raise errors.ProgrammerError("Wrong template configuration")
6576     remote_node = secondary_nodes[0]
6577     minors = lu.cfg.AllocateDRBDMinor(
6578       [primary_node, remote_node] * len(disk_info), instance_name)
6579
6580     names = []
6581     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6582                                                for i in range(disk_count)]):
6583       names.append(lv_prefix + "_data")
6584       names.append(lv_prefix + "_meta")
6585     for idx, disk in enumerate(disk_info):
6586       disk_index = idx + base_index
6587       vg = disk.get("vg", vgname)
6588       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6589                                       disk["size"], vg, names[idx*2:idx*2+2],
6590                                       "disk/%d" % disk_index,
6591                                       minors[idx*2], minors[idx*2+1])
6592       disk_dev.mode = disk["mode"]
6593       disks.append(disk_dev)
6594   elif template_name == constants.DT_FILE:
6595     if len(secondary_nodes) != 0:
6596       raise errors.ProgrammerError("Wrong template configuration")
6597
6598     opcodes.RequireFileStorage()
6599
6600     for idx, disk in enumerate(disk_info):
6601       disk_index = idx + base_index
6602       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6603                               iv_name="disk/%d" % disk_index,
6604                               logical_id=(file_driver,
6605                                           "%s/disk%d" % (file_storage_dir,
6606                                                          disk_index)),
6607                               mode=disk["mode"])
6608       disks.append(disk_dev)
6609   else:
6610     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6611   return disks
6612
6613
6614 def _GetInstanceInfoText(instance):
6615   """Compute that text that should be added to the disk's metadata.
6616
6617   """
6618   return "originstname+%s" % instance.name
6619
6620
6621 def _CalcEta(time_taken, written, total_size):
6622   """Calculates the ETA based on size written and total size.
6623
6624   @param time_taken: The time taken so far
6625   @param written: amount written so far
6626   @param total_size: The total size of data to be written
6627   @return: The remaining time in seconds
6628
6629   """
6630   avg_time = time_taken / float(written)
6631   return (total_size - written) * avg_time
6632
6633
6634 def _WipeDisks(lu, instance):
6635   """Wipes instance disks.
6636
6637   @type lu: L{LogicalUnit}
6638   @param lu: the logical unit on whose behalf we execute
6639   @type instance: L{objects.Instance}
6640   @param instance: the instance whose disks we should create
6641   @return: the success of the wipe
6642
6643   """
6644   node = instance.primary_node
6645
6646   for device in instance.disks:
6647     lu.cfg.SetDiskID(device, node)
6648
6649   logging.info("Pause sync of instance %s disks", instance.name)
6650   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6651
6652   for idx, success in enumerate(result.payload):
6653     if not success:
6654       logging.warn("pause-sync of instance %s for disks %d failed",
6655                    instance.name, idx)
6656
6657   try:
6658     for idx, device in enumerate(instance.disks):
6659       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6660       # MAX_WIPE_CHUNK at max
6661       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6662                             constants.MIN_WIPE_CHUNK_PERCENT)
6663       # we _must_ make this an int, otherwise rounding errors will
6664       # occur
6665       wipe_chunk_size = int(wipe_chunk_size)
6666
6667       lu.LogInfo("* Wiping disk %d", idx)
6668       logging.info("Wiping disk %d for instance %s, node %s using"
6669                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
6670
6671       offset = 0
6672       size = device.size
6673       last_output = 0
6674       start_time = time.time()
6675
6676       while offset < size:
6677         wipe_size = min(wipe_chunk_size, size - offset)
6678         logging.debug("Wiping disk %d, offset %s, chunk %s",
6679                       idx, offset, wipe_size)
6680         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6681         result.Raise("Could not wipe disk %d at offset %d for size %d" %
6682                      (idx, offset, wipe_size))
6683         now = time.time()
6684         offset += wipe_size
6685         if now - last_output >= 60:
6686           eta = _CalcEta(now - start_time, offset, size)
6687           lu.LogInfo(" - done: %.1f%% ETA: %s" %
6688                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
6689           last_output = now
6690   finally:
6691     logging.info("Resume sync of instance %s disks", instance.name)
6692
6693     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6694
6695     for idx, success in enumerate(result.payload):
6696       if not success:
6697         lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6698                       " look at the status and troubleshoot the issue.", idx)
6699         logging.warn("resume-sync of instance %s for disks %d failed",
6700                      instance.name, idx)
6701
6702
6703 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6704   """Create all disks for an instance.
6705
6706   This abstracts away some work from AddInstance.
6707
6708   @type lu: L{LogicalUnit}
6709   @param lu: the logical unit on whose behalf we execute
6710   @type instance: L{objects.Instance}
6711   @param instance: the instance whose disks we should create
6712   @type to_skip: list
6713   @param to_skip: list of indices to skip
6714   @type target_node: string
6715   @param target_node: if passed, overrides the target node for creation
6716   @rtype: boolean
6717   @return: the success of the creation
6718
6719   """
6720   info = _GetInstanceInfoText(instance)
6721   if target_node is None:
6722     pnode = instance.primary_node
6723     all_nodes = instance.all_nodes
6724   else:
6725     pnode = target_node
6726     all_nodes = [pnode]
6727
6728   if instance.disk_template == constants.DT_FILE:
6729     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6730     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6731
6732     result.Raise("Failed to create directory '%s' on"
6733                  " node %s" % (file_storage_dir, pnode))
6734
6735   # Note: this needs to be kept in sync with adding of disks in
6736   # LUInstanceSetParams
6737   for idx, device in enumerate(instance.disks):
6738     if to_skip and idx in to_skip:
6739       continue
6740     logging.info("Creating volume %s for instance %s",
6741                  device.iv_name, instance.name)
6742     #HARDCODE
6743     for node in all_nodes:
6744       f_create = node == pnode
6745       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6746
6747
6748 def _RemoveDisks(lu, instance, target_node=None):
6749   """Remove all disks for an instance.
6750
6751   This abstracts away some work from `AddInstance()` and
6752   `RemoveInstance()`. Note that in case some of the devices couldn't
6753   be removed, the removal will continue with the other ones (compare
6754   with `_CreateDisks()`).
6755
6756   @type lu: L{LogicalUnit}
6757   @param lu: the logical unit on whose behalf we execute
6758   @type instance: L{objects.Instance}
6759   @param instance: the instance whose disks we should remove
6760   @type target_node: string
6761   @param target_node: used to override the node on which to remove the disks
6762   @rtype: boolean
6763   @return: the success of the removal
6764
6765   """
6766   logging.info("Removing block devices for instance %s", instance.name)
6767
6768   all_result = True
6769   for device in instance.disks:
6770     if target_node:
6771       edata = [(target_node, device)]
6772     else:
6773       edata = device.ComputeNodeTree(instance.primary_node)
6774     for node, disk in edata:
6775       lu.cfg.SetDiskID(disk, node)
6776       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6777       if msg:
6778         lu.LogWarning("Could not remove block device %s on node %s,"
6779                       " continuing anyway: %s", device.iv_name, node, msg)
6780         all_result = False
6781
6782   if instance.disk_template == constants.DT_FILE:
6783     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6784     if target_node:
6785       tgt = target_node
6786     else:
6787       tgt = instance.primary_node
6788     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6789     if result.fail_msg:
6790       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6791                     file_storage_dir, instance.primary_node, result.fail_msg)
6792       all_result = False
6793
6794   return all_result
6795
6796
6797 def _ComputeDiskSizePerVG(disk_template, disks):
6798   """Compute disk size requirements in the volume group
6799
6800   """
6801   def _compute(disks, payload):
6802     """Universal algorithm
6803
6804     """
6805     vgs = {}
6806     for disk in disks:
6807       vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6808
6809     return vgs
6810
6811   # Required free disk space as a function of disk and swap space
6812   req_size_dict = {
6813     constants.DT_DISKLESS: {},
6814     constants.DT_PLAIN: _compute(disks, 0),
6815     # 128 MB are added for drbd metadata for each disk
6816     constants.DT_DRBD8: _compute(disks, 128),
6817     constants.DT_FILE: {},
6818   }
6819
6820   if disk_template not in req_size_dict:
6821     raise errors.ProgrammerError("Disk template '%s' size requirement"
6822                                  " is unknown" %  disk_template)
6823
6824   return req_size_dict[disk_template]
6825
6826
6827 def _ComputeDiskSize(disk_template, disks):
6828   """Compute disk size requirements in the volume group
6829
6830   """
6831   # Required free disk space as a function of disk and swap space
6832   req_size_dict = {
6833     constants.DT_DISKLESS: None,
6834     constants.DT_PLAIN: sum(d["size"] for d in disks),
6835     # 128 MB are added for drbd metadata for each disk
6836     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6837     constants.DT_FILE: None,
6838   }
6839
6840   if disk_template not in req_size_dict:
6841     raise errors.ProgrammerError("Disk template '%s' size requirement"
6842                                  " is unknown" %  disk_template)
6843
6844   return req_size_dict[disk_template]
6845
6846
6847 def _FilterVmNodes(lu, nodenames):
6848   """Filters out non-vm_capable nodes from a list.
6849
6850   @type lu: L{LogicalUnit}
6851   @param lu: the logical unit for which we check
6852   @type nodenames: list
6853   @param nodenames: the list of nodes on which we should check
6854   @rtype: list
6855   @return: the list of vm-capable nodes
6856
6857   """
6858   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
6859   return [name for name in nodenames if name not in vm_nodes]
6860
6861
6862 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6863   """Hypervisor parameter validation.
6864
6865   This function abstract the hypervisor parameter validation to be
6866   used in both instance create and instance modify.
6867
6868   @type lu: L{LogicalUnit}
6869   @param lu: the logical unit for which we check
6870   @type nodenames: list
6871   @param nodenames: the list of nodes on which we should check
6872   @type hvname: string
6873   @param hvname: the name of the hypervisor we should use
6874   @type hvparams: dict
6875   @param hvparams: the parameters which we need to check
6876   @raise errors.OpPrereqError: if the parameters are not valid
6877
6878   """
6879   nodenames = _FilterVmNodes(lu, nodenames)
6880   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6881                                                   hvname,
6882                                                   hvparams)
6883   for node in nodenames:
6884     info = hvinfo[node]
6885     if info.offline:
6886       continue
6887     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6888
6889
6890 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6891   """OS parameters validation.
6892
6893   @type lu: L{LogicalUnit}
6894   @param lu: the logical unit for which we check
6895   @type required: boolean
6896   @param required: whether the validation should fail if the OS is not
6897       found
6898   @type nodenames: list
6899   @param nodenames: the list of nodes on which we should check
6900   @type osname: string
6901   @param osname: the name of the hypervisor we should use
6902   @type osparams: dict
6903   @param osparams: the parameters which we need to check
6904   @raise errors.OpPrereqError: if the parameters are not valid
6905
6906   """
6907   nodenames = _FilterVmNodes(lu, nodenames)
6908   result = lu.rpc.call_os_validate(required, nodenames, osname,
6909                                    [constants.OS_VALIDATE_PARAMETERS],
6910                                    osparams)
6911   for node, nres in result.items():
6912     # we don't check for offline cases since this should be run only
6913     # against the master node and/or an instance's nodes
6914     nres.Raise("OS Parameters validation failed on node %s" % node)
6915     if not nres.payload:
6916       lu.LogInfo("OS %s not found on node %s, validation skipped",
6917                  osname, node)
6918
6919
6920 class LUInstanceCreate(LogicalUnit):
6921   """Create an instance.
6922
6923   """
6924   HPATH = "instance-add"
6925   HTYPE = constants.HTYPE_INSTANCE
6926   REQ_BGL = False
6927
6928   def CheckArguments(self):
6929     """Check arguments.
6930
6931     """
6932     # do not require name_check to ease forward/backward compatibility
6933     # for tools
6934     if self.op.no_install and self.op.start:
6935       self.LogInfo("No-installation mode selected, disabling startup")
6936       self.op.start = False
6937     # validate/normalize the instance name
6938     self.op.instance_name = \
6939       netutils.Hostname.GetNormalizedName(self.op.instance_name)
6940
6941     if self.op.ip_check and not self.op.name_check:
6942       # TODO: make the ip check more flexible and not depend on the name check
6943       raise errors.OpPrereqError("Cannot do ip check without a name check",
6944                                  errors.ECODE_INVAL)
6945
6946     # check nics' parameter names
6947     for nic in self.op.nics:
6948       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6949
6950     # check disks. parameter names and consistent adopt/no-adopt strategy
6951     has_adopt = has_no_adopt = False
6952     for disk in self.op.disks:
6953       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6954       if "adopt" in disk:
6955         has_adopt = True
6956       else:
6957         has_no_adopt = True
6958     if has_adopt and has_no_adopt:
6959       raise errors.OpPrereqError("Either all disks are adopted or none is",
6960                                  errors.ECODE_INVAL)
6961     if has_adopt:
6962       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6963         raise errors.OpPrereqError("Disk adoption is not supported for the"
6964                                    " '%s' disk template" %
6965                                    self.op.disk_template,
6966                                    errors.ECODE_INVAL)
6967       if self.op.iallocator is not None:
6968         raise errors.OpPrereqError("Disk adoption not allowed with an"
6969                                    " iallocator script", errors.ECODE_INVAL)
6970       if self.op.mode == constants.INSTANCE_IMPORT:
6971         raise errors.OpPrereqError("Disk adoption not allowed for"
6972                                    " instance import", errors.ECODE_INVAL)
6973
6974     self.adopt_disks = has_adopt
6975
6976     # instance name verification
6977     if self.op.name_check:
6978       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6979       self.op.instance_name = self.hostname1.name
6980       # used in CheckPrereq for ip ping check
6981       self.check_ip = self.hostname1.ip
6982     else:
6983       self.check_ip = None
6984
6985     # file storage checks
6986     if (self.op.file_driver and
6987         not self.op.file_driver in constants.FILE_DRIVER):
6988       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6989                                  self.op.file_driver, errors.ECODE_INVAL)
6990
6991     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6992       raise errors.OpPrereqError("File storage directory path not absolute",
6993                                  errors.ECODE_INVAL)
6994
6995     ### Node/iallocator related checks
6996     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6997
6998     if self.op.pnode is not None:
6999       if self.op.disk_template in constants.DTS_NET_MIRROR:
7000         if self.op.snode is None:
7001           raise errors.OpPrereqError("The networked disk templates need"
7002                                      " a mirror node", errors.ECODE_INVAL)
7003       elif self.op.snode:
7004         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7005                         " template")
7006         self.op.snode = None
7007
7008     self._cds = _GetClusterDomainSecret()
7009
7010     if self.op.mode == constants.INSTANCE_IMPORT:
7011       # On import force_variant must be True, because if we forced it at
7012       # initial install, our only chance when importing it back is that it
7013       # works again!
7014       self.op.force_variant = True
7015
7016       if self.op.no_install:
7017         self.LogInfo("No-installation mode has no effect during import")
7018
7019     elif self.op.mode == constants.INSTANCE_CREATE:
7020       if self.op.os_type is None:
7021         raise errors.OpPrereqError("No guest OS specified",
7022                                    errors.ECODE_INVAL)
7023       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7024         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7025                                    " installation" % self.op.os_type,
7026                                    errors.ECODE_STATE)
7027       if self.op.disk_template is None:
7028         raise errors.OpPrereqError("No disk template specified",
7029                                    errors.ECODE_INVAL)
7030
7031     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7032       # Check handshake to ensure both clusters have the same domain secret
7033       src_handshake = self.op.source_handshake
7034       if not src_handshake:
7035         raise errors.OpPrereqError("Missing source handshake",
7036                                    errors.ECODE_INVAL)
7037
7038       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7039                                                            src_handshake)
7040       if errmsg:
7041         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7042                                    errors.ECODE_INVAL)
7043
7044       # Load and check source CA
7045       self.source_x509_ca_pem = self.op.source_x509_ca
7046       if not self.source_x509_ca_pem:
7047         raise errors.OpPrereqError("Missing source X509 CA",
7048                                    errors.ECODE_INVAL)
7049
7050       try:
7051         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7052                                                     self._cds)
7053       except OpenSSL.crypto.Error, err:
7054         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7055                                    (err, ), errors.ECODE_INVAL)
7056
7057       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7058       if errcode is not None:
7059         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7060                                    errors.ECODE_INVAL)
7061
7062       self.source_x509_ca = cert
7063
7064       src_instance_name = self.op.source_instance_name
7065       if not src_instance_name:
7066         raise errors.OpPrereqError("Missing source instance name",
7067                                    errors.ECODE_INVAL)
7068
7069       self.source_instance_name = \
7070           netutils.GetHostname(name=src_instance_name).name
7071
7072     else:
7073       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7074                                  self.op.mode, errors.ECODE_INVAL)
7075
7076   def ExpandNames(self):
7077     """ExpandNames for CreateInstance.
7078
7079     Figure out the right locks for instance creation.
7080
7081     """
7082     self.needed_locks = {}
7083
7084     instance_name = self.op.instance_name
7085     # this is just a preventive check, but someone might still add this
7086     # instance in the meantime, and creation will fail at lock-add time
7087     if instance_name in self.cfg.GetInstanceList():
7088       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7089                                  instance_name, errors.ECODE_EXISTS)
7090
7091     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7092
7093     if self.op.iallocator:
7094       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7095     else:
7096       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7097       nodelist = [self.op.pnode]
7098       if self.op.snode is not None:
7099         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7100         nodelist.append(self.op.snode)
7101       self.needed_locks[locking.LEVEL_NODE] = nodelist
7102
7103     # in case of import lock the source node too
7104     if self.op.mode == constants.INSTANCE_IMPORT:
7105       src_node = self.op.src_node
7106       src_path = self.op.src_path
7107
7108       if src_path is None:
7109         self.op.src_path = src_path = self.op.instance_name
7110
7111       if src_node is None:
7112         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7113         self.op.src_node = None
7114         if os.path.isabs(src_path):
7115           raise errors.OpPrereqError("Importing an instance from an absolute"
7116                                      " path requires a source node option.",
7117                                      errors.ECODE_INVAL)
7118       else:
7119         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7120         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7121           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7122         if not os.path.isabs(src_path):
7123           self.op.src_path = src_path = \
7124             utils.PathJoin(constants.EXPORT_DIR, src_path)
7125
7126   def _RunAllocator(self):
7127     """Run the allocator based on input opcode.
7128
7129     """
7130     nics = [n.ToDict() for n in self.nics]
7131     ial = IAllocator(self.cfg, self.rpc,
7132                      mode=constants.IALLOCATOR_MODE_ALLOC,
7133                      name=self.op.instance_name,
7134                      disk_template=self.op.disk_template,
7135                      tags=[],
7136                      os=self.op.os_type,
7137                      vcpus=self.be_full[constants.BE_VCPUS],
7138                      mem_size=self.be_full[constants.BE_MEMORY],
7139                      disks=self.disks,
7140                      nics=nics,
7141                      hypervisor=self.op.hypervisor,
7142                      )
7143
7144     ial.Run(self.op.iallocator)
7145
7146     if not ial.success:
7147       raise errors.OpPrereqError("Can't compute nodes using"
7148                                  " iallocator '%s': %s" %
7149                                  (self.op.iallocator, ial.info),
7150                                  errors.ECODE_NORES)
7151     if len(ial.result) != ial.required_nodes:
7152       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7153                                  " of nodes (%s), required %s" %
7154                                  (self.op.iallocator, len(ial.result),
7155                                   ial.required_nodes), errors.ECODE_FAULT)
7156     self.op.pnode = ial.result[0]
7157     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7158                  self.op.instance_name, self.op.iallocator,
7159                  utils.CommaJoin(ial.result))
7160     if ial.required_nodes == 2:
7161       self.op.snode = ial.result[1]
7162
7163   def BuildHooksEnv(self):
7164     """Build hooks env.
7165
7166     This runs on master, primary and secondary nodes of the instance.
7167
7168     """
7169     env = {
7170       "ADD_MODE": self.op.mode,
7171       }
7172     if self.op.mode == constants.INSTANCE_IMPORT:
7173       env["SRC_NODE"] = self.op.src_node
7174       env["SRC_PATH"] = self.op.src_path
7175       env["SRC_IMAGES"] = self.src_images
7176
7177     env.update(_BuildInstanceHookEnv(
7178       name=self.op.instance_name,
7179       primary_node=self.op.pnode,
7180       secondary_nodes=self.secondaries,
7181       status=self.op.start,
7182       os_type=self.op.os_type,
7183       memory=self.be_full[constants.BE_MEMORY],
7184       vcpus=self.be_full[constants.BE_VCPUS],
7185       nics=_NICListToTuple(self, self.nics),
7186       disk_template=self.op.disk_template,
7187       disks=[(d["size"], d["mode"]) for d in self.disks],
7188       bep=self.be_full,
7189       hvp=self.hv_full,
7190       hypervisor_name=self.op.hypervisor,
7191     ))
7192
7193     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7194           self.secondaries)
7195     return env, nl, nl
7196
7197   def _ReadExportInfo(self):
7198     """Reads the export information from disk.
7199
7200     It will override the opcode source node and path with the actual
7201     information, if these two were not specified before.
7202
7203     @return: the export information
7204
7205     """
7206     assert self.op.mode == constants.INSTANCE_IMPORT
7207
7208     src_node = self.op.src_node
7209     src_path = self.op.src_path
7210
7211     if src_node is None:
7212       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7213       exp_list = self.rpc.call_export_list(locked_nodes)
7214       found = False
7215       for node in exp_list:
7216         if exp_list[node].fail_msg:
7217           continue
7218         if src_path in exp_list[node].payload:
7219           found = True
7220           self.op.src_node = src_node = node
7221           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7222                                                        src_path)
7223           break
7224       if not found:
7225         raise errors.OpPrereqError("No export found for relative path %s" %
7226                                     src_path, errors.ECODE_INVAL)
7227
7228     _CheckNodeOnline(self, src_node)
7229     result = self.rpc.call_export_info(src_node, src_path)
7230     result.Raise("No export or invalid export found in dir %s" % src_path)
7231
7232     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7233     if not export_info.has_section(constants.INISECT_EXP):
7234       raise errors.ProgrammerError("Corrupted export config",
7235                                    errors.ECODE_ENVIRON)
7236
7237     ei_version = export_info.get(constants.INISECT_EXP, "version")
7238     if (int(ei_version) != constants.EXPORT_VERSION):
7239       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7240                                  (ei_version, constants.EXPORT_VERSION),
7241                                  errors.ECODE_ENVIRON)
7242     return export_info
7243
7244   def _ReadExportParams(self, einfo):
7245     """Use export parameters as defaults.
7246
7247     In case the opcode doesn't specify (as in override) some instance
7248     parameters, then try to use them from the export information, if
7249     that declares them.
7250
7251     """
7252     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7253
7254     if self.op.disk_template is None:
7255       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7256         self.op.disk_template = einfo.get(constants.INISECT_INS,
7257                                           "disk_template")
7258       else:
7259         raise errors.OpPrereqError("No disk template specified and the export"
7260                                    " is missing the disk_template information",
7261                                    errors.ECODE_INVAL)
7262
7263     if not self.op.disks:
7264       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7265         disks = []
7266         # TODO: import the disk iv_name too
7267         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7268           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7269           disks.append({"size": disk_sz})
7270         self.op.disks = disks
7271       else:
7272         raise errors.OpPrereqError("No disk info specified and the export"
7273                                    " is missing the disk information",
7274                                    errors.ECODE_INVAL)
7275
7276     if (not self.op.nics and
7277         einfo.has_option(constants.INISECT_INS, "nic_count")):
7278       nics = []
7279       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7280         ndict = {}
7281         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7282           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7283           ndict[name] = v
7284         nics.append(ndict)
7285       self.op.nics = nics
7286
7287     if (self.op.hypervisor is None and
7288         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7289       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7290     if einfo.has_section(constants.INISECT_HYP):
7291       # use the export parameters but do not override the ones
7292       # specified by the user
7293       for name, value in einfo.items(constants.INISECT_HYP):
7294         if name not in self.op.hvparams:
7295           self.op.hvparams[name] = value
7296
7297     if einfo.has_section(constants.INISECT_BEP):
7298       # use the parameters, without overriding
7299       for name, value in einfo.items(constants.INISECT_BEP):
7300         if name not in self.op.beparams:
7301           self.op.beparams[name] = value
7302     else:
7303       # try to read the parameters old style, from the main section
7304       for name in constants.BES_PARAMETERS:
7305         if (name not in self.op.beparams and
7306             einfo.has_option(constants.INISECT_INS, name)):
7307           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7308
7309     if einfo.has_section(constants.INISECT_OSP):
7310       # use the parameters, without overriding
7311       for name, value in einfo.items(constants.INISECT_OSP):
7312         if name not in self.op.osparams:
7313           self.op.osparams[name] = value
7314
7315   def _RevertToDefaults(self, cluster):
7316     """Revert the instance parameters to the default values.
7317
7318     """
7319     # hvparams
7320     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7321     for name in self.op.hvparams.keys():
7322       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7323         del self.op.hvparams[name]
7324     # beparams
7325     be_defs = cluster.SimpleFillBE({})
7326     for name in self.op.beparams.keys():
7327       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7328         del self.op.beparams[name]
7329     # nic params
7330     nic_defs = cluster.SimpleFillNIC({})
7331     for nic in self.op.nics:
7332       for name in constants.NICS_PARAMETERS:
7333         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7334           del nic[name]
7335     # osparams
7336     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7337     for name in self.op.osparams.keys():
7338       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7339         del self.op.osparams[name]
7340
7341   def CheckPrereq(self):
7342     """Check prerequisites.
7343
7344     """
7345     if self.op.mode == constants.INSTANCE_IMPORT:
7346       export_info = self._ReadExportInfo()
7347       self._ReadExportParams(export_info)
7348
7349     if (not self.cfg.GetVGName() and
7350         self.op.disk_template not in constants.DTS_NOT_LVM):
7351       raise errors.OpPrereqError("Cluster does not support lvm-based"
7352                                  " instances", errors.ECODE_STATE)
7353
7354     if self.op.hypervisor is None:
7355       self.op.hypervisor = self.cfg.GetHypervisorType()
7356
7357     cluster = self.cfg.GetClusterInfo()
7358     enabled_hvs = cluster.enabled_hypervisors
7359     if self.op.hypervisor not in enabled_hvs:
7360       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7361                                  " cluster (%s)" % (self.op.hypervisor,
7362                                   ",".join(enabled_hvs)),
7363                                  errors.ECODE_STATE)
7364
7365     # check hypervisor parameter syntax (locally)
7366     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7367     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7368                                       self.op.hvparams)
7369     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7370     hv_type.CheckParameterSyntax(filled_hvp)
7371     self.hv_full = filled_hvp
7372     # check that we don't specify global parameters on an instance
7373     _CheckGlobalHvParams(self.op.hvparams)
7374
7375     # fill and remember the beparams dict
7376     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7377     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7378
7379     # build os parameters
7380     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7381
7382     # now that hvp/bep are in final format, let's reset to defaults,
7383     # if told to do so
7384     if self.op.identify_defaults:
7385       self._RevertToDefaults(cluster)
7386
7387     # NIC buildup
7388     self.nics = []
7389     for idx, nic in enumerate(self.op.nics):
7390       nic_mode_req = nic.get("mode", None)
7391       nic_mode = nic_mode_req
7392       if nic_mode is None:
7393         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7394
7395       # in routed mode, for the first nic, the default ip is 'auto'
7396       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7397         default_ip_mode = constants.VALUE_AUTO
7398       else:
7399         default_ip_mode = constants.VALUE_NONE
7400
7401       # ip validity checks
7402       ip = nic.get("ip", default_ip_mode)
7403       if ip is None or ip.lower() == constants.VALUE_NONE:
7404         nic_ip = None
7405       elif ip.lower() == constants.VALUE_AUTO:
7406         if not self.op.name_check:
7407           raise errors.OpPrereqError("IP address set to auto but name checks"
7408                                      " have been skipped",
7409                                      errors.ECODE_INVAL)
7410         nic_ip = self.hostname1.ip
7411       else:
7412         if not netutils.IPAddress.IsValid(ip):
7413           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7414                                      errors.ECODE_INVAL)
7415         nic_ip = ip
7416
7417       # TODO: check the ip address for uniqueness
7418       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7419         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7420                                    errors.ECODE_INVAL)
7421
7422       # MAC address verification
7423       mac = nic.get("mac", constants.VALUE_AUTO)
7424       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7425         mac = utils.NormalizeAndValidateMac(mac)
7426
7427         try:
7428           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7429         except errors.ReservationError:
7430           raise errors.OpPrereqError("MAC address %s already in use"
7431                                      " in cluster" % mac,
7432                                      errors.ECODE_NOTUNIQUE)
7433
7434       # bridge verification
7435       bridge = nic.get("bridge", None)
7436       link = nic.get("link", None)
7437       if bridge and link:
7438         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7439                                    " at the same time", errors.ECODE_INVAL)
7440       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7441         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7442                                    errors.ECODE_INVAL)
7443       elif bridge:
7444         link = bridge
7445
7446       nicparams = {}
7447       if nic_mode_req:
7448         nicparams[constants.NIC_MODE] = nic_mode_req
7449       if link:
7450         nicparams[constants.NIC_LINK] = link
7451
7452       check_params = cluster.SimpleFillNIC(nicparams)
7453       objects.NIC.CheckParameterSyntax(check_params)
7454       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7455
7456     # disk checks/pre-build
7457     self.disks = []
7458     for disk in self.op.disks:
7459       mode = disk.get("mode", constants.DISK_RDWR)
7460       if mode not in constants.DISK_ACCESS_SET:
7461         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7462                                    mode, errors.ECODE_INVAL)
7463       size = disk.get("size", None)
7464       if size is None:
7465         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7466       try:
7467         size = int(size)
7468       except (TypeError, ValueError):
7469         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7470                                    errors.ECODE_INVAL)
7471       vg = disk.get("vg", self.cfg.GetVGName())
7472       new_disk = {"size": size, "mode": mode, "vg": vg}
7473       if "adopt" in disk:
7474         new_disk["adopt"] = disk["adopt"]
7475       self.disks.append(new_disk)
7476
7477     if self.op.mode == constants.INSTANCE_IMPORT:
7478
7479       # Check that the new instance doesn't have less disks than the export
7480       instance_disks = len(self.disks)
7481       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7482       if instance_disks < export_disks:
7483         raise errors.OpPrereqError("Not enough disks to import."
7484                                    " (instance: %d, export: %d)" %
7485                                    (instance_disks, export_disks),
7486                                    errors.ECODE_INVAL)
7487
7488       disk_images = []
7489       for idx in range(export_disks):
7490         option = 'disk%d_dump' % idx
7491         if export_info.has_option(constants.INISECT_INS, option):
7492           # FIXME: are the old os-es, disk sizes, etc. useful?
7493           export_name = export_info.get(constants.INISECT_INS, option)
7494           image = utils.PathJoin(self.op.src_path, export_name)
7495           disk_images.append(image)
7496         else:
7497           disk_images.append(False)
7498
7499       self.src_images = disk_images
7500
7501       old_name = export_info.get(constants.INISECT_INS, 'name')
7502       try:
7503         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7504       except (TypeError, ValueError), err:
7505         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7506                                    " an integer: %s" % str(err),
7507                                    errors.ECODE_STATE)
7508       if self.op.instance_name == old_name:
7509         for idx, nic in enumerate(self.nics):
7510           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7511             nic_mac_ini = 'nic%d_mac' % idx
7512             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7513
7514     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7515
7516     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7517     if self.op.ip_check:
7518       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7519         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7520                                    (self.check_ip, self.op.instance_name),
7521                                    errors.ECODE_NOTUNIQUE)
7522
7523     #### mac address generation
7524     # By generating here the mac address both the allocator and the hooks get
7525     # the real final mac address rather than the 'auto' or 'generate' value.
7526     # There is a race condition between the generation and the instance object
7527     # creation, which means that we know the mac is valid now, but we're not
7528     # sure it will be when we actually add the instance. If things go bad
7529     # adding the instance will abort because of a duplicate mac, and the
7530     # creation job will fail.
7531     for nic in self.nics:
7532       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7533         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7534
7535     #### allocator run
7536
7537     if self.op.iallocator is not None:
7538       self._RunAllocator()
7539
7540     #### node related checks
7541
7542     # check primary node
7543     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7544     assert self.pnode is not None, \
7545       "Cannot retrieve locked node %s" % self.op.pnode
7546     if pnode.offline:
7547       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7548                                  pnode.name, errors.ECODE_STATE)
7549     if pnode.drained:
7550       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7551                                  pnode.name, errors.ECODE_STATE)
7552     if not pnode.vm_capable:
7553       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7554                                  " '%s'" % pnode.name, errors.ECODE_STATE)
7555
7556     self.secondaries = []
7557
7558     # mirror node verification
7559     if self.op.disk_template in constants.DTS_NET_MIRROR:
7560       if self.op.snode == pnode.name:
7561         raise errors.OpPrereqError("The secondary node cannot be the"
7562                                    " primary node.", errors.ECODE_INVAL)
7563       _CheckNodeOnline(self, self.op.snode)
7564       _CheckNodeNotDrained(self, self.op.snode)
7565       _CheckNodeVmCapable(self, self.op.snode)
7566       self.secondaries.append(self.op.snode)
7567
7568     nodenames = [pnode.name] + self.secondaries
7569
7570     if not self.adopt_disks:
7571       # Check lv size requirements, if not adopting
7572       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7573       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7574
7575     else: # instead, we must check the adoption data
7576       all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7577       if len(all_lvs) != len(self.disks):
7578         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7579                                    errors.ECODE_INVAL)
7580       for lv_name in all_lvs:
7581         try:
7582           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7583           # to ReserveLV uses the same syntax
7584           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7585         except errors.ReservationError:
7586           raise errors.OpPrereqError("LV named %s used by another instance" %
7587                                      lv_name, errors.ECODE_NOTUNIQUE)
7588
7589       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7590       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7591
7592       node_lvs = self.rpc.call_lv_list([pnode.name],
7593                                        vg_names.payload.keys())[pnode.name]
7594       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7595       node_lvs = node_lvs.payload
7596
7597       delta = all_lvs.difference(node_lvs.keys())
7598       if delta:
7599         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7600                                    utils.CommaJoin(delta),
7601                                    errors.ECODE_INVAL)
7602       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7603       if online_lvs:
7604         raise errors.OpPrereqError("Online logical volumes found, cannot"
7605                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7606                                    errors.ECODE_STATE)
7607       # update the size of disk based on what is found
7608       for dsk in self.disks:
7609         dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7610
7611     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7612
7613     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7614     # check OS parameters (remotely)
7615     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7616
7617     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7618
7619     # memory check on primary node
7620     if self.op.start:
7621       _CheckNodeFreeMemory(self, self.pnode.name,
7622                            "creating instance %s" % self.op.instance_name,
7623                            self.be_full[constants.BE_MEMORY],
7624                            self.op.hypervisor)
7625
7626     self.dry_run_result = list(nodenames)
7627
7628   def Exec(self, feedback_fn):
7629     """Create and add the instance to the cluster.
7630
7631     """
7632     instance = self.op.instance_name
7633     pnode_name = self.pnode.name
7634
7635     ht_kind = self.op.hypervisor
7636     if ht_kind in constants.HTS_REQ_PORT:
7637       network_port = self.cfg.AllocatePort()
7638     else:
7639       network_port = None
7640
7641     if constants.ENABLE_FILE_STORAGE:
7642       # this is needed because os.path.join does not accept None arguments
7643       if self.op.file_storage_dir is None:
7644         string_file_storage_dir = ""
7645       else:
7646         string_file_storage_dir = self.op.file_storage_dir
7647
7648       # build the full file storage dir path
7649       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7650                                         string_file_storage_dir, instance)
7651     else:
7652       file_storage_dir = ""
7653
7654     disks = _GenerateDiskTemplate(self,
7655                                   self.op.disk_template,
7656                                   instance, pnode_name,
7657                                   self.secondaries,
7658                                   self.disks,
7659                                   file_storage_dir,
7660                                   self.op.file_driver,
7661                                   0,
7662                                   feedback_fn)
7663
7664     iobj = objects.Instance(name=instance, os=self.op.os_type,
7665                             primary_node=pnode_name,
7666                             nics=self.nics, disks=disks,
7667                             disk_template=self.op.disk_template,
7668                             admin_up=False,
7669                             network_port=network_port,
7670                             beparams=self.op.beparams,
7671                             hvparams=self.op.hvparams,
7672                             hypervisor=self.op.hypervisor,
7673                             osparams=self.op.osparams,
7674                             )
7675
7676     if self.adopt_disks:
7677       # rename LVs to the newly-generated names; we need to construct
7678       # 'fake' LV disks with the old data, plus the new unique_id
7679       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7680       rename_to = []
7681       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7682         rename_to.append(t_dsk.logical_id)
7683         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7684         self.cfg.SetDiskID(t_dsk, pnode_name)
7685       result = self.rpc.call_blockdev_rename(pnode_name,
7686                                              zip(tmp_disks, rename_to))
7687       result.Raise("Failed to rename adoped LVs")
7688     else:
7689       feedback_fn("* creating instance disks...")
7690       try:
7691         _CreateDisks(self, iobj)
7692       except errors.OpExecError:
7693         self.LogWarning("Device creation failed, reverting...")
7694         try:
7695           _RemoveDisks(self, iobj)
7696         finally:
7697           self.cfg.ReleaseDRBDMinors(instance)
7698           raise
7699
7700     feedback_fn("adding instance %s to cluster config" % instance)
7701
7702     self.cfg.AddInstance(iobj, self.proc.GetECId())
7703
7704     # Declare that we don't want to remove the instance lock anymore, as we've
7705     # added the instance to the config
7706     del self.remove_locks[locking.LEVEL_INSTANCE]
7707     # Unlock all the nodes
7708     if self.op.mode == constants.INSTANCE_IMPORT:
7709       nodes_keep = [self.op.src_node]
7710       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7711                        if node != self.op.src_node]
7712       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7713       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7714     else:
7715       self.context.glm.release(locking.LEVEL_NODE)
7716       del self.acquired_locks[locking.LEVEL_NODE]
7717
7718     disk_abort = False
7719     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
7720       feedback_fn("* wiping instance disks...")
7721       try:
7722         _WipeDisks(self, iobj)
7723       except errors.OpExecError, err:
7724         logging.exception("Wiping disks failed")
7725         self.LogWarning("Wiping instance disks failed (%s)", err)
7726         disk_abort = True
7727
7728     if disk_abort:
7729       # Something is already wrong with the disks, don't do anything else
7730       pass
7731     elif self.op.wait_for_sync:
7732       disk_abort = not _WaitForSync(self, iobj)
7733     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7734       # make sure the disks are not degraded (still sync-ing is ok)
7735       time.sleep(15)
7736       feedback_fn("* checking mirrors status")
7737       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7738     else:
7739       disk_abort = False
7740
7741     if disk_abort:
7742       _RemoveDisks(self, iobj)
7743       self.cfg.RemoveInstance(iobj.name)
7744       # Make sure the instance lock gets removed
7745       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7746       raise errors.OpExecError("There are some degraded disks for"
7747                                " this instance")
7748
7749     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7750       if self.op.mode == constants.INSTANCE_CREATE:
7751         if not self.op.no_install:
7752           feedback_fn("* running the instance OS create scripts...")
7753           # FIXME: pass debug option from opcode to backend
7754           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7755                                                  self.op.debug_level)
7756           result.Raise("Could not add os for instance %s"
7757                        " on node %s" % (instance, pnode_name))
7758
7759       elif self.op.mode == constants.INSTANCE_IMPORT:
7760         feedback_fn("* running the instance OS import scripts...")
7761
7762         transfers = []
7763
7764         for idx, image in enumerate(self.src_images):
7765           if not image:
7766             continue
7767
7768           # FIXME: pass debug option from opcode to backend
7769           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7770                                              constants.IEIO_FILE, (image, ),
7771                                              constants.IEIO_SCRIPT,
7772                                              (iobj.disks[idx], idx),
7773                                              None)
7774           transfers.append(dt)
7775
7776         import_result = \
7777           masterd.instance.TransferInstanceData(self, feedback_fn,
7778                                                 self.op.src_node, pnode_name,
7779                                                 self.pnode.secondary_ip,
7780                                                 iobj, transfers)
7781         if not compat.all(import_result):
7782           self.LogWarning("Some disks for instance %s on node %s were not"
7783                           " imported successfully" % (instance, pnode_name))
7784
7785       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7786         feedback_fn("* preparing remote import...")
7787         # The source cluster will stop the instance before attempting to make a
7788         # connection. In some cases stopping an instance can take a long time,
7789         # hence the shutdown timeout is added to the connection timeout.
7790         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7791                            self.op.source_shutdown_timeout)
7792         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7793
7794         assert iobj.primary_node == self.pnode.name
7795         disk_results = \
7796           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7797                                         self.source_x509_ca,
7798                                         self._cds, timeouts)
7799         if not compat.all(disk_results):
7800           # TODO: Should the instance still be started, even if some disks
7801           # failed to import (valid for local imports, too)?
7802           self.LogWarning("Some disks for instance %s on node %s were not"
7803                           " imported successfully" % (instance, pnode_name))
7804
7805         # Run rename script on newly imported instance
7806         assert iobj.name == instance
7807         feedback_fn("Running rename script for %s" % instance)
7808         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7809                                                    self.source_instance_name,
7810                                                    self.op.debug_level)
7811         if result.fail_msg:
7812           self.LogWarning("Failed to run rename script for %s on node"
7813                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7814
7815       else:
7816         # also checked in the prereq part
7817         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7818                                      % self.op.mode)
7819
7820     if self.op.start:
7821       iobj.admin_up = True
7822       self.cfg.Update(iobj, feedback_fn)
7823       logging.info("Starting instance %s on node %s", instance, pnode_name)
7824       feedback_fn("* starting instance...")
7825       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7826       result.Raise("Could not start instance")
7827
7828     return list(iobj.all_nodes)
7829
7830
7831 class LUInstanceConsole(NoHooksLU):
7832   """Connect to an instance's console.
7833
7834   This is somewhat special in that it returns the command line that
7835   you need to run on the master node in order to connect to the
7836   console.
7837
7838   """
7839   REQ_BGL = False
7840
7841   def ExpandNames(self):
7842     self._ExpandAndLockInstance()
7843
7844   def CheckPrereq(self):
7845     """Check prerequisites.
7846
7847     This checks that the instance is in the cluster.
7848
7849     """
7850     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7851     assert self.instance is not None, \
7852       "Cannot retrieve locked instance %s" % self.op.instance_name
7853     _CheckNodeOnline(self, self.instance.primary_node)
7854
7855   def Exec(self, feedback_fn):
7856     """Connect to the console of an instance
7857
7858     """
7859     instance = self.instance
7860     node = instance.primary_node
7861
7862     node_insts = self.rpc.call_instance_list([node],
7863                                              [instance.hypervisor])[node]
7864     node_insts.Raise("Can't get node information from %s" % node)
7865
7866     if instance.name not in node_insts.payload:
7867       if instance.admin_up:
7868         state = "ERROR_down"
7869       else:
7870         state = "ADMIN_down"
7871       raise errors.OpExecError("Instance %s is not running (state %s)" %
7872                                (instance.name, state))
7873
7874     logging.debug("Connecting to console of %s on %s", instance.name, node)
7875
7876     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
7877
7878
7879 def _GetInstanceConsole(cluster, instance):
7880   """Returns console information for an instance.
7881
7882   @type cluster: L{objects.Cluster}
7883   @type instance: L{objects.Instance}
7884   @rtype: dict
7885
7886   """
7887   hyper = hypervisor.GetHypervisor(instance.hypervisor)
7888   # beparams and hvparams are passed separately, to avoid editing the
7889   # instance and then saving the defaults in the instance itself.
7890   hvparams = cluster.FillHV(instance)
7891   beparams = cluster.FillBE(instance)
7892   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7893
7894   assert console.instance == instance.name
7895   assert console.Validate()
7896
7897   return console.ToDict()
7898
7899
7900 class LUInstanceReplaceDisks(LogicalUnit):
7901   """Replace the disks of an instance.
7902
7903   """
7904   HPATH = "mirrors-replace"
7905   HTYPE = constants.HTYPE_INSTANCE
7906   REQ_BGL = False
7907
7908   def CheckArguments(self):
7909     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7910                                   self.op.iallocator)
7911
7912   def ExpandNames(self):
7913     self._ExpandAndLockInstance()
7914
7915     if self.op.iallocator is not None:
7916       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7917
7918     elif self.op.remote_node is not None:
7919       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7920       self.op.remote_node = remote_node
7921
7922       # Warning: do not remove the locking of the new secondary here
7923       # unless DRBD8.AddChildren is changed to work in parallel;
7924       # currently it doesn't since parallel invocations of
7925       # FindUnusedMinor will conflict
7926       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7927       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7928
7929     else:
7930       self.needed_locks[locking.LEVEL_NODE] = []
7931       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7932
7933     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7934                                    self.op.iallocator, self.op.remote_node,
7935                                    self.op.disks, False, self.op.early_release)
7936
7937     self.tasklets = [self.replacer]
7938
7939   def DeclareLocks(self, level):
7940     # If we're not already locking all nodes in the set we have to declare the
7941     # instance's primary/secondary nodes.
7942     if (level == locking.LEVEL_NODE and
7943         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7944       self._LockInstancesNodes()
7945
7946   def BuildHooksEnv(self):
7947     """Build hooks env.
7948
7949     This runs on the master, the primary and all the secondaries.
7950
7951     """
7952     instance = self.replacer.instance
7953     env = {
7954       "MODE": self.op.mode,
7955       "NEW_SECONDARY": self.op.remote_node,
7956       "OLD_SECONDARY": instance.secondary_nodes[0],
7957       }
7958     env.update(_BuildInstanceHookEnvByObject(self, instance))
7959     nl = [
7960       self.cfg.GetMasterNode(),
7961       instance.primary_node,
7962       ]
7963     if self.op.remote_node is not None:
7964       nl.append(self.op.remote_node)
7965     return env, nl, nl
7966
7967
7968 class TLReplaceDisks(Tasklet):
7969   """Replaces disks for an instance.
7970
7971   Note: Locking is not within the scope of this class.
7972
7973   """
7974   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7975                disks, delay_iallocator, early_release):
7976     """Initializes this class.
7977
7978     """
7979     Tasklet.__init__(self, lu)
7980
7981     # Parameters
7982     self.instance_name = instance_name
7983     self.mode = mode
7984     self.iallocator_name = iallocator_name
7985     self.remote_node = remote_node
7986     self.disks = disks
7987     self.delay_iallocator = delay_iallocator
7988     self.early_release = early_release
7989
7990     # Runtime data
7991     self.instance = None
7992     self.new_node = None
7993     self.target_node = None
7994     self.other_node = None
7995     self.remote_node_info = None
7996     self.node_secondary_ip = None
7997
7998   @staticmethod
7999   def CheckArguments(mode, remote_node, iallocator):
8000     """Helper function for users of this class.
8001
8002     """
8003     # check for valid parameter combination
8004     if mode == constants.REPLACE_DISK_CHG:
8005       if remote_node is None and iallocator is None:
8006         raise errors.OpPrereqError("When changing the secondary either an"
8007                                    " iallocator script must be used or the"
8008                                    " new node given", errors.ECODE_INVAL)
8009
8010       if remote_node is not None and iallocator is not None:
8011         raise errors.OpPrereqError("Give either the iallocator or the new"
8012                                    " secondary, not both", errors.ECODE_INVAL)
8013
8014     elif remote_node is not None or iallocator is not None:
8015       # Not replacing the secondary
8016       raise errors.OpPrereqError("The iallocator and new node options can"
8017                                  " only be used when changing the"
8018                                  " secondary node", errors.ECODE_INVAL)
8019
8020   @staticmethod
8021   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8022     """Compute a new secondary node using an IAllocator.
8023
8024     """
8025     ial = IAllocator(lu.cfg, lu.rpc,
8026                      mode=constants.IALLOCATOR_MODE_RELOC,
8027                      name=instance_name,
8028                      relocate_from=relocate_from)
8029
8030     ial.Run(iallocator_name)
8031
8032     if not ial.success:
8033       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8034                                  " %s" % (iallocator_name, ial.info),
8035                                  errors.ECODE_NORES)
8036
8037     if len(ial.result) != ial.required_nodes:
8038       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8039                                  " of nodes (%s), required %s" %
8040                                  (iallocator_name,
8041                                   len(ial.result), ial.required_nodes),
8042                                  errors.ECODE_FAULT)
8043
8044     remote_node_name = ial.result[0]
8045
8046     lu.LogInfo("Selected new secondary for instance '%s': %s",
8047                instance_name, remote_node_name)
8048
8049     return remote_node_name
8050
8051   def _FindFaultyDisks(self, node_name):
8052     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8053                                     node_name, True)
8054
8055   def CheckPrereq(self):
8056     """Check prerequisites.
8057
8058     This checks that the instance is in the cluster.
8059
8060     """
8061     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8062     assert instance is not None, \
8063       "Cannot retrieve locked instance %s" % self.instance_name
8064
8065     if instance.disk_template != constants.DT_DRBD8:
8066       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8067                                  " instances", errors.ECODE_INVAL)
8068
8069     if len(instance.secondary_nodes) != 1:
8070       raise errors.OpPrereqError("The instance has a strange layout,"
8071                                  " expected one secondary but found %d" %
8072                                  len(instance.secondary_nodes),
8073                                  errors.ECODE_FAULT)
8074
8075     if not self.delay_iallocator:
8076       self._CheckPrereq2()
8077
8078   def _CheckPrereq2(self):
8079     """Check prerequisites, second part.
8080
8081     This function should always be part of CheckPrereq. It was separated and is
8082     now called from Exec because during node evacuation iallocator was only
8083     called with an unmodified cluster model, not taking planned changes into
8084     account.
8085
8086     """
8087     instance = self.instance
8088     secondary_node = instance.secondary_nodes[0]
8089
8090     if self.iallocator_name is None:
8091       remote_node = self.remote_node
8092     else:
8093       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8094                                        instance.name, instance.secondary_nodes)
8095
8096     if remote_node is not None:
8097       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8098       assert self.remote_node_info is not None, \
8099         "Cannot retrieve locked node %s" % remote_node
8100     else:
8101       self.remote_node_info = None
8102
8103     if remote_node == self.instance.primary_node:
8104       raise errors.OpPrereqError("The specified node is the primary node of"
8105                                  " the instance.", errors.ECODE_INVAL)
8106
8107     if remote_node == secondary_node:
8108       raise errors.OpPrereqError("The specified node is already the"
8109                                  " secondary node of the instance.",
8110                                  errors.ECODE_INVAL)
8111
8112     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8113                                     constants.REPLACE_DISK_CHG):
8114       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8115                                  errors.ECODE_INVAL)
8116
8117     if self.mode == constants.REPLACE_DISK_AUTO:
8118       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8119       faulty_secondary = self._FindFaultyDisks(secondary_node)
8120
8121       if faulty_primary and faulty_secondary:
8122         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8123                                    " one node and can not be repaired"
8124                                    " automatically" % self.instance_name,
8125                                    errors.ECODE_STATE)
8126
8127       if faulty_primary:
8128         self.disks = faulty_primary
8129         self.target_node = instance.primary_node
8130         self.other_node = secondary_node
8131         check_nodes = [self.target_node, self.other_node]
8132       elif faulty_secondary:
8133         self.disks = faulty_secondary
8134         self.target_node = secondary_node
8135         self.other_node = instance.primary_node
8136         check_nodes = [self.target_node, self.other_node]
8137       else:
8138         self.disks = []
8139         check_nodes = []
8140
8141     else:
8142       # Non-automatic modes
8143       if self.mode == constants.REPLACE_DISK_PRI:
8144         self.target_node = instance.primary_node
8145         self.other_node = secondary_node
8146         check_nodes = [self.target_node, self.other_node]
8147
8148       elif self.mode == constants.REPLACE_DISK_SEC:
8149         self.target_node = secondary_node
8150         self.other_node = instance.primary_node
8151         check_nodes = [self.target_node, self.other_node]
8152
8153       elif self.mode == constants.REPLACE_DISK_CHG:
8154         self.new_node = remote_node
8155         self.other_node = instance.primary_node
8156         self.target_node = secondary_node
8157         check_nodes = [self.new_node, self.other_node]
8158
8159         _CheckNodeNotDrained(self.lu, remote_node)
8160         _CheckNodeVmCapable(self.lu, remote_node)
8161
8162         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8163         assert old_node_info is not None
8164         if old_node_info.offline and not self.early_release:
8165           # doesn't make sense to delay the release
8166           self.early_release = True
8167           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8168                           " early-release mode", secondary_node)
8169
8170       else:
8171         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8172                                      self.mode)
8173
8174       # If not specified all disks should be replaced
8175       if not self.disks:
8176         self.disks = range(len(self.instance.disks))
8177
8178     for node in check_nodes:
8179       _CheckNodeOnline(self.lu, node)
8180
8181     # Check whether disks are valid
8182     for disk_idx in self.disks:
8183       instance.FindDisk(disk_idx)
8184
8185     # Get secondary node IP addresses
8186     node_2nd_ip = {}
8187
8188     for node_name in [self.target_node, self.other_node, self.new_node]:
8189       if node_name is not None:
8190         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8191
8192     self.node_secondary_ip = node_2nd_ip
8193
8194   def Exec(self, feedback_fn):
8195     """Execute disk replacement.
8196
8197     This dispatches the disk replacement to the appropriate handler.
8198
8199     """
8200     if self.delay_iallocator:
8201       self._CheckPrereq2()
8202
8203     if not self.disks:
8204       feedback_fn("No disks need replacement")
8205       return
8206
8207     feedback_fn("Replacing disk(s) %s for %s" %
8208                 (utils.CommaJoin(self.disks), self.instance.name))
8209
8210     activate_disks = (not self.instance.admin_up)
8211
8212     # Activate the instance disks if we're replacing them on a down instance
8213     if activate_disks:
8214       _StartInstanceDisks(self.lu, self.instance, True)
8215
8216     try:
8217       # Should we replace the secondary node?
8218       if self.new_node is not None:
8219         fn = self._ExecDrbd8Secondary
8220       else:
8221         fn = self._ExecDrbd8DiskOnly
8222
8223       return fn(feedback_fn)
8224
8225     finally:
8226       # Deactivate the instance disks if we're replacing them on a
8227       # down instance
8228       if activate_disks:
8229         _SafeShutdownInstanceDisks(self.lu, self.instance)
8230
8231   def _CheckVolumeGroup(self, nodes):
8232     self.lu.LogInfo("Checking volume groups")
8233
8234     vgname = self.cfg.GetVGName()
8235
8236     # Make sure volume group exists on all involved nodes
8237     results = self.rpc.call_vg_list(nodes)
8238     if not results:
8239       raise errors.OpExecError("Can't list volume groups on the nodes")
8240
8241     for node in nodes:
8242       res = results[node]
8243       res.Raise("Error checking node %s" % node)
8244       if vgname not in res.payload:
8245         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8246                                  (vgname, node))
8247
8248   def _CheckDisksExistence(self, nodes):
8249     # Check disk existence
8250     for idx, dev in enumerate(self.instance.disks):
8251       if idx not in self.disks:
8252         continue
8253
8254       for node in nodes:
8255         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8256         self.cfg.SetDiskID(dev, node)
8257
8258         result = self.rpc.call_blockdev_find(node, dev)
8259
8260         msg = result.fail_msg
8261         if msg or not result.payload:
8262           if not msg:
8263             msg = "disk not found"
8264           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8265                                    (idx, node, msg))
8266
8267   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8268     for idx, dev in enumerate(self.instance.disks):
8269       if idx not in self.disks:
8270         continue
8271
8272       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8273                       (idx, node_name))
8274
8275       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8276                                    ldisk=ldisk):
8277         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8278                                  " replace disks for instance %s" %
8279                                  (node_name, self.instance.name))
8280
8281   def _CreateNewStorage(self, node_name):
8282     vgname = self.cfg.GetVGName()
8283     iv_names = {}
8284
8285     for idx, dev in enumerate(self.instance.disks):
8286       if idx not in self.disks:
8287         continue
8288
8289       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8290
8291       self.cfg.SetDiskID(dev, node_name)
8292
8293       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8294       names = _GenerateUniqueNames(self.lu, lv_names)
8295
8296       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8297                              logical_id=(vgname, names[0]))
8298       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8299                              logical_id=(vgname, names[1]))
8300
8301       new_lvs = [lv_data, lv_meta]
8302       old_lvs = dev.children
8303       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8304
8305       # we pass force_create=True to force the LVM creation
8306       for new_lv in new_lvs:
8307         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8308                         _GetInstanceInfoText(self.instance), False)
8309
8310     return iv_names
8311
8312   def _CheckDevices(self, node_name, iv_names):
8313     for name, (dev, _, _) in iv_names.iteritems():
8314       self.cfg.SetDiskID(dev, node_name)
8315
8316       result = self.rpc.call_blockdev_find(node_name, dev)
8317
8318       msg = result.fail_msg
8319       if msg or not result.payload:
8320         if not msg:
8321           msg = "disk not found"
8322         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8323                                  (name, msg))
8324
8325       if result.payload.is_degraded:
8326         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8327
8328   def _RemoveOldStorage(self, node_name, iv_names):
8329     for name, (_, old_lvs, _) in iv_names.iteritems():
8330       self.lu.LogInfo("Remove logical volumes for %s" % name)
8331
8332       for lv in old_lvs:
8333         self.cfg.SetDiskID(lv, node_name)
8334
8335         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8336         if msg:
8337           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8338                              hint="remove unused LVs manually")
8339
8340   def _ReleaseNodeLock(self, node_name):
8341     """Releases the lock for a given node."""
8342     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8343
8344   def _ExecDrbd8DiskOnly(self, feedback_fn):
8345     """Replace a disk on the primary or secondary for DRBD 8.
8346
8347     The algorithm for replace is quite complicated:
8348
8349       1. for each disk to be replaced:
8350
8351         1. create new LVs on the target node with unique names
8352         1. detach old LVs from the drbd device
8353         1. rename old LVs to name_replaced.<time_t>
8354         1. rename new LVs to old LVs
8355         1. attach the new LVs (with the old names now) to the drbd device
8356
8357       1. wait for sync across all devices
8358
8359       1. for each modified disk:
8360
8361         1. remove old LVs (which have the name name_replaces.<time_t>)
8362
8363     Failures are not very well handled.
8364
8365     """
8366     steps_total = 6
8367
8368     # Step: check device activation
8369     self.lu.LogStep(1, steps_total, "Check device existence")
8370     self._CheckDisksExistence([self.other_node, self.target_node])
8371     self._CheckVolumeGroup([self.target_node, self.other_node])
8372
8373     # Step: check other node consistency
8374     self.lu.LogStep(2, steps_total, "Check peer consistency")
8375     self._CheckDisksConsistency(self.other_node,
8376                                 self.other_node == self.instance.primary_node,
8377                                 False)
8378
8379     # Step: create new storage
8380     self.lu.LogStep(3, steps_total, "Allocate new storage")
8381     iv_names = self._CreateNewStorage(self.target_node)
8382
8383     # Step: for each lv, detach+rename*2+attach
8384     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8385     for dev, old_lvs, new_lvs in iv_names.itervalues():
8386       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8387
8388       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8389                                                      old_lvs)
8390       result.Raise("Can't detach drbd from local storage on node"
8391                    " %s for device %s" % (self.target_node, dev.iv_name))
8392       #dev.children = []
8393       #cfg.Update(instance)
8394
8395       # ok, we created the new LVs, so now we know we have the needed
8396       # storage; as such, we proceed on the target node to rename
8397       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8398       # using the assumption that logical_id == physical_id (which in
8399       # turn is the unique_id on that node)
8400
8401       # FIXME(iustin): use a better name for the replaced LVs
8402       temp_suffix = int(time.time())
8403       ren_fn = lambda d, suff: (d.physical_id[0],
8404                                 d.physical_id[1] + "_replaced-%s" % suff)
8405
8406       # Build the rename list based on what LVs exist on the node
8407       rename_old_to_new = []
8408       for to_ren in old_lvs:
8409         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8410         if not result.fail_msg and result.payload:
8411           # device exists
8412           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8413
8414       self.lu.LogInfo("Renaming the old LVs on the target node")
8415       result = self.rpc.call_blockdev_rename(self.target_node,
8416                                              rename_old_to_new)
8417       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8418
8419       # Now we rename the new LVs to the old LVs
8420       self.lu.LogInfo("Renaming the new LVs on the target node")
8421       rename_new_to_old = [(new, old.physical_id)
8422                            for old, new in zip(old_lvs, new_lvs)]
8423       result = self.rpc.call_blockdev_rename(self.target_node,
8424                                              rename_new_to_old)
8425       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8426
8427       for old, new in zip(old_lvs, new_lvs):
8428         new.logical_id = old.logical_id
8429         self.cfg.SetDiskID(new, self.target_node)
8430
8431       for disk in old_lvs:
8432         disk.logical_id = ren_fn(disk, temp_suffix)
8433         self.cfg.SetDiskID(disk, self.target_node)
8434
8435       # Now that the new lvs have the old name, we can add them to the device
8436       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8437       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8438                                                   new_lvs)
8439       msg = result.fail_msg
8440       if msg:
8441         for new_lv in new_lvs:
8442           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8443                                                new_lv).fail_msg
8444           if msg2:
8445             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8446                                hint=("cleanup manually the unused logical"
8447                                      "volumes"))
8448         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8449
8450       dev.children = new_lvs
8451
8452       self.cfg.Update(self.instance, feedback_fn)
8453
8454     cstep = 5
8455     if self.early_release:
8456       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8457       cstep += 1
8458       self._RemoveOldStorage(self.target_node, iv_names)
8459       # WARNING: we release both node locks here, do not do other RPCs
8460       # than WaitForSync to the primary node
8461       self._ReleaseNodeLock([self.target_node, self.other_node])
8462
8463     # Wait for sync
8464     # This can fail as the old devices are degraded and _WaitForSync
8465     # does a combined result over all disks, so we don't check its return value
8466     self.lu.LogStep(cstep, steps_total, "Sync devices")
8467     cstep += 1
8468     _WaitForSync(self.lu, self.instance)
8469
8470     # Check all devices manually
8471     self._CheckDevices(self.instance.primary_node, iv_names)
8472
8473     # Step: remove old storage
8474     if not self.early_release:
8475       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8476       cstep += 1
8477       self._RemoveOldStorage(self.target_node, iv_names)
8478
8479   def _ExecDrbd8Secondary(self, feedback_fn):
8480     """Replace the secondary node for DRBD 8.
8481
8482     The algorithm for replace is quite complicated:
8483       - for all disks of the instance:
8484         - create new LVs on the new node with same names
8485         - shutdown the drbd device on the old secondary
8486         - disconnect the drbd network on the primary
8487         - create the drbd device on the new secondary
8488         - network attach the drbd on the primary, using an artifice:
8489           the drbd code for Attach() will connect to the network if it
8490           finds a device which is connected to the good local disks but
8491           not network enabled
8492       - wait for sync across all devices
8493       - remove all disks from the old secondary
8494
8495     Failures are not very well handled.
8496
8497     """
8498     steps_total = 6
8499
8500     # Step: check device activation
8501     self.lu.LogStep(1, steps_total, "Check device existence")
8502     self._CheckDisksExistence([self.instance.primary_node])
8503     self._CheckVolumeGroup([self.instance.primary_node])
8504
8505     # Step: check other node consistency
8506     self.lu.LogStep(2, steps_total, "Check peer consistency")
8507     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8508
8509     # Step: create new storage
8510     self.lu.LogStep(3, steps_total, "Allocate new storage")
8511     for idx, dev in enumerate(self.instance.disks):
8512       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8513                       (self.new_node, idx))
8514       # we pass force_create=True to force LVM creation
8515       for new_lv in dev.children:
8516         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8517                         _GetInstanceInfoText(self.instance), False)
8518
8519     # Step 4: dbrd minors and drbd setups changes
8520     # after this, we must manually remove the drbd minors on both the
8521     # error and the success paths
8522     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8523     minors = self.cfg.AllocateDRBDMinor([self.new_node
8524                                          for dev in self.instance.disks],
8525                                         self.instance.name)
8526     logging.debug("Allocated minors %r", minors)
8527
8528     iv_names = {}
8529     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8530       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8531                       (self.new_node, idx))
8532       # create new devices on new_node; note that we create two IDs:
8533       # one without port, so the drbd will be activated without
8534       # networking information on the new node at this stage, and one
8535       # with network, for the latter activation in step 4
8536       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8537       if self.instance.primary_node == o_node1:
8538         p_minor = o_minor1
8539       else:
8540         assert self.instance.primary_node == o_node2, "Three-node instance?"
8541         p_minor = o_minor2
8542
8543       new_alone_id = (self.instance.primary_node, self.new_node, None,
8544                       p_minor, new_minor, o_secret)
8545       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8546                     p_minor, new_minor, o_secret)
8547
8548       iv_names[idx] = (dev, dev.children, new_net_id)
8549       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8550                     new_net_id)
8551       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8552                               logical_id=new_alone_id,
8553                               children=dev.children,
8554                               size=dev.size)
8555       try:
8556         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8557                               _GetInstanceInfoText(self.instance), False)
8558       except errors.GenericError:
8559         self.cfg.ReleaseDRBDMinors(self.instance.name)
8560         raise
8561
8562     # We have new devices, shutdown the drbd on the old secondary
8563     for idx, dev in enumerate(self.instance.disks):
8564       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8565       self.cfg.SetDiskID(dev, self.target_node)
8566       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8567       if msg:
8568         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8569                            "node: %s" % (idx, msg),
8570                            hint=("Please cleanup this device manually as"
8571                                  " soon as possible"))
8572
8573     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8574     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8575                                                self.node_secondary_ip,
8576                                                self.instance.disks)\
8577                                               [self.instance.primary_node]
8578
8579     msg = result.fail_msg
8580     if msg:
8581       # detaches didn't succeed (unlikely)
8582       self.cfg.ReleaseDRBDMinors(self.instance.name)
8583       raise errors.OpExecError("Can't detach the disks from the network on"
8584                                " old node: %s" % (msg,))
8585
8586     # if we managed to detach at least one, we update all the disks of
8587     # the instance to point to the new secondary
8588     self.lu.LogInfo("Updating instance configuration")
8589     for dev, _, new_logical_id in iv_names.itervalues():
8590       dev.logical_id = new_logical_id
8591       self.cfg.SetDiskID(dev, self.instance.primary_node)
8592
8593     self.cfg.Update(self.instance, feedback_fn)
8594
8595     # and now perform the drbd attach
8596     self.lu.LogInfo("Attaching primary drbds to new secondary"
8597                     " (standalone => connected)")
8598     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8599                                             self.new_node],
8600                                            self.node_secondary_ip,
8601                                            self.instance.disks,
8602                                            self.instance.name,
8603                                            False)
8604     for to_node, to_result in result.items():
8605       msg = to_result.fail_msg
8606       if msg:
8607         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8608                            to_node, msg,
8609                            hint=("please do a gnt-instance info to see the"
8610                                  " status of disks"))
8611     cstep = 5
8612     if self.early_release:
8613       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8614       cstep += 1
8615       self._RemoveOldStorage(self.target_node, iv_names)
8616       # WARNING: we release all node locks here, do not do other RPCs
8617       # than WaitForSync to the primary node
8618       self._ReleaseNodeLock([self.instance.primary_node,
8619                              self.target_node,
8620                              self.new_node])
8621
8622     # Wait for sync
8623     # This can fail as the old devices are degraded and _WaitForSync
8624     # does a combined result over all disks, so we don't check its return value
8625     self.lu.LogStep(cstep, steps_total, "Sync devices")
8626     cstep += 1
8627     _WaitForSync(self.lu, self.instance)
8628
8629     # Check all devices manually
8630     self._CheckDevices(self.instance.primary_node, iv_names)
8631
8632     # Step: remove old storage
8633     if not self.early_release:
8634       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8635       self._RemoveOldStorage(self.target_node, iv_names)
8636
8637
8638 class LURepairNodeStorage(NoHooksLU):
8639   """Repairs the volume group on a node.
8640
8641   """
8642   REQ_BGL = False
8643
8644   def CheckArguments(self):
8645     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8646
8647     storage_type = self.op.storage_type
8648
8649     if (constants.SO_FIX_CONSISTENCY not in
8650         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8651       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8652                                  " repaired" % storage_type,
8653                                  errors.ECODE_INVAL)
8654
8655   def ExpandNames(self):
8656     self.needed_locks = {
8657       locking.LEVEL_NODE: [self.op.node_name],
8658       }
8659
8660   def _CheckFaultyDisks(self, instance, node_name):
8661     """Ensure faulty disks abort the opcode or at least warn."""
8662     try:
8663       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8664                                   node_name, True):
8665         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8666                                    " node '%s'" % (instance.name, node_name),
8667                                    errors.ECODE_STATE)
8668     except errors.OpPrereqError, err:
8669       if self.op.ignore_consistency:
8670         self.proc.LogWarning(str(err.args[0]))
8671       else:
8672         raise
8673
8674   def CheckPrereq(self):
8675     """Check prerequisites.
8676
8677     """
8678     # Check whether any instance on this node has faulty disks
8679     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8680       if not inst.admin_up:
8681         continue
8682       check_nodes = set(inst.all_nodes)
8683       check_nodes.discard(self.op.node_name)
8684       for inst_node_name in check_nodes:
8685         self._CheckFaultyDisks(inst, inst_node_name)
8686
8687   def Exec(self, feedback_fn):
8688     feedback_fn("Repairing storage unit '%s' on %s ..." %
8689                 (self.op.name, self.op.node_name))
8690
8691     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8692     result = self.rpc.call_storage_execute(self.op.node_name,
8693                                            self.op.storage_type, st_args,
8694                                            self.op.name,
8695                                            constants.SO_FIX_CONSISTENCY)
8696     result.Raise("Failed to repair storage unit '%s' on %s" %
8697                  (self.op.name, self.op.node_name))
8698
8699
8700 class LUNodeEvacStrategy(NoHooksLU):
8701   """Computes the node evacuation strategy.
8702
8703   """
8704   REQ_BGL = False
8705
8706   def CheckArguments(self):
8707     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8708
8709   def ExpandNames(self):
8710     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8711     self.needed_locks = locks = {}
8712     if self.op.remote_node is None:
8713       locks[locking.LEVEL_NODE] = locking.ALL_SET
8714     else:
8715       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8716       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8717
8718   def Exec(self, feedback_fn):
8719     if self.op.remote_node is not None:
8720       instances = []
8721       for node in self.op.nodes:
8722         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8723       result = []
8724       for i in instances:
8725         if i.primary_node == self.op.remote_node:
8726           raise errors.OpPrereqError("Node %s is the primary node of"
8727                                      " instance %s, cannot use it as"
8728                                      " secondary" %
8729                                      (self.op.remote_node, i.name),
8730                                      errors.ECODE_INVAL)
8731         result.append([i.name, self.op.remote_node])
8732     else:
8733       ial = IAllocator(self.cfg, self.rpc,
8734                        mode=constants.IALLOCATOR_MODE_MEVAC,
8735                        evac_nodes=self.op.nodes)
8736       ial.Run(self.op.iallocator, validate=True)
8737       if not ial.success:
8738         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8739                                  errors.ECODE_NORES)
8740       result = ial.result
8741     return result
8742
8743
8744 class LUInstanceGrowDisk(LogicalUnit):
8745   """Grow a disk of an instance.
8746
8747   """
8748   HPATH = "disk-grow"
8749   HTYPE = constants.HTYPE_INSTANCE
8750   REQ_BGL = False
8751
8752   def ExpandNames(self):
8753     self._ExpandAndLockInstance()
8754     self.needed_locks[locking.LEVEL_NODE] = []
8755     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8756
8757   def DeclareLocks(self, level):
8758     if level == locking.LEVEL_NODE:
8759       self._LockInstancesNodes()
8760
8761   def BuildHooksEnv(self):
8762     """Build hooks env.
8763
8764     This runs on the master, the primary and all the secondaries.
8765
8766     """
8767     env = {
8768       "DISK": self.op.disk,
8769       "AMOUNT": self.op.amount,
8770       }
8771     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8772     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8773     return env, nl, nl
8774
8775   def CheckPrereq(self):
8776     """Check prerequisites.
8777
8778     This checks that the instance is in the cluster.
8779
8780     """
8781     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8782     assert instance is not None, \
8783       "Cannot retrieve locked instance %s" % self.op.instance_name
8784     nodenames = list(instance.all_nodes)
8785     for node in nodenames:
8786       _CheckNodeOnline(self, node)
8787
8788     self.instance = instance
8789
8790     if instance.disk_template not in constants.DTS_GROWABLE:
8791       raise errors.OpPrereqError("Instance's disk layout does not support"
8792                                  " growing.", errors.ECODE_INVAL)
8793
8794     self.disk = instance.FindDisk(self.op.disk)
8795
8796     if instance.disk_template != constants.DT_FILE:
8797       # TODO: check the free disk space for file, when that feature
8798       # will be supported
8799       _CheckNodesFreeDiskPerVG(self, nodenames,
8800                                self.disk.ComputeGrowth(self.op.amount))
8801
8802   def Exec(self, feedback_fn):
8803     """Execute disk grow.
8804
8805     """
8806     instance = self.instance
8807     disk = self.disk
8808
8809     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8810     if not disks_ok:
8811       raise errors.OpExecError("Cannot activate block device to grow")
8812
8813     for node in instance.all_nodes:
8814       self.cfg.SetDiskID(disk, node)
8815       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8816       result.Raise("Grow request failed to node %s" % node)
8817
8818       # TODO: Rewrite code to work properly
8819       # DRBD goes into sync mode for a short amount of time after executing the
8820       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8821       # calling "resize" in sync mode fails. Sleeping for a short amount of
8822       # time is a work-around.
8823       time.sleep(5)
8824
8825     disk.RecordGrow(self.op.amount)
8826     self.cfg.Update(instance, feedback_fn)
8827     if self.op.wait_for_sync:
8828       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8829       if disk_abort:
8830         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8831                              " status.\nPlease check the instance.")
8832       if not instance.admin_up:
8833         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8834     elif not instance.admin_up:
8835       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8836                            " not supposed to be running because no wait for"
8837                            " sync mode was requested.")
8838
8839
8840 class LUInstanceQueryData(NoHooksLU):
8841   """Query runtime instance data.
8842
8843   """
8844   REQ_BGL = False
8845
8846   def ExpandNames(self):
8847     self.needed_locks = {}
8848
8849     # Use locking if requested or when non-static information is wanted
8850     if not (self.op.static or self.op.use_locking):
8851       self.LogWarning("Non-static data requested, locks need to be acquired")
8852       self.op.use_locking = True
8853
8854     if self.op.instances or not self.op.use_locking:
8855       # Expand instance names right here
8856       self.wanted_names = _GetWantedInstances(self, self.op.instances)
8857     else:
8858       # Will use acquired locks
8859       self.wanted_names = None
8860
8861     if self.op.use_locking:
8862       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8863
8864       if self.wanted_names is None:
8865         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8866       else:
8867         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8868
8869       self.needed_locks[locking.LEVEL_NODE] = []
8870       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8871       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8872
8873   def DeclareLocks(self, level):
8874     if self.op.use_locking and level == locking.LEVEL_NODE:
8875       self._LockInstancesNodes()
8876
8877   def CheckPrereq(self):
8878     """Check prerequisites.
8879
8880     This only checks the optional instance list against the existing names.
8881
8882     """
8883     if self.wanted_names is None:
8884       assert self.op.use_locking, "Locking was not used"
8885       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8886
8887     self.wanted_instances = [self.cfg.GetInstanceInfo(name)
8888                              for name in self.wanted_names]
8889
8890   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8891     """Returns the status of a block device
8892
8893     """
8894     if self.op.static or not node:
8895       return None
8896
8897     self.cfg.SetDiskID(dev, node)
8898
8899     result = self.rpc.call_blockdev_find(node, dev)
8900     if result.offline:
8901       return None
8902
8903     result.Raise("Can't compute disk status for %s" % instance_name)
8904
8905     status = result.payload
8906     if status is None:
8907       return None
8908
8909     return (status.dev_path, status.major, status.minor,
8910             status.sync_percent, status.estimated_time,
8911             status.is_degraded, status.ldisk_status)
8912
8913   def _ComputeDiskStatus(self, instance, snode, dev):
8914     """Compute block device status.
8915
8916     """
8917     if dev.dev_type in constants.LDS_DRBD:
8918       # we change the snode then (otherwise we use the one passed in)
8919       if dev.logical_id[0] == instance.primary_node:
8920         snode = dev.logical_id[1]
8921       else:
8922         snode = dev.logical_id[0]
8923
8924     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8925                                               instance.name, dev)
8926     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8927
8928     if dev.children:
8929       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8930                       for child in dev.children]
8931     else:
8932       dev_children = []
8933
8934     return {
8935       "iv_name": dev.iv_name,
8936       "dev_type": dev.dev_type,
8937       "logical_id": dev.logical_id,
8938       "physical_id": dev.physical_id,
8939       "pstatus": dev_pstatus,
8940       "sstatus": dev_sstatus,
8941       "children": dev_children,
8942       "mode": dev.mode,
8943       "size": dev.size,
8944       }
8945
8946   def Exec(self, feedback_fn):
8947     """Gather and return data"""
8948     result = {}
8949
8950     cluster = self.cfg.GetClusterInfo()
8951
8952     for instance in self.wanted_instances:
8953       if not self.op.static:
8954         remote_info = self.rpc.call_instance_info(instance.primary_node,
8955                                                   instance.name,
8956                                                   instance.hypervisor)
8957         remote_info.Raise("Error checking node %s" % instance.primary_node)
8958         remote_info = remote_info.payload
8959         if remote_info and "state" in remote_info:
8960           remote_state = "up"
8961         else:
8962           remote_state = "down"
8963       else:
8964         remote_state = None
8965       if instance.admin_up:
8966         config_state = "up"
8967       else:
8968         config_state = "down"
8969
8970       disks = [self._ComputeDiskStatus(instance, None, device)
8971                for device in instance.disks]
8972
8973       result[instance.name] = {
8974         "name": instance.name,
8975         "config_state": config_state,
8976         "run_state": remote_state,
8977         "pnode": instance.primary_node,
8978         "snodes": instance.secondary_nodes,
8979         "os": instance.os,
8980         # this happens to be the same format used for hooks
8981         "nics": _NICListToTuple(self, instance.nics),
8982         "disk_template": instance.disk_template,
8983         "disks": disks,
8984         "hypervisor": instance.hypervisor,
8985         "network_port": instance.network_port,
8986         "hv_instance": instance.hvparams,
8987         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8988         "be_instance": instance.beparams,
8989         "be_actual": cluster.FillBE(instance),
8990         "os_instance": instance.osparams,
8991         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8992         "serial_no": instance.serial_no,
8993         "mtime": instance.mtime,
8994         "ctime": instance.ctime,
8995         "uuid": instance.uuid,
8996         }
8997
8998     return result
8999
9000
9001 class LUInstanceSetParams(LogicalUnit):
9002   """Modifies an instances's parameters.
9003
9004   """
9005   HPATH = "instance-modify"
9006   HTYPE = constants.HTYPE_INSTANCE
9007   REQ_BGL = False
9008
9009   def CheckArguments(self):
9010     if not (self.op.nics or self.op.disks or self.op.disk_template or
9011             self.op.hvparams or self.op.beparams or self.op.os_name):
9012       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9013
9014     if self.op.hvparams:
9015       _CheckGlobalHvParams(self.op.hvparams)
9016
9017     # Disk validation
9018     disk_addremove = 0
9019     for disk_op, disk_dict in self.op.disks:
9020       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9021       if disk_op == constants.DDM_REMOVE:
9022         disk_addremove += 1
9023         continue
9024       elif disk_op == constants.DDM_ADD:
9025         disk_addremove += 1
9026       else:
9027         if not isinstance(disk_op, int):
9028           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9029         if not isinstance(disk_dict, dict):
9030           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9031           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9032
9033       if disk_op == constants.DDM_ADD:
9034         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9035         if mode not in constants.DISK_ACCESS_SET:
9036           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9037                                      errors.ECODE_INVAL)
9038         size = disk_dict.get('size', None)
9039         if size is None:
9040           raise errors.OpPrereqError("Required disk parameter size missing",
9041                                      errors.ECODE_INVAL)
9042         try:
9043           size = int(size)
9044         except (TypeError, ValueError), err:
9045           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9046                                      str(err), errors.ECODE_INVAL)
9047         disk_dict['size'] = size
9048       else:
9049         # modification of disk
9050         if 'size' in disk_dict:
9051           raise errors.OpPrereqError("Disk size change not possible, use"
9052                                      " grow-disk", errors.ECODE_INVAL)
9053
9054     if disk_addremove > 1:
9055       raise errors.OpPrereqError("Only one disk add or remove operation"
9056                                  " supported at a time", errors.ECODE_INVAL)
9057
9058     if self.op.disks and self.op.disk_template is not None:
9059       raise errors.OpPrereqError("Disk template conversion and other disk"
9060                                  " changes not supported at the same time",
9061                                  errors.ECODE_INVAL)
9062
9063     if (self.op.disk_template and
9064         self.op.disk_template in constants.DTS_NET_MIRROR and
9065         self.op.remote_node is None):
9066       raise errors.OpPrereqError("Changing the disk template to a mirrored"
9067                                  " one requires specifying a secondary node",
9068                                  errors.ECODE_INVAL)
9069
9070     # NIC validation
9071     nic_addremove = 0
9072     for nic_op, nic_dict in self.op.nics:
9073       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9074       if nic_op == constants.DDM_REMOVE:
9075         nic_addremove += 1
9076         continue
9077       elif nic_op == constants.DDM_ADD:
9078         nic_addremove += 1
9079       else:
9080         if not isinstance(nic_op, int):
9081           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9082         if not isinstance(nic_dict, dict):
9083           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9084           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9085
9086       # nic_dict should be a dict
9087       nic_ip = nic_dict.get('ip', None)
9088       if nic_ip is not None:
9089         if nic_ip.lower() == constants.VALUE_NONE:
9090           nic_dict['ip'] = None
9091         else:
9092           if not netutils.IPAddress.IsValid(nic_ip):
9093             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9094                                        errors.ECODE_INVAL)
9095
9096       nic_bridge = nic_dict.get('bridge', None)
9097       nic_link = nic_dict.get('link', None)
9098       if nic_bridge and nic_link:
9099         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9100                                    " at the same time", errors.ECODE_INVAL)
9101       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9102         nic_dict['bridge'] = None
9103       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9104         nic_dict['link'] = None
9105
9106       if nic_op == constants.DDM_ADD:
9107         nic_mac = nic_dict.get('mac', None)
9108         if nic_mac is None:
9109           nic_dict['mac'] = constants.VALUE_AUTO
9110
9111       if 'mac' in nic_dict:
9112         nic_mac = nic_dict['mac']
9113         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9114           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9115
9116         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9117           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9118                                      " modifying an existing nic",
9119                                      errors.ECODE_INVAL)
9120
9121     if nic_addremove > 1:
9122       raise errors.OpPrereqError("Only one NIC add or remove operation"
9123                                  " supported at a time", errors.ECODE_INVAL)
9124
9125   def ExpandNames(self):
9126     self._ExpandAndLockInstance()
9127     self.needed_locks[locking.LEVEL_NODE] = []
9128     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9129
9130   def DeclareLocks(self, level):
9131     if level == locking.LEVEL_NODE:
9132       self._LockInstancesNodes()
9133       if self.op.disk_template and self.op.remote_node:
9134         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9135         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9136
9137   def BuildHooksEnv(self):
9138     """Build hooks env.
9139
9140     This runs on the master, primary and secondaries.
9141
9142     """
9143     args = dict()
9144     if constants.BE_MEMORY in self.be_new:
9145       args['memory'] = self.be_new[constants.BE_MEMORY]
9146     if constants.BE_VCPUS in self.be_new:
9147       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9148     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9149     # information at all.
9150     if self.op.nics:
9151       args['nics'] = []
9152       nic_override = dict(self.op.nics)
9153       for idx, nic in enumerate(self.instance.nics):
9154         if idx in nic_override:
9155           this_nic_override = nic_override[idx]
9156         else:
9157           this_nic_override = {}
9158         if 'ip' in this_nic_override:
9159           ip = this_nic_override['ip']
9160         else:
9161           ip = nic.ip
9162         if 'mac' in this_nic_override:
9163           mac = this_nic_override['mac']
9164         else:
9165           mac = nic.mac
9166         if idx in self.nic_pnew:
9167           nicparams = self.nic_pnew[idx]
9168         else:
9169           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9170         mode = nicparams[constants.NIC_MODE]
9171         link = nicparams[constants.NIC_LINK]
9172         args['nics'].append((ip, mac, mode, link))
9173       if constants.DDM_ADD in nic_override:
9174         ip = nic_override[constants.DDM_ADD].get('ip', None)
9175         mac = nic_override[constants.DDM_ADD]['mac']
9176         nicparams = self.nic_pnew[constants.DDM_ADD]
9177         mode = nicparams[constants.NIC_MODE]
9178         link = nicparams[constants.NIC_LINK]
9179         args['nics'].append((ip, mac, mode, link))
9180       elif constants.DDM_REMOVE in nic_override:
9181         del args['nics'][-1]
9182
9183     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9184     if self.op.disk_template:
9185       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9186     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9187     return env, nl, nl
9188
9189   def CheckPrereq(self):
9190     """Check prerequisites.
9191
9192     This only checks the instance list against the existing names.
9193
9194     """
9195     # checking the new params on the primary/secondary nodes
9196
9197     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9198     cluster = self.cluster = self.cfg.GetClusterInfo()
9199     assert self.instance is not None, \
9200       "Cannot retrieve locked instance %s" % self.op.instance_name
9201     pnode = instance.primary_node
9202     nodelist = list(instance.all_nodes)
9203
9204     # OS change
9205     if self.op.os_name and not self.op.force:
9206       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9207                       self.op.force_variant)
9208       instance_os = self.op.os_name
9209     else:
9210       instance_os = instance.os
9211
9212     if self.op.disk_template:
9213       if instance.disk_template == self.op.disk_template:
9214         raise errors.OpPrereqError("Instance already has disk template %s" %
9215                                    instance.disk_template, errors.ECODE_INVAL)
9216
9217       if (instance.disk_template,
9218           self.op.disk_template) not in self._DISK_CONVERSIONS:
9219         raise errors.OpPrereqError("Unsupported disk template conversion from"
9220                                    " %s to %s" % (instance.disk_template,
9221                                                   self.op.disk_template),
9222                                    errors.ECODE_INVAL)
9223       _CheckInstanceDown(self, instance, "cannot change disk template")
9224       if self.op.disk_template in constants.DTS_NET_MIRROR:
9225         if self.op.remote_node == pnode:
9226           raise errors.OpPrereqError("Given new secondary node %s is the same"
9227                                      " as the primary node of the instance" %
9228                                      self.op.remote_node, errors.ECODE_STATE)
9229         _CheckNodeOnline(self, self.op.remote_node)
9230         _CheckNodeNotDrained(self, self.op.remote_node)
9231         # FIXME: here we assume that the old instance type is DT_PLAIN
9232         assert instance.disk_template == constants.DT_PLAIN
9233         disks = [{"size": d.size, "vg": d.logical_id[0]}
9234                  for d in instance.disks]
9235         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9236         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9237
9238     # hvparams processing
9239     if self.op.hvparams:
9240       hv_type = instance.hypervisor
9241       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9242       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9243       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9244
9245       # local check
9246       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9247       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9248       self.hv_new = hv_new # the new actual values
9249       self.hv_inst = i_hvdict # the new dict (without defaults)
9250     else:
9251       self.hv_new = self.hv_inst = {}
9252
9253     # beparams processing
9254     if self.op.beparams:
9255       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9256                                    use_none=True)
9257       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9258       be_new = cluster.SimpleFillBE(i_bedict)
9259       self.be_new = be_new # the new actual values
9260       self.be_inst = i_bedict # the new dict (without defaults)
9261     else:
9262       self.be_new = self.be_inst = {}
9263
9264     # osparams processing
9265     if self.op.osparams:
9266       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9267       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9268       self.os_inst = i_osdict # the new dict (without defaults)
9269     else:
9270       self.os_inst = {}
9271
9272     self.warn = []
9273
9274     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9275       mem_check_list = [pnode]
9276       if be_new[constants.BE_AUTO_BALANCE]:
9277         # either we changed auto_balance to yes or it was from before
9278         mem_check_list.extend(instance.secondary_nodes)
9279       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9280                                                   instance.hypervisor)
9281       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9282                                          instance.hypervisor)
9283       pninfo = nodeinfo[pnode]
9284       msg = pninfo.fail_msg
9285       if msg:
9286         # Assume the primary node is unreachable and go ahead
9287         self.warn.append("Can't get info from primary node %s: %s" %
9288                          (pnode,  msg))
9289       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9290         self.warn.append("Node data from primary node %s doesn't contain"
9291                          " free memory information" % pnode)
9292       elif instance_info.fail_msg:
9293         self.warn.append("Can't get instance runtime information: %s" %
9294                         instance_info.fail_msg)
9295       else:
9296         if instance_info.payload:
9297           current_mem = int(instance_info.payload['memory'])
9298         else:
9299           # Assume instance not running
9300           # (there is a slight race condition here, but it's not very probable,
9301           # and we have no other way to check)
9302           current_mem = 0
9303         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9304                     pninfo.payload['memory_free'])
9305         if miss_mem > 0:
9306           raise errors.OpPrereqError("This change will prevent the instance"
9307                                      " from starting, due to %d MB of memory"
9308                                      " missing on its primary node" % miss_mem,
9309                                      errors.ECODE_NORES)
9310
9311       if be_new[constants.BE_AUTO_BALANCE]:
9312         for node, nres in nodeinfo.items():
9313           if node not in instance.secondary_nodes:
9314             continue
9315           msg = nres.fail_msg
9316           if msg:
9317             self.warn.append("Can't get info from secondary node %s: %s" %
9318                              (node, msg))
9319           elif not isinstance(nres.payload.get('memory_free', None), int):
9320             self.warn.append("Secondary node %s didn't return free"
9321                              " memory information" % node)
9322           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9323             self.warn.append("Not enough memory to failover instance to"
9324                              " secondary node %s" % node)
9325
9326     # NIC processing
9327     self.nic_pnew = {}
9328     self.nic_pinst = {}
9329     for nic_op, nic_dict in self.op.nics:
9330       if nic_op == constants.DDM_REMOVE:
9331         if not instance.nics:
9332           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9333                                      errors.ECODE_INVAL)
9334         continue
9335       if nic_op != constants.DDM_ADD:
9336         # an existing nic
9337         if not instance.nics:
9338           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9339                                      " no NICs" % nic_op,
9340                                      errors.ECODE_INVAL)
9341         if nic_op < 0 or nic_op >= len(instance.nics):
9342           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9343                                      " are 0 to %d" %
9344                                      (nic_op, len(instance.nics) - 1),
9345                                      errors.ECODE_INVAL)
9346         old_nic_params = instance.nics[nic_op].nicparams
9347         old_nic_ip = instance.nics[nic_op].ip
9348       else:
9349         old_nic_params = {}
9350         old_nic_ip = None
9351
9352       update_params_dict = dict([(key, nic_dict[key])
9353                                  for key in constants.NICS_PARAMETERS
9354                                  if key in nic_dict])
9355
9356       if 'bridge' in nic_dict:
9357         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9358
9359       new_nic_params = _GetUpdatedParams(old_nic_params,
9360                                          update_params_dict)
9361       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9362       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9363       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9364       self.nic_pinst[nic_op] = new_nic_params
9365       self.nic_pnew[nic_op] = new_filled_nic_params
9366       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9367
9368       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9369         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9370         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9371         if msg:
9372           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9373           if self.op.force:
9374             self.warn.append(msg)
9375           else:
9376             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9377       if new_nic_mode == constants.NIC_MODE_ROUTED:
9378         if 'ip' in nic_dict:
9379           nic_ip = nic_dict['ip']
9380         else:
9381           nic_ip = old_nic_ip
9382         if nic_ip is None:
9383           raise errors.OpPrereqError('Cannot set the nic ip to None'
9384                                      ' on a routed nic', errors.ECODE_INVAL)
9385       if 'mac' in nic_dict:
9386         nic_mac = nic_dict['mac']
9387         if nic_mac is None:
9388           raise errors.OpPrereqError('Cannot set the nic mac to None',
9389                                      errors.ECODE_INVAL)
9390         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9391           # otherwise generate the mac
9392           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9393         else:
9394           # or validate/reserve the current one
9395           try:
9396             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9397           except errors.ReservationError:
9398             raise errors.OpPrereqError("MAC address %s already in use"
9399                                        " in cluster" % nic_mac,
9400                                        errors.ECODE_NOTUNIQUE)
9401
9402     # DISK processing
9403     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9404       raise errors.OpPrereqError("Disk operations not supported for"
9405                                  " diskless instances",
9406                                  errors.ECODE_INVAL)
9407     for disk_op, _ in self.op.disks:
9408       if disk_op == constants.DDM_REMOVE:
9409         if len(instance.disks) == 1:
9410           raise errors.OpPrereqError("Cannot remove the last disk of"
9411                                      " an instance", errors.ECODE_INVAL)
9412         _CheckInstanceDown(self, instance, "cannot remove disks")
9413
9414       if (disk_op == constants.DDM_ADD and
9415           len(instance.disks) >= constants.MAX_DISKS):
9416         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9417                                    " add more" % constants.MAX_DISKS,
9418                                    errors.ECODE_STATE)
9419       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9420         # an existing disk
9421         if disk_op < 0 or disk_op >= len(instance.disks):
9422           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9423                                      " are 0 to %d" %
9424                                      (disk_op, len(instance.disks)),
9425                                      errors.ECODE_INVAL)
9426
9427     return
9428
9429   def _ConvertPlainToDrbd(self, feedback_fn):
9430     """Converts an instance from plain to drbd.
9431
9432     """
9433     feedback_fn("Converting template to drbd")
9434     instance = self.instance
9435     pnode = instance.primary_node
9436     snode = self.op.remote_node
9437
9438     # create a fake disk info for _GenerateDiskTemplate
9439     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9440     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9441                                       instance.name, pnode, [snode],
9442                                       disk_info, None, None, 0, feedback_fn)
9443     info = _GetInstanceInfoText(instance)
9444     feedback_fn("Creating aditional volumes...")
9445     # first, create the missing data and meta devices
9446     for disk in new_disks:
9447       # unfortunately this is... not too nice
9448       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9449                             info, True)
9450       for child in disk.children:
9451         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9452     # at this stage, all new LVs have been created, we can rename the
9453     # old ones
9454     feedback_fn("Renaming original volumes...")
9455     rename_list = [(o, n.children[0].logical_id)
9456                    for (o, n) in zip(instance.disks, new_disks)]
9457     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9458     result.Raise("Failed to rename original LVs")
9459
9460     feedback_fn("Initializing DRBD devices...")
9461     # all child devices are in place, we can now create the DRBD devices
9462     for disk in new_disks:
9463       for node in [pnode, snode]:
9464         f_create = node == pnode
9465         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9466
9467     # at this point, the instance has been modified
9468     instance.disk_template = constants.DT_DRBD8
9469     instance.disks = new_disks
9470     self.cfg.Update(instance, feedback_fn)
9471
9472     # disks are created, waiting for sync
9473     disk_abort = not _WaitForSync(self, instance)
9474     if disk_abort:
9475       raise errors.OpExecError("There are some degraded disks for"
9476                                " this instance, please cleanup manually")
9477
9478   def _ConvertDrbdToPlain(self, feedback_fn):
9479     """Converts an instance from drbd to plain.
9480
9481     """
9482     instance = self.instance
9483     assert len(instance.secondary_nodes) == 1
9484     pnode = instance.primary_node
9485     snode = instance.secondary_nodes[0]
9486     feedback_fn("Converting template to plain")
9487
9488     old_disks = instance.disks
9489     new_disks = [d.children[0] for d in old_disks]
9490
9491     # copy over size and mode
9492     for parent, child in zip(old_disks, new_disks):
9493       child.size = parent.size
9494       child.mode = parent.mode
9495
9496     # update instance structure
9497     instance.disks = new_disks
9498     instance.disk_template = constants.DT_PLAIN
9499     self.cfg.Update(instance, feedback_fn)
9500
9501     feedback_fn("Removing volumes on the secondary node...")
9502     for disk in old_disks:
9503       self.cfg.SetDiskID(disk, snode)
9504       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9505       if msg:
9506         self.LogWarning("Could not remove block device %s on node %s,"
9507                         " continuing anyway: %s", disk.iv_name, snode, msg)
9508
9509     feedback_fn("Removing unneeded volumes on the primary node...")
9510     for idx, disk in enumerate(old_disks):
9511       meta = disk.children[1]
9512       self.cfg.SetDiskID(meta, pnode)
9513       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9514       if msg:
9515         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9516                         " continuing anyway: %s", idx, pnode, msg)
9517
9518   def Exec(self, feedback_fn):
9519     """Modifies an instance.
9520
9521     All parameters take effect only at the next restart of the instance.
9522
9523     """
9524     # Process here the warnings from CheckPrereq, as we don't have a
9525     # feedback_fn there.
9526     for warn in self.warn:
9527       feedback_fn("WARNING: %s" % warn)
9528
9529     result = []
9530     instance = self.instance
9531     # disk changes
9532     for disk_op, disk_dict in self.op.disks:
9533       if disk_op == constants.DDM_REMOVE:
9534         # remove the last disk
9535         device = instance.disks.pop()
9536         device_idx = len(instance.disks)
9537         for node, disk in device.ComputeNodeTree(instance.primary_node):
9538           self.cfg.SetDiskID(disk, node)
9539           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9540           if msg:
9541             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9542                             " continuing anyway", device_idx, node, msg)
9543         result.append(("disk/%d" % device_idx, "remove"))
9544       elif disk_op == constants.DDM_ADD:
9545         # add a new disk
9546         if instance.disk_template == constants.DT_FILE:
9547           file_driver, file_path = instance.disks[0].logical_id
9548           file_path = os.path.dirname(file_path)
9549         else:
9550           file_driver = file_path = None
9551         disk_idx_base = len(instance.disks)
9552         new_disk = _GenerateDiskTemplate(self,
9553                                          instance.disk_template,
9554                                          instance.name, instance.primary_node,
9555                                          instance.secondary_nodes,
9556                                          [disk_dict],
9557                                          file_path,
9558                                          file_driver,
9559                                          disk_idx_base, feedback_fn)[0]
9560         instance.disks.append(new_disk)
9561         info = _GetInstanceInfoText(instance)
9562
9563         logging.info("Creating volume %s for instance %s",
9564                      new_disk.iv_name, instance.name)
9565         # Note: this needs to be kept in sync with _CreateDisks
9566         #HARDCODE
9567         for node in instance.all_nodes:
9568           f_create = node == instance.primary_node
9569           try:
9570             _CreateBlockDev(self, node, instance, new_disk,
9571                             f_create, info, f_create)
9572           except errors.OpExecError, err:
9573             self.LogWarning("Failed to create volume %s (%s) on"
9574                             " node %s: %s",
9575                             new_disk.iv_name, new_disk, node, err)
9576         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9577                        (new_disk.size, new_disk.mode)))
9578       else:
9579         # change a given disk
9580         instance.disks[disk_op].mode = disk_dict['mode']
9581         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9582
9583     if self.op.disk_template:
9584       r_shut = _ShutdownInstanceDisks(self, instance)
9585       if not r_shut:
9586         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9587                                  " proceed with disk template conversion")
9588       mode = (instance.disk_template, self.op.disk_template)
9589       try:
9590         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9591       except:
9592         self.cfg.ReleaseDRBDMinors(instance.name)
9593         raise
9594       result.append(("disk_template", self.op.disk_template))
9595
9596     # NIC changes
9597     for nic_op, nic_dict in self.op.nics:
9598       if nic_op == constants.DDM_REMOVE:
9599         # remove the last nic
9600         del instance.nics[-1]
9601         result.append(("nic.%d" % len(instance.nics), "remove"))
9602       elif nic_op == constants.DDM_ADD:
9603         # mac and bridge should be set, by now
9604         mac = nic_dict['mac']
9605         ip = nic_dict.get('ip', None)
9606         nicparams = self.nic_pinst[constants.DDM_ADD]
9607         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9608         instance.nics.append(new_nic)
9609         result.append(("nic.%d" % (len(instance.nics) - 1),
9610                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9611                        (new_nic.mac, new_nic.ip,
9612                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9613                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9614                        )))
9615       else:
9616         for key in 'mac', 'ip':
9617           if key in nic_dict:
9618             setattr(instance.nics[nic_op], key, nic_dict[key])
9619         if nic_op in self.nic_pinst:
9620           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9621         for key, val in nic_dict.iteritems():
9622           result.append(("nic.%s/%d" % (key, nic_op), val))
9623
9624     # hvparams changes
9625     if self.op.hvparams:
9626       instance.hvparams = self.hv_inst
9627       for key, val in self.op.hvparams.iteritems():
9628         result.append(("hv/%s" % key, val))
9629
9630     # beparams changes
9631     if self.op.beparams:
9632       instance.beparams = self.be_inst
9633       for key, val in self.op.beparams.iteritems():
9634         result.append(("be/%s" % key, val))
9635
9636     # OS change
9637     if self.op.os_name:
9638       instance.os = self.op.os_name
9639
9640     # osparams changes
9641     if self.op.osparams:
9642       instance.osparams = self.os_inst
9643       for key, val in self.op.osparams.iteritems():
9644         result.append(("os/%s" % key, val))
9645
9646     self.cfg.Update(instance, feedback_fn)
9647
9648     return result
9649
9650   _DISK_CONVERSIONS = {
9651     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9652     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9653     }
9654
9655
9656 class LUBackupQuery(NoHooksLU):
9657   """Query the exports list
9658
9659   """
9660   REQ_BGL = False
9661
9662   def ExpandNames(self):
9663     self.needed_locks = {}
9664     self.share_locks[locking.LEVEL_NODE] = 1
9665     if not self.op.nodes:
9666       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9667     else:
9668       self.needed_locks[locking.LEVEL_NODE] = \
9669         _GetWantedNodes(self, self.op.nodes)
9670
9671   def Exec(self, feedback_fn):
9672     """Compute the list of all the exported system images.
9673
9674     @rtype: dict
9675     @return: a dictionary with the structure node->(export-list)
9676         where export-list is a list of the instances exported on
9677         that node.
9678
9679     """
9680     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9681     rpcresult = self.rpc.call_export_list(self.nodes)
9682     result = {}
9683     for node in rpcresult:
9684       if rpcresult[node].fail_msg:
9685         result[node] = False
9686       else:
9687         result[node] = rpcresult[node].payload
9688
9689     return result
9690
9691
9692 class LUBackupPrepare(NoHooksLU):
9693   """Prepares an instance for an export and returns useful information.
9694
9695   """
9696   REQ_BGL = False
9697
9698   def ExpandNames(self):
9699     self._ExpandAndLockInstance()
9700
9701   def CheckPrereq(self):
9702     """Check prerequisites.
9703
9704     """
9705     instance_name = self.op.instance_name
9706
9707     self.instance = self.cfg.GetInstanceInfo(instance_name)
9708     assert self.instance is not None, \
9709           "Cannot retrieve locked instance %s" % self.op.instance_name
9710     _CheckNodeOnline(self, self.instance.primary_node)
9711
9712     self._cds = _GetClusterDomainSecret()
9713
9714   def Exec(self, feedback_fn):
9715     """Prepares an instance for an export.
9716
9717     """
9718     instance = self.instance
9719
9720     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9721       salt = utils.GenerateSecret(8)
9722
9723       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9724       result = self.rpc.call_x509_cert_create(instance.primary_node,
9725                                               constants.RIE_CERT_VALIDITY)
9726       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9727
9728       (name, cert_pem) = result.payload
9729
9730       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9731                                              cert_pem)
9732
9733       return {
9734         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9735         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9736                           salt),
9737         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9738         }
9739
9740     return None
9741
9742
9743 class LUBackupExport(LogicalUnit):
9744   """Export an instance to an image in the cluster.
9745
9746   """
9747   HPATH = "instance-export"
9748   HTYPE = constants.HTYPE_INSTANCE
9749   REQ_BGL = False
9750
9751   def CheckArguments(self):
9752     """Check the arguments.
9753
9754     """
9755     self.x509_key_name = self.op.x509_key_name
9756     self.dest_x509_ca_pem = self.op.destination_x509_ca
9757
9758     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9759       if not self.x509_key_name:
9760         raise errors.OpPrereqError("Missing X509 key name for encryption",
9761                                    errors.ECODE_INVAL)
9762
9763       if not self.dest_x509_ca_pem:
9764         raise errors.OpPrereqError("Missing destination X509 CA",
9765                                    errors.ECODE_INVAL)
9766
9767   def ExpandNames(self):
9768     self._ExpandAndLockInstance()
9769
9770     # Lock all nodes for local exports
9771     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9772       # FIXME: lock only instance primary and destination node
9773       #
9774       # Sad but true, for now we have do lock all nodes, as we don't know where
9775       # the previous export might be, and in this LU we search for it and
9776       # remove it from its current node. In the future we could fix this by:
9777       #  - making a tasklet to search (share-lock all), then create the
9778       #    new one, then one to remove, after
9779       #  - removing the removal operation altogether
9780       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9781
9782   def DeclareLocks(self, level):
9783     """Last minute lock declaration."""
9784     # All nodes are locked anyway, so nothing to do here.
9785
9786   def BuildHooksEnv(self):
9787     """Build hooks env.
9788
9789     This will run on the master, primary node and target node.
9790
9791     """
9792     env = {
9793       "EXPORT_MODE": self.op.mode,
9794       "EXPORT_NODE": self.op.target_node,
9795       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9796       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9797       # TODO: Generic function for boolean env variables
9798       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9799       }
9800
9801     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9802
9803     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9804
9805     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9806       nl.append(self.op.target_node)
9807
9808     return env, nl, nl
9809
9810   def CheckPrereq(self):
9811     """Check prerequisites.
9812
9813     This checks that the instance and node names are valid.
9814
9815     """
9816     instance_name = self.op.instance_name
9817
9818     self.instance = self.cfg.GetInstanceInfo(instance_name)
9819     assert self.instance is not None, \
9820           "Cannot retrieve locked instance %s" % self.op.instance_name
9821     _CheckNodeOnline(self, self.instance.primary_node)
9822
9823     if (self.op.remove_instance and self.instance.admin_up and
9824         not self.op.shutdown):
9825       raise errors.OpPrereqError("Can not remove instance without shutting it"
9826                                  " down before")
9827
9828     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9829       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9830       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9831       assert self.dst_node is not None
9832
9833       _CheckNodeOnline(self, self.dst_node.name)
9834       _CheckNodeNotDrained(self, self.dst_node.name)
9835
9836       self._cds = None
9837       self.dest_disk_info = None
9838       self.dest_x509_ca = None
9839
9840     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9841       self.dst_node = None
9842
9843       if len(self.op.target_node) != len(self.instance.disks):
9844         raise errors.OpPrereqError(("Received destination information for %s"
9845                                     " disks, but instance %s has %s disks") %
9846                                    (len(self.op.target_node), instance_name,
9847                                     len(self.instance.disks)),
9848                                    errors.ECODE_INVAL)
9849
9850       cds = _GetClusterDomainSecret()
9851
9852       # Check X509 key name
9853       try:
9854         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9855       except (TypeError, ValueError), err:
9856         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9857
9858       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9859         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9860                                    errors.ECODE_INVAL)
9861
9862       # Load and verify CA
9863       try:
9864         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9865       except OpenSSL.crypto.Error, err:
9866         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9867                                    (err, ), errors.ECODE_INVAL)
9868
9869       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9870       if errcode is not None:
9871         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9872                                    (msg, ), errors.ECODE_INVAL)
9873
9874       self.dest_x509_ca = cert
9875
9876       # Verify target information
9877       disk_info = []
9878       for idx, disk_data in enumerate(self.op.target_node):
9879         try:
9880           (host, port, magic) = \
9881             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9882         except errors.GenericError, err:
9883           raise errors.OpPrereqError("Target info for disk %s: %s" %
9884                                      (idx, err), errors.ECODE_INVAL)
9885
9886         disk_info.append((host, port, magic))
9887
9888       assert len(disk_info) == len(self.op.target_node)
9889       self.dest_disk_info = disk_info
9890
9891     else:
9892       raise errors.ProgrammerError("Unhandled export mode %r" %
9893                                    self.op.mode)
9894
9895     # instance disk type verification
9896     # TODO: Implement export support for file-based disks
9897     for disk in self.instance.disks:
9898       if disk.dev_type == constants.LD_FILE:
9899         raise errors.OpPrereqError("Export not supported for instances with"
9900                                    " file-based disks", errors.ECODE_INVAL)
9901
9902   def _CleanupExports(self, feedback_fn):
9903     """Removes exports of current instance from all other nodes.
9904
9905     If an instance in a cluster with nodes A..D was exported to node C, its
9906     exports will be removed from the nodes A, B and D.
9907
9908     """
9909     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9910
9911     nodelist = self.cfg.GetNodeList()
9912     nodelist.remove(self.dst_node.name)
9913
9914     # on one-node clusters nodelist will be empty after the removal
9915     # if we proceed the backup would be removed because OpBackupQuery
9916     # substitutes an empty list with the full cluster node list.
9917     iname = self.instance.name
9918     if nodelist:
9919       feedback_fn("Removing old exports for instance %s" % iname)
9920       exportlist = self.rpc.call_export_list(nodelist)
9921       for node in exportlist:
9922         if exportlist[node].fail_msg:
9923           continue
9924         if iname in exportlist[node].payload:
9925           msg = self.rpc.call_export_remove(node, iname).fail_msg
9926           if msg:
9927             self.LogWarning("Could not remove older export for instance %s"
9928                             " on node %s: %s", iname, node, msg)
9929
9930   def Exec(self, feedback_fn):
9931     """Export an instance to an image in the cluster.
9932
9933     """
9934     assert self.op.mode in constants.EXPORT_MODES
9935
9936     instance = self.instance
9937     src_node = instance.primary_node
9938
9939     if self.op.shutdown:
9940       # shutdown the instance, but not the disks
9941       feedback_fn("Shutting down instance %s" % instance.name)
9942       result = self.rpc.call_instance_shutdown(src_node, instance,
9943                                                self.op.shutdown_timeout)
9944       # TODO: Maybe ignore failures if ignore_remove_failures is set
9945       result.Raise("Could not shutdown instance %s on"
9946                    " node %s" % (instance.name, src_node))
9947
9948     # set the disks ID correctly since call_instance_start needs the
9949     # correct drbd minor to create the symlinks
9950     for disk in instance.disks:
9951       self.cfg.SetDiskID(disk, src_node)
9952
9953     activate_disks = (not instance.admin_up)
9954
9955     if activate_disks:
9956       # Activate the instance disks if we'exporting a stopped instance
9957       feedback_fn("Activating disks for %s" % instance.name)
9958       _StartInstanceDisks(self, instance, None)
9959
9960     try:
9961       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9962                                                      instance)
9963
9964       helper.CreateSnapshots()
9965       try:
9966         if (self.op.shutdown and instance.admin_up and
9967             not self.op.remove_instance):
9968           assert not activate_disks
9969           feedback_fn("Starting instance %s" % instance.name)
9970           result = self.rpc.call_instance_start(src_node, instance, None, None)
9971           msg = result.fail_msg
9972           if msg:
9973             feedback_fn("Failed to start instance: %s" % msg)
9974             _ShutdownInstanceDisks(self, instance)
9975             raise errors.OpExecError("Could not start instance: %s" % msg)
9976
9977         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9978           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9979         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9980           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9981           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9982
9983           (key_name, _, _) = self.x509_key_name
9984
9985           dest_ca_pem = \
9986             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9987                                             self.dest_x509_ca)
9988
9989           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9990                                                      key_name, dest_ca_pem,
9991                                                      timeouts)
9992       finally:
9993         helper.Cleanup()
9994
9995       # Check for backwards compatibility
9996       assert len(dresults) == len(instance.disks)
9997       assert compat.all(isinstance(i, bool) for i in dresults), \
9998              "Not all results are boolean: %r" % dresults
9999
10000     finally:
10001       if activate_disks:
10002         feedback_fn("Deactivating disks for %s" % instance.name)
10003         _ShutdownInstanceDisks(self, instance)
10004
10005     if not (compat.all(dresults) and fin_resu):
10006       failures = []
10007       if not fin_resu:
10008         failures.append("export finalization")
10009       if not compat.all(dresults):
10010         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10011                                if not dsk)
10012         failures.append("disk export: disk(s) %s" % fdsk)
10013
10014       raise errors.OpExecError("Export failed, errors in %s" %
10015                                utils.CommaJoin(failures))
10016
10017     # At this point, the export was successful, we can cleanup/finish
10018
10019     # Remove instance if requested
10020     if self.op.remove_instance:
10021       feedback_fn("Removing instance %s" % instance.name)
10022       _RemoveInstance(self, feedback_fn, instance,
10023                       self.op.ignore_remove_failures)
10024
10025     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10026       self._CleanupExports(feedback_fn)
10027
10028     return fin_resu, dresults
10029
10030
10031 class LUBackupRemove(NoHooksLU):
10032   """Remove exports related to the named instance.
10033
10034   """
10035   REQ_BGL = False
10036
10037   def ExpandNames(self):
10038     self.needed_locks = {}
10039     # We need all nodes to be locked in order for RemoveExport to work, but we
10040     # don't need to lock the instance itself, as nothing will happen to it (and
10041     # we can remove exports also for a removed instance)
10042     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10043
10044   def Exec(self, feedback_fn):
10045     """Remove any export.
10046
10047     """
10048     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10049     # If the instance was not found we'll try with the name that was passed in.
10050     # This will only work if it was an FQDN, though.
10051     fqdn_warn = False
10052     if not instance_name:
10053       fqdn_warn = True
10054       instance_name = self.op.instance_name
10055
10056     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10057     exportlist = self.rpc.call_export_list(locked_nodes)
10058     found = False
10059     for node in exportlist:
10060       msg = exportlist[node].fail_msg
10061       if msg:
10062         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10063         continue
10064       if instance_name in exportlist[node].payload:
10065         found = True
10066         result = self.rpc.call_export_remove(node, instance_name)
10067         msg = result.fail_msg
10068         if msg:
10069           logging.error("Could not remove export for instance %s"
10070                         " on node %s: %s", instance_name, node, msg)
10071
10072     if fqdn_warn and not found:
10073       feedback_fn("Export not found. If trying to remove an export belonging"
10074                   " to a deleted instance please use its Fully Qualified"
10075                   " Domain Name.")
10076
10077
10078 class LUGroupAdd(LogicalUnit):
10079   """Logical unit for creating node groups.
10080
10081   """
10082   HPATH = "group-add"
10083   HTYPE = constants.HTYPE_GROUP
10084   REQ_BGL = False
10085
10086   def ExpandNames(self):
10087     # We need the new group's UUID here so that we can create and acquire the
10088     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10089     # that it should not check whether the UUID exists in the configuration.
10090     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10091     self.needed_locks = {}
10092     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10093
10094   def CheckPrereq(self):
10095     """Check prerequisites.
10096
10097     This checks that the given group name is not an existing node group
10098     already.
10099
10100     """
10101     try:
10102       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10103     except errors.OpPrereqError:
10104       pass
10105     else:
10106       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10107                                  " node group (UUID: %s)" %
10108                                  (self.op.group_name, existing_uuid),
10109                                  errors.ECODE_EXISTS)
10110
10111     if self.op.ndparams:
10112       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10113
10114   def BuildHooksEnv(self):
10115     """Build hooks env.
10116
10117     """
10118     env = {
10119       "GROUP_NAME": self.op.group_name,
10120       }
10121     mn = self.cfg.GetMasterNode()
10122     return env, [mn], [mn]
10123
10124   def Exec(self, feedback_fn):
10125     """Add the node group to the cluster.
10126
10127     """
10128     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10129                                   uuid=self.group_uuid,
10130                                   alloc_policy=self.op.alloc_policy,
10131                                   ndparams=self.op.ndparams)
10132
10133     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10134     del self.remove_locks[locking.LEVEL_NODEGROUP]
10135
10136
10137 class LUGroupAssignNodes(NoHooksLU):
10138   """Logical unit for assigning nodes to groups.
10139
10140   """
10141   REQ_BGL = False
10142
10143   def ExpandNames(self):
10144     # These raise errors.OpPrereqError on their own:
10145     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10146     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10147
10148     # We want to lock all the affected nodes and groups. We have readily
10149     # available the list of nodes, and the *destination* group. To gather the
10150     # list of "source" groups, we need to fetch node information.
10151     self.node_data = self.cfg.GetAllNodesInfo()
10152     affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10153     affected_groups.add(self.group_uuid)
10154
10155     self.needed_locks = {
10156       locking.LEVEL_NODEGROUP: list(affected_groups),
10157       locking.LEVEL_NODE: self.op.nodes,
10158       }
10159
10160   def CheckPrereq(self):
10161     """Check prerequisites.
10162
10163     """
10164     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10165     instance_data = self.cfg.GetAllInstancesInfo()
10166
10167     if self.group is None:
10168       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10169                                (self.op.group_name, self.group_uuid))
10170
10171     (new_splits, previous_splits) = \
10172       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10173                                              for node in self.op.nodes],
10174                                             self.node_data, instance_data)
10175
10176     if new_splits:
10177       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10178
10179       if not self.op.force:
10180         raise errors.OpExecError("The following instances get split by this"
10181                                  " change and --force was not given: %s" %
10182                                  fmt_new_splits)
10183       else:
10184         self.LogWarning("This operation will split the following instances: %s",
10185                         fmt_new_splits)
10186
10187         if previous_splits:
10188           self.LogWarning("In addition, these already-split instances continue"
10189                           " to be split across groups: %s",
10190                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10191
10192   def Exec(self, feedback_fn):
10193     """Assign nodes to a new group.
10194
10195     """
10196     for node in self.op.nodes:
10197       self.node_data[node].group = self.group_uuid
10198
10199     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10200
10201   @staticmethod
10202   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10203     """Check for split instances after a node assignment.
10204
10205     This method considers a series of node assignments as an atomic operation,
10206     and returns information about split instances after applying the set of
10207     changes.
10208
10209     In particular, it returns information about newly split instances, and
10210     instances that were already split, and remain so after the change.
10211
10212     Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10213     considered.
10214
10215     @type changes: list of (node_name, new_group_uuid) pairs.
10216     @param changes: list of node assignments to consider.
10217     @param node_data: a dict with data for all nodes
10218     @param instance_data: a dict with all instances to consider
10219     @rtype: a two-tuple
10220     @return: a list of instances that were previously okay and result split as a
10221       consequence of this change, and a list of instances that were previously
10222       split and this change does not fix.
10223
10224     """
10225     changed_nodes = dict((node, group) for node, group in changes
10226                          if node_data[node].group != group)
10227
10228     all_split_instances = set()
10229     previously_split_instances = set()
10230
10231     def InstanceNodes(instance):
10232       return [instance.primary_node] + list(instance.secondary_nodes)
10233
10234     for inst in instance_data.values():
10235       if inst.disk_template not in constants.DTS_NET_MIRROR:
10236         continue
10237
10238       instance_nodes = InstanceNodes(inst)
10239
10240       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10241         previously_split_instances.add(inst.name)
10242
10243       if len(set(changed_nodes.get(node, node_data[node].group)
10244                  for node in instance_nodes)) > 1:
10245         all_split_instances.add(inst.name)
10246
10247     return (list(all_split_instances - previously_split_instances),
10248             list(previously_split_instances & all_split_instances))
10249
10250
10251 class _GroupQuery(_QueryBase):
10252
10253   FIELDS = query.GROUP_FIELDS
10254
10255   def ExpandNames(self, lu):
10256     lu.needed_locks = {}
10257
10258     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10259     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10260
10261     if not self.names:
10262       self.wanted = [name_to_uuid[name]
10263                      for name in utils.NiceSort(name_to_uuid.keys())]
10264     else:
10265       # Accept names to be either names or UUIDs.
10266       missing = []
10267       self.wanted = []
10268       all_uuid = frozenset(self._all_groups.keys())
10269
10270       for name in self.names:
10271         if name in all_uuid:
10272           self.wanted.append(name)
10273         elif name in name_to_uuid:
10274           self.wanted.append(name_to_uuid[name])
10275         else:
10276           missing.append(name)
10277
10278       if missing:
10279         raise errors.OpPrereqError("Some groups do not exist: %s" %
10280                                    utils.CommaJoin(missing),
10281                                    errors.ECODE_NOENT)
10282
10283   def DeclareLocks(self, lu, level):
10284     pass
10285
10286   def _GetQueryData(self, lu):
10287     """Computes the list of node groups and their attributes.
10288
10289     """
10290     do_nodes = query.GQ_NODE in self.requested_data
10291     do_instances = query.GQ_INST in self.requested_data
10292
10293     group_to_nodes = None
10294     group_to_instances = None
10295
10296     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10297     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10298     # latter GetAllInstancesInfo() is not enough, for we have to go through
10299     # instance->node. Hence, we will need to process nodes even if we only need
10300     # instance information.
10301     if do_nodes or do_instances:
10302       all_nodes = lu.cfg.GetAllNodesInfo()
10303       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10304       node_to_group = {}
10305
10306       for node in all_nodes.values():
10307         if node.group in group_to_nodes:
10308           group_to_nodes[node.group].append(node.name)
10309           node_to_group[node.name] = node.group
10310
10311       if do_instances:
10312         all_instances = lu.cfg.GetAllInstancesInfo()
10313         group_to_instances = dict((uuid, []) for uuid in self.wanted)
10314
10315         for instance in all_instances.values():
10316           node = instance.primary_node
10317           if node in node_to_group:
10318             group_to_instances[node_to_group[node]].append(instance.name)
10319
10320         if not do_nodes:
10321           # Do not pass on node information if it was not requested.
10322           group_to_nodes = None
10323
10324     return query.GroupQueryData([self._all_groups[uuid]
10325                                  for uuid in self.wanted],
10326                                 group_to_nodes, group_to_instances)
10327
10328
10329 class LUGroupQuery(NoHooksLU):
10330   """Logical unit for querying node groups.
10331
10332   """
10333   REQ_BGL = False
10334
10335   def CheckArguments(self):
10336     self.gq = _GroupQuery(self.op.names, self.op.output_fields, False)
10337
10338   def ExpandNames(self):
10339     self.gq.ExpandNames(self)
10340
10341   def Exec(self, feedback_fn):
10342     return self.gq.OldStyleQuery(self)
10343
10344
10345 class LUGroupSetParams(LogicalUnit):
10346   """Modifies the parameters of a node group.
10347
10348   """
10349   HPATH = "group-modify"
10350   HTYPE = constants.HTYPE_GROUP
10351   REQ_BGL = False
10352
10353   def CheckArguments(self):
10354     all_changes = [
10355       self.op.ndparams,
10356       self.op.alloc_policy,
10357       ]
10358
10359     if all_changes.count(None) == len(all_changes):
10360       raise errors.OpPrereqError("Please pass at least one modification",
10361                                  errors.ECODE_INVAL)
10362
10363   def ExpandNames(self):
10364     # This raises errors.OpPrereqError on its own:
10365     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10366
10367     self.needed_locks = {
10368       locking.LEVEL_NODEGROUP: [self.group_uuid],
10369       }
10370
10371   def CheckPrereq(self):
10372     """Check prerequisites.
10373
10374     """
10375     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10376
10377     if self.group is None:
10378       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10379                                (self.op.group_name, self.group_uuid))
10380
10381     if self.op.ndparams:
10382       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10383       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10384       self.new_ndparams = new_ndparams
10385
10386   def BuildHooksEnv(self):
10387     """Build hooks env.
10388
10389     """
10390     env = {
10391       "GROUP_NAME": self.op.group_name,
10392       "NEW_ALLOC_POLICY": self.op.alloc_policy,
10393       }
10394     mn = self.cfg.GetMasterNode()
10395     return env, [mn], [mn]
10396
10397   def Exec(self, feedback_fn):
10398     """Modifies the node group.
10399
10400     """
10401     result = []
10402
10403     if self.op.ndparams:
10404       self.group.ndparams = self.new_ndparams
10405       result.append(("ndparams", str(self.group.ndparams)))
10406
10407     if self.op.alloc_policy:
10408       self.group.alloc_policy = self.op.alloc_policy
10409
10410     self.cfg.Update(self.group, feedback_fn)
10411     return result
10412
10413
10414
10415 class LUGroupRemove(LogicalUnit):
10416   HPATH = "group-remove"
10417   HTYPE = constants.HTYPE_GROUP
10418   REQ_BGL = False
10419
10420   def ExpandNames(self):
10421     # This will raises errors.OpPrereqError on its own:
10422     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10423     self.needed_locks = {
10424       locking.LEVEL_NODEGROUP: [self.group_uuid],
10425       }
10426
10427   def CheckPrereq(self):
10428     """Check prerequisites.
10429
10430     This checks that the given group name exists as a node group, that is
10431     empty (i.e., contains no nodes), and that is not the last group of the
10432     cluster.
10433
10434     """
10435     # Verify that the group is empty.
10436     group_nodes = [node.name
10437                    for node in self.cfg.GetAllNodesInfo().values()
10438                    if node.group == self.group_uuid]
10439
10440     if group_nodes:
10441       raise errors.OpPrereqError("Group '%s' not empty, has the following"
10442                                  " nodes: %s" %
10443                                  (self.op.group_name,
10444                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
10445                                  errors.ECODE_STATE)
10446
10447     # Verify the cluster would not be left group-less.
10448     if len(self.cfg.GetNodeGroupList()) == 1:
10449       raise errors.OpPrereqError("Group '%s' is the only group,"
10450                                  " cannot be removed" %
10451                                  self.op.group_name,
10452                                  errors.ECODE_STATE)
10453
10454   def BuildHooksEnv(self):
10455     """Build hooks env.
10456
10457     """
10458     env = {
10459       "GROUP_NAME": self.op.group_name,
10460       }
10461     mn = self.cfg.GetMasterNode()
10462     return env, [mn], [mn]
10463
10464   def Exec(self, feedback_fn):
10465     """Remove the node group.
10466
10467     """
10468     try:
10469       self.cfg.RemoveNodeGroup(self.group_uuid)
10470     except errors.ConfigurationError:
10471       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10472                                (self.op.group_name, self.group_uuid))
10473
10474     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10475
10476
10477 class LUGroupRename(LogicalUnit):
10478   HPATH = "group-rename"
10479   HTYPE = constants.HTYPE_GROUP
10480   REQ_BGL = False
10481
10482   def ExpandNames(self):
10483     # This raises errors.OpPrereqError on its own:
10484     self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10485
10486     self.needed_locks = {
10487       locking.LEVEL_NODEGROUP: [self.group_uuid],
10488       }
10489
10490   def CheckPrereq(self):
10491     """Check prerequisites.
10492
10493     This checks that the given old_name exists as a node group, and that
10494     new_name doesn't.
10495
10496     """
10497     try:
10498       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10499     except errors.OpPrereqError:
10500       pass
10501     else:
10502       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10503                                  " node group (UUID: %s)" %
10504                                  (self.op.new_name, new_name_uuid),
10505                                  errors.ECODE_EXISTS)
10506
10507   def BuildHooksEnv(self):
10508     """Build hooks env.
10509
10510     """
10511     env = {
10512       "OLD_NAME": self.op.old_name,
10513       "NEW_NAME": self.op.new_name,
10514       }
10515
10516     mn = self.cfg.GetMasterNode()
10517     all_nodes = self.cfg.GetAllNodesInfo()
10518     run_nodes = [mn]
10519     all_nodes.pop(mn, None)
10520
10521     for node in all_nodes.values():
10522       if node.group == self.group_uuid:
10523         run_nodes.append(node.name)
10524
10525     return env, run_nodes, run_nodes
10526
10527   def Exec(self, feedback_fn):
10528     """Rename the node group.
10529
10530     """
10531     group = self.cfg.GetNodeGroup(self.group_uuid)
10532
10533     if group is None:
10534       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10535                                (self.op.old_name, self.group_uuid))
10536
10537     group.name = self.op.new_name
10538     self.cfg.Update(group, feedback_fn)
10539
10540     return self.op.new_name
10541
10542
10543 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10544   """Generic tags LU.
10545
10546   This is an abstract class which is the parent of all the other tags LUs.
10547
10548   """
10549
10550   def ExpandNames(self):
10551     self.needed_locks = {}
10552     if self.op.kind == constants.TAG_NODE:
10553       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10554       self.needed_locks[locking.LEVEL_NODE] = self.op.name
10555     elif self.op.kind == constants.TAG_INSTANCE:
10556       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10557       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10558
10559     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10560     # not possible to acquire the BGL based on opcode parameters)
10561
10562   def CheckPrereq(self):
10563     """Check prerequisites.
10564
10565     """
10566     if self.op.kind == constants.TAG_CLUSTER:
10567       self.target = self.cfg.GetClusterInfo()
10568     elif self.op.kind == constants.TAG_NODE:
10569       self.target = self.cfg.GetNodeInfo(self.op.name)
10570     elif self.op.kind == constants.TAG_INSTANCE:
10571       self.target = self.cfg.GetInstanceInfo(self.op.name)
10572     else:
10573       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10574                                  str(self.op.kind), errors.ECODE_INVAL)
10575
10576
10577 class LUTagsGet(TagsLU):
10578   """Returns the tags of a given object.
10579
10580   """
10581   REQ_BGL = False
10582
10583   def ExpandNames(self):
10584     TagsLU.ExpandNames(self)
10585
10586     # Share locks as this is only a read operation
10587     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10588
10589   def Exec(self, feedback_fn):
10590     """Returns the tag list.
10591
10592     """
10593     return list(self.target.GetTags())
10594
10595
10596 class LUTagsSearch(NoHooksLU):
10597   """Searches the tags for a given pattern.
10598
10599   """
10600   REQ_BGL = False
10601
10602   def ExpandNames(self):
10603     self.needed_locks = {}
10604
10605   def CheckPrereq(self):
10606     """Check prerequisites.
10607
10608     This checks the pattern passed for validity by compiling it.
10609
10610     """
10611     try:
10612       self.re = re.compile(self.op.pattern)
10613     except re.error, err:
10614       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10615                                  (self.op.pattern, err), errors.ECODE_INVAL)
10616
10617   def Exec(self, feedback_fn):
10618     """Returns the tag list.
10619
10620     """
10621     cfg = self.cfg
10622     tgts = [("/cluster", cfg.GetClusterInfo())]
10623     ilist = cfg.GetAllInstancesInfo().values()
10624     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10625     nlist = cfg.GetAllNodesInfo().values()
10626     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10627     results = []
10628     for path, target in tgts:
10629       for tag in target.GetTags():
10630         if self.re.search(tag):
10631           results.append((path, tag))
10632     return results
10633
10634
10635 class LUTagsSet(TagsLU):
10636   """Sets a tag on a given object.
10637
10638   """
10639   REQ_BGL = False
10640
10641   def CheckPrereq(self):
10642     """Check prerequisites.
10643
10644     This checks the type and length of the tag name and value.
10645
10646     """
10647     TagsLU.CheckPrereq(self)
10648     for tag in self.op.tags:
10649       objects.TaggableObject.ValidateTag(tag)
10650
10651   def Exec(self, feedback_fn):
10652     """Sets the tag.
10653
10654     """
10655     try:
10656       for tag in self.op.tags:
10657         self.target.AddTag(tag)
10658     except errors.TagError, err:
10659       raise errors.OpExecError("Error while setting tag: %s" % str(err))
10660     self.cfg.Update(self.target, feedback_fn)
10661
10662
10663 class LUTagsDel(TagsLU):
10664   """Delete a list of tags from a given object.
10665
10666   """
10667   REQ_BGL = False
10668
10669   def CheckPrereq(self):
10670     """Check prerequisites.
10671
10672     This checks that we have the given tag.
10673
10674     """
10675     TagsLU.CheckPrereq(self)
10676     for tag in self.op.tags:
10677       objects.TaggableObject.ValidateTag(tag)
10678     del_tags = frozenset(self.op.tags)
10679     cur_tags = self.target.GetTags()
10680
10681     diff_tags = del_tags - cur_tags
10682     if diff_tags:
10683       diff_names = ("'%s'" % i for i in sorted(diff_tags))
10684       raise errors.OpPrereqError("Tag(s) %s not found" %
10685                                  (utils.CommaJoin(diff_names), ),
10686                                  errors.ECODE_NOENT)
10687
10688   def Exec(self, feedback_fn):
10689     """Remove the tag from the object.
10690
10691     """
10692     for tag in self.op.tags:
10693       self.target.RemoveTag(tag)
10694     self.cfg.Update(self.target, feedback_fn)
10695
10696
10697 class LUTestDelay(NoHooksLU):
10698   """Sleep for a specified amount of time.
10699
10700   This LU sleeps on the master and/or nodes for a specified amount of
10701   time.
10702
10703   """
10704   REQ_BGL = False
10705
10706   def ExpandNames(self):
10707     """Expand names and set required locks.
10708
10709     This expands the node list, if any.
10710
10711     """
10712     self.needed_locks = {}
10713     if self.op.on_nodes:
10714       # _GetWantedNodes can be used here, but is not always appropriate to use
10715       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10716       # more information.
10717       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10718       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10719
10720   def _TestDelay(self):
10721     """Do the actual sleep.
10722
10723     """
10724     if self.op.on_master:
10725       if not utils.TestDelay(self.op.duration):
10726         raise errors.OpExecError("Error during master delay test")
10727     if self.op.on_nodes:
10728       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10729       for node, node_result in result.items():
10730         node_result.Raise("Failure during rpc call to node %s" % node)
10731
10732   def Exec(self, feedback_fn):
10733     """Execute the test delay opcode, with the wanted repetitions.
10734
10735     """
10736     if self.op.repeat == 0:
10737       self._TestDelay()
10738     else:
10739       top_value = self.op.repeat - 1
10740       for i in range(self.op.repeat):
10741         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10742         self._TestDelay()
10743
10744
10745 class LUTestJqueue(NoHooksLU):
10746   """Utility LU to test some aspects of the job queue.
10747
10748   """
10749   REQ_BGL = False
10750
10751   # Must be lower than default timeout for WaitForJobChange to see whether it
10752   # notices changed jobs
10753   _CLIENT_CONNECT_TIMEOUT = 20.0
10754   _CLIENT_CONFIRM_TIMEOUT = 60.0
10755
10756   @classmethod
10757   def _NotifyUsingSocket(cls, cb, errcls):
10758     """Opens a Unix socket and waits for another program to connect.
10759
10760     @type cb: callable
10761     @param cb: Callback to send socket name to client
10762     @type errcls: class
10763     @param errcls: Exception class to use for errors
10764
10765     """
10766     # Using a temporary directory as there's no easy way to create temporary
10767     # sockets without writing a custom loop around tempfile.mktemp and
10768     # socket.bind
10769     tmpdir = tempfile.mkdtemp()
10770     try:
10771       tmpsock = utils.PathJoin(tmpdir, "sock")
10772
10773       logging.debug("Creating temporary socket at %s", tmpsock)
10774       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10775       try:
10776         sock.bind(tmpsock)
10777         sock.listen(1)
10778
10779         # Send details to client
10780         cb(tmpsock)
10781
10782         # Wait for client to connect before continuing
10783         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10784         try:
10785           (conn, _) = sock.accept()
10786         except socket.error, err:
10787           raise errcls("Client didn't connect in time (%s)" % err)
10788       finally:
10789         sock.close()
10790     finally:
10791       # Remove as soon as client is connected
10792       shutil.rmtree(tmpdir)
10793
10794     # Wait for client to close
10795     try:
10796       try:
10797         # pylint: disable-msg=E1101
10798         # Instance of '_socketobject' has no ... member
10799         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10800         conn.recv(1)
10801       except socket.error, err:
10802         raise errcls("Client failed to confirm notification (%s)" % err)
10803     finally:
10804       conn.close()
10805
10806   def _SendNotification(self, test, arg, sockname):
10807     """Sends a notification to the client.
10808
10809     @type test: string
10810     @param test: Test name
10811     @param arg: Test argument (depends on test)
10812     @type sockname: string
10813     @param sockname: Socket path
10814
10815     """
10816     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10817
10818   def _Notify(self, prereq, test, arg):
10819     """Notifies the client of a test.
10820
10821     @type prereq: bool
10822     @param prereq: Whether this is a prereq-phase test
10823     @type test: string
10824     @param test: Test name
10825     @param arg: Test argument (depends on test)
10826
10827     """
10828     if prereq:
10829       errcls = errors.OpPrereqError
10830     else:
10831       errcls = errors.OpExecError
10832
10833     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10834                                                   test, arg),
10835                                    errcls)
10836
10837   def CheckArguments(self):
10838     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10839     self.expandnames_calls = 0
10840
10841   def ExpandNames(self):
10842     checkargs_calls = getattr(self, "checkargs_calls", 0)
10843     if checkargs_calls < 1:
10844       raise errors.ProgrammerError("CheckArguments was not called")
10845
10846     self.expandnames_calls += 1
10847
10848     if self.op.notify_waitlock:
10849       self._Notify(True, constants.JQT_EXPANDNAMES, None)
10850
10851     self.LogInfo("Expanding names")
10852
10853     # Get lock on master node (just to get a lock, not for a particular reason)
10854     self.needed_locks = {
10855       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10856       }
10857
10858   def Exec(self, feedback_fn):
10859     if self.expandnames_calls < 1:
10860       raise errors.ProgrammerError("ExpandNames was not called")
10861
10862     if self.op.notify_exec:
10863       self._Notify(False, constants.JQT_EXEC, None)
10864
10865     self.LogInfo("Executing")
10866
10867     if self.op.log_messages:
10868       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10869       for idx, msg in enumerate(self.op.log_messages):
10870         self.LogInfo("Sending log message %s", idx + 1)
10871         feedback_fn(constants.JQT_MSGPREFIX + msg)
10872         # Report how many test messages have been sent
10873         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10874
10875     if self.op.fail:
10876       raise errors.OpExecError("Opcode failure was requested")
10877
10878     return True
10879
10880
10881 class IAllocator(object):
10882   """IAllocator framework.
10883
10884   An IAllocator instance has three sets of attributes:
10885     - cfg that is needed to query the cluster
10886     - input data (all members of the _KEYS class attribute are required)
10887     - four buffer attributes (in|out_data|text), that represent the
10888       input (to the external script) in text and data structure format,
10889       and the output from it, again in two formats
10890     - the result variables from the script (success, info, nodes) for
10891       easy usage
10892
10893   """
10894   # pylint: disable-msg=R0902
10895   # lots of instance attributes
10896   _ALLO_KEYS = [
10897     "name", "mem_size", "disks", "disk_template",
10898     "os", "tags", "nics", "vcpus", "hypervisor",
10899     ]
10900   _RELO_KEYS = [
10901     "name", "relocate_from",
10902     ]
10903   _EVAC_KEYS = [
10904     "evac_nodes",
10905     ]
10906
10907   def __init__(self, cfg, rpc, mode, **kwargs):
10908     self.cfg = cfg
10909     self.rpc = rpc
10910     # init buffer variables
10911     self.in_text = self.out_text = self.in_data = self.out_data = None
10912     # init all input fields so that pylint is happy
10913     self.mode = mode
10914     self.mem_size = self.disks = self.disk_template = None
10915     self.os = self.tags = self.nics = self.vcpus = None
10916     self.hypervisor = None
10917     self.relocate_from = None
10918     self.name = None
10919     self.evac_nodes = None
10920     # computed fields
10921     self.required_nodes = None
10922     # init result fields
10923     self.success = self.info = self.result = None
10924     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10925       keyset = self._ALLO_KEYS
10926       fn = self._AddNewInstance
10927     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10928       keyset = self._RELO_KEYS
10929       fn = self._AddRelocateInstance
10930     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10931       keyset = self._EVAC_KEYS
10932       fn = self._AddEvacuateNodes
10933     else:
10934       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10935                                    " IAllocator" % self.mode)
10936     for key in kwargs:
10937       if key not in keyset:
10938         raise errors.ProgrammerError("Invalid input parameter '%s' to"
10939                                      " IAllocator" % key)
10940       setattr(self, key, kwargs[key])
10941
10942     for key in keyset:
10943       if key not in kwargs:
10944         raise errors.ProgrammerError("Missing input parameter '%s' to"
10945                                      " IAllocator" % key)
10946     self._BuildInputData(fn)
10947
10948   def _ComputeClusterData(self):
10949     """Compute the generic allocator input data.
10950
10951     This is the data that is independent of the actual operation.
10952
10953     """
10954     cfg = self.cfg
10955     cluster_info = cfg.GetClusterInfo()
10956     # cluster data
10957     data = {
10958       "version": constants.IALLOCATOR_VERSION,
10959       "cluster_name": cfg.GetClusterName(),
10960       "cluster_tags": list(cluster_info.GetTags()),
10961       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10962       # we don't have job IDs
10963       }
10964     ninfo = cfg.GetAllNodesInfo()
10965     iinfo = cfg.GetAllInstancesInfo().values()
10966     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10967
10968     # node data
10969     node_list = [n.name for n in ninfo.values() if n.vm_capable]
10970
10971     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10972       hypervisor_name = self.hypervisor
10973     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10974       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10975     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10976       hypervisor_name = cluster_info.enabled_hypervisors[0]
10977
10978     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10979                                         hypervisor_name)
10980     node_iinfo = \
10981       self.rpc.call_all_instances_info(node_list,
10982                                        cluster_info.enabled_hypervisors)
10983
10984     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10985
10986     config_ndata = self._ComputeBasicNodeData(ninfo)
10987     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
10988                                                  i_list, config_ndata)
10989     assert len(data["nodes"]) == len(ninfo), \
10990         "Incomplete node data computed"
10991
10992     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10993
10994     self.in_data = data
10995
10996   @staticmethod
10997   def _ComputeNodeGroupData(cfg):
10998     """Compute node groups data.
10999
11000     """
11001     ng = {}
11002     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11003       ng[guuid] = {
11004         "name": gdata.name,
11005         "alloc_policy": gdata.alloc_policy,
11006         }
11007     return ng
11008
11009   @staticmethod
11010   def _ComputeBasicNodeData(node_cfg):
11011     """Compute global node data.
11012
11013     @rtype: dict
11014     @returns: a dict of name: (node dict, node config)
11015
11016     """
11017     node_results = {}
11018     for ninfo in node_cfg.values():
11019       # fill in static (config-based) values
11020       pnr = {
11021         "tags": list(ninfo.GetTags()),
11022         "primary_ip": ninfo.primary_ip,
11023         "secondary_ip": ninfo.secondary_ip,
11024         "offline": ninfo.offline,
11025         "drained": ninfo.drained,
11026         "master_candidate": ninfo.master_candidate,
11027         "group": ninfo.group,
11028         "master_capable": ninfo.master_capable,
11029         "vm_capable": ninfo.vm_capable,
11030         }
11031
11032       node_results[ninfo.name] = pnr
11033
11034     return node_results
11035
11036   @staticmethod
11037   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11038                               node_results):
11039     """Compute global node data.
11040
11041     @param node_results: the basic node structures as filled from the config
11042
11043     """
11044     # make a copy of the current dict
11045     node_results = dict(node_results)
11046     for nname, nresult in node_data.items():
11047       assert nname in node_results, "Missing basic data for node %s" % nname
11048       ninfo = node_cfg[nname]
11049
11050       if not (ninfo.offline or ninfo.drained):
11051         nresult.Raise("Can't get data for node %s" % nname)
11052         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11053                                 nname)
11054         remote_info = nresult.payload
11055
11056         for attr in ['memory_total', 'memory_free', 'memory_dom0',
11057                      'vg_size', 'vg_free', 'cpu_total']:
11058           if attr not in remote_info:
11059             raise errors.OpExecError("Node '%s' didn't return attribute"
11060                                      " '%s'" % (nname, attr))
11061           if not isinstance(remote_info[attr], int):
11062             raise errors.OpExecError("Node '%s' returned invalid value"
11063                                      " for '%s': %s" %
11064                                      (nname, attr, remote_info[attr]))
11065         # compute memory used by primary instances
11066         i_p_mem = i_p_up_mem = 0
11067         for iinfo, beinfo in i_list:
11068           if iinfo.primary_node == nname:
11069             i_p_mem += beinfo[constants.BE_MEMORY]
11070             if iinfo.name not in node_iinfo[nname].payload:
11071               i_used_mem = 0
11072             else:
11073               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11074             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11075             remote_info['memory_free'] -= max(0, i_mem_diff)
11076
11077             if iinfo.admin_up:
11078               i_p_up_mem += beinfo[constants.BE_MEMORY]
11079
11080         # compute memory used by instances
11081         pnr_dyn = {
11082           "total_memory": remote_info['memory_total'],
11083           "reserved_memory": remote_info['memory_dom0'],
11084           "free_memory": remote_info['memory_free'],
11085           "total_disk": remote_info['vg_size'],
11086           "free_disk": remote_info['vg_free'],
11087           "total_cpus": remote_info['cpu_total'],
11088           "i_pri_memory": i_p_mem,
11089           "i_pri_up_memory": i_p_up_mem,
11090           }
11091         pnr_dyn.update(node_results[nname])
11092         node_results[nname] = pnr_dyn
11093
11094     return node_results
11095
11096   @staticmethod
11097   def _ComputeInstanceData(cluster_info, i_list):
11098     """Compute global instance data.
11099
11100     """
11101     instance_data = {}
11102     for iinfo, beinfo in i_list:
11103       nic_data = []
11104       for nic in iinfo.nics:
11105         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11106         nic_dict = {"mac": nic.mac,
11107                     "ip": nic.ip,
11108                     "mode": filled_params[constants.NIC_MODE],
11109                     "link": filled_params[constants.NIC_LINK],
11110                    }
11111         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11112           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11113         nic_data.append(nic_dict)
11114       pir = {
11115         "tags": list(iinfo.GetTags()),
11116         "admin_up": iinfo.admin_up,
11117         "vcpus": beinfo[constants.BE_VCPUS],
11118         "memory": beinfo[constants.BE_MEMORY],
11119         "os": iinfo.os,
11120         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11121         "nics": nic_data,
11122         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11123         "disk_template": iinfo.disk_template,
11124         "hypervisor": iinfo.hypervisor,
11125         }
11126       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11127                                                  pir["disks"])
11128       instance_data[iinfo.name] = pir
11129
11130     return instance_data
11131
11132   def _AddNewInstance(self):
11133     """Add new instance data to allocator structure.
11134
11135     This in combination with _AllocatorGetClusterData will create the
11136     correct structure needed as input for the allocator.
11137
11138     The checks for the completeness of the opcode must have already been
11139     done.
11140
11141     """
11142     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11143
11144     if self.disk_template in constants.DTS_NET_MIRROR:
11145       self.required_nodes = 2
11146     else:
11147       self.required_nodes = 1
11148     request = {
11149       "name": self.name,
11150       "disk_template": self.disk_template,
11151       "tags": self.tags,
11152       "os": self.os,
11153       "vcpus": self.vcpus,
11154       "memory": self.mem_size,
11155       "disks": self.disks,
11156       "disk_space_total": disk_space,
11157       "nics": self.nics,
11158       "required_nodes": self.required_nodes,
11159       }
11160     return request
11161
11162   def _AddRelocateInstance(self):
11163     """Add relocate instance data to allocator structure.
11164
11165     This in combination with _IAllocatorGetClusterData will create the
11166     correct structure needed as input for the allocator.
11167
11168     The checks for the completeness of the opcode must have already been
11169     done.
11170
11171     """
11172     instance = self.cfg.GetInstanceInfo(self.name)
11173     if instance is None:
11174       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11175                                    " IAllocator" % self.name)
11176
11177     if instance.disk_template not in constants.DTS_NET_MIRROR:
11178       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11179                                  errors.ECODE_INVAL)
11180
11181     if len(instance.secondary_nodes) != 1:
11182       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11183                                  errors.ECODE_STATE)
11184
11185     self.required_nodes = 1
11186     disk_sizes = [{'size': disk.size} for disk in instance.disks]
11187     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11188
11189     request = {
11190       "name": self.name,
11191       "disk_space_total": disk_space,
11192       "required_nodes": self.required_nodes,
11193       "relocate_from": self.relocate_from,
11194       }
11195     return request
11196
11197   def _AddEvacuateNodes(self):
11198     """Add evacuate nodes data to allocator structure.
11199
11200     """
11201     request = {
11202       "evac_nodes": self.evac_nodes
11203       }
11204     return request
11205
11206   def _BuildInputData(self, fn):
11207     """Build input data structures.
11208
11209     """
11210     self._ComputeClusterData()
11211
11212     request = fn()
11213     request["type"] = self.mode
11214     self.in_data["request"] = request
11215
11216     self.in_text = serializer.Dump(self.in_data)
11217
11218   def Run(self, name, validate=True, call_fn=None):
11219     """Run an instance allocator and return the results.
11220
11221     """
11222     if call_fn is None:
11223       call_fn = self.rpc.call_iallocator_runner
11224
11225     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11226     result.Raise("Failure while running the iallocator script")
11227
11228     self.out_text = result.payload
11229     if validate:
11230       self._ValidateResult()
11231
11232   def _ValidateResult(self):
11233     """Process the allocator results.
11234
11235     This will process and if successful save the result in
11236     self.out_data and the other parameters.
11237
11238     """
11239     try:
11240       rdict = serializer.Load(self.out_text)
11241     except Exception, err:
11242       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11243
11244     if not isinstance(rdict, dict):
11245       raise errors.OpExecError("Can't parse iallocator results: not a dict")
11246
11247     # TODO: remove backwards compatiblity in later versions
11248     if "nodes" in rdict and "result" not in rdict:
11249       rdict["result"] = rdict["nodes"]
11250       del rdict["nodes"]
11251
11252     for key in "success", "info", "result":
11253       if key not in rdict:
11254         raise errors.OpExecError("Can't parse iallocator results:"
11255                                  " missing key '%s'" % key)
11256       setattr(self, key, rdict[key])
11257
11258     if not isinstance(rdict["result"], list):
11259       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11260                                " is not a list")
11261     self.out_data = rdict
11262
11263
11264 class LUTestAllocator(NoHooksLU):
11265   """Run allocator tests.
11266
11267   This LU runs the allocator tests
11268
11269   """
11270   def CheckPrereq(self):
11271     """Check prerequisites.
11272
11273     This checks the opcode parameters depending on the director and mode test.
11274
11275     """
11276     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11277       for attr in ["mem_size", "disks", "disk_template",
11278                    "os", "tags", "nics", "vcpus"]:
11279         if not hasattr(self.op, attr):
11280           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11281                                      attr, errors.ECODE_INVAL)
11282       iname = self.cfg.ExpandInstanceName(self.op.name)
11283       if iname is not None:
11284         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11285                                    iname, errors.ECODE_EXISTS)
11286       if not isinstance(self.op.nics, list):
11287         raise errors.OpPrereqError("Invalid parameter 'nics'",
11288                                    errors.ECODE_INVAL)
11289       if not isinstance(self.op.disks, list):
11290         raise errors.OpPrereqError("Invalid parameter 'disks'",
11291                                    errors.ECODE_INVAL)
11292       for row in self.op.disks:
11293         if (not isinstance(row, dict) or
11294             "size" not in row or
11295             not isinstance(row["size"], int) or
11296             "mode" not in row or
11297             row["mode"] not in ['r', 'w']):
11298           raise errors.OpPrereqError("Invalid contents of the 'disks'"
11299                                      " parameter", errors.ECODE_INVAL)
11300       if self.op.hypervisor is None:
11301         self.op.hypervisor = self.cfg.GetHypervisorType()
11302     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11303       fname = _ExpandInstanceName(self.cfg, self.op.name)
11304       self.op.name = fname
11305       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11306     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11307       if not hasattr(self.op, "evac_nodes"):
11308         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11309                                    " opcode input", errors.ECODE_INVAL)
11310     else:
11311       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11312                                  self.op.mode, errors.ECODE_INVAL)
11313
11314     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11315       if self.op.allocator is None:
11316         raise errors.OpPrereqError("Missing allocator name",
11317                                    errors.ECODE_INVAL)
11318     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11319       raise errors.OpPrereqError("Wrong allocator test '%s'" %
11320                                  self.op.direction, errors.ECODE_INVAL)
11321
11322   def Exec(self, feedback_fn):
11323     """Run the allocator test.
11324
11325     """
11326     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11327       ial = IAllocator(self.cfg, self.rpc,
11328                        mode=self.op.mode,
11329                        name=self.op.name,
11330                        mem_size=self.op.mem_size,
11331                        disks=self.op.disks,
11332                        disk_template=self.op.disk_template,
11333                        os=self.op.os,
11334                        tags=self.op.tags,
11335                        nics=self.op.nics,
11336                        vcpus=self.op.vcpus,
11337                        hypervisor=self.op.hypervisor,
11338                        )
11339     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11340       ial = IAllocator(self.cfg, self.rpc,
11341                        mode=self.op.mode,
11342                        name=self.op.name,
11343                        relocate_from=list(self.relocate_from),
11344                        )
11345     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11346       ial = IAllocator(self.cfg, self.rpc,
11347                        mode=self.op.mode,
11348                        evac_nodes=self.op.evac_nodes)
11349     else:
11350       raise errors.ProgrammerError("Uncatched mode %s in"
11351                                    " LUTestAllocator.Exec", self.op.mode)
11352
11353     if self.op.direction == constants.IALLOCATOR_DIR_IN:
11354       result = ial.in_text
11355     else:
11356       ial.Run(self.op.allocator, validate=False)
11357       result = ial.out_text
11358     return result
11359
11360
11361 #: Query type implementations
11362 _QUERY_IMPL = {
11363   constants.QR_INSTANCE: _InstanceQuery,
11364   constants.QR_NODE: _NodeQuery,
11365   constants.QR_GROUP: _GroupQuery,
11366   }
11367
11368
11369 def _GetQueryImplementation(name):
11370   """Returns the implemtnation for a query type.
11371
11372   @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11373
11374   """
11375   try:
11376     return _QUERY_IMPL[name]
11377   except KeyError:
11378     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11379                                errors.ECODE_INVAL)