code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 # End types
  78 class LogicalUnit(object):
  79   """Logical Unit base class.
  80
  81   Subclasses must follow these rules:
  82     - implement ExpandNames
  83     - implement CheckPrereq (except when tasklets are used)
  84     - implement Exec (except when tasklets are used)
  85     - implement BuildHooksEnv
  86     - redefine HPATH and HTYPE
  87     - optionally redefine their run requirements:
  88         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  89
  90   Note that all commands require root permissions.
  91
  92   @ivar dry_run_result: the value (if any) that will be returned to the caller
  93       in dry-run mode (signalled by opcode dry_run parameter)
  94
  95   """
  96   HPATH = None
  97   HTYPE = None
  98   REQ_BGL = True
  99
 100   def __init__(self, processor, op, context, rpc):
 101     """Constructor for LogicalUnit.
 102
 103     This needs to be overridden in derived classes in order to check op
 104     validity.
 105
 106     """
 107     self.proc = processor
 108     self.op = op
 109     self.cfg = context.cfg
 110     self.context = context
 111     self.rpc = rpc
 112     # Dicts used to declare locking needs to mcpu
 113     self.needed_locks = None
 114     self.acquired_locks = {}
 115     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 116     self.add_locks = {}
 117     self.remove_locks = {}
 118     # Used to force good behavior when calling helper functions
 119     self.recalculate_locks = {}
 120     self.__ssh = None
 121     # logging
 122     self.Log = processor.Log # pylint: disable-msg=C0103
 123     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 124     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 125     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 126     # support for dry-run
 127     self.dry_run_result = None
 128     # support for generic debug attribute
 129     if (not hasattr(self.op, "debug_level") or
 130         not isinstance(self.op.debug_level, int)):
 131       self.op.debug_level = 0
 132
 133     # Tasklets
 134     self.tasklets = None
 135
 136     # Validate opcode parameters and set defaults
 137     self.op.Validate(True)
 138
 139     self.CheckArguments()
 140
 141   def __GetSSH(self):
 142     """Returns the SshRunner object
 143
 144     """
 145     if not self.__ssh:
 146       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 147     return self.__ssh
 148
 149   ssh = property(fget=__GetSSH)
 150
 151   def CheckArguments(self):
 152     """Check syntactic validity for the opcode arguments.
 153
 154     This method is for doing a simple syntactic check and ensure
 155     validity of opcode parameters, without any cluster-related
 156     checks. While the same can be accomplished in ExpandNames and/or
 157     CheckPrereq, doing these separate is better because:
 158
 159       - ExpandNames is left as as purely a lock-related function
 160       - CheckPrereq is run after we have acquired locks (and possible
 161         waited for them)
 162
 163     The function is allowed to change the self.op attribute so that
 164     later methods can no longer worry about missing parameters.
 165
 166     """
 167     pass
 168
 169   def ExpandNames(self):
 170     """Expand names for this LU.
 171
 172     This method is called before starting to execute the opcode, and it should
 173     update all the parameters of the opcode to their canonical form (e.g. a
 174     short node name must be fully expanded after this method has successfully
 175     completed). This way locking, hooks, logging, etc. can work correctly.
 176
 177     LUs which implement this method must also populate the self.needed_locks
 178     member, as a dict with lock levels as keys, and a list of needed lock names
 179     as values. Rules:
 180
 181       - use an empty dict if you don't need any lock
 182       - if you don't need any lock at a particular level omit that level
 183       - don't put anything for the BGL level
 184       - if you want all locks at a level use locking.ALL_SET as a value
 185
 186     If you need to share locks (rather than acquire them exclusively) at one
 187     level you can modify self.share_locks, setting a true value (usually 1) for
 188     that level. By default locks are not shared.
 189
 190     This function can also define a list of tasklets, which then will be
 191     executed in order instead of the usual LU-level CheckPrereq and Exec
 192     functions, if those are not defined by the LU.
 193
 194     Examples::
 195
 196       # Acquire all nodes and one instance
 197       self.needed_locks = {
 198         locking.LEVEL_NODE: locking.ALL_SET,
 199         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 200       }
 201       # Acquire just two nodes
 202       self.needed_locks = {
 203         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 204       }
 205       # Acquire no locks
 206       self.needed_locks = {} # No, you can't leave it to the default value None
 207
 208     """
 209     # The implementation of this method is mandatory only if the new LU is
 210     # concurrent, so that old LUs don't need to be changed all at the same
 211     # time.
 212     if self.REQ_BGL:
 213       self.needed_locks = {} # Exclusive LUs don't need locks.
 214     else:
 215       raise NotImplementedError
 216
 217   def DeclareLocks(self, level):
 218     """Declare LU locking needs for a level
 219
 220     While most LUs can just declare their locking needs at ExpandNames time,
 221     sometimes there's the need to calculate some locks after having acquired
 222     the ones before. This function is called just before acquiring locks at a
 223     particular level, but after acquiring the ones at lower levels, and permits
 224     such calculations. It can be used to modify self.needed_locks, and by
 225     default it does nothing.
 226
 227     This function is only called if you have something already set in
 228     self.needed_locks for the level.
 229
 230     @param level: Locking level which is going to be locked
 231     @type level: member of ganeti.locking.LEVELS
 232
 233     """
 234
 235   def CheckPrereq(self):
 236     """Check prerequisites for this LU.
 237
 238     This method should check that the prerequisites for the execution
 239     of this LU are fulfilled. It can do internode communication, but
 240     it should be idempotent - no cluster or system changes are
 241     allowed.
 242
 243     The method should raise errors.OpPrereqError in case something is
 244     not fulfilled. Its return value is ignored.
 245
 246     This method should also update all the parameters of the opcode to
 247     their canonical form if it hasn't been done by ExpandNames before.
 248
 249     """
 250     if self.tasklets is not None:
 251       for (idx, tl) in enumerate(self.tasklets):
 252         logging.debug("Checking prerequisites for tasklet %s/%s",
 253                       idx + 1, len(self.tasklets))
 254         tl.CheckPrereq()
 255     else:
 256       pass
 257
 258   def Exec(self, feedback_fn):
 259     """Execute the LU.
 260
 261     This method should implement the actual work. It should raise
 262     errors.OpExecError for failures that are somewhat dealt with in
 263     code, or expected.
 264
 265     """
 266     if self.tasklets is not None:
 267       for (idx, tl) in enumerate(self.tasklets):
 268         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 269         tl.Exec(feedback_fn)
 270     else:
 271       raise NotImplementedError
 272
 273   def BuildHooksEnv(self):
 274     """Build hooks environment for this LU.
 275
 276     This method should return a three-node tuple consisting of: a dict
 277     containing the environment that will be used for running the
 278     specific hook for this LU, a list of node names on which the hook
 279     should run before the execution, and a list of node names on which
 280     the hook should run after the execution.
 281
 282     The keys of the dict must not have 'GANETI_' prefixed as this will
 283     be handled in the hooks runner. Also note additional keys will be
 284     added by the hooks runner. If the LU doesn't define any
 285     environment, an empty dict (and not None) should be returned.
 286
 287     No nodes should be returned as an empty list (and not None).
 288
 289     Note that if the HPATH for a LU class is None, this function will
 290     not be called.
 291
 292     """
 293     raise NotImplementedError
 294
 295   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 296     """Notify the LU about the results of its hooks.
 297
 298     This method is called every time a hooks phase is executed, and notifies
 299     the Logical Unit about the hooks' result. The LU can then use it to alter
 300     its result based on the hooks.  By default the method does nothing and the
 301     previous result is passed back unchanged but any LU can define it if it
 302     wants to use the local cluster hook-scripts somehow.
 303
 304     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 305         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 306     @param hook_results: the results of the multi-node hooks rpc call
 307     @param feedback_fn: function used send feedback back to the caller
 308     @param lu_result: the previous Exec result this LU had, or None
 309         in the PRE phase
 310     @return: the new Exec result, based on the previous result
 311         and hook results
 312
 313     """
 314     # API must be kept, thus we ignore the unused argument and could
 315     # be a function warnings
 316     # pylint: disable-msg=W0613,R0201
 317     return lu_result
 318
 319   def _ExpandAndLockInstance(self):
 320     """Helper function to expand and lock an instance.
 321
 322     Many LUs that work on an instance take its name in self.op.instance_name
 323     and need to expand it and then declare the expanded name for locking. This
 324     function does it, and then updates self.op.instance_name to the expanded
 325     name. It also initializes needed_locks as a dict, if this hasn't been done
 326     before.
 327
 328     """
 329     if self.needed_locks is None:
 330       self.needed_locks = {}
 331     else:
 332       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 333         "_ExpandAndLockInstance called with instance-level locks set"
 334     self.op.instance_name = _ExpandInstanceName(self.cfg,
 335                                                 self.op.instance_name)
 336     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 337
 338   def _LockInstancesNodes(self, primary_only=False):
 339     """Helper function to declare instances' nodes for locking.
 340
 341     This function should be called after locking one or more instances to lock
 342     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 343     with all primary or secondary nodes for instances already locked and
 344     present in self.needed_locks[locking.LEVEL_INSTANCE].
 345
 346     It should be called from DeclareLocks, and for safety only works if
 347     self.recalculate_locks[locking.LEVEL_NODE] is set.
 348
 349     In the future it may grow parameters to just lock some instance's nodes, or
 350     to just lock primaries or secondary nodes, if needed.
 351
 352     If should be called in DeclareLocks in a way similar to::
 353
 354       if level == locking.LEVEL_NODE:
 355         self._LockInstancesNodes()
 356
 357     @type primary_only: boolean
 358     @param primary_only: only lock primary nodes of locked instances
 359
 360     """
 361     assert locking.LEVEL_NODE in self.recalculate_locks, \
 362       "_LockInstancesNodes helper function called with no nodes to recalculate"
 363
 364     # TODO: check if we're really been called with the instance locks held
 365
 366     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 367     # future we might want to have different behaviors depending on the value
 368     # of self.recalculate_locks[locking.LEVEL_NODE]
 369     wanted_nodes = []
 370     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 371       instance = self.context.cfg.GetInstanceInfo(instance_name)
 372       wanted_nodes.append(instance.primary_node)
 373       if not primary_only:
 374         wanted_nodes.extend(instance.secondary_nodes)
 375
 376     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 377       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 378     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 379       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 380
 381     del self.recalculate_locks[locking.LEVEL_NODE]
 382
 383
 384 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 385   """Simple LU which runs no hooks.
 386
 387   This LU is intended as a parent for other LogicalUnits which will
 388   run no hooks, in order to reduce duplicate code.
 389
 390   """
 391   HPATH = None
 392   HTYPE = None
 393
 394   def BuildHooksEnv(self):
 395     """Empty BuildHooksEnv for NoHooksLu.
 396
 397     This just raises an error.
 398
 399     """
 400     assert False, "BuildHooksEnv called for NoHooksLUs"
 401
 402
 403 class Tasklet:
 404   """Tasklet base class.
 405
 406   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 407   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 408   tasklets know nothing about locks.
 409
 410   Subclasses must follow these rules:
 411     - Implement CheckPrereq
 412     - Implement Exec
 413
 414   """
 415   def __init__(self, lu):
 416     self.lu = lu
 417
 418     # Shortcuts
 419     self.cfg = lu.cfg
 420     self.rpc = lu.rpc
 421
 422   def CheckPrereq(self):
 423     """Check prerequisites for this tasklets.
 424
 425     This method should check whether the prerequisites for the execution of
 426     this tasklet are fulfilled. It can do internode communication, but it
 427     should be idempotent - no cluster or system changes are allowed.
 428
 429     The method should raise errors.OpPrereqError in case something is not
 430     fulfilled. Its return value is ignored.
 431
 432     This method should also update all parameters to their canonical form if it
 433     hasn't been done before.
 434
 435     """
 436     pass
 437
 438   def Exec(self, feedback_fn):
 439     """Execute the tasklet.
 440
 441     This method should implement the actual work. It should raise
 442     errors.OpExecError for failures that are somewhat dealt with in code, or
 443     expected.
 444
 445     """
 446     raise NotImplementedError
 447
 448
 449 class _QueryBase:
 450   """Base for query utility classes.
 451
 452   """
 453   #: Attribute holding field definitions
 454   FIELDS = None
 455
 456   def __init__(self, filter_, fields, use_locking):
 457     """Initializes this class.
 458
 459     """
 460     self.names = qlang.ReadSimpleFilter("name", filter_)
 461     self.use_locking = use_locking
 462
 463     self.query = query.Query(self.FIELDS, fields)
 464     self.requested_data = self.query.RequestedData()
 465
 466     self.do_locking = None
 467     self.wanted = None
 468
 469   def _GetNames(self, lu, all_names, lock_level):
 470     """Helper function to determine names asked for in the query.
 471
 472     """
 473     if self.do_locking:
 474       names = lu.acquired_locks[lock_level]
 475     else:
 476       names = all_names
 477
 478     if self.wanted == locking.ALL_SET:
 479       assert not self.names
 480       # caller didn't specify names, so ordering is not important
 481       return utils.NiceSort(names)
 482
 483     # caller specified names and we must keep the same order
 484     assert self.names
 485     assert not self.do_locking or lu.acquired_locks[lock_level]
 486
 487     missing = set(self.wanted).difference(names)
 488     if missing:
 489       raise errors.OpExecError("Some items were removed before retrieving"
 490                                " their data: %s" % missing)
 491
 492     # Return expanded names
 493     return self.wanted
 494
 495   @classmethod
 496   def FieldsQuery(cls, fields):
 497     """Returns list of available fields.
 498
 499     @return: List of L{objects.QueryFieldDefinition}
 500
 501     """
 502     return query.QueryFields(cls.FIELDS, fields)
 503
 504   def ExpandNames(self, lu):
 505     """Expand names for this query.
 506
 507     See L{LogicalUnit.ExpandNames}.
 508
 509     """
 510     raise NotImplementedError()
 511
 512   def DeclareLocks(self, lu, level):
 513     """Declare locks for this query.
 514
 515     See L{LogicalUnit.DeclareLocks}.
 516
 517     """
 518     raise NotImplementedError()
 519
 520   def _GetQueryData(self, lu):
 521     """Collects all data for this query.
 522
 523     @return: Query data object
 524
 525     """
 526     raise NotImplementedError()
 527
 528   def NewStyleQuery(self, lu):
 529     """Collect data and execute query.
 530
 531     """
 532     return query.GetQueryResponse(self.query, self._GetQueryData(lu))
 533
 534   def OldStyleQuery(self, lu):
 535     """Collect data and execute query.
 536
 537     """
 538     return self.query.OldStyleQuery(self._GetQueryData(lu))
 539
 540
 541 def _GetWantedNodes(lu, nodes):
 542   """Returns list of checked and expanded node names.
 543
 544   @type lu: L{LogicalUnit}
 545   @param lu: the logical unit on whose behalf we execute
 546   @type nodes: list
 547   @param nodes: list of node names or None for all nodes
 548   @rtype: list
 549   @return: the list of nodes, sorted
 550   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 551
 552   """
 553   if nodes:
 554     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 555
 556   return utils.NiceSort(lu.cfg.GetNodeList())
 557
 558
 559 def _GetWantedInstances(lu, instances):
 560   """Returns list of checked and expanded instance names.
 561
 562   @type lu: L{LogicalUnit}
 563   @param lu: the logical unit on whose behalf we execute
 564   @type instances: list
 565   @param instances: list of instance names or None for all instances
 566   @rtype: list
 567   @return: the list of instances, sorted
 568   @raise errors.OpPrereqError: if the instances parameter is wrong type
 569   @raise errors.OpPrereqError: if any of the passed instances is not found
 570
 571   """
 572   if instances:
 573     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 574   else:
 575     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 576   return wanted
 577
 578
 579 def _GetUpdatedParams(old_params, update_dict,
 580                       use_default=True, use_none=False):
 581   """Return the new version of a parameter dictionary.
 582
 583   @type old_params: dict
 584   @param old_params: old parameters
 585   @type update_dict: dict
 586   @param update_dict: dict containing new parameter values, or
 587       constants.VALUE_DEFAULT to reset the parameter to its default
 588       value
 589   @param use_default: boolean
 590   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 591       values as 'to be deleted' values
 592   @param use_none: boolean
 593   @type use_none: whether to recognise C{None} values as 'to be
 594       deleted' values
 595   @rtype: dict
 596   @return: the new parameter dictionary
 597
 598   """
 599   params_copy = copy.deepcopy(old_params)
 600   for key, val in update_dict.iteritems():
 601     if ((use_default and val == constants.VALUE_DEFAULT) or
 602         (use_none and val is None)):
 603       try:
 604         del params_copy[key]
 605       except KeyError:
 606         pass
 607     else:
 608       params_copy[key] = val
 609   return params_copy
 610
 611
 612 def _CheckOutputFields(static, dynamic, selected):
 613   """Checks whether all selected fields are valid.
 614
 615   @type static: L{utils.FieldSet}
 616   @param static: static fields set
 617   @type dynamic: L{utils.FieldSet}
 618   @param dynamic: dynamic fields set
 619
 620   """
 621   f = utils.FieldSet()
 622   f.Extend(static)
 623   f.Extend(dynamic)
 624
 625   delta = f.NonMatching(selected)
 626   if delta:
 627     raise errors.OpPrereqError("Unknown output fields selected: %s"
 628                                % ",".join(delta), errors.ECODE_INVAL)
 629
 630
 631 def _CheckGlobalHvParams(params):
 632   """Validates that given hypervisor params are not global ones.
 633
 634   This will ensure that instances don't get customised versions of
 635   global params.
 636
 637   """
 638   used_globals = constants.HVC_GLOBALS.intersection(params)
 639   if used_globals:
 640     msg = ("The following hypervisor parameters are global and cannot"
 641            " be customized at instance level, please modify them at"
 642            " cluster level: %s" % utils.CommaJoin(used_globals))
 643     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 644
 645
 646 def _CheckNodeOnline(lu, node, msg=None):
 647   """Ensure that a given node is online.
 648
 649   @param lu: the LU on behalf of which we make the check
 650   @param node: the node to check
 651   @param msg: if passed, should be a message to replace the default one
 652   @raise errors.OpPrereqError: if the node is offline
 653
 654   """
 655   if msg is None:
 656     msg = "Can't use offline node"
 657   if lu.cfg.GetNodeInfo(node).offline:
 658     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 659
 660
 661 def _CheckNodeNotDrained(lu, node):
 662   """Ensure that a given node is not drained.
 663
 664   @param lu: the LU on behalf of which we make the check
 665   @param node: the node to check
 666   @raise errors.OpPrereqError: if the node is drained
 667
 668   """
 669   if lu.cfg.GetNodeInfo(node).drained:
 670     raise errors.OpPrereqError("Can't use drained node %s" % node,
 671                                errors.ECODE_STATE)
 672
 673
 674 def _CheckNodeVmCapable(lu, node):
 675   """Ensure that a given node is vm capable.
 676
 677   @param lu: the LU on behalf of which we make the check
 678   @param node: the node to check
 679   @raise errors.OpPrereqError: if the node is not vm capable
 680
 681   """
 682   if not lu.cfg.GetNodeInfo(node).vm_capable:
 683     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 684                                errors.ECODE_STATE)
 685
 686
 687 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 688   """Ensure that a node supports a given OS.
 689
 690   @param lu: the LU on behalf of which we make the check
 691   @param node: the node to check
 692   @param os_name: the OS to query about
 693   @param force_variant: whether to ignore variant errors
 694   @raise errors.OpPrereqError: if the node is not supporting the OS
 695
 696   """
 697   result = lu.rpc.call_os_get(node, os_name)
 698   result.Raise("OS '%s' not in supported OS list for node %s" %
 699                (os_name, node),
 700                prereq=True, ecode=errors.ECODE_INVAL)
 701   if not force_variant:
 702     _CheckOSVariant(result.payload, os_name)
 703
 704
 705 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 706   """Ensure that a node has the given secondary ip.
 707
 708   @type lu: L{LogicalUnit}
 709   @param lu: the LU on behalf of which we make the check
 710   @type node: string
 711   @param node: the node to check
 712   @type secondary_ip: string
 713   @param secondary_ip: the ip to check
 714   @type prereq: boolean
 715   @param prereq: whether to throw a prerequisite or an execute error
 716   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 717   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 718
 719   """
 720   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 721   result.Raise("Failure checking secondary ip on node %s" % node,
 722                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 723   if not result.payload:
 724     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 725            " please fix and re-run this command" % secondary_ip)
 726     if prereq:
 727       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 728     else:
 729       raise errors.OpExecError(msg)
 730
 731
 732 def _GetClusterDomainSecret():
 733   """Reads the cluster domain secret.
 734
 735   """
 736   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 737                                strict=True)
 738
 739
 740 def _CheckInstanceDown(lu, instance, reason):
 741   """Ensure that an instance is not running."""
 742   if instance.admin_up:
 743     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 744                                (instance.name, reason), errors.ECODE_STATE)
 745
 746   pnode = instance.primary_node
 747   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 748   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 749               prereq=True, ecode=errors.ECODE_ENVIRON)
 750
 751   if instance.name in ins_l.payload:
 752     raise errors.OpPrereqError("Instance %s is running, %s" %
 753                                (instance.name, reason), errors.ECODE_STATE)
 754
 755
 756 def _ExpandItemName(fn, name, kind):
 757   """Expand an item name.
 758
 759   @param fn: the function to use for expansion
 760   @param name: requested item name
 761   @param kind: text description ('Node' or 'Instance')
 762   @return: the resolved (full) name
 763   @raise errors.OpPrereqError: if the item is not found
 764
 765   """
 766   full_name = fn(name)
 767   if full_name is None:
 768     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 769                                errors.ECODE_NOENT)
 770   return full_name
 771
 772
 773 def _ExpandNodeName(cfg, name):
 774   """Wrapper over L{_ExpandItemName} for nodes."""
 775   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 776
 777
 778 def _ExpandInstanceName(cfg, name):
 779   """Wrapper over L{_ExpandItemName} for instance."""
 780   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 781
 782
 783 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 784                           memory, vcpus, nics, disk_template, disks,
 785                           bep, hvp, hypervisor_name):
 786   """Builds instance related env variables for hooks
 787
 788   This builds the hook environment from individual variables.
 789
 790   @type name: string
 791   @param name: the name of the instance
 792   @type primary_node: string
 793   @param primary_node: the name of the instance's primary node
 794   @type secondary_nodes: list
 795   @param secondary_nodes: list of secondary nodes as strings
 796   @type os_type: string
 797   @param os_type: the name of the instance's OS
 798   @type status: boolean
 799   @param status: the should_run status of the instance
 800   @type memory: string
 801   @param memory: the memory size of the instance
 802   @type vcpus: string
 803   @param vcpus: the count of VCPUs the instance has
 804   @type nics: list
 805   @param nics: list of tuples (ip, mac, mode, link) representing
 806       the NICs the instance has
 807   @type disk_template: string
 808   @param disk_template: the disk template of the instance
 809   @type disks: list
 810   @param disks: the list of (size, mode) pairs
 811   @type bep: dict
 812   @param bep: the backend parameters for the instance
 813   @type hvp: dict
 814   @param hvp: the hypervisor parameters for the instance
 815   @type hypervisor_name: string
 816   @param hypervisor_name: the hypervisor for the instance
 817   @rtype: dict
 818   @return: the hook environment for this instance
 819
 820   """
 821   if status:
 822     str_status = "up"
 823   else:
 824     str_status = "down"
 825   env = {
 826     "OP_TARGET": name,
 827     "INSTANCE_NAME": name,
 828     "INSTANCE_PRIMARY": primary_node,
 829     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 830     "INSTANCE_OS_TYPE": os_type,
 831     "INSTANCE_STATUS": str_status,
 832     "INSTANCE_MEMORY": memory,
 833     "INSTANCE_VCPUS": vcpus,
 834     "INSTANCE_DISK_TEMPLATE": disk_template,
 835     "INSTANCE_HYPERVISOR": hypervisor_name,
 836   }
 837
 838   if nics:
 839     nic_count = len(nics)
 840     for idx, (ip, mac, mode, link) in enumerate(nics):
 841       if ip is None:
 842         ip = ""
 843       env["INSTANCE_NIC%d_IP" % idx] = ip
 844       env["INSTANCE_NIC%d_MAC" % idx] = mac
 845       env["INSTANCE_NIC%d_MODE" % idx] = mode
 846       env["INSTANCE_NIC%d_LINK" % idx] = link
 847       if mode == constants.NIC_MODE_BRIDGED:
 848         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 849   else:
 850     nic_count = 0
 851
 852   env["INSTANCE_NIC_COUNT"] = nic_count
 853
 854   if disks:
 855     disk_count = len(disks)
 856     for idx, (size, mode) in enumerate(disks):
 857       env["INSTANCE_DISK%d_SIZE" % idx] = size
 858       env["INSTANCE_DISK%d_MODE" % idx] = mode
 859   else:
 860     disk_count = 0
 861
 862   env["INSTANCE_DISK_COUNT"] = disk_count
 863
 864   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 865     for key, value in source.items():
 866       env["INSTANCE_%s_%s" % (kind, key)] = value
 867
 868   return env
 869
 870
 871 def _NICListToTuple(lu, nics):
 872   """Build a list of nic information tuples.
 873
 874   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 875   value in LUInstanceQueryData.
 876
 877   @type lu:  L{LogicalUnit}
 878   @param lu: the logical unit on whose behalf we execute
 879   @type nics: list of L{objects.NIC}
 880   @param nics: list of nics to convert to hooks tuples
 881
 882   """
 883   hooks_nics = []
 884   cluster = lu.cfg.GetClusterInfo()
 885   for nic in nics:
 886     ip = nic.ip
 887     mac = nic.mac
 888     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 889     mode = filled_params[constants.NIC_MODE]
 890     link = filled_params[constants.NIC_LINK]
 891     hooks_nics.append((ip, mac, mode, link))
 892   return hooks_nics
 893
 894
 895 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 896   """Builds instance related env variables for hooks from an object.
 897
 898   @type lu: L{LogicalUnit}
 899   @param lu: the logical unit on whose behalf we execute
 900   @type instance: L{objects.Instance}
 901   @param instance: the instance for which we should build the
 902       environment
 903   @type override: dict
 904   @param override: dictionary with key/values that will override
 905       our values
 906   @rtype: dict
 907   @return: the hook environment dictionary
 908
 909   """
 910   cluster = lu.cfg.GetClusterInfo()
 911   bep = cluster.FillBE(instance)
 912   hvp = cluster.FillHV(instance)
 913   args = {
 914     'name': instance.name,
 915     'primary_node': instance.primary_node,
 916     'secondary_nodes': instance.secondary_nodes,
 917     'os_type': instance.os,
 918     'status': instance.admin_up,
 919     'memory': bep[constants.BE_MEMORY],
 920     'vcpus': bep[constants.BE_VCPUS],
 921     'nics': _NICListToTuple(lu, instance.nics),
 922     'disk_template': instance.disk_template,
 923     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 924     'bep': bep,
 925     'hvp': hvp,
 926     'hypervisor_name': instance.hypervisor,
 927   }
 928   if override:
 929     args.update(override)
 930   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 931
 932
 933 def _AdjustCandidatePool(lu, exceptions):
 934   """Adjust the candidate pool after node operations.
 935
 936   """
 937   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 938   if mod_list:
 939     lu.LogInfo("Promoted nodes to master candidate role: %s",
 940                utils.CommaJoin(node.name for node in mod_list))
 941     for name in mod_list:
 942       lu.context.ReaddNode(name)
 943   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 944   if mc_now > mc_max:
 945     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 946                (mc_now, mc_max))
 947
 948
 949 def _DecideSelfPromotion(lu, exceptions=None):
 950   """Decide whether I should promote myself as a master candidate.
 951
 952   """
 953   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 954   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 955   # the new node will increase mc_max with one, so:
 956   mc_should = min(mc_should + 1, cp_size)
 957   return mc_now < mc_should
 958
 959
 960 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 961   """Check that the brigdes needed by a list of nics exist.
 962
 963   """
 964   cluster = lu.cfg.GetClusterInfo()
 965   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 966   brlist = [params[constants.NIC_LINK] for params in paramslist
 967             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 968   if brlist:
 969     result = lu.rpc.call_bridges_exist(target_node, brlist)
 970     result.Raise("Error checking bridges on destination node '%s'" %
 971                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 972
 973
 974 def _CheckInstanceBridgesExist(lu, instance, node=None):
 975   """Check that the brigdes needed by an instance exist.
 976
 977   """
 978   if node is None:
 979     node = instance.primary_node
 980   _CheckNicsBridgesExist(lu, instance.nics, node)
 981
 982
 983 def _CheckOSVariant(os_obj, name):
 984   """Check whether an OS name conforms to the os variants specification.
 985
 986   @type os_obj: L{objects.OS}
 987   @param os_obj: OS object to check
 988   @type name: string
 989   @param name: OS name passed by the user, to check for validity
 990
 991   """
 992   if not os_obj.supported_variants:
 993     return
 994   variant = objects.OS.GetVariant(name)
 995   if not variant:
 996     raise errors.OpPrereqError("OS name must include a variant",
 997                                errors.ECODE_INVAL)
 998
 999   if variant not in os_obj.supported_variants:
1000     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1001
1002
1003 def _GetNodeInstancesInner(cfg, fn):
1004   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1005
1006
1007 def _GetNodeInstances(cfg, node_name):
1008   """Returns a list of all primary and secondary instances on a node.
1009
1010   """
1011
1012   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1013
1014
1015 def _GetNodePrimaryInstances(cfg, node_name):
1016   """Returns primary instances on a node.
1017
1018   """
1019   return _GetNodeInstancesInner(cfg,
1020                                 lambda inst: node_name == inst.primary_node)
1021
1022
1023 def _GetNodeSecondaryInstances(cfg, node_name):
1024   """Returns secondary instances on a node.
1025
1026   """
1027   return _GetNodeInstancesInner(cfg,
1028                                 lambda inst: node_name in inst.secondary_nodes)
1029
1030
1031 def _GetStorageTypeArgs(cfg, storage_type):
1032   """Returns the arguments for a storage type.
1033
1034   """
1035   # Special case for file storage
1036   if storage_type == constants.ST_FILE:
1037     # storage.FileStorage wants a list of storage directories
1038     return [[cfg.GetFileStorageDir()]]
1039
1040   return []
1041
1042
1043 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1044   faulty = []
1045
1046   for dev in instance.disks:
1047     cfg.SetDiskID(dev, node_name)
1048
1049   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1050   result.Raise("Failed to get disk status from node %s" % node_name,
1051                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1052
1053   for idx, bdev_status in enumerate(result.payload):
1054     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1055       faulty.append(idx)
1056
1057   return faulty
1058
1059
1060 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1061   """Check the sanity of iallocator and node arguments and use the
1062   cluster-wide iallocator if appropriate.
1063
1064   Check that at most one of (iallocator, node) is specified. If none is
1065   specified, then the LU's opcode's iallocator slot is filled with the
1066   cluster-wide default iallocator.
1067
1068   @type iallocator_slot: string
1069   @param iallocator_slot: the name of the opcode iallocator slot
1070   @type node_slot: string
1071   @param node_slot: the name of the opcode target node slot
1072
1073   """
1074   node = getattr(lu.op, node_slot, None)
1075   iallocator = getattr(lu.op, iallocator_slot, None)
1076
1077   if node is not None and iallocator is not None:
1078     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1079                                errors.ECODE_INVAL)
1080   elif node is None and iallocator is None:
1081     default_iallocator = lu.cfg.GetDefaultIAllocator()
1082     if default_iallocator:
1083       setattr(lu.op, iallocator_slot, default_iallocator)
1084     else:
1085       raise errors.OpPrereqError("No iallocator or node given and no"
1086                                  " cluster-wide default iallocator found."
1087                                  " Please specify either an iallocator or a"
1088                                  " node, or set a cluster-wide default"
1089                                  " iallocator.")
1090
1091
1092 class LUClusterPostInit(LogicalUnit):
1093   """Logical unit for running hooks after cluster initialization.
1094
1095   """
1096   HPATH = "cluster-init"
1097   HTYPE = constants.HTYPE_CLUSTER
1098
1099   def BuildHooksEnv(self):
1100     """Build hooks env.
1101
1102     """
1103     env = {"OP_TARGET": self.cfg.GetClusterName()}
1104     mn = self.cfg.GetMasterNode()
1105     return env, [], [mn]
1106
1107   def Exec(self, feedback_fn):
1108     """Nothing to do.
1109
1110     """
1111     return True
1112
1113
1114 class LUClusterDestroy(LogicalUnit):
1115   """Logical unit for destroying the cluster.
1116
1117   """
1118   HPATH = "cluster-destroy"
1119   HTYPE = constants.HTYPE_CLUSTER
1120
1121   def BuildHooksEnv(self):
1122     """Build hooks env.
1123
1124     """
1125     env = {"OP_TARGET": self.cfg.GetClusterName()}
1126     return env, [], []
1127
1128   def CheckPrereq(self):
1129     """Check prerequisites.
1130
1131     This checks whether the cluster is empty.
1132
1133     Any errors are signaled by raising errors.OpPrereqError.
1134
1135     """
1136     master = self.cfg.GetMasterNode()
1137
1138     nodelist = self.cfg.GetNodeList()
1139     if len(nodelist) != 1 or nodelist[0] != master:
1140       raise errors.OpPrereqError("There are still %d node(s) in"
1141                                  " this cluster." % (len(nodelist) - 1),
1142                                  errors.ECODE_INVAL)
1143     instancelist = self.cfg.GetInstanceList()
1144     if instancelist:
1145       raise errors.OpPrereqError("There are still %d instance(s) in"
1146                                  " this cluster." % len(instancelist),
1147                                  errors.ECODE_INVAL)
1148
1149   def Exec(self, feedback_fn):
1150     """Destroys the cluster.
1151
1152     """
1153     master = self.cfg.GetMasterNode()
1154
1155     # Run post hooks on master node before it's removed
1156     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1157     try:
1158       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1159     except:
1160       # pylint: disable-msg=W0702
1161       self.LogWarning("Errors occurred running hooks on %s" % master)
1162
1163     result = self.rpc.call_node_stop_master(master, False)
1164     result.Raise("Could not disable the master role")
1165
1166     return master
1167
1168
1169 def _VerifyCertificate(filename):
1170   """Verifies a certificate for LUClusterVerify.
1171
1172   @type filename: string
1173   @param filename: Path to PEM file
1174
1175   """
1176   try:
1177     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1178                                            utils.ReadFile(filename))
1179   except Exception, err: # pylint: disable-msg=W0703
1180     return (LUClusterVerify.ETYPE_ERROR,
1181             "Failed to load X509 certificate %s: %s" % (filename, err))
1182
1183   (errcode, msg) = \
1184     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1185                                 constants.SSL_CERT_EXPIRATION_ERROR)
1186
1187   if msg:
1188     fnamemsg = "While verifying %s: %s" % (filename, msg)
1189   else:
1190     fnamemsg = None
1191
1192   if errcode is None:
1193     return (None, fnamemsg)
1194   elif errcode == utils.CERT_WARNING:
1195     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1196   elif errcode == utils.CERT_ERROR:
1197     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1198
1199   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1200
1201
1202 class LUClusterVerify(LogicalUnit):
1203   """Verifies the cluster status.
1204
1205   """
1206   HPATH = "cluster-verify"
1207   HTYPE = constants.HTYPE_CLUSTER
1208   REQ_BGL = False
1209
1210   TCLUSTER = "cluster"
1211   TNODE = "node"
1212   TINSTANCE = "instance"
1213
1214   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1215   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1216   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1217   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1218   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1219   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1220   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1221   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1222   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1223   ENODEDRBD = (TNODE, "ENODEDRBD")
1224   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1225   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1226   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1227   ENODEHV = (TNODE, "ENODEHV")
1228   ENODELVM = (TNODE, "ENODELVM")
1229   ENODEN1 = (TNODE, "ENODEN1")
1230   ENODENET = (TNODE, "ENODENET")
1231   ENODEOS = (TNODE, "ENODEOS")
1232   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1233   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1234   ENODERPC = (TNODE, "ENODERPC")
1235   ENODESSH = (TNODE, "ENODESSH")
1236   ENODEVERSION = (TNODE, "ENODEVERSION")
1237   ENODESETUP = (TNODE, "ENODESETUP")
1238   ENODETIME = (TNODE, "ENODETIME")
1239   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1240
1241   ETYPE_FIELD = "code"
1242   ETYPE_ERROR = "ERROR"
1243   ETYPE_WARNING = "WARNING"
1244
1245   _HOOKS_INDENT_RE = re.compile("^", re.M)
1246
1247   class NodeImage(object):
1248     """A class representing the logical and physical status of a node.
1249
1250     @type name: string
1251     @ivar name: the node name to which this object refers
1252     @ivar volumes: a structure as returned from
1253         L{ganeti.backend.GetVolumeList} (runtime)
1254     @ivar instances: a list of running instances (runtime)
1255     @ivar pinst: list of configured primary instances (config)
1256     @ivar sinst: list of configured secondary instances (config)
1257     @ivar sbp: dictionary of {primary-node: list of instances} for all
1258         instances for which this node is secondary (config)
1259     @ivar mfree: free memory, as reported by hypervisor (runtime)
1260     @ivar dfree: free disk, as reported by the node (runtime)
1261     @ivar offline: the offline status (config)
1262     @type rpc_fail: boolean
1263     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1264         not whether the individual keys were correct) (runtime)
1265     @type lvm_fail: boolean
1266     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1267     @type hyp_fail: boolean
1268     @ivar hyp_fail: whether the RPC call didn't return the instance list
1269     @type ghost: boolean
1270     @ivar ghost: whether this is a known node or not (config)
1271     @type os_fail: boolean
1272     @ivar os_fail: whether the RPC call didn't return valid OS data
1273     @type oslist: list
1274     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1275     @type vm_capable: boolean
1276     @ivar vm_capable: whether the node can host instances
1277
1278     """
1279     def __init__(self, offline=False, name=None, vm_capable=True):
1280       self.name = name
1281       self.volumes = {}
1282       self.instances = []
1283       self.pinst = []
1284       self.sinst = []
1285       self.sbp = {}
1286       self.mfree = 0
1287       self.dfree = 0
1288       self.offline = offline
1289       self.vm_capable = vm_capable
1290       self.rpc_fail = False
1291       self.lvm_fail = False
1292       self.hyp_fail = False
1293       self.ghost = False
1294       self.os_fail = False
1295       self.oslist = {}
1296
1297   def ExpandNames(self):
1298     self.needed_locks = {
1299       locking.LEVEL_NODE: locking.ALL_SET,
1300       locking.LEVEL_INSTANCE: locking.ALL_SET,
1301     }
1302     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1303
1304   def _Error(self, ecode, item, msg, *args, **kwargs):
1305     """Format an error message.
1306
1307     Based on the opcode's error_codes parameter, either format a
1308     parseable error code, or a simpler error string.
1309
1310     This must be called only from Exec and functions called from Exec.
1311
1312     """
1313     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1314     itype, etxt = ecode
1315     # first complete the msg
1316     if args:
1317       msg = msg % args
1318     # then format the whole message
1319     if self.op.error_codes:
1320       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1321     else:
1322       if item:
1323         item = " " + item
1324       else:
1325         item = ""
1326       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1327     # and finally report it via the feedback_fn
1328     self._feedback_fn("  - %s" % msg)
1329
1330   def _ErrorIf(self, cond, *args, **kwargs):
1331     """Log an error message if the passed condition is True.
1332
1333     """
1334     cond = bool(cond) or self.op.debug_simulate_errors
1335     if cond:
1336       self._Error(*args, **kwargs)
1337     # do not mark the operation as failed for WARN cases only
1338     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1339       self.bad = self.bad or cond
1340
1341   def _VerifyNode(self, ninfo, nresult):
1342     """Perform some basic validation on data returned from a node.
1343
1344       - check the result data structure is well formed and has all the
1345         mandatory fields
1346       - check ganeti version
1347
1348     @type ninfo: L{objects.Node}
1349     @param ninfo: the node to check
1350     @param nresult: the results from the node
1351     @rtype: boolean
1352     @return: whether overall this call was successful (and we can expect
1353          reasonable values in the respose)
1354
1355     """
1356     node = ninfo.name
1357     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1358
1359     # main result, nresult should be a non-empty dict
1360     test = not nresult or not isinstance(nresult, dict)
1361     _ErrorIf(test, self.ENODERPC, node,
1362                   "unable to verify node: no data returned")
1363     if test:
1364       return False
1365
1366     # compares ganeti version
1367     local_version = constants.PROTOCOL_VERSION
1368     remote_version = nresult.get("version", None)
1369     test = not (remote_version and
1370                 isinstance(remote_version, (list, tuple)) and
1371                 len(remote_version) == 2)
1372     _ErrorIf(test, self.ENODERPC, node,
1373              "connection to node returned invalid data")
1374     if test:
1375       return False
1376
1377     test = local_version != remote_version[0]
1378     _ErrorIf(test, self.ENODEVERSION, node,
1379              "incompatible protocol versions: master %s,"
1380              " node %s", local_version, remote_version[0])
1381     if test:
1382       return False
1383
1384     # node seems compatible, we can actually try to look into its results
1385
1386     # full package version
1387     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1388                   self.ENODEVERSION, node,
1389                   "software version mismatch: master %s, node %s",
1390                   constants.RELEASE_VERSION, remote_version[1],
1391                   code=self.ETYPE_WARNING)
1392
1393     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1394     if ninfo.vm_capable and isinstance(hyp_result, dict):
1395       for hv_name, hv_result in hyp_result.iteritems():
1396         test = hv_result is not None
1397         _ErrorIf(test, self.ENODEHV, node,
1398                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1399
1400     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1401     if ninfo.vm_capable and isinstance(hvp_result, list):
1402       for item, hv_name, hv_result in hvp_result:
1403         _ErrorIf(True, self.ENODEHV, node,
1404                  "hypervisor %s parameter verify failure (source %s): %s",
1405                  hv_name, item, hv_result)
1406
1407     test = nresult.get(constants.NV_NODESETUP,
1408                            ["Missing NODESETUP results"])
1409     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1410              "; ".join(test))
1411
1412     return True
1413
1414   def _VerifyNodeTime(self, ninfo, nresult,
1415                       nvinfo_starttime, nvinfo_endtime):
1416     """Check the node time.
1417
1418     @type ninfo: L{objects.Node}
1419     @param ninfo: the node to check
1420     @param nresult: the remote results for the node
1421     @param nvinfo_starttime: the start time of the RPC call
1422     @param nvinfo_endtime: the end time of the RPC call
1423
1424     """
1425     node = ninfo.name
1426     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1427
1428     ntime = nresult.get(constants.NV_TIME, None)
1429     try:
1430       ntime_merged = utils.MergeTime(ntime)
1431     except (ValueError, TypeError):
1432       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1433       return
1434
1435     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1436       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1437     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1438       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1439     else:
1440       ntime_diff = None
1441
1442     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1443              "Node time diverges by at least %s from master node time",
1444              ntime_diff)
1445
1446   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1447     """Check the node time.
1448
1449     @type ninfo: L{objects.Node}
1450     @param ninfo: the node to check
1451     @param nresult: the remote results for the node
1452     @param vg_name: the configured VG name
1453
1454     """
1455     if vg_name is None:
1456       return
1457
1458     node = ninfo.name
1459     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1460
1461     # checks vg existence and size > 20G
1462     vglist = nresult.get(constants.NV_VGLIST, None)
1463     test = not vglist
1464     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1465     if not test:
1466       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1467                                             constants.MIN_VG_SIZE)
1468       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1469
1470     # check pv names
1471     pvlist = nresult.get(constants.NV_PVLIST, None)
1472     test = pvlist is None
1473     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1474     if not test:
1475       # check that ':' is not present in PV names, since it's a
1476       # special character for lvcreate (denotes the range of PEs to
1477       # use on the PV)
1478       for _, pvname, owner_vg in pvlist:
1479         test = ":" in pvname
1480         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1481                  " '%s' of VG '%s'", pvname, owner_vg)
1482
1483   def _VerifyNodeNetwork(self, ninfo, nresult):
1484     """Check the node time.
1485
1486     @type ninfo: L{objects.Node}
1487     @param ninfo: the node to check
1488     @param nresult: the remote results for the node
1489
1490     """
1491     node = ninfo.name
1492     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1493
1494     test = constants.NV_NODELIST not in nresult
1495     _ErrorIf(test, self.ENODESSH, node,
1496              "node hasn't returned node ssh connectivity data")
1497     if not test:
1498       if nresult[constants.NV_NODELIST]:
1499         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1500           _ErrorIf(True, self.ENODESSH, node,
1501                    "ssh communication with node '%s': %s", a_node, a_msg)
1502
1503     test = constants.NV_NODENETTEST not in nresult
1504     _ErrorIf(test, self.ENODENET, node,
1505              "node hasn't returned node tcp connectivity data")
1506     if not test:
1507       if nresult[constants.NV_NODENETTEST]:
1508         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1509         for anode in nlist:
1510           _ErrorIf(True, self.ENODENET, node,
1511                    "tcp communication with node '%s': %s",
1512                    anode, nresult[constants.NV_NODENETTEST][anode])
1513
1514     test = constants.NV_MASTERIP not in nresult
1515     _ErrorIf(test, self.ENODENET, node,
1516              "node hasn't returned node master IP reachability data")
1517     if not test:
1518       if not nresult[constants.NV_MASTERIP]:
1519         if node == self.master_node:
1520           msg = "the master node cannot reach the master IP (not configured?)"
1521         else:
1522           msg = "cannot reach the master IP"
1523         _ErrorIf(True, self.ENODENET, node, msg)
1524
1525   def _VerifyInstance(self, instance, instanceconfig, node_image,
1526                       diskstatus):
1527     """Verify an instance.
1528
1529     This function checks to see if the required block devices are
1530     available on the instance's node.
1531
1532     """
1533     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1534     node_current = instanceconfig.primary_node
1535
1536     node_vol_should = {}
1537     instanceconfig.MapLVsByNode(node_vol_should)
1538
1539     for node in node_vol_should:
1540       n_img = node_image[node]
1541       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1542         # ignore missing volumes on offline or broken nodes
1543         continue
1544       for volume in node_vol_should[node]:
1545         test = volume not in n_img.volumes
1546         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1547                  "volume %s missing on node %s", volume, node)
1548
1549     if instanceconfig.admin_up:
1550       pri_img = node_image[node_current]
1551       test = instance not in pri_img.instances and not pri_img.offline
1552       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1553                "instance not running on its primary node %s",
1554                node_current)
1555
1556     for node, n_img in node_image.items():
1557       if node != node_current:
1558         test = instance in n_img.instances
1559         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1560                  "instance should not run on node %s", node)
1561
1562     diskdata = [(nname, success, status, idx)
1563                 for (nname, disks) in diskstatus.items()
1564                 for idx, (success, status) in enumerate(disks)]
1565
1566     for nname, success, bdev_status, idx in diskdata:
1567       # the 'ghost node' construction in Exec() ensures that we have a
1568       # node here
1569       snode = node_image[nname]
1570       bad_snode = snode.ghost or snode.offline
1571       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1572                self.EINSTANCEFAULTYDISK, instance,
1573                "couldn't retrieve status for disk/%s on %s: %s",
1574                idx, nname, bdev_status)
1575       _ErrorIf((instanceconfig.admin_up and success and
1576                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1577                self.EINSTANCEFAULTYDISK, instance,
1578                "disk/%s on %s is faulty", idx, nname)
1579
1580   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1581     """Verify if there are any unknown volumes in the cluster.
1582
1583     The .os, .swap and backup volumes are ignored. All other volumes are
1584     reported as unknown.
1585
1586     @type reserved: L{ganeti.utils.FieldSet}
1587     @param reserved: a FieldSet of reserved volume names
1588
1589     """
1590     for node, n_img in node_image.items():
1591       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1592         # skip non-healthy nodes
1593         continue
1594       for volume in n_img.volumes:
1595         test = ((node not in node_vol_should or
1596                 volume not in node_vol_should[node]) and
1597                 not reserved.Matches(volume))
1598         self._ErrorIf(test, self.ENODEORPHANLV, node,
1599                       "volume %s is unknown", volume)
1600
1601   def _VerifyOrphanInstances(self, instancelist, node_image):
1602     """Verify the list of running instances.
1603
1604     This checks what instances are running but unknown to the cluster.
1605
1606     """
1607     for node, n_img in node_image.items():
1608       for o_inst in n_img.instances:
1609         test = o_inst not in instancelist
1610         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1611                       "instance %s on node %s should not exist", o_inst, node)
1612
1613   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1614     """Verify N+1 Memory Resilience.
1615
1616     Check that if one single node dies we can still start all the
1617     instances it was primary for.
1618
1619     """
1620     cluster_info = self.cfg.GetClusterInfo()
1621     for node, n_img in node_image.items():
1622       # This code checks that every node which is now listed as
1623       # secondary has enough memory to host all instances it is
1624       # supposed to should a single other node in the cluster fail.
1625       # FIXME: not ready for failover to an arbitrary node
1626       # FIXME: does not support file-backed instances
1627       # WARNING: we currently take into account down instances as well
1628       # as up ones, considering that even if they're down someone
1629       # might want to start them even in the event of a node failure.
1630       if n_img.offline:
1631         # we're skipping offline nodes from the N+1 warning, since
1632         # most likely we don't have good memory infromation from them;
1633         # we already list instances living on such nodes, and that's
1634         # enough warning
1635         continue
1636       for prinode, instances in n_img.sbp.items():
1637         needed_mem = 0
1638         for instance in instances:
1639           bep = cluster_info.FillBE(instance_cfg[instance])
1640           if bep[constants.BE_AUTO_BALANCE]:
1641             needed_mem += bep[constants.BE_MEMORY]
1642         test = n_img.mfree < needed_mem
1643         self._ErrorIf(test, self.ENODEN1, node,
1644                       "not enough memory to accomodate instance failovers"
1645                       " should node %s fail", prinode)
1646
1647   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1648                        master_files):
1649     """Verifies and computes the node required file checksums.
1650
1651     @type ninfo: L{objects.Node}
1652     @param ninfo: the node to check
1653     @param nresult: the remote results for the node
1654     @param file_list: required list of files
1655     @param local_cksum: dictionary of local files and their checksums
1656     @param master_files: list of files that only masters should have
1657
1658     """
1659     node = ninfo.name
1660     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1661
1662     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1663     test = not isinstance(remote_cksum, dict)
1664     _ErrorIf(test, self.ENODEFILECHECK, node,
1665              "node hasn't returned file checksum data")
1666     if test:
1667       return
1668
1669     for file_name in file_list:
1670       node_is_mc = ninfo.master_candidate
1671       must_have = (file_name not in master_files) or node_is_mc
1672       # missing
1673       test1 = file_name not in remote_cksum
1674       # invalid checksum
1675       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1676       # existing and good
1677       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1678       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1679                "file '%s' missing", file_name)
1680       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1681                "file '%s' has wrong checksum", file_name)
1682       # not candidate and this is not a must-have file
1683       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1684                "file '%s' should not exist on non master"
1685                " candidates (and the file is outdated)", file_name)
1686       # all good, except non-master/non-must have combination
1687       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1688                "file '%s' should not exist"
1689                " on non master candidates", file_name)
1690
1691   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1692                       drbd_map):
1693     """Verifies and the node DRBD status.
1694
1695     @type ninfo: L{objects.Node}
1696     @param ninfo: the node to check
1697     @param nresult: the remote results for the node
1698     @param instanceinfo: the dict of instances
1699     @param drbd_helper: the configured DRBD usermode helper
1700     @param drbd_map: the DRBD map as returned by
1701         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1702
1703     """
1704     node = ninfo.name
1705     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1706
1707     if drbd_helper:
1708       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1709       test = (helper_result == None)
1710       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1711                "no drbd usermode helper returned")
1712       if helper_result:
1713         status, payload = helper_result
1714         test = not status
1715         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1716                  "drbd usermode helper check unsuccessful: %s", payload)
1717         test = status and (payload != drbd_helper)
1718         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1719                  "wrong drbd usermode helper: %s", payload)
1720
1721     # compute the DRBD minors
1722     node_drbd = {}
1723     for minor, instance in drbd_map[node].items():
1724       test = instance not in instanceinfo
1725       _ErrorIf(test, self.ECLUSTERCFG, None,
1726                "ghost instance '%s' in temporary DRBD map", instance)
1727         # ghost instance should not be running, but otherwise we
1728         # don't give double warnings (both ghost instance and
1729         # unallocated minor in use)
1730       if test:
1731         node_drbd[minor] = (instance, False)
1732       else:
1733         instance = instanceinfo[instance]
1734         node_drbd[minor] = (instance.name, instance.admin_up)
1735
1736     # and now check them
1737     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1738     test = not isinstance(used_minors, (tuple, list))
1739     _ErrorIf(test, self.ENODEDRBD, node,
1740              "cannot parse drbd status file: %s", str(used_minors))
1741     if test:
1742       # we cannot check drbd status
1743       return
1744
1745     for minor, (iname, must_exist) in node_drbd.items():
1746       test = minor not in used_minors and must_exist
1747       _ErrorIf(test, self.ENODEDRBD, node,
1748                "drbd minor %d of instance %s is not active", minor, iname)
1749     for minor in used_minors:
1750       test = minor not in node_drbd
1751       _ErrorIf(test, self.ENODEDRBD, node,
1752                "unallocated drbd minor %d is in use", minor)
1753
1754   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1755     """Builds the node OS structures.
1756
1757     @type ninfo: L{objects.Node}
1758     @param ninfo: the node to check
1759     @param nresult: the remote results for the node
1760     @param nimg: the node image object
1761
1762     """
1763     node = ninfo.name
1764     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1765
1766     remote_os = nresult.get(constants.NV_OSLIST, None)
1767     test = (not isinstance(remote_os, list) or
1768             not compat.all(isinstance(v, list) and len(v) == 7
1769                            for v in remote_os))
1770
1771     _ErrorIf(test, self.ENODEOS, node,
1772              "node hasn't returned valid OS data")
1773
1774     nimg.os_fail = test
1775
1776     if test:
1777       return
1778
1779     os_dict = {}
1780
1781     for (name, os_path, status, diagnose,
1782          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1783
1784       if name not in os_dict:
1785         os_dict[name] = []
1786
1787       # parameters is a list of lists instead of list of tuples due to
1788       # JSON lacking a real tuple type, fix it:
1789       parameters = [tuple(v) for v in parameters]
1790       os_dict[name].append((os_path, status, diagnose,
1791                             set(variants), set(parameters), set(api_ver)))
1792
1793     nimg.oslist = os_dict
1794
1795   def _VerifyNodeOS(self, ninfo, nimg, base):
1796     """Verifies the node OS list.
1797
1798     @type ninfo: L{objects.Node}
1799     @param ninfo: the node to check
1800     @param nimg: the node image object
1801     @param base: the 'template' node we match against (e.g. from the master)
1802
1803     """
1804     node = ninfo.name
1805     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1806
1807     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1808
1809     for os_name, os_data in nimg.oslist.items():
1810       assert os_data, "Empty OS status for OS %s?!" % os_name
1811       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1812       _ErrorIf(not f_status, self.ENODEOS, node,
1813                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1814       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1815                "OS '%s' has multiple entries (first one shadows the rest): %s",
1816                os_name, utils.CommaJoin([v[0] for v in os_data]))
1817       # this will catched in backend too
1818       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1819                and not f_var, self.ENODEOS, node,
1820                "OS %s with API at least %d does not declare any variant",
1821                os_name, constants.OS_API_V15)
1822       # comparisons with the 'base' image
1823       test = os_name not in base.oslist
1824       _ErrorIf(test, self.ENODEOS, node,
1825                "Extra OS %s not present on reference node (%s)",
1826                os_name, base.name)
1827       if test:
1828         continue
1829       assert base.oslist[os_name], "Base node has empty OS status?"
1830       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1831       if not b_status:
1832         # base OS is invalid, skipping
1833         continue
1834       for kind, a, b in [("API version", f_api, b_api),
1835                          ("variants list", f_var, b_var),
1836                          ("parameters", f_param, b_param)]:
1837         _ErrorIf(a != b, self.ENODEOS, node,
1838                  "OS %s %s differs from reference node %s: %s vs. %s",
1839                  kind, os_name, base.name,
1840                  utils.CommaJoin(a), utils.CommaJoin(b))
1841
1842     # check any missing OSes
1843     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1844     _ErrorIf(missing, self.ENODEOS, node,
1845              "OSes present on reference node %s but missing on this node: %s",
1846              base.name, utils.CommaJoin(missing))
1847
1848   def _VerifyOob(self, ninfo, nresult):
1849     """Verifies out of band functionality of a node.
1850
1851     @type ninfo: L{objects.Node}
1852     @param ninfo: the node to check
1853     @param nresult: the remote results for the node
1854
1855     """
1856     node = ninfo.name
1857     # We just have to verify the paths on master and/or master candidates
1858     # as the oob helper is invoked on the master
1859     if ((ninfo.master_candidate or ninfo.master_capable) and
1860         constants.NV_OOB_PATHS in nresult):
1861       for path_result in nresult[constants.NV_OOB_PATHS]:
1862         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1863
1864   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1865     """Verifies and updates the node volume data.
1866
1867     This function will update a L{NodeImage}'s internal structures
1868     with data from the remote call.
1869
1870     @type ninfo: L{objects.Node}
1871     @param ninfo: the node to check
1872     @param nresult: the remote results for the node
1873     @param nimg: the node image object
1874     @param vg_name: the configured VG name
1875
1876     """
1877     node = ninfo.name
1878     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1879
1880     nimg.lvm_fail = True
1881     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1882     if vg_name is None:
1883       pass
1884     elif isinstance(lvdata, basestring):
1885       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1886                utils.SafeEncode(lvdata))
1887     elif not isinstance(lvdata, dict):
1888       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1889     else:
1890       nimg.volumes = lvdata
1891       nimg.lvm_fail = False
1892
1893   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1894     """Verifies and updates the node instance list.
1895
1896     If the listing was successful, then updates this node's instance
1897     list. Otherwise, it marks the RPC call as failed for the instance
1898     list key.
1899
1900     @type ninfo: L{objects.Node}
1901     @param ninfo: the node to check
1902     @param nresult: the remote results for the node
1903     @param nimg: the node image object
1904
1905     """
1906     idata = nresult.get(constants.NV_INSTANCELIST, None)
1907     test = not isinstance(idata, list)
1908     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1909                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1910     if test:
1911       nimg.hyp_fail = True
1912     else:
1913       nimg.instances = idata
1914
1915   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1916     """Verifies and computes a node information map
1917
1918     @type ninfo: L{objects.Node}
1919     @param ninfo: the node to check
1920     @param nresult: the remote results for the node
1921     @param nimg: the node image object
1922     @param vg_name: the configured VG name
1923
1924     """
1925     node = ninfo.name
1926     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1927
1928     # try to read free memory (from the hypervisor)
1929     hv_info = nresult.get(constants.NV_HVINFO, None)
1930     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1931     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1932     if not test:
1933       try:
1934         nimg.mfree = int(hv_info["memory_free"])
1935       except (ValueError, TypeError):
1936         _ErrorIf(True, self.ENODERPC, node,
1937                  "node returned invalid nodeinfo, check hypervisor")
1938
1939     # FIXME: devise a free space model for file based instances as well
1940     if vg_name is not None:
1941       test = (constants.NV_VGLIST not in nresult or
1942               vg_name not in nresult[constants.NV_VGLIST])
1943       _ErrorIf(test, self.ENODELVM, node,
1944                "node didn't return data for the volume group '%s'"
1945                " - it is either missing or broken", vg_name)
1946       if not test:
1947         try:
1948           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1949         except (ValueError, TypeError):
1950           _ErrorIf(True, self.ENODERPC, node,
1951                    "node returned invalid LVM info, check LVM status")
1952
1953   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1954     """Gets per-disk status information for all instances.
1955
1956     @type nodelist: list of strings
1957     @param nodelist: Node names
1958     @type node_image: dict of (name, L{objects.Node})
1959     @param node_image: Node objects
1960     @type instanceinfo: dict of (name, L{objects.Instance})
1961     @param instanceinfo: Instance objects
1962     @rtype: {instance: {node: [(succes, payload)]}}
1963     @return: a dictionary of per-instance dictionaries with nodes as
1964         keys and disk information as values; the disk information is a
1965         list of tuples (success, payload)
1966
1967     """
1968     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1969
1970     node_disks = {}
1971     node_disks_devonly = {}
1972     diskless_instances = set()
1973     diskless = constants.DT_DISKLESS
1974
1975     for nname in nodelist:
1976       node_instances = list(itertools.chain(node_image[nname].pinst,
1977                                             node_image[nname].sinst))
1978       diskless_instances.update(inst for inst in node_instances
1979                                 if instanceinfo[inst].disk_template == diskless)
1980       disks = [(inst, disk)
1981                for inst in node_instances
1982                for disk in instanceinfo[inst].disks]
1983
1984       if not disks:
1985         # No need to collect data
1986         continue
1987
1988       node_disks[nname] = disks
1989
1990       # Creating copies as SetDiskID below will modify the objects and that can
1991       # lead to incorrect data returned from nodes
1992       devonly = [dev.Copy() for (_, dev) in disks]
1993
1994       for dev in devonly:
1995         self.cfg.SetDiskID(dev, nname)
1996
1997       node_disks_devonly[nname] = devonly
1998
1999     assert len(node_disks) == len(node_disks_devonly)
2000
2001     # Collect data from all nodes with disks
2002     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2003                                                           node_disks_devonly)
2004
2005     assert len(result) == len(node_disks)
2006
2007     instdisk = {}
2008
2009     for (nname, nres) in result.items():
2010       disks = node_disks[nname]
2011
2012       if nres.offline:
2013         # No data from this node
2014         data = len(disks) * [(False, "node offline")]
2015       else:
2016         msg = nres.fail_msg
2017         _ErrorIf(msg, self.ENODERPC, nname,
2018                  "while getting disk information: %s", msg)
2019         if msg:
2020           # No data from this node
2021           data = len(disks) * [(False, msg)]
2022         else:
2023           data = []
2024           for idx, i in enumerate(nres.payload):
2025             if isinstance(i, (tuple, list)) and len(i) == 2:
2026               data.append(i)
2027             else:
2028               logging.warning("Invalid result from node %s, entry %d: %s",
2029                               nname, idx, i)
2030               data.append((False, "Invalid result from the remote node"))
2031
2032       for ((inst, _), status) in zip(disks, data):
2033         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2034
2035     # Add empty entries for diskless instances.
2036     for inst in diskless_instances:
2037       assert inst not in instdisk
2038       instdisk[inst] = {}
2039
2040     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2041                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2042                       compat.all(isinstance(s, (tuple, list)) and
2043                                  len(s) == 2 for s in statuses)
2044                       for inst, nnames in instdisk.items()
2045                       for nname, statuses in nnames.items())
2046     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2047
2048     return instdisk
2049
2050   def _VerifyHVP(self, hvp_data):
2051     """Verifies locally the syntax of the hypervisor parameters.
2052
2053     """
2054     for item, hv_name, hv_params in hvp_data:
2055       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2056              (item, hv_name))
2057       try:
2058         hv_class = hypervisor.GetHypervisor(hv_name)
2059         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2060         hv_class.CheckParameterSyntax(hv_params)
2061       except errors.GenericError, err:
2062         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2063
2064
2065   def BuildHooksEnv(self):
2066     """Build hooks env.
2067
2068     Cluster-Verify hooks just ran in the post phase and their failure makes
2069     the output be logged in the verify output and the verification to fail.
2070
2071     """
2072     all_nodes = self.cfg.GetNodeList()
2073     env = {
2074       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2075       }
2076     for node in self.cfg.GetAllNodesInfo().values():
2077       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2078
2079     return env, [], all_nodes
2080
2081   def Exec(self, feedback_fn):
2082     """Verify integrity of cluster, performing various test on nodes.
2083
2084     """
2085     # This method has too many local variables. pylint: disable-msg=R0914
2086     self.bad = False
2087     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2088     verbose = self.op.verbose
2089     self._feedback_fn = feedback_fn
2090     feedback_fn("* Verifying global settings")
2091     for msg in self.cfg.VerifyConfig():
2092       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2093
2094     # Check the cluster certificates
2095     for cert_filename in constants.ALL_CERT_FILES:
2096       (errcode, msg) = _VerifyCertificate(cert_filename)
2097       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2098
2099     vg_name = self.cfg.GetVGName()
2100     drbd_helper = self.cfg.GetDRBDHelper()
2101     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2102     cluster = self.cfg.GetClusterInfo()
2103     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2104     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2105     nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2106     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2107     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2108                         for iname in instancelist)
2109     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2110     i_non_redundant = [] # Non redundant instances
2111     i_non_a_balanced = [] # Non auto-balanced instances
2112     n_offline = 0 # Count of offline nodes
2113     n_drained = 0 # Count of nodes being drained
2114     node_vol_should = {}
2115
2116     # FIXME: verify OS list
2117     # do local checksums
2118     master_files = [constants.CLUSTER_CONF_FILE]
2119     master_node = self.master_node = self.cfg.GetMasterNode()
2120     master_ip = self.cfg.GetMasterIP()
2121
2122     file_names = ssconf.SimpleStore().GetFileList()
2123     file_names.extend(constants.ALL_CERT_FILES)
2124     file_names.extend(master_files)
2125     if cluster.modify_etc_hosts:
2126       file_names.append(constants.ETC_HOSTS)
2127
2128     local_checksums = utils.FingerprintFiles(file_names)
2129
2130     # Compute the set of hypervisor parameters
2131     hvp_data = []
2132     for hv_name in hypervisors:
2133       hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2134     for os_name, os_hvp in cluster.os_hvp.items():
2135       for hv_name, hv_params in os_hvp.items():
2136         if not hv_params:
2137           continue
2138         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2139         hvp_data.append(("os %s" % os_name, hv_name, full_params))
2140     # TODO: collapse identical parameter values in a single one
2141     for instance in instanceinfo.values():
2142       if not instance.hvparams:
2143         continue
2144       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2145                        cluster.FillHV(instance)))
2146     # and verify them locally
2147     self._VerifyHVP(hvp_data)
2148
2149     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2150     node_verify_param = {
2151       constants.NV_FILELIST: file_names,
2152       constants.NV_NODELIST: [node.name for node in nodeinfo
2153                               if not node.offline],
2154       constants.NV_HYPERVISOR: hypervisors,
2155       constants.NV_HVPARAMS: hvp_data,
2156       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2157                                   node.secondary_ip) for node in nodeinfo
2158                                  if not node.offline],
2159       constants.NV_INSTANCELIST: hypervisors,
2160       constants.NV_VERSION: None,
2161       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2162       constants.NV_NODESETUP: None,
2163       constants.NV_TIME: None,
2164       constants.NV_MASTERIP: (master_node, master_ip),
2165       constants.NV_OSLIST: None,
2166       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2167       }
2168
2169     if vg_name is not None:
2170       node_verify_param[constants.NV_VGLIST] = None
2171       node_verify_param[constants.NV_LVLIST] = vg_name
2172       node_verify_param[constants.NV_PVLIST] = [vg_name]
2173       node_verify_param[constants.NV_DRBDLIST] = None
2174
2175     if drbd_helper:
2176       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2177
2178     # Build our expected cluster state
2179     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2180                                                  name=node.name,
2181                                                  vm_capable=node.vm_capable))
2182                       for node in nodeinfo)
2183
2184     # Gather OOB paths
2185     oob_paths = []
2186     for node in nodeinfo:
2187       path = _SupportsOob(self.cfg, node)
2188       if path and path not in oob_paths:
2189         oob_paths.append(path)
2190
2191     if oob_paths:
2192       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2193
2194     for instance in instancelist:
2195       inst_config = instanceinfo[instance]
2196
2197       for nname in inst_config.all_nodes:
2198         if nname not in node_image:
2199           # ghost node
2200           gnode = self.NodeImage(name=nname)
2201           gnode.ghost = True
2202           node_image[nname] = gnode
2203
2204       inst_config.MapLVsByNode(node_vol_should)
2205
2206       pnode = inst_config.primary_node
2207       node_image[pnode].pinst.append(instance)
2208
2209       for snode in inst_config.secondary_nodes:
2210         nimg = node_image[snode]
2211         nimg.sinst.append(instance)
2212         if pnode not in nimg.sbp:
2213           nimg.sbp[pnode] = []
2214         nimg.sbp[pnode].append(instance)
2215
2216     # At this point, we have the in-memory data structures complete,
2217     # except for the runtime information, which we'll gather next
2218
2219     # Due to the way our RPC system works, exact response times cannot be
2220     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2221     # time before and after executing the request, we can at least have a time
2222     # window.
2223     nvinfo_starttime = time.time()
2224     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2225                                            self.cfg.GetClusterName())
2226     nvinfo_endtime = time.time()
2227
2228     all_drbd_map = self.cfg.ComputeDRBDMap()
2229
2230     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2231     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2232
2233     feedback_fn("* Verifying node status")
2234
2235     refos_img = None
2236
2237     for node_i in nodeinfo:
2238       node = node_i.name
2239       nimg = node_image[node]
2240
2241       if node_i.offline:
2242         if verbose:
2243           feedback_fn("* Skipping offline node %s" % (node,))
2244         n_offline += 1
2245         continue
2246
2247       if node == master_node:
2248         ntype = "master"
2249       elif node_i.master_candidate:
2250         ntype = "master candidate"
2251       elif node_i.drained:
2252         ntype = "drained"
2253         n_drained += 1
2254       else:
2255         ntype = "regular"
2256       if verbose:
2257         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2258
2259       msg = all_nvinfo[node].fail_msg
2260       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2261       if msg:
2262         nimg.rpc_fail = True
2263         continue
2264
2265       nresult = all_nvinfo[node].payload
2266
2267       nimg.call_ok = self._VerifyNode(node_i, nresult)
2268       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2269       self._VerifyNodeNetwork(node_i, nresult)
2270       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2271                             master_files)
2272
2273       self._VerifyOob(node_i, nresult)
2274
2275       if nimg.vm_capable:
2276         self._VerifyNodeLVM(node_i, nresult, vg_name)
2277         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2278                              all_drbd_map)
2279
2280         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2281         self._UpdateNodeInstances(node_i, nresult, nimg)
2282         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2283         self._UpdateNodeOS(node_i, nresult, nimg)
2284         if not nimg.os_fail:
2285           if refos_img is None:
2286             refos_img = nimg
2287           self._VerifyNodeOS(node_i, nimg, refos_img)
2288
2289     feedback_fn("* Verifying instance status")
2290     for instance in instancelist:
2291       if verbose:
2292         feedback_fn("* Verifying instance %s" % instance)
2293       inst_config = instanceinfo[instance]
2294       self._VerifyInstance(instance, inst_config, node_image,
2295                            instdisk[instance])
2296       inst_nodes_offline = []
2297
2298       pnode = inst_config.primary_node
2299       pnode_img = node_image[pnode]
2300       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2301                self.ENODERPC, pnode, "instance %s, connection to"
2302                " primary node failed", instance)
2303
2304       _ErrorIf(pnode_img.offline, self.EINSTANCEBADNODE, instance,
2305                "instance lives on offline node %s", inst_config.primary_node)
2306
2307       # If the instance is non-redundant we cannot survive losing its primary
2308       # node, so we are not N+1 compliant. On the other hand we have no disk
2309       # templates with more than one secondary so that situation is not well
2310       # supported either.
2311       # FIXME: does not support file-backed instances
2312       if not inst_config.secondary_nodes:
2313         i_non_redundant.append(instance)
2314
2315       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2316                instance, "instance has multiple secondary nodes: %s",
2317                utils.CommaJoin(inst_config.secondary_nodes),
2318                code=self.ETYPE_WARNING)
2319
2320       if inst_config.disk_template in constants.DTS_NET_MIRROR:
2321         pnode = inst_config.primary_node
2322         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2323         instance_groups = {}
2324
2325         for node in instance_nodes:
2326           instance_groups.setdefault(nodeinfo_byname[node].group,
2327                                      []).append(node)
2328
2329         pretty_list = [
2330           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2331           # Sort so that we always list the primary node first.
2332           for group, nodes in sorted(instance_groups.items(),
2333                                      key=lambda (_, nodes): pnode in nodes,
2334                                      reverse=True)]
2335
2336         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2337                       instance, "instance has primary and secondary nodes in"
2338                       " different groups: %s", utils.CommaJoin(pretty_list),
2339                       code=self.ETYPE_WARNING)
2340
2341       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2342         i_non_a_balanced.append(instance)
2343
2344       for snode in inst_config.secondary_nodes:
2345         s_img = node_image[snode]
2346         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2347                  "instance %s, connection to secondary node failed", instance)
2348
2349         if s_img.offline:
2350           inst_nodes_offline.append(snode)
2351
2352       # warn that the instance lives on offline nodes
2353       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2354                "instance has offline secondary node(s) %s",
2355                utils.CommaJoin(inst_nodes_offline))
2356       # ... or ghost/non-vm_capable nodes
2357       for node in inst_config.all_nodes:
2358         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2359                  "instance lives on ghost node %s", node)
2360         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2361                  instance, "instance lives on non-vm_capable node %s", node)
2362
2363     feedback_fn("* Verifying orphan volumes")
2364     reserved = utils.FieldSet(*cluster.reserved_lvs)
2365     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2366
2367     feedback_fn("* Verifying orphan instances")
2368     self._VerifyOrphanInstances(instancelist, node_image)
2369
2370     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2371       feedback_fn("* Verifying N+1 Memory redundancy")
2372       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2373
2374     feedback_fn("* Other Notes")
2375     if i_non_redundant:
2376       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2377                   % len(i_non_redundant))
2378
2379     if i_non_a_balanced:
2380       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2381                   % len(i_non_a_balanced))
2382
2383     if n_offline:
2384       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2385
2386     if n_drained:
2387       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2388
2389     return not self.bad
2390
2391   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2392     """Analyze the post-hooks' result
2393
2394     This method analyses the hook result, handles it, and sends some
2395     nicely-formatted feedback back to the user.
2396
2397     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2398         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2399     @param hooks_results: the results of the multi-node hooks rpc call
2400     @param feedback_fn: function used send feedback back to the caller
2401     @param lu_result: previous Exec result
2402     @return: the new Exec result, based on the previous result
2403         and hook results
2404
2405     """
2406     # We only really run POST phase hooks, and are only interested in
2407     # their results
2408     if phase == constants.HOOKS_PHASE_POST:
2409       # Used to change hooks' output to proper indentation
2410       feedback_fn("* Hooks Results")
2411       assert hooks_results, "invalid result from hooks"
2412
2413       for node_name in hooks_results:
2414         res = hooks_results[node_name]
2415         msg = res.fail_msg
2416         test = msg and not res.offline
2417         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2418                       "Communication failure in hooks execution: %s", msg)
2419         if res.offline or msg:
2420           # No need to investigate payload if node is offline or gave an error.
2421           # override manually lu_result here as _ErrorIf only
2422           # overrides self.bad
2423           lu_result = 1
2424           continue
2425         for script, hkr, output in res.payload:
2426           test = hkr == constants.HKR_FAIL
2427           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2428                         "Script %s failed, output:", script)
2429           if test:
2430             output = self._HOOKS_INDENT_RE.sub('      ', output)
2431             feedback_fn("%s" % output)
2432             lu_result = 0
2433
2434       return lu_result
2435
2436
2437 class LUClusterVerifyDisks(NoHooksLU):
2438   """Verifies the cluster disks status.
2439
2440   """
2441   REQ_BGL = False
2442
2443   def ExpandNames(self):
2444     self.needed_locks = {
2445       locking.LEVEL_NODE: locking.ALL_SET,
2446       locking.LEVEL_INSTANCE: locking.ALL_SET,
2447     }
2448     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2449
2450   def Exec(self, feedback_fn):
2451     """Verify integrity of cluster disks.
2452
2453     @rtype: tuple of three items
2454     @return: a tuple of (dict of node-to-node_error, list of instances
2455         which need activate-disks, dict of instance: (node, volume) for
2456         missing volumes
2457
2458     """
2459     result = res_nodes, res_instances, res_missing = {}, [], {}
2460
2461     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2462     instances = self.cfg.GetAllInstancesInfo().values()
2463
2464     nv_dict = {}
2465     for inst in instances:
2466       inst_lvs = {}
2467       if not inst.admin_up:
2468         continue
2469       inst.MapLVsByNode(inst_lvs)
2470       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2471       for node, vol_list in inst_lvs.iteritems():
2472         for vol in vol_list:
2473           nv_dict[(node, vol)] = inst
2474
2475     if not nv_dict:
2476       return result
2477
2478     node_lvs = self.rpc.call_lv_list(nodes, [])
2479     for node, node_res in node_lvs.items():
2480       if node_res.offline:
2481         continue
2482       msg = node_res.fail_msg
2483       if msg:
2484         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2485         res_nodes[node] = msg
2486         continue
2487
2488       lvs = node_res.payload
2489       for lv_name, (_, _, lv_online) in lvs.items():
2490         inst = nv_dict.pop((node, lv_name), None)
2491         if (not lv_online and inst is not None
2492             and inst.name not in res_instances):
2493           res_instances.append(inst.name)
2494
2495     # any leftover items in nv_dict are missing LVs, let's arrange the
2496     # data better
2497     for key, inst in nv_dict.iteritems():
2498       if inst.name not in res_missing:
2499         res_missing[inst.name] = []
2500       res_missing[inst.name].append(key)
2501
2502     return result
2503
2504
2505 class LUClusterRepairDiskSizes(NoHooksLU):
2506   """Verifies the cluster disks sizes.
2507
2508   """
2509   REQ_BGL = False
2510
2511   def ExpandNames(self):
2512     if self.op.instances:
2513       self.wanted_names = []
2514       for name in self.op.instances:
2515         full_name = _ExpandInstanceName(self.cfg, name)
2516         self.wanted_names.append(full_name)
2517       self.needed_locks = {
2518         locking.LEVEL_NODE: [],
2519         locking.LEVEL_INSTANCE: self.wanted_names,
2520         }
2521       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2522     else:
2523       self.wanted_names = None
2524       self.needed_locks = {
2525         locking.LEVEL_NODE: locking.ALL_SET,
2526         locking.LEVEL_INSTANCE: locking.ALL_SET,
2527         }
2528     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2529
2530   def DeclareLocks(self, level):
2531     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2532       self._LockInstancesNodes(primary_only=True)
2533
2534   def CheckPrereq(self):
2535     """Check prerequisites.
2536
2537     This only checks the optional instance list against the existing names.
2538
2539     """
2540     if self.wanted_names is None:
2541       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2542
2543     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2544                              in self.wanted_names]
2545
2546   def _EnsureChildSizes(self, disk):
2547     """Ensure children of the disk have the needed disk size.
2548
2549     This is valid mainly for DRBD8 and fixes an issue where the
2550     children have smaller disk size.
2551
2552     @param disk: an L{ganeti.objects.Disk} object
2553
2554     """
2555     if disk.dev_type == constants.LD_DRBD8:
2556       assert disk.children, "Empty children for DRBD8?"
2557       fchild = disk.children[0]
2558       mismatch = fchild.size < disk.size
2559       if mismatch:
2560         self.LogInfo("Child disk has size %d, parent %d, fixing",
2561                      fchild.size, disk.size)
2562         fchild.size = disk.size
2563
2564       # and we recurse on this child only, not on the metadev
2565       return self._EnsureChildSizes(fchild) or mismatch
2566     else:
2567       return False
2568
2569   def Exec(self, feedback_fn):
2570     """Verify the size of cluster disks.
2571
2572     """
2573     # TODO: check child disks too
2574     # TODO: check differences in size between primary/secondary nodes
2575     per_node_disks = {}
2576     for instance in self.wanted_instances:
2577       pnode = instance.primary_node
2578       if pnode not in per_node_disks:
2579         per_node_disks[pnode] = []
2580       for idx, disk in enumerate(instance.disks):
2581         per_node_disks[pnode].append((instance, idx, disk))
2582
2583     changed = []
2584     for node, dskl in per_node_disks.items():
2585       newl = [v[2].Copy() for v in dskl]
2586       for dsk in newl:
2587         self.cfg.SetDiskID(dsk, node)
2588       result = self.rpc.call_blockdev_getsize(node, newl)
2589       if result.fail_msg:
2590         self.LogWarning("Failure in blockdev_getsize call to node"
2591                         " %s, ignoring", node)
2592         continue
2593       if len(result.payload) != len(dskl):
2594         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2595                         " result.payload=%s", node, len(dskl), result.payload)
2596         self.LogWarning("Invalid result from node %s, ignoring node results",
2597                         node)
2598         continue
2599       for ((instance, idx, disk), size) in zip(dskl, result.payload):
2600         if size is None:
2601           self.LogWarning("Disk %d of instance %s did not return size"
2602                           " information, ignoring", idx, instance.name)
2603           continue
2604         if not isinstance(size, (int, long)):
2605           self.LogWarning("Disk %d of instance %s did not return valid"
2606                           " size information, ignoring", idx, instance.name)
2607           continue
2608         size = size >> 20
2609         if size != disk.size:
2610           self.LogInfo("Disk %d of instance %s has mismatched size,"
2611                        " correcting: recorded %d, actual %d", idx,
2612                        instance.name, disk.size, size)
2613           disk.size = size
2614           self.cfg.Update(instance, feedback_fn)
2615           changed.append((instance.name, idx, size))
2616         if self._EnsureChildSizes(disk):
2617           self.cfg.Update(instance, feedback_fn)
2618           changed.append((instance.name, idx, disk.size))
2619     return changed
2620
2621
2622 class LUClusterRename(LogicalUnit):
2623   """Rename the cluster.
2624
2625   """
2626   HPATH = "cluster-rename"
2627   HTYPE = constants.HTYPE_CLUSTER
2628
2629   def BuildHooksEnv(self):
2630     """Build hooks env.
2631
2632     """
2633     env = {
2634       "OP_TARGET": self.cfg.GetClusterName(),
2635       "NEW_NAME": self.op.name,
2636       }
2637     mn = self.cfg.GetMasterNode()
2638     all_nodes = self.cfg.GetNodeList()
2639     return env, [mn], all_nodes
2640
2641   def CheckPrereq(self):
2642     """Verify that the passed name is a valid one.
2643
2644     """
2645     hostname = netutils.GetHostname(name=self.op.name,
2646                                     family=self.cfg.GetPrimaryIPFamily())
2647
2648     new_name = hostname.name
2649     self.ip = new_ip = hostname.ip
2650     old_name = self.cfg.GetClusterName()
2651     old_ip = self.cfg.GetMasterIP()
2652     if new_name == old_name and new_ip == old_ip:
2653       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2654                                  " cluster has changed",
2655                                  errors.ECODE_INVAL)
2656     if new_ip != old_ip:
2657       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2658         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2659                                    " reachable on the network" %
2660                                    new_ip, errors.ECODE_NOTUNIQUE)
2661
2662     self.op.name = new_name
2663
2664   def Exec(self, feedback_fn):
2665     """Rename the cluster.
2666
2667     """
2668     clustername = self.op.name
2669     ip = self.ip
2670
2671     # shutdown the master IP
2672     master = self.cfg.GetMasterNode()
2673     result = self.rpc.call_node_stop_master(master, False)
2674     result.Raise("Could not disable the master role")
2675
2676     try:
2677       cluster = self.cfg.GetClusterInfo()
2678       cluster.cluster_name = clustername
2679       cluster.master_ip = ip
2680       self.cfg.Update(cluster, feedback_fn)
2681
2682       # update the known hosts file
2683       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2684       node_list = self.cfg.GetOnlineNodeList()
2685       try:
2686         node_list.remove(master)
2687       except ValueError:
2688         pass
2689       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2690     finally:
2691       result = self.rpc.call_node_start_master(master, False, False)
2692       msg = result.fail_msg
2693       if msg:
2694         self.LogWarning("Could not re-enable the master role on"
2695                         " the master, please restart manually: %s", msg)
2696
2697     return clustername
2698
2699
2700 class LUClusterSetParams(LogicalUnit):
2701   """Change the parameters of the cluster.
2702
2703   """
2704   HPATH = "cluster-modify"
2705   HTYPE = constants.HTYPE_CLUSTER
2706   REQ_BGL = False
2707
2708   def CheckArguments(self):
2709     """Check parameters
2710
2711     """
2712     if self.op.uid_pool:
2713       uidpool.CheckUidPool(self.op.uid_pool)
2714
2715     if self.op.add_uids:
2716       uidpool.CheckUidPool(self.op.add_uids)
2717
2718     if self.op.remove_uids:
2719       uidpool.CheckUidPool(self.op.remove_uids)
2720
2721   def ExpandNames(self):
2722     # FIXME: in the future maybe other cluster params won't require checking on
2723     # all nodes to be modified.
2724     self.needed_locks = {
2725       locking.LEVEL_NODE: locking.ALL_SET,
2726     }
2727     self.share_locks[locking.LEVEL_NODE] = 1
2728
2729   def BuildHooksEnv(self):
2730     """Build hooks env.
2731
2732     """
2733     env = {
2734       "OP_TARGET": self.cfg.GetClusterName(),
2735       "NEW_VG_NAME": self.op.vg_name,
2736       }
2737     mn = self.cfg.GetMasterNode()
2738     return env, [mn], [mn]
2739
2740   def CheckPrereq(self):
2741     """Check prerequisites.
2742
2743     This checks whether the given params don't conflict and
2744     if the given volume group is valid.
2745
2746     """
2747     if self.op.vg_name is not None and not self.op.vg_name:
2748       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2749         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2750                                    " instances exist", errors.ECODE_INVAL)
2751
2752     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2753       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2754         raise errors.OpPrereqError("Cannot disable drbd helper while"
2755                                    " drbd-based instances exist",
2756                                    errors.ECODE_INVAL)
2757
2758     node_list = self.acquired_locks[locking.LEVEL_NODE]
2759
2760     # if vg_name not None, checks given volume group on all nodes
2761     if self.op.vg_name:
2762       vglist = self.rpc.call_vg_list(node_list)
2763       for node in node_list:
2764         msg = vglist[node].fail_msg
2765         if msg:
2766           # ignoring down node
2767           self.LogWarning("Error while gathering data on node %s"
2768                           " (ignoring node): %s", node, msg)
2769           continue
2770         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2771                                               self.op.vg_name,
2772                                               constants.MIN_VG_SIZE)
2773         if vgstatus:
2774           raise errors.OpPrereqError("Error on node '%s': %s" %
2775                                      (node, vgstatus), errors.ECODE_ENVIRON)
2776
2777     if self.op.drbd_helper:
2778       # checks given drbd helper on all nodes
2779       helpers = self.rpc.call_drbd_helper(node_list)
2780       for node in node_list:
2781         ninfo = self.cfg.GetNodeInfo(node)
2782         if ninfo.offline:
2783           self.LogInfo("Not checking drbd helper on offline node %s", node)
2784           continue
2785         msg = helpers[node].fail_msg
2786         if msg:
2787           raise errors.OpPrereqError("Error checking drbd helper on node"
2788                                      " '%s': %s" % (node, msg),
2789                                      errors.ECODE_ENVIRON)
2790         node_helper = helpers[node].payload
2791         if node_helper != self.op.drbd_helper:
2792           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2793                                      (node, node_helper), errors.ECODE_ENVIRON)
2794
2795     self.cluster = cluster = self.cfg.GetClusterInfo()
2796     # validate params changes
2797     if self.op.beparams:
2798       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2799       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2800
2801     if self.op.ndparams:
2802       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2803       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2804
2805     if self.op.nicparams:
2806       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2807       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2808       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2809       nic_errors = []
2810
2811       # check all instances for consistency
2812       for instance in self.cfg.GetAllInstancesInfo().values():
2813         for nic_idx, nic in enumerate(instance.nics):
2814           params_copy = copy.deepcopy(nic.nicparams)
2815           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2816
2817           # check parameter syntax
2818           try:
2819             objects.NIC.CheckParameterSyntax(params_filled)
2820           except errors.ConfigurationError, err:
2821             nic_errors.append("Instance %s, nic/%d: %s" %
2822                               (instance.name, nic_idx, err))
2823
2824           # if we're moving instances to routed, check that they have an ip
2825           target_mode = params_filled[constants.NIC_MODE]
2826           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2827             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2828                               (instance.name, nic_idx))
2829       if nic_errors:
2830         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2831                                    "\n".join(nic_errors))
2832
2833     # hypervisor list/parameters
2834     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2835     if self.op.hvparams:
2836       for hv_name, hv_dict in self.op.hvparams.items():
2837         if hv_name not in self.new_hvparams:
2838           self.new_hvparams[hv_name] = hv_dict
2839         else:
2840           self.new_hvparams[hv_name].update(hv_dict)
2841
2842     # os hypervisor parameters
2843     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2844     if self.op.os_hvp:
2845       for os_name, hvs in self.op.os_hvp.items():
2846         if os_name not in self.new_os_hvp:
2847           self.new_os_hvp[os_name] = hvs
2848         else:
2849           for hv_name, hv_dict in hvs.items():
2850             if hv_name not in self.new_os_hvp[os_name]:
2851               self.new_os_hvp[os_name][hv_name] = hv_dict
2852             else:
2853               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2854
2855     # os parameters
2856     self.new_osp = objects.FillDict(cluster.osparams, {})
2857     if self.op.osparams:
2858       for os_name, osp in self.op.osparams.items():
2859         if os_name not in self.new_osp:
2860           self.new_osp[os_name] = {}
2861
2862         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2863                                                   use_none=True)
2864
2865         if not self.new_osp[os_name]:
2866           # we removed all parameters
2867           del self.new_osp[os_name]
2868         else:
2869           # check the parameter validity (remote check)
2870           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2871                          os_name, self.new_osp[os_name])
2872
2873     # changes to the hypervisor list
2874     if self.op.enabled_hypervisors is not None:
2875       self.hv_list = self.op.enabled_hypervisors
2876       for hv in self.hv_list:
2877         # if the hypervisor doesn't already exist in the cluster
2878         # hvparams, we initialize it to empty, and then (in both
2879         # cases) we make sure to fill the defaults, as we might not
2880         # have a complete defaults list if the hypervisor wasn't
2881         # enabled before
2882         if hv not in new_hvp:
2883           new_hvp[hv] = {}
2884         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2885         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2886     else:
2887       self.hv_list = cluster.enabled_hypervisors
2888
2889     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2890       # either the enabled list has changed, or the parameters have, validate
2891       for hv_name, hv_params in self.new_hvparams.items():
2892         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2893             (self.op.enabled_hypervisors and
2894              hv_name in self.op.enabled_hypervisors)):
2895           # either this is a new hypervisor, or its parameters have changed
2896           hv_class = hypervisor.GetHypervisor(hv_name)
2897           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2898           hv_class.CheckParameterSyntax(hv_params)
2899           _CheckHVParams(self, node_list, hv_name, hv_params)
2900
2901     if self.op.os_hvp:
2902       # no need to check any newly-enabled hypervisors, since the
2903       # defaults have already been checked in the above code-block
2904       for os_name, os_hvp in self.new_os_hvp.items():
2905         for hv_name, hv_params in os_hvp.items():
2906           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2907           # we need to fill in the new os_hvp on top of the actual hv_p
2908           cluster_defaults = self.new_hvparams.get(hv_name, {})
2909           new_osp = objects.FillDict(cluster_defaults, hv_params)
2910           hv_class = hypervisor.GetHypervisor(hv_name)
2911           hv_class.CheckParameterSyntax(new_osp)
2912           _CheckHVParams(self, node_list, hv_name, new_osp)
2913
2914     if self.op.default_iallocator:
2915       alloc_script = utils.FindFile(self.op.default_iallocator,
2916                                     constants.IALLOCATOR_SEARCH_PATH,
2917                                     os.path.isfile)
2918       if alloc_script is None:
2919         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2920                                    " specified" % self.op.default_iallocator,
2921                                    errors.ECODE_INVAL)
2922
2923   def Exec(self, feedback_fn):
2924     """Change the parameters of the cluster.
2925
2926     """
2927     if self.op.vg_name is not None:
2928       new_volume = self.op.vg_name
2929       if not new_volume:
2930         new_volume = None
2931       if new_volume != self.cfg.GetVGName():
2932         self.cfg.SetVGName(new_volume)
2933       else:
2934         feedback_fn("Cluster LVM configuration already in desired"
2935                     " state, not changing")
2936     if self.op.drbd_helper is not None:
2937       new_helper = self.op.drbd_helper
2938       if not new_helper:
2939         new_helper = None
2940       if new_helper != self.cfg.GetDRBDHelper():
2941         self.cfg.SetDRBDHelper(new_helper)
2942       else:
2943         feedback_fn("Cluster DRBD helper already in desired state,"
2944                     " not changing")
2945     if self.op.hvparams:
2946       self.cluster.hvparams = self.new_hvparams
2947     if self.op.os_hvp:
2948       self.cluster.os_hvp = self.new_os_hvp
2949     if self.op.enabled_hypervisors is not None:
2950       self.cluster.hvparams = self.new_hvparams
2951       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2952     if self.op.beparams:
2953       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2954     if self.op.nicparams:
2955       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2956     if self.op.osparams:
2957       self.cluster.osparams = self.new_osp
2958     if self.op.ndparams:
2959       self.cluster.ndparams = self.new_ndparams
2960
2961     if self.op.candidate_pool_size is not None:
2962       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2963       # we need to update the pool size here, otherwise the save will fail
2964       _AdjustCandidatePool(self, [])
2965
2966     if self.op.maintain_node_health is not None:
2967       self.cluster.maintain_node_health = self.op.maintain_node_health
2968
2969     if self.op.prealloc_wipe_disks is not None:
2970       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2971
2972     if self.op.add_uids is not None:
2973       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2974
2975     if self.op.remove_uids is not None:
2976       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2977
2978     if self.op.uid_pool is not None:
2979       self.cluster.uid_pool = self.op.uid_pool
2980
2981     if self.op.default_iallocator is not None:
2982       self.cluster.default_iallocator = self.op.default_iallocator
2983
2984     if self.op.reserved_lvs is not None:
2985       self.cluster.reserved_lvs = self.op.reserved_lvs
2986
2987     def helper_os(aname, mods, desc):
2988       desc += " OS list"
2989       lst = getattr(self.cluster, aname)
2990       for key, val in mods:
2991         if key == constants.DDM_ADD:
2992           if val in lst:
2993             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2994           else:
2995             lst.append(val)
2996         elif key == constants.DDM_REMOVE:
2997           if val in lst:
2998             lst.remove(val)
2999           else:
3000             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3001         else:
3002           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3003
3004     if self.op.hidden_os:
3005       helper_os("hidden_os", self.op.hidden_os, "hidden")
3006
3007     if self.op.blacklisted_os:
3008       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3009
3010     if self.op.master_netdev:
3011       master = self.cfg.GetMasterNode()
3012       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3013                   self.cluster.master_netdev)
3014       result = self.rpc.call_node_stop_master(master, False)
3015       result.Raise("Could not disable the master ip")
3016       feedback_fn("Changing master_netdev from %s to %s" %
3017                   (self.cluster.master_netdev, self.op.master_netdev))
3018       self.cluster.master_netdev = self.op.master_netdev
3019
3020     self.cfg.Update(self.cluster, feedback_fn)
3021
3022     if self.op.master_netdev:
3023       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3024                   self.op.master_netdev)
3025       result = self.rpc.call_node_start_master(master, False, False)
3026       if result.fail_msg:
3027         self.LogWarning("Could not re-enable the master ip on"
3028                         " the master, please restart manually: %s",
3029                         result.fail_msg)
3030
3031
3032 def _UploadHelper(lu, nodes, fname):
3033   """Helper for uploading a file and showing warnings.
3034
3035   """
3036   if os.path.exists(fname):
3037     result = lu.rpc.call_upload_file(nodes, fname)
3038     for to_node, to_result in result.items():
3039       msg = to_result.fail_msg
3040       if msg:
3041         msg = ("Copy of file %s to node %s failed: %s" %
3042                (fname, to_node, msg))
3043         lu.proc.LogWarning(msg)
3044
3045
3046 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3047   """Distribute additional files which are part of the cluster configuration.
3048
3049   ConfigWriter takes care of distributing the config and ssconf files, but
3050   there are more files which should be distributed to all nodes. This function
3051   makes sure those are copied.
3052
3053   @param lu: calling logical unit
3054   @param additional_nodes: list of nodes not in the config to distribute to
3055   @type additional_vm: boolean
3056   @param additional_vm: whether the additional nodes are vm-capable or not
3057
3058   """
3059   # 1. Gather target nodes
3060   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3061   dist_nodes = lu.cfg.GetOnlineNodeList()
3062   nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3063   vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3064   if additional_nodes is not None:
3065     dist_nodes.extend(additional_nodes)
3066     if additional_vm:
3067       vm_nodes.extend(additional_nodes)
3068   if myself.name in dist_nodes:
3069     dist_nodes.remove(myself.name)
3070   if myself.name in vm_nodes:
3071     vm_nodes.remove(myself.name)
3072
3073   # 2. Gather files to distribute
3074   dist_files = set([constants.ETC_HOSTS,
3075                     constants.SSH_KNOWN_HOSTS_FILE,
3076                     constants.RAPI_CERT_FILE,
3077                     constants.RAPI_USERS_FILE,
3078                     constants.CONFD_HMAC_KEY,
3079                     constants.CLUSTER_DOMAIN_SECRET_FILE,
3080                    ])
3081
3082   vm_files = set()
3083   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3084   for hv_name in enabled_hypervisors:
3085     hv_class = hypervisor.GetHypervisor(hv_name)
3086     vm_files.update(hv_class.GetAncillaryFiles())
3087
3088   # 3. Perform the files upload
3089   for fname in dist_files:
3090     _UploadHelper(lu, dist_nodes, fname)
3091   for fname in vm_files:
3092     _UploadHelper(lu, vm_nodes, fname)
3093
3094
3095 class LUClusterRedistConf(NoHooksLU):
3096   """Force the redistribution of cluster configuration.
3097
3098   This is a very simple LU.
3099
3100   """
3101   REQ_BGL = False
3102
3103   def ExpandNames(self):
3104     self.needed_locks = {
3105       locking.LEVEL_NODE: locking.ALL_SET,
3106     }
3107     self.share_locks[locking.LEVEL_NODE] = 1
3108
3109   def Exec(self, feedback_fn):
3110     """Redistribute the configuration.
3111
3112     """
3113     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3114     _RedistributeAncillaryFiles(self)
3115
3116
3117 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3118   """Sleep and poll for an instance's disk to sync.
3119
3120   """
3121   if not instance.disks or disks is not None and not disks:
3122     return True
3123
3124   disks = _ExpandCheckDisks(instance, disks)
3125
3126   if not oneshot:
3127     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3128
3129   node = instance.primary_node
3130
3131   for dev in disks:
3132     lu.cfg.SetDiskID(dev, node)
3133
3134   # TODO: Convert to utils.Retry
3135
3136   retries = 0
3137   degr_retries = 10 # in seconds, as we sleep 1 second each time
3138   while True:
3139     max_time = 0
3140     done = True
3141     cumul_degraded = False
3142     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3143     msg = rstats.fail_msg
3144     if msg:
3145       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3146       retries += 1
3147       if retries >= 10:
3148         raise errors.RemoteError("Can't contact node %s for mirror data,"
3149                                  " aborting." % node)
3150       time.sleep(6)
3151       continue
3152     rstats = rstats.payload
3153     retries = 0
3154     for i, mstat in enumerate(rstats):
3155       if mstat is None:
3156         lu.LogWarning("Can't compute data for node %s/%s",
3157                            node, disks[i].iv_name)
3158         continue
3159
3160       cumul_degraded = (cumul_degraded or
3161                         (mstat.is_degraded and mstat.sync_percent is None))
3162       if mstat.sync_percent is not None:
3163         done = False
3164         if mstat.estimated_time is not None:
3165           rem_time = ("%s remaining (estimated)" %
3166                       utils.FormatSeconds(mstat.estimated_time))
3167           max_time = mstat.estimated_time
3168         else:
3169           rem_time = "no time estimate"
3170         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3171                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3172
3173     # if we're done but degraded, let's do a few small retries, to
3174     # make sure we see a stable and not transient situation; therefore
3175     # we force restart of the loop
3176     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3177       logging.info("Degraded disks found, %d retries left", degr_retries)
3178       degr_retries -= 1
3179       time.sleep(1)
3180       continue
3181
3182     if done or oneshot:
3183       break
3184
3185     time.sleep(min(60, max_time))
3186
3187   if done:
3188     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3189   return not cumul_degraded
3190
3191
3192 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3193   """Check that mirrors are not degraded.
3194
3195   The ldisk parameter, if True, will change the test from the
3196   is_degraded attribute (which represents overall non-ok status for
3197   the device(s)) to the ldisk (representing the local storage status).
3198
3199   """
3200   lu.cfg.SetDiskID(dev, node)
3201
3202   result = True
3203
3204   if on_primary or dev.AssembleOnSecondary():
3205     rstats = lu.rpc.call_blockdev_find(node, dev)
3206     msg = rstats.fail_msg
3207     if msg:
3208       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3209       result = False
3210     elif not rstats.payload:
3211       lu.LogWarning("Can't find disk on node %s", node)
3212       result = False
3213     else:
3214       if ldisk:
3215         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3216       else:
3217         result = result and not rstats.payload.is_degraded
3218
3219   if dev.children:
3220     for child in dev.children:
3221       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3222
3223   return result
3224
3225
3226 class LUOobCommand(NoHooksLU):
3227   """Logical unit for OOB handling.
3228
3229   """
3230   REG_BGL = False
3231   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3232
3233   def CheckPrereq(self):
3234     """Check prerequisites.
3235
3236     This checks:
3237      - the node exists in the configuration
3238      - OOB is supported
3239
3240     Any errors are signaled by raising errors.OpPrereqError.
3241
3242     """
3243     self.nodes = []
3244     self.master_node = self.cfg.GetMasterNode()
3245
3246     if self.op.node_names:
3247       if self.op.command in self._SKIP_MASTER:
3248         if self.master_node in self.op.node_names:
3249           master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3250           master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3251
3252           if master_oob_handler:
3253             additional_text = ("Run '%s %s %s' if you want to operate on the"
3254                                " master regardless") % (master_oob_handler,
3255                                                         self.op.command,
3256                                                         self.master_node)
3257           else:
3258             additional_text = "The master node does not support out-of-band"
3259
3260           raise errors.OpPrereqError(("Operating on the master node %s is not"
3261                                       " allowed for %s\n%s") %
3262                                      (self.master_node, self.op.command,
3263                                       additional_text), errors.ECODE_INVAL)
3264     else:
3265       self.op.node_names = self.cfg.GetNodeList()
3266       if self.op.command in self._SKIP_MASTER:
3267         self.op.node_names.remove(self.master_node)
3268
3269     if self.op.command in self._SKIP_MASTER:
3270       assert self.master_node not in self.op.node_names
3271
3272     for node_name in self.op.node_names:
3273       node = self.cfg.GetNodeInfo(node_name)
3274
3275       if node is None:
3276         raise errors.OpPrereqError("Node %s not found" % node_name,
3277                                    errors.ECODE_NOENT)
3278       else:
3279         self.nodes.append(node)
3280
3281       if (not self.op.ignore_status and
3282           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3283         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3284                                     " not marked offline") % node_name,
3285                                    errors.ECODE_STATE)
3286
3287   def ExpandNames(self):
3288     """Gather locks we need.
3289
3290     """
3291     if self.op.node_names:
3292       self.op.node_names = [_ExpandNodeName(self.cfg, name)
3293                             for name in self.op.node_names]
3294       lock_names = self.op.node_names
3295     else:
3296       lock_names = locking.ALL_SET
3297
3298     self.needed_locks = {
3299       locking.LEVEL_NODE: lock_names,
3300       }
3301
3302   def Exec(self, feedback_fn):
3303     """Execute OOB and return result if we expect any.
3304
3305     """
3306     master_node = self.master_node
3307     ret = []
3308
3309     for node in self.nodes:
3310       node_entry = [(constants.RS_NORMAL, node.name)]
3311       ret.append(node_entry)
3312
3313       oob_program = _SupportsOob(self.cfg, node)
3314
3315       if not oob_program:
3316         node_entry.append((constants.RS_UNAVAIL, None))
3317         continue
3318
3319       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3320                    self.op.command, oob_program, node.name)
3321       result = self.rpc.call_run_oob(master_node, oob_program,
3322                                      self.op.command, node.name,
3323                                      self.op.timeout)
3324
3325       if result.fail_msg:
3326         self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3327                         node.name, result.fail_msg)
3328         node_entry.append((constants.RS_NODATA, None))
3329       else:
3330         try:
3331           self._CheckPayload(result)
3332         except errors.OpExecError, err:
3333           self.LogWarning("The payload returned by '%s' is not valid: %s",
3334                           node.name, err)
3335           node_entry.append((constants.RS_NODATA, None))
3336         else:
3337           if self.op.command == constants.OOB_HEALTH:
3338             # For health we should log important events
3339             for item, status in result.payload:
3340               if status in [constants.OOB_STATUS_WARNING,
3341                             constants.OOB_STATUS_CRITICAL]:
3342                 self.LogWarning("On node '%s' item '%s' has status '%s'",
3343                                 node.name, item, status)
3344
3345           if self.op.command == constants.OOB_POWER_ON:
3346             node.powered = True
3347           elif self.op.command == constants.OOB_POWER_OFF:
3348             node.powered = False
3349           elif self.op.command == constants.OOB_POWER_STATUS:
3350             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3351             if powered != node.powered:
3352               logging.warning(("Recorded power state (%s) of node '%s' does not"
3353                                " match actual power state (%s)"), node.powered,
3354                               node.name, powered)
3355
3356           # For configuration changing commands we should update the node
3357           if self.op.command in (constants.OOB_POWER_ON,
3358                                  constants.OOB_POWER_OFF):
3359             self.cfg.Update(node, feedback_fn)
3360
3361           node_entry.append((constants.RS_NORMAL, result.payload))
3362
3363     return ret
3364
3365   def _CheckPayload(self, result):
3366     """Checks if the payload is valid.
3367
3368     @param result: RPC result
3369     @raises errors.OpExecError: If payload is not valid
3370
3371     """
3372     errs = []
3373     if self.op.command == constants.OOB_HEALTH:
3374       if not isinstance(result.payload, list):
3375         errs.append("command 'health' is expected to return a list but got %s" %
3376                     type(result.payload))
3377       else:
3378         for item, status in result.payload:
3379           if status not in constants.OOB_STATUSES:
3380             errs.append("health item '%s' has invalid status '%s'" %
3381                         (item, status))
3382
3383     if self.op.command == constants.OOB_POWER_STATUS:
3384       if not isinstance(result.payload, dict):
3385         errs.append("power-status is expected to return a dict but got %s" %
3386                     type(result.payload))
3387
3388     if self.op.command in [
3389         constants.OOB_POWER_ON,
3390         constants.OOB_POWER_OFF,
3391         constants.OOB_POWER_CYCLE,
3392         ]:
3393       if result.payload is not None:
3394         errs.append("%s is expected to not return payload but got '%s'" %
3395                     (self.op.command, result.payload))
3396
3397     if errs:
3398       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3399                                utils.CommaJoin(errs))
3400
3401
3402
3403 class LUOsDiagnose(NoHooksLU):
3404   """Logical unit for OS diagnose/query.
3405
3406   """
3407   REQ_BGL = False
3408   _HID = "hidden"
3409   _BLK = "blacklisted"
3410   _VLD = "valid"
3411   _FIELDS_STATIC = utils.FieldSet()
3412   _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3413                                    "parameters", "api_versions", _HID, _BLK)
3414
3415   def CheckArguments(self):
3416     if self.op.names:
3417       raise errors.OpPrereqError("Selective OS query not supported",
3418                                  errors.ECODE_INVAL)
3419
3420     _CheckOutputFields(static=self._FIELDS_STATIC,
3421                        dynamic=self._FIELDS_DYNAMIC,
3422                        selected=self.op.output_fields)
3423
3424   def ExpandNames(self):
3425     # Lock all nodes, in shared mode
3426     # Temporary removal of locks, should be reverted later
3427     # TODO: reintroduce locks when they are lighter-weight
3428     self.needed_locks = {}
3429     #self.share_locks[locking.LEVEL_NODE] = 1
3430     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3431
3432   @staticmethod
3433   def _DiagnoseByOS(rlist):
3434     """Remaps a per-node return list into an a per-os per-node dictionary
3435
3436     @param rlist: a map with node names as keys and OS objects as values
3437
3438     @rtype: dict
3439     @return: a dictionary with osnames as keys and as value another
3440         map, with nodes as keys and tuples of (path, status, diagnose,
3441         variants, parameters, api_versions) as values, eg::
3442
3443           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3444                                      (/srv/..., False, "invalid api")],
3445                            "node2": [(/srv/..., True, "", [], [])]}
3446           }
3447
3448     """
3449     all_os = {}
3450     # we build here the list of nodes that didn't fail the RPC (at RPC
3451     # level), so that nodes with a non-responding node daemon don't
3452     # make all OSes invalid
3453     good_nodes = [node_name for node_name in rlist
3454                   if not rlist[node_name].fail_msg]
3455     for node_name, nr in rlist.items():
3456       if nr.fail_msg or not nr.payload:
3457         continue
3458       for (name, path, status, diagnose, variants,
3459            params, api_versions) in nr.payload:
3460         if name not in all_os:
3461           # build a list of nodes for this os containing empty lists
3462           # for each node in node_list
3463           all_os[name] = {}
3464           for nname in good_nodes:
3465             all_os[name][nname] = []
3466         # convert params from [name, help] to (name, help)
3467         params = [tuple(v) for v in params]
3468         all_os[name][node_name].append((path, status, diagnose,
3469                                         variants, params, api_versions))
3470     return all_os
3471
3472   def Exec(self, feedback_fn):
3473     """Compute the list of OSes.
3474
3475     """
3476     valid_nodes = [node.name
3477                    for node in self.cfg.GetAllNodesInfo().values()
3478                    if not node.offline and node.vm_capable]
3479     node_data = self.rpc.call_os_diagnose(valid_nodes)
3480     pol = self._DiagnoseByOS(node_data)
3481     output = []
3482     cluster = self.cfg.GetClusterInfo()
3483
3484     for os_name in utils.NiceSort(pol.keys()):
3485       os_data = pol[os_name]
3486       row = []
3487       valid = True
3488       (variants, params, api_versions) = null_state = (set(), set(), set())
3489       for idx, osl in enumerate(os_data.values()):
3490         valid = bool(valid and osl and osl[0][1])
3491         if not valid:
3492           (variants, params, api_versions) = null_state
3493           break
3494         node_variants, node_params, node_api = osl[0][3:6]
3495         if idx == 0: # first entry
3496           variants = set(node_variants)
3497           params = set(node_params)
3498           api_versions = set(node_api)
3499         else: # keep consistency
3500           variants.intersection_update(node_variants)
3501           params.intersection_update(node_params)
3502           api_versions.intersection_update(node_api)
3503
3504       is_hid = os_name in cluster.hidden_os
3505       is_blk = os_name in cluster.blacklisted_os
3506       if ((self._HID not in self.op.output_fields and is_hid) or
3507           (self._BLK not in self.op.output_fields and is_blk) or
3508           (self._VLD not in self.op.output_fields and not valid)):
3509         continue
3510
3511       for field in self.op.output_fields:
3512         if field == "name":
3513           val = os_name
3514         elif field == self._VLD:
3515           val = valid
3516         elif field == "node_status":
3517           # this is just a copy of the dict
3518           val = {}
3519           for node_name, nos_list in os_data.items():
3520             val[node_name] = nos_list
3521         elif field == "variants":
3522           val = utils.NiceSort(list(variants))
3523         elif field == "parameters":
3524           val = list(params)
3525         elif field == "api_versions":
3526           val = list(api_versions)
3527         elif field == self._HID:
3528           val = is_hid
3529         elif field == self._BLK:
3530           val = is_blk
3531         else:
3532           raise errors.ParameterError(field)
3533         row.append(val)
3534       output.append(row)
3535
3536     return output
3537
3538
3539 class LUNodeRemove(LogicalUnit):
3540   """Logical unit for removing a node.
3541
3542   """
3543   HPATH = "node-remove"
3544   HTYPE = constants.HTYPE_NODE
3545
3546   def BuildHooksEnv(self):
3547     """Build hooks env.
3548
3549     This doesn't run on the target node in the pre phase as a failed
3550     node would then be impossible to remove.
3551
3552     """
3553     env = {
3554       "OP_TARGET": self.op.node_name,
3555       "NODE_NAME": self.op.node_name,
3556       }
3557     all_nodes = self.cfg.GetNodeList()
3558     try:
3559       all_nodes.remove(self.op.node_name)
3560     except ValueError:
3561       logging.warning("Node %s which is about to be removed not found"
3562                       " in the all nodes list", self.op.node_name)
3563     return env, all_nodes, all_nodes
3564
3565   def CheckPrereq(self):
3566     """Check prerequisites.
3567
3568     This checks:
3569      - the node exists in the configuration
3570      - it does not have primary or secondary instances
3571      - it's not the master
3572
3573     Any errors are signaled by raising errors.OpPrereqError.
3574
3575     """
3576     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3577     node = self.cfg.GetNodeInfo(self.op.node_name)
3578     assert node is not None
3579
3580     instance_list = self.cfg.GetInstanceList()
3581
3582     masternode = self.cfg.GetMasterNode()
3583     if node.name == masternode:
3584       raise errors.OpPrereqError("Node is the master node,"
3585                                  " you need to failover first.",
3586                                  errors.ECODE_INVAL)
3587
3588     for instance_name in instance_list:
3589       instance = self.cfg.GetInstanceInfo(instance_name)
3590       if node.name in instance.all_nodes:
3591         raise errors.OpPrereqError("Instance %s is still running on the node,"
3592                                    " please remove first." % instance_name,
3593                                    errors.ECODE_INVAL)
3594     self.op.node_name = node.name
3595     self.node = node
3596
3597   def Exec(self, feedback_fn):
3598     """Removes the node from the cluster.
3599
3600     """
3601     node = self.node
3602     logging.info("Stopping the node daemon and removing configs from node %s",
3603                  node.name)
3604
3605     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3606
3607     # Promote nodes to master candidate as needed
3608     _AdjustCandidatePool(self, exceptions=[node.name])
3609     self.context.RemoveNode(node.name)
3610
3611     # Run post hooks on the node before it's removed
3612     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3613     try:
3614       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3615     except:
3616       # pylint: disable-msg=W0702
3617       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3618
3619     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3620     msg = result.fail_msg
3621     if msg:
3622       self.LogWarning("Errors encountered on the remote node while leaving"
3623                       " the cluster: %s", msg)
3624
3625     # Remove node from our /etc/hosts
3626     if self.cfg.GetClusterInfo().modify_etc_hosts:
3627       master_node = self.cfg.GetMasterNode()
3628       result = self.rpc.call_etc_hosts_modify(master_node,
3629                                               constants.ETC_HOSTS_REMOVE,
3630                                               node.name, None)
3631       result.Raise("Can't update hosts file with new host data")
3632       _RedistributeAncillaryFiles(self)
3633
3634
3635 class _NodeQuery(_QueryBase):
3636   FIELDS = query.NODE_FIELDS
3637
3638   def ExpandNames(self, lu):
3639     lu.needed_locks = {}
3640     lu.share_locks[locking.LEVEL_NODE] = 1
3641
3642     if self.names:
3643       self.wanted = _GetWantedNodes(lu, self.names)
3644     else:
3645       self.wanted = locking.ALL_SET
3646
3647     self.do_locking = (self.use_locking and
3648                        query.NQ_LIVE in self.requested_data)
3649
3650     if self.do_locking:
3651       # if we don't request only static fields, we need to lock the nodes
3652       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3653
3654   def DeclareLocks(self, lu, level):
3655     pass
3656
3657   def _GetQueryData(self, lu):
3658     """Computes the list of nodes and their attributes.
3659
3660     """
3661     all_info = lu.cfg.GetAllNodesInfo()
3662
3663     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3664
3665     # Gather data as requested
3666     if query.NQ_LIVE in self.requested_data:
3667       # filter out non-vm_capable nodes
3668       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3669
3670       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3671                                         lu.cfg.GetHypervisorType())
3672       live_data = dict((name, nresult.payload)
3673                        for (name, nresult) in node_data.items()
3674                        if not nresult.fail_msg and nresult.payload)
3675     else:
3676       live_data = None
3677
3678     if query.NQ_INST in self.requested_data:
3679       node_to_primary = dict([(name, set()) for name in nodenames])
3680       node_to_secondary = dict([(name, set()) for name in nodenames])
3681
3682       inst_data = lu.cfg.GetAllInstancesInfo()
3683
3684       for inst in inst_data.values():
3685         if inst.primary_node in node_to_primary:
3686           node_to_primary[inst.primary_node].add(inst.name)
3687         for secnode in inst.secondary_nodes:
3688           if secnode in node_to_secondary:
3689             node_to_secondary[secnode].add(inst.name)
3690     else:
3691       node_to_primary = None
3692       node_to_secondary = None
3693
3694     if query.NQ_OOB in self.requested_data:
3695       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3696                          for name, node in all_info.iteritems())
3697     else:
3698       oob_support = None
3699
3700     if query.NQ_GROUP in self.requested_data:
3701       groups = lu.cfg.GetAllNodeGroupsInfo()
3702     else:
3703       groups = {}
3704
3705     return query.NodeQueryData([all_info[name] for name in nodenames],
3706                                live_data, lu.cfg.GetMasterNode(),
3707                                node_to_primary, node_to_secondary, groups,
3708                                oob_support, lu.cfg.GetClusterInfo())
3709
3710
3711 class LUNodeQuery(NoHooksLU):
3712   """Logical unit for querying nodes.
3713
3714   """
3715   # pylint: disable-msg=W0142
3716   REQ_BGL = False
3717
3718   def CheckArguments(self):
3719     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3720                          self.op.output_fields, self.op.use_locking)
3721
3722   def ExpandNames(self):
3723     self.nq.ExpandNames(self)
3724
3725   def Exec(self, feedback_fn):
3726     return self.nq.OldStyleQuery(self)
3727
3728
3729 class LUNodeQueryvols(NoHooksLU):
3730   """Logical unit for getting volumes on node(s).
3731
3732   """
3733   REQ_BGL = False
3734   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3735   _FIELDS_STATIC = utils.FieldSet("node")
3736
3737   def CheckArguments(self):
3738     _CheckOutputFields(static=self._FIELDS_STATIC,
3739                        dynamic=self._FIELDS_DYNAMIC,
3740                        selected=self.op.output_fields)
3741
3742   def ExpandNames(self):
3743     self.needed_locks = {}
3744     self.share_locks[locking.LEVEL_NODE] = 1
3745     if not self.op.nodes:
3746       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3747     else:
3748       self.needed_locks[locking.LEVEL_NODE] = \
3749         _GetWantedNodes(self, self.op.nodes)
3750
3751   def Exec(self, feedback_fn):
3752     """Computes the list of nodes and their attributes.
3753
3754     """
3755     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3756     volumes = self.rpc.call_node_volumes(nodenames)
3757
3758     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3759              in self.cfg.GetInstanceList()]
3760
3761     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3762
3763     output = []
3764     for node in nodenames:
3765       nresult = volumes[node]
3766       if nresult.offline:
3767         continue
3768       msg = nresult.fail_msg
3769       if msg:
3770         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3771         continue
3772
3773       node_vols = nresult.payload[:]
3774       node_vols.sort(key=lambda vol: vol['dev'])
3775
3776       for vol in node_vols:
3777         node_output = []
3778         for field in self.op.output_fields:
3779           if field == "node":
3780             val = node
3781           elif field == "phys":
3782             val = vol['dev']
3783           elif field == "vg":
3784             val = vol['vg']
3785           elif field == "name":
3786             val = vol['name']
3787           elif field == "size":
3788             val = int(float(vol['size']))
3789           elif field == "instance":
3790             for inst in ilist:
3791               if node not in lv_by_node[inst]:
3792                 continue
3793               if vol['name'] in lv_by_node[inst][node]:
3794                 val = inst.name
3795                 break
3796             else:
3797               val = '-'
3798           else:
3799             raise errors.ParameterError(field)
3800           node_output.append(str(val))
3801
3802         output.append(node_output)
3803
3804     return output
3805
3806
3807 class LUNodeQueryStorage(NoHooksLU):
3808   """Logical unit for getting information on storage units on node(s).
3809
3810   """
3811   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3812   REQ_BGL = False
3813
3814   def CheckArguments(self):
3815     _CheckOutputFields(static=self._FIELDS_STATIC,
3816                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3817                        selected=self.op.output_fields)
3818
3819   def ExpandNames(self):
3820     self.needed_locks = {}
3821     self.share_locks[locking.LEVEL_NODE] = 1
3822
3823     if self.op.nodes:
3824       self.needed_locks[locking.LEVEL_NODE] = \
3825         _GetWantedNodes(self, self.op.nodes)
3826     else:
3827       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3828
3829   def Exec(self, feedback_fn):
3830     """Computes the list of nodes and their attributes.
3831
3832     """
3833     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3834
3835     # Always get name to sort by
3836     if constants.SF_NAME in self.op.output_fields:
3837       fields = self.op.output_fields[:]
3838     else:
3839       fields = [constants.SF_NAME] + self.op.output_fields
3840
3841     # Never ask for node or type as it's only known to the LU
3842     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3843       while extra in fields:
3844         fields.remove(extra)
3845
3846     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3847     name_idx = field_idx[constants.SF_NAME]
3848
3849     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3850     data = self.rpc.call_storage_list(self.nodes,
3851                                       self.op.storage_type, st_args,
3852                                       self.op.name, fields)
3853
3854     result = []
3855
3856     for node in utils.NiceSort(self.nodes):
3857       nresult = data[node]
3858       if nresult.offline:
3859         continue
3860
3861       msg = nresult.fail_msg
3862       if msg:
3863         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3864         continue
3865
3866       rows = dict([(row[name_idx], row) for row in nresult.payload])
3867
3868       for name in utils.NiceSort(rows.keys()):
3869         row = rows[name]
3870
3871         out = []
3872
3873         for field in self.op.output_fields:
3874           if field == constants.SF_NODE:
3875             val = node
3876           elif field == constants.SF_TYPE:
3877             val = self.op.storage_type
3878           elif field in field_idx:
3879             val = row[field_idx[field]]
3880           else:
3881             raise errors.ParameterError(field)
3882
3883           out.append(val)
3884
3885         result.append(out)
3886
3887     return result
3888
3889
3890 class _InstanceQuery(_QueryBase):
3891   FIELDS = query.INSTANCE_FIELDS
3892
3893   def ExpandNames(self, lu):
3894     lu.needed_locks = {}
3895     lu.share_locks[locking.LEVEL_INSTANCE] = 1
3896     lu.share_locks[locking.LEVEL_NODE] = 1
3897
3898     if self.names:
3899       self.wanted = _GetWantedInstances(lu, self.names)
3900     else:
3901       self.wanted = locking.ALL_SET
3902
3903     self.do_locking = (self.use_locking and
3904                        query.IQ_LIVE in self.requested_data)
3905     if self.do_locking:
3906       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3907       lu.needed_locks[locking.LEVEL_NODE] = []
3908       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3909
3910   def DeclareLocks(self, lu, level):
3911     if level == locking.LEVEL_NODE and self.do_locking:
3912       lu._LockInstancesNodes() # pylint: disable-msg=W0212
3913
3914   def _GetQueryData(self, lu):
3915     """Computes the list of instances and their attributes.
3916
3917     """
3918     cluster = lu.cfg.GetClusterInfo()
3919     all_info = lu.cfg.GetAllInstancesInfo()
3920
3921     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3922
3923     instance_list = [all_info[name] for name in instance_names]
3924     nodes = frozenset(itertools.chain(*(inst.all_nodes
3925                                         for inst in instance_list)))
3926     hv_list = list(set([inst.hypervisor for inst in instance_list]))
3927     bad_nodes = []
3928     offline_nodes = []
3929     wrongnode_inst = set()
3930
3931     # Gather data as requested
3932     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
3933       live_data = {}
3934       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3935       for name in nodes:
3936         result = node_data[name]
3937         if result.offline:
3938           # offline nodes will be in both lists
3939           assert result.fail_msg
3940           offline_nodes.append(name)
3941         if result.fail_msg:
3942           bad_nodes.append(name)
3943         elif result.payload:
3944           for inst in result.payload:
3945             if all_info[inst].primary_node == name:
3946               live_data.update(result.payload)
3947             else:
3948               wrongnode_inst.add(inst)
3949         # else no instance is alive
3950     else:
3951       live_data = {}
3952
3953     if query.IQ_DISKUSAGE in self.requested_data:
3954       disk_usage = dict((inst.name,
3955                          _ComputeDiskSize(inst.disk_template,
3956                                           [{"size": disk.size}
3957                                            for disk in inst.disks]))
3958                         for inst in instance_list)
3959     else:
3960       disk_usage = None
3961
3962     if query.IQ_CONSOLE in self.requested_data:
3963       consinfo = {}
3964       for inst in instance_list:
3965         if inst.name in live_data:
3966           # Instance is running
3967           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
3968         else:
3969           consinfo[inst.name] = None
3970       assert set(consinfo.keys()) == set(instance_names)
3971     else:
3972       consinfo = None
3973
3974     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3975                                    disk_usage, offline_nodes, bad_nodes,
3976                                    live_data, wrongnode_inst, consinfo)
3977
3978
3979 class LUQuery(NoHooksLU):
3980   """Query for resources/items of a certain kind.
3981
3982   """
3983   # pylint: disable-msg=W0142
3984   REQ_BGL = False
3985
3986   def CheckArguments(self):
3987     qcls = _GetQueryImplementation(self.op.what)
3988
3989     self.impl = qcls(self.op.filter, self.op.fields, False)
3990
3991   def ExpandNames(self):
3992     self.impl.ExpandNames(self)
3993
3994   def DeclareLocks(self, level):
3995     self.impl.DeclareLocks(self, level)
3996
3997   def Exec(self, feedback_fn):
3998     return self.impl.NewStyleQuery(self)
3999
4000
4001 class LUQueryFields(NoHooksLU):
4002   """Query for resources/items of a certain kind.
4003
4004   """
4005   # pylint: disable-msg=W0142
4006   REQ_BGL = False
4007
4008   def CheckArguments(self):
4009     self.qcls = _GetQueryImplementation(self.op.what)
4010
4011   def ExpandNames(self):
4012     self.needed_locks = {}
4013
4014   def Exec(self, feedback_fn):
4015     return self.qcls.FieldsQuery(self.op.fields)
4016
4017
4018 class LUNodeModifyStorage(NoHooksLU):
4019   """Logical unit for modifying a storage volume on a node.
4020
4021   """
4022   REQ_BGL = False
4023
4024   def CheckArguments(self):
4025     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4026
4027     storage_type = self.op.storage_type
4028
4029     try:
4030       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4031     except KeyError:
4032       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4033                                  " modified" % storage_type,
4034                                  errors.ECODE_INVAL)
4035
4036     diff = set(self.op.changes.keys()) - modifiable
4037     if diff:
4038       raise errors.OpPrereqError("The following fields can not be modified for"
4039                                  " storage units of type '%s': %r" %
4040                                  (storage_type, list(diff)),
4041                                  errors.ECODE_INVAL)
4042
4043   def ExpandNames(self):
4044     self.needed_locks = {
4045       locking.LEVEL_NODE: self.op.node_name,
4046       }
4047
4048   def Exec(self, feedback_fn):
4049     """Computes the list of nodes and their attributes.
4050
4051     """
4052     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4053     result = self.rpc.call_storage_modify(self.op.node_name,
4054                                           self.op.storage_type, st_args,
4055                                           self.op.name, self.op.changes)
4056     result.Raise("Failed to modify storage unit '%s' on %s" %
4057                  (self.op.name, self.op.node_name))
4058
4059
4060 class LUNodeAdd(LogicalUnit):
4061   """Logical unit for adding node to the cluster.
4062
4063   """
4064   HPATH = "node-add"
4065   HTYPE = constants.HTYPE_NODE
4066   _NFLAGS = ["master_capable", "vm_capable"]
4067
4068   def CheckArguments(self):
4069     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4070     # validate/normalize the node name
4071     self.hostname = netutils.GetHostname(name=self.op.node_name,
4072                                          family=self.primary_ip_family)
4073     self.op.node_name = self.hostname.name
4074     if self.op.readd and self.op.group:
4075       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4076                                  " being readded", errors.ECODE_INVAL)
4077
4078   def BuildHooksEnv(self):
4079     """Build hooks env.
4080
4081     This will run on all nodes before, and on all nodes + the new node after.
4082
4083     """
4084     env = {
4085       "OP_TARGET": self.op.node_name,
4086       "NODE_NAME": self.op.node_name,
4087       "NODE_PIP": self.op.primary_ip,
4088       "NODE_SIP": self.op.secondary_ip,
4089       "MASTER_CAPABLE": str(self.op.master_capable),
4090       "VM_CAPABLE": str(self.op.vm_capable),
4091       }
4092     nodes_0 = self.cfg.GetNodeList()
4093     nodes_1 = nodes_0 + [self.op.node_name, ]
4094     return env, nodes_0, nodes_1
4095
4096   def CheckPrereq(self):
4097     """Check prerequisites.
4098
4099     This checks:
4100      - the new node is not already in the config
4101      - it is resolvable
4102      - its parameters (single/dual homed) matches the cluster
4103
4104     Any errors are signaled by raising errors.OpPrereqError.
4105
4106     """
4107     cfg = self.cfg
4108     hostname = self.hostname
4109     node = hostname.name
4110     primary_ip = self.op.primary_ip = hostname.ip
4111     if self.op.secondary_ip is None:
4112       if self.primary_ip_family == netutils.IP6Address.family:
4113         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4114                                    " IPv4 address must be given as secondary",
4115                                    errors.ECODE_INVAL)
4116       self.op.secondary_ip = primary_ip
4117
4118     secondary_ip = self.op.secondary_ip
4119     if not netutils.IP4Address.IsValid(secondary_ip):
4120       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4121                                  " address" % secondary_ip, errors.ECODE_INVAL)
4122
4123     node_list = cfg.GetNodeList()
4124     if not self.op.readd and node in node_list:
4125       raise errors.OpPrereqError("Node %s is already in the configuration" %
4126                                  node, errors.ECODE_EXISTS)
4127     elif self.op.readd and node not in node_list:
4128       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4129                                  errors.ECODE_NOENT)
4130
4131     self.changed_primary_ip = False
4132
4133     for existing_node_name in node_list:
4134       existing_node = cfg.GetNodeInfo(existing_node_name)
4135
4136       if self.op.readd and node == existing_node_name:
4137         if existing_node.secondary_ip != secondary_ip:
4138           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4139                                      " address configuration as before",
4140                                      errors.ECODE_INVAL)
4141         if existing_node.primary_ip != primary_ip:
4142           self.changed_primary_ip = True
4143
4144         continue
4145
4146       if (existing_node.primary_ip == primary_ip or
4147           existing_node.secondary_ip == primary_ip or
4148           existing_node.primary_ip == secondary_ip or
4149           existing_node.secondary_ip == secondary_ip):
4150         raise errors.OpPrereqError("New node ip address(es) conflict with"
4151                                    " existing node %s" % existing_node.name,
4152                                    errors.ECODE_NOTUNIQUE)
4153
4154     # After this 'if' block, None is no longer a valid value for the
4155     # _capable op attributes
4156     if self.op.readd:
4157       old_node = self.cfg.GetNodeInfo(node)
4158       assert old_node is not None, "Can't retrieve locked node %s" % node
4159       for attr in self._NFLAGS:
4160         if getattr(self.op, attr) is None:
4161           setattr(self.op, attr, getattr(old_node, attr))
4162     else:
4163       for attr in self._NFLAGS:
4164         if getattr(self.op, attr) is None:
4165           setattr(self.op, attr, True)
4166
4167     if self.op.readd and not self.op.vm_capable:
4168       pri, sec = cfg.GetNodeInstances(node)
4169       if pri or sec:
4170         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4171                                    " flag set to false, but it already holds"
4172                                    " instances" % node,
4173                                    errors.ECODE_STATE)
4174
4175     # check that the type of the node (single versus dual homed) is the
4176     # same as for the master
4177     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4178     master_singlehomed = myself.secondary_ip == myself.primary_ip
4179     newbie_singlehomed = secondary_ip == primary_ip
4180     if master_singlehomed != newbie_singlehomed:
4181       if master_singlehomed:
4182         raise errors.OpPrereqError("The master has no secondary ip but the"
4183                                    " new node has one",
4184                                    errors.ECODE_INVAL)
4185       else:
4186         raise errors.OpPrereqError("The master has a secondary ip but the"
4187                                    " new node doesn't have one",
4188                                    errors.ECODE_INVAL)
4189
4190     # checks reachability
4191     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4192       raise errors.OpPrereqError("Node not reachable by ping",
4193                                  errors.ECODE_ENVIRON)
4194
4195     if not newbie_singlehomed:
4196       # check reachability from my secondary ip to newbie's secondary ip
4197       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4198                            source=myself.secondary_ip):
4199         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4200                                    " based ping to node daemon port",
4201                                    errors.ECODE_ENVIRON)
4202
4203     if self.op.readd:
4204       exceptions = [node]
4205     else:
4206       exceptions = []
4207
4208     if self.op.master_capable:
4209       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4210     else:
4211       self.master_candidate = False
4212
4213     if self.op.readd:
4214       self.new_node = old_node
4215     else:
4216       node_group = cfg.LookupNodeGroup(self.op.group)
4217       self.new_node = objects.Node(name=node,
4218                                    primary_ip=primary_ip,
4219                                    secondary_ip=secondary_ip,
4220                                    master_candidate=self.master_candidate,
4221                                    offline=False, drained=False,
4222                                    group=node_group)
4223
4224     if self.op.ndparams:
4225       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4226
4227   def Exec(self, feedback_fn):
4228     """Adds the new node to the cluster.
4229
4230     """
4231     new_node = self.new_node
4232     node = new_node.name
4233
4234     # We adding a new node so we assume it's powered
4235     new_node.powered = True
4236
4237     # for re-adds, reset the offline/drained/master-candidate flags;
4238     # we need to reset here, otherwise offline would prevent RPC calls
4239     # later in the procedure; this also means that if the re-add
4240     # fails, we are left with a non-offlined, broken node
4241     if self.op.readd:
4242       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4243       self.LogInfo("Readding a node, the offline/drained flags were reset")
4244       # if we demote the node, we do cleanup later in the procedure
4245       new_node.master_candidate = self.master_candidate
4246       if self.changed_primary_ip:
4247         new_node.primary_ip = self.op.primary_ip
4248
4249     # copy the master/vm_capable flags
4250     for attr in self._NFLAGS:
4251       setattr(new_node, attr, getattr(self.op, attr))
4252
4253     # notify the user about any possible mc promotion
4254     if new_node.master_candidate:
4255       self.LogInfo("Node will be a master candidate")
4256
4257     if self.op.ndparams:
4258       new_node.ndparams = self.op.ndparams
4259     else:
4260       new_node.ndparams = {}
4261
4262     # check connectivity
4263     result = self.rpc.call_version([node])[node]
4264     result.Raise("Can't get version information from node %s" % node)
4265     if constants.PROTOCOL_VERSION == result.payload:
4266       logging.info("Communication to node %s fine, sw version %s match",
4267                    node, result.payload)
4268     else:
4269       raise errors.OpExecError("Version mismatch master version %s,"
4270                                " node version %s" %
4271                                (constants.PROTOCOL_VERSION, result.payload))
4272
4273     # Add node to our /etc/hosts, and add key to known_hosts
4274     if self.cfg.GetClusterInfo().modify_etc_hosts:
4275       master_node = self.cfg.GetMasterNode()
4276       result = self.rpc.call_etc_hosts_modify(master_node,
4277                                               constants.ETC_HOSTS_ADD,
4278                                               self.hostname.name,
4279                                               self.hostname.ip)
4280       result.Raise("Can't update hosts file with new host data")
4281
4282     if new_node.secondary_ip != new_node.primary_ip:
4283       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4284                                False)
4285
4286     node_verify_list = [self.cfg.GetMasterNode()]
4287     node_verify_param = {
4288       constants.NV_NODELIST: [node],
4289       # TODO: do a node-net-test as well?
4290     }
4291
4292     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4293                                        self.cfg.GetClusterName())
4294     for verifier in node_verify_list:
4295       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4296       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4297       if nl_payload:
4298         for failed in nl_payload:
4299           feedback_fn("ssh/hostname verification failed"
4300                       " (checking from %s): %s" %
4301                       (verifier, nl_payload[failed]))
4302         raise errors.OpExecError("ssh/hostname verification failed.")
4303
4304     if self.op.readd:
4305       _RedistributeAncillaryFiles(self)
4306       self.context.ReaddNode(new_node)
4307       # make sure we redistribute the config
4308       self.cfg.Update(new_node, feedback_fn)
4309       # and make sure the new node will not have old files around
4310       if not new_node.master_candidate:
4311         result = self.rpc.call_node_demote_from_mc(new_node.name)
4312         msg = result.fail_msg
4313         if msg:
4314           self.LogWarning("Node failed to demote itself from master"
4315                           " candidate status: %s" % msg)
4316     else:
4317       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4318                                   additional_vm=self.op.vm_capable)
4319       self.context.AddNode(new_node, self.proc.GetECId())
4320
4321
4322 class LUNodeSetParams(LogicalUnit):
4323   """Modifies the parameters of a node.
4324
4325   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4326       to the node role (as _ROLE_*)
4327   @cvar _R2F: a dictionary from node role to tuples of flags
4328   @cvar _FLAGS: a list of attribute names corresponding to the flags
4329
4330   """
4331   HPATH = "node-modify"
4332   HTYPE = constants.HTYPE_NODE
4333   REQ_BGL = False
4334   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4335   _F2R = {
4336     (True, False, False): _ROLE_CANDIDATE,
4337     (False, True, False): _ROLE_DRAINED,
4338     (False, False, True): _ROLE_OFFLINE,
4339     (False, False, False): _ROLE_REGULAR,
4340     }
4341   _R2F = dict((v, k) for k, v in _F2R.items())
4342   _FLAGS = ["master_candidate", "drained", "offline"]
4343
4344   def CheckArguments(self):
4345     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4346     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4347                 self.op.master_capable, self.op.vm_capable,
4348                 self.op.secondary_ip, self.op.ndparams]
4349     if all_mods.count(None) == len(all_mods):
4350       raise errors.OpPrereqError("Please pass at least one modification",
4351                                  errors.ECODE_INVAL)
4352     if all_mods.count(True) > 1:
4353       raise errors.OpPrereqError("Can't set the node into more than one"
4354                                  " state at the same time",
4355                                  errors.ECODE_INVAL)
4356
4357     # Boolean value that tells us whether we might be demoting from MC
4358     self.might_demote = (self.op.master_candidate == False or
4359                          self.op.offline == True or
4360                          self.op.drained == True or
4361                          self.op.master_capable == False)
4362
4363     if self.op.secondary_ip:
4364       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4365         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4366                                    " address" % self.op.secondary_ip,
4367                                    errors.ECODE_INVAL)
4368
4369     self.lock_all = self.op.auto_promote and self.might_demote
4370     self.lock_instances = self.op.secondary_ip is not None
4371
4372   def ExpandNames(self):
4373     if self.lock_all:
4374       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4375     else:
4376       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4377
4378     if self.lock_instances:
4379       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4380
4381   def DeclareLocks(self, level):
4382     # If we have locked all instances, before waiting to lock nodes, release
4383     # all the ones living on nodes unrelated to the current operation.
4384     if level == locking.LEVEL_NODE and self.lock_instances:
4385       instances_release = []
4386       instances_keep = []
4387       self.affected_instances = []
4388       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4389         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4390           instance = self.context.cfg.GetInstanceInfo(instance_name)
4391           i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4392           if i_mirrored and self.op.node_name in instance.all_nodes:
4393             instances_keep.append(instance_name)
4394             self.affected_instances.append(instance)
4395           else:
4396             instances_release.append(instance_name)
4397         if instances_release:
4398           self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4399           self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4400
4401   def BuildHooksEnv(self):
4402     """Build hooks env.
4403
4404     This runs on the master node.
4405
4406     """
4407     env = {
4408       "OP_TARGET": self.op.node_name,
4409       "MASTER_CANDIDATE": str(self.op.master_candidate),
4410       "OFFLINE": str(self.op.offline),
4411       "DRAINED": str(self.op.drained),
4412       "MASTER_CAPABLE": str(self.op.master_capable),
4413       "VM_CAPABLE": str(self.op.vm_capable),
4414       }
4415     nl = [self.cfg.GetMasterNode(),
4416           self.op.node_name]
4417     return env, nl, nl
4418
4419   def CheckPrereq(self):
4420     """Check prerequisites.
4421
4422     This only checks the instance list against the existing names.
4423
4424     """
4425     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4426
4427     if (self.op.master_candidate is not None or
4428         self.op.drained is not None or
4429         self.op.offline is not None):
4430       # we can't change the master's node flags
4431       if self.op.node_name == self.cfg.GetMasterNode():
4432         raise errors.OpPrereqError("The master role can be changed"
4433                                    " only via master-failover",
4434                                    errors.ECODE_INVAL)
4435
4436     if self.op.master_candidate and not node.master_capable:
4437       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4438                                  " it a master candidate" % node.name,
4439                                  errors.ECODE_STATE)
4440
4441     if self.op.vm_capable == False:
4442       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4443       if ipri or isec:
4444         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4445                                    " the vm_capable flag" % node.name,
4446                                    errors.ECODE_STATE)
4447
4448     if node.master_candidate and self.might_demote and not self.lock_all:
4449       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4450       # check if after removing the current node, we're missing master
4451       # candidates
4452       (mc_remaining, mc_should, _) = \
4453           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4454       if mc_remaining < mc_should:
4455         raise errors.OpPrereqError("Not enough master candidates, please"
4456                                    " pass auto promote option to allow"
4457                                    " promotion", errors.ECODE_STATE)
4458
4459     self.old_flags = old_flags = (node.master_candidate,
4460                                   node.drained, node.offline)
4461     assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4462     self.old_role = old_role = self._F2R[old_flags]
4463
4464     # Check for ineffective changes
4465     for attr in self._FLAGS:
4466       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4467         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4468         setattr(self.op, attr, None)
4469
4470     # Past this point, any flag change to False means a transition
4471     # away from the respective state, as only real changes are kept
4472
4473     # TODO: We might query the real power state if it supports OOB
4474     if _SupportsOob(self.cfg, node):
4475       if self.op.offline is False and not (node.powered or
4476                                            self.op.powered == True):
4477         raise errors.OpPrereqError(("Please power on node %s first before you"
4478                                     " can reset offline state") %
4479                                    self.op.node_name)
4480     elif self.op.powered is not None:
4481       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4482                                   " which does not support out-of-band"
4483                                   " handling") % self.op.node_name)
4484
4485     # If we're being deofflined/drained, we'll MC ourself if needed
4486     if (self.op.drained == False or self.op.offline == False or
4487         (self.op.master_capable and not node.master_capable)):
4488       if _DecideSelfPromotion(self):
4489         self.op.master_candidate = True
4490         self.LogInfo("Auto-promoting node to master candidate")
4491
4492     # If we're no longer master capable, we'll demote ourselves from MC
4493     if self.op.master_capable == False and node.master_candidate:
4494       self.LogInfo("Demoting from master candidate")
4495       self.op.master_candidate = False
4496
4497     # Compute new role
4498     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4499     if self.op.master_candidate:
4500       new_role = self._ROLE_CANDIDATE
4501     elif self.op.drained:
4502       new_role = self._ROLE_DRAINED
4503     elif self.op.offline:
4504       new_role = self._ROLE_OFFLINE
4505     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4506       # False is still in new flags, which means we're un-setting (the
4507       # only) True flag
4508       new_role = self._ROLE_REGULAR
4509     else: # no new flags, nothing, keep old role
4510       new_role = old_role
4511
4512     self.new_role = new_role
4513
4514     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4515       # Trying to transition out of offline status
4516       result = self.rpc.call_version([node.name])[node.name]
4517       if result.fail_msg:
4518         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4519                                    " to report its version: %s" %
4520                                    (node.name, result.fail_msg),
4521                                    errors.ECODE_STATE)
4522       else:
4523         self.LogWarning("Transitioning node from offline to online state"
4524                         " without using re-add. Please make sure the node"
4525                         " is healthy!")
4526
4527     if self.op.secondary_ip:
4528       # Ok even without locking, because this can't be changed by any LU
4529       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4530       master_singlehomed = master.secondary_ip == master.primary_ip
4531       if master_singlehomed and self.op.secondary_ip:
4532         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4533                                    " homed cluster", errors.ECODE_INVAL)
4534
4535       if node.offline:
4536         if self.affected_instances:
4537           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4538                                      " node has instances (%s) configured"
4539                                      " to use it" % self.affected_instances)
4540       else:
4541         # On online nodes, check that no instances are running, and that
4542         # the node has the new ip and we can reach it.
4543         for instance in self.affected_instances:
4544           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4545
4546         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4547         if master.name != node.name:
4548           # check reachability from master secondary ip to new secondary ip
4549           if not netutils.TcpPing(self.op.secondary_ip,
4550                                   constants.DEFAULT_NODED_PORT,
4551                                   source=master.secondary_ip):
4552             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4553                                        " based ping to node daemon port",
4554                                        errors.ECODE_ENVIRON)
4555
4556     if self.op.ndparams:
4557       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4558       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4559       self.new_ndparams = new_ndparams
4560
4561   def Exec(self, feedback_fn):
4562     """Modifies a node.
4563
4564     """
4565     node = self.node
4566     old_role = self.old_role
4567     new_role = self.new_role
4568
4569     result = []
4570
4571     if self.op.ndparams:
4572       node.ndparams = self.new_ndparams
4573
4574     if self.op.powered is not None:
4575       node.powered = self.op.powered
4576
4577     for attr in ["master_capable", "vm_capable"]:
4578       val = getattr(self.op, attr)
4579       if val is not None:
4580         setattr(node, attr, val)
4581         result.append((attr, str(val)))
4582
4583     if new_role != old_role:
4584       # Tell the node to demote itself, if no longer MC and not offline
4585       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4586         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4587         if msg:
4588           self.LogWarning("Node failed to demote itself: %s", msg)
4589
4590       new_flags = self._R2F[new_role]
4591       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4592         if of != nf:
4593           result.append((desc, str(nf)))
4594       (node.master_candidate, node.drained, node.offline) = new_flags
4595
4596       # we locked all nodes, we adjust the CP before updating this node
4597       if self.lock_all:
4598         _AdjustCandidatePool(self, [node.name])
4599
4600     if self.op.secondary_ip:
4601       node.secondary_ip = self.op.secondary_ip
4602       result.append(("secondary_ip", self.op.secondary_ip))
4603
4604     # this will trigger configuration file update, if needed
4605     self.cfg.Update(node, feedback_fn)
4606
4607     # this will trigger job queue propagation or cleanup if the mc
4608     # flag changed
4609     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4610       self.context.ReaddNode(node)
4611
4612     return result
4613
4614
4615 class LUNodePowercycle(NoHooksLU):
4616   """Powercycles a node.
4617
4618   """
4619   REQ_BGL = False
4620
4621   def CheckArguments(self):
4622     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4623     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4624       raise errors.OpPrereqError("The node is the master and the force"
4625                                  " parameter was not set",
4626                                  errors.ECODE_INVAL)
4627
4628   def ExpandNames(self):
4629     """Locking for PowercycleNode.
4630
4631     This is a last-resort option and shouldn't block on other
4632     jobs. Therefore, we grab no locks.
4633
4634     """
4635     self.needed_locks = {}
4636
4637   def Exec(self, feedback_fn):
4638     """Reboots a node.
4639
4640     """
4641     result = self.rpc.call_node_powercycle(self.op.node_name,
4642                                            self.cfg.GetHypervisorType())
4643     result.Raise("Failed to schedule the reboot")
4644     return result.payload
4645
4646
4647 class LUClusterQuery(NoHooksLU):
4648   """Query cluster configuration.
4649
4650   """
4651   REQ_BGL = False
4652
4653   def ExpandNames(self):
4654     self.needed_locks = {}
4655
4656   def Exec(self, feedback_fn):
4657     """Return cluster config.
4658
4659     """
4660     cluster = self.cfg.GetClusterInfo()
4661     os_hvp = {}
4662
4663     # Filter just for enabled hypervisors
4664     for os_name, hv_dict in cluster.os_hvp.items():
4665       os_hvp[os_name] = {}
4666       for hv_name, hv_params in hv_dict.items():
4667         if hv_name in cluster.enabled_hypervisors:
4668           os_hvp[os_name][hv_name] = hv_params
4669
4670     # Convert ip_family to ip_version
4671     primary_ip_version = constants.IP4_VERSION
4672     if cluster.primary_ip_family == netutils.IP6Address.family:
4673       primary_ip_version = constants.IP6_VERSION
4674
4675     result = {
4676       "software_version": constants.RELEASE_VERSION,
4677       "protocol_version": constants.PROTOCOL_VERSION,
4678       "config_version": constants.CONFIG_VERSION,
4679       "os_api_version": max(constants.OS_API_VERSIONS),
4680       "export_version": constants.EXPORT_VERSION,
4681       "architecture": (platform.architecture()[0], platform.machine()),
4682       "name": cluster.cluster_name,
4683       "master": cluster.master_node,
4684       "default_hypervisor": cluster.enabled_hypervisors[0],
4685       "enabled_hypervisors": cluster.enabled_hypervisors,
4686       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4687                         for hypervisor_name in cluster.enabled_hypervisors]),
4688       "os_hvp": os_hvp,
4689       "beparams": cluster.beparams,
4690       "osparams": cluster.osparams,
4691       "nicparams": cluster.nicparams,
4692       "ndparams": cluster.ndparams,
4693       "candidate_pool_size": cluster.candidate_pool_size,
4694       "master_netdev": cluster.master_netdev,
4695       "volume_group_name": cluster.volume_group_name,
4696       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4697       "file_storage_dir": cluster.file_storage_dir,
4698       "maintain_node_health": cluster.maintain_node_health,
4699       "ctime": cluster.ctime,
4700       "mtime": cluster.mtime,
4701       "uuid": cluster.uuid,
4702       "tags": list(cluster.GetTags()),
4703       "uid_pool": cluster.uid_pool,
4704       "default_iallocator": cluster.default_iallocator,
4705       "reserved_lvs": cluster.reserved_lvs,
4706       "primary_ip_version": primary_ip_version,
4707       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4708       "hidden_os": cluster.hidden_os,
4709       "blacklisted_os": cluster.blacklisted_os,
4710       }
4711
4712     return result
4713
4714
4715 class LUClusterConfigQuery(NoHooksLU):
4716   """Return configuration values.
4717
4718   """
4719   REQ_BGL = False
4720   _FIELDS_DYNAMIC = utils.FieldSet()
4721   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4722                                   "watcher_pause", "volume_group_name")
4723
4724   def CheckArguments(self):
4725     _CheckOutputFields(static=self._FIELDS_STATIC,
4726                        dynamic=self._FIELDS_DYNAMIC,
4727                        selected=self.op.output_fields)
4728
4729   def ExpandNames(self):
4730     self.needed_locks = {}
4731
4732   def Exec(self, feedback_fn):
4733     """Dump a representation of the cluster config to the standard output.
4734
4735     """
4736     values = []
4737     for field in self.op.output_fields:
4738       if field == "cluster_name":
4739         entry = self.cfg.GetClusterName()
4740       elif field == "master_node":
4741         entry = self.cfg.GetMasterNode()
4742       elif field == "drain_flag":
4743         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4744       elif field == "watcher_pause":
4745         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4746       elif field == "volume_group_name":
4747         entry = self.cfg.GetVGName()
4748       else:
4749         raise errors.ParameterError(field)
4750       values.append(entry)
4751     return values
4752
4753
4754 class LUInstanceActivateDisks(NoHooksLU):
4755   """Bring up an instance's disks.
4756
4757   """
4758   REQ_BGL = False
4759
4760   def ExpandNames(self):
4761     self._ExpandAndLockInstance()
4762     self.needed_locks[locking.LEVEL_NODE] = []
4763     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4764
4765   def DeclareLocks(self, level):
4766     if level == locking.LEVEL_NODE:
4767       self._LockInstancesNodes()
4768
4769   def CheckPrereq(self):
4770     """Check prerequisites.
4771
4772     This checks that the instance is in the cluster.
4773
4774     """
4775     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4776     assert self.instance is not None, \
4777       "Cannot retrieve locked instance %s" % self.op.instance_name
4778     _CheckNodeOnline(self, self.instance.primary_node)
4779
4780   def Exec(self, feedback_fn):
4781     """Activate the disks.
4782
4783     """
4784     disks_ok, disks_info = \
4785               _AssembleInstanceDisks(self, self.instance,
4786                                      ignore_size=self.op.ignore_size)
4787     if not disks_ok:
4788       raise errors.OpExecError("Cannot activate block devices")
4789
4790     return disks_info
4791
4792
4793 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4794                            ignore_size=False):
4795   """Prepare the block devices for an instance.
4796
4797   This sets up the block devices on all nodes.
4798
4799   @type lu: L{LogicalUnit}
4800   @param lu: the logical unit on whose behalf we execute
4801   @type instance: L{objects.Instance}
4802   @param instance: the instance for whose disks we assemble
4803   @type disks: list of L{objects.Disk} or None
4804   @param disks: which disks to assemble (or all, if None)
4805   @type ignore_secondaries: boolean
4806   @param ignore_secondaries: if true, errors on secondary nodes
4807       won't result in an error return from the function
4808   @type ignore_size: boolean
4809   @param ignore_size: if true, the current known size of the disk
4810       will not be used during the disk activation, useful for cases
4811       when the size is wrong
4812   @return: False if the operation failed, otherwise a list of
4813       (host, instance_visible_name, node_visible_name)
4814       with the mapping from node devices to instance devices
4815
4816   """
4817   device_info = []
4818   disks_ok = True
4819   iname = instance.name
4820   disks = _ExpandCheckDisks(instance, disks)
4821
4822   # With the two passes mechanism we try to reduce the window of
4823   # opportunity for the race condition of switching DRBD to primary
4824   # before handshaking occured, but we do not eliminate it
4825
4826   # The proper fix would be to wait (with some limits) until the
4827   # connection has been made and drbd transitions from WFConnection
4828   # into any other network-connected state (Connected, SyncTarget,
4829   # SyncSource, etc.)
4830
4831   # 1st pass, assemble on all nodes in secondary mode
4832   for idx, inst_disk in enumerate(disks):
4833     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4834       if ignore_size:
4835         node_disk = node_disk.Copy()
4836         node_disk.UnsetSize()
4837       lu.cfg.SetDiskID(node_disk, node)
4838       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
4839       msg = result.fail_msg
4840       if msg:
4841         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4842                            " (is_primary=False, pass=1): %s",
4843                            inst_disk.iv_name, node, msg)
4844         if not ignore_secondaries:
4845           disks_ok = False
4846
4847   # FIXME: race condition on drbd migration to primary
4848
4849   # 2nd pass, do only the primary node
4850   for idx, inst_disk in enumerate(disks):
4851     dev_path = None
4852
4853     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4854       if node != instance.primary_node:
4855         continue
4856       if ignore_size:
4857         node_disk = node_disk.Copy()
4858         node_disk.UnsetSize()
4859       lu.cfg.SetDiskID(node_disk, node)
4860       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
4861       msg = result.fail_msg
4862       if msg:
4863         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4864                            " (is_primary=True, pass=2): %s",
4865                            inst_disk.iv_name, node, msg)
4866         disks_ok = False
4867       else:
4868         dev_path = result.payload
4869
4870     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4871
4872   # leave the disks configured for the primary node
4873   # this is a workaround that would be fixed better by
4874   # improving the logical/physical id handling
4875   for disk in disks:
4876     lu.cfg.SetDiskID(disk, instance.primary_node)
4877
4878   return disks_ok, device_info
4879
4880
4881 def _StartInstanceDisks(lu, instance, force):
4882   """Start the disks of an instance.
4883
4884   """
4885   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4886                                            ignore_secondaries=force)
4887   if not disks_ok:
4888     _ShutdownInstanceDisks(lu, instance)
4889     if force is not None and not force:
4890       lu.proc.LogWarning("", hint="If the message above refers to a"
4891                          " secondary node,"
4892                          " you can retry the operation using '--force'.")
4893     raise errors.OpExecError("Disk consistency error")
4894
4895
4896 class LUInstanceDeactivateDisks(NoHooksLU):
4897   """Shutdown an instance's disks.
4898
4899   """
4900   REQ_BGL = False
4901
4902   def ExpandNames(self):
4903     self._ExpandAndLockInstance()
4904     self.needed_locks[locking.LEVEL_NODE] = []
4905     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4906
4907   def DeclareLocks(self, level):
4908     if level == locking.LEVEL_NODE:
4909       self._LockInstancesNodes()
4910
4911   def CheckPrereq(self):
4912     """Check prerequisites.
4913
4914     This checks that the instance is in the cluster.
4915
4916     """
4917     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4918     assert self.instance is not None, \
4919       "Cannot retrieve locked instance %s" % self.op.instance_name
4920
4921   def Exec(self, feedback_fn):
4922     """Deactivate the disks
4923
4924     """
4925     instance = self.instance
4926     if self.op.force:
4927       _ShutdownInstanceDisks(self, instance)
4928     else:
4929       _SafeShutdownInstanceDisks(self, instance)
4930
4931
4932 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4933   """Shutdown block devices of an instance.
4934
4935   This function checks if an instance is running, before calling
4936   _ShutdownInstanceDisks.
4937
4938   """
4939   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4940   _ShutdownInstanceDisks(lu, instance, disks=disks)
4941
4942
4943 def _ExpandCheckDisks(instance, disks):
4944   """Return the instance disks selected by the disks list
4945
4946   @type disks: list of L{objects.Disk} or None
4947   @param disks: selected disks
4948   @rtype: list of L{objects.Disk}
4949   @return: selected instance disks to act on
4950
4951   """
4952   if disks is None:
4953     return instance.disks
4954   else:
4955     if not set(disks).issubset(instance.disks):
4956       raise errors.ProgrammerError("Can only act on disks belonging to the"
4957                                    " target instance")
4958     return disks
4959
4960
4961 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4962   """Shutdown block devices of an instance.
4963
4964   This does the shutdown on all nodes of the instance.
4965
4966   If the ignore_primary is false, errors on the primary node are
4967   ignored.
4968
4969   """
4970   all_result = True
4971   disks = _ExpandCheckDisks(instance, disks)
4972
4973   for disk in disks:
4974     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4975       lu.cfg.SetDiskID(top_disk, node)
4976       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4977       msg = result.fail_msg
4978       if msg:
4979         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4980                       disk.iv_name, node, msg)
4981         if ((node == instance.primary_node and not ignore_primary) or
4982             (node != instance.primary_node and not result.offline)):
4983           all_result = False
4984   return all_result
4985
4986
4987 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4988   """Checks if a node has enough free memory.
4989
4990   This function check if a given node has the needed amount of free
4991   memory. In case the node has less memory or we cannot get the
4992   information from the node, this function raise an OpPrereqError
4993   exception.
4994
4995   @type lu: C{LogicalUnit}
4996   @param lu: a logical unit from which we get configuration data
4997   @type node: C{str}
4998   @param node: the node to check
4999   @type reason: C{str}
5000   @param reason: string to use in the error message
5001   @type requested: C{int}
5002   @param requested: the amount of memory in MiB to check for
5003   @type hypervisor_name: C{str}
5004   @param hypervisor_name: the hypervisor to ask for memory stats
5005   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5006       we cannot check the node
5007
5008   """
5009   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5010   nodeinfo[node].Raise("Can't get data from node %s" % node,
5011                        prereq=True, ecode=errors.ECODE_ENVIRON)
5012   free_mem = nodeinfo[node].payload.get('memory_free', None)
5013   if not isinstance(free_mem, int):
5014     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5015                                " was '%s'" % (node, free_mem),
5016                                errors.ECODE_ENVIRON)
5017   if requested > free_mem:
5018     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5019                                " needed %s MiB, available %s MiB" %
5020                                (node, reason, requested, free_mem),
5021                                errors.ECODE_NORES)
5022
5023
5024 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5025   """Checks if nodes have enough free disk space in the all VGs.
5026
5027   This function check if all given nodes have the needed amount of
5028   free disk. In case any node has less disk or we cannot get the
5029   information from the node, this function raise an OpPrereqError
5030   exception.
5031
5032   @type lu: C{LogicalUnit}
5033   @param lu: a logical unit from which we get configuration data
5034   @type nodenames: C{list}
5035   @param nodenames: the list of node names to check
5036   @type req_sizes: C{dict}
5037   @param req_sizes: the hash of vg and corresponding amount of disk in
5038       MiB to check for
5039   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5040       or we cannot check the node
5041
5042   """
5043   for vg, req_size in req_sizes.items():
5044     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5045
5046
5047 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5048   """Checks if nodes have enough free disk space in the specified VG.
5049
5050   This function check if all given nodes have the needed amount of
5051   free disk. In case any node has less disk or we cannot get the
5052   information from the node, this function raise an OpPrereqError
5053   exception.
5054
5055   @type lu: C{LogicalUnit}
5056   @param lu: a logical unit from which we get configuration data
5057   @type nodenames: C{list}
5058   @param nodenames: the list of node names to check
5059   @type vg: C{str}
5060   @param vg: the volume group to check
5061   @type requested: C{int}
5062   @param requested: the amount of disk in MiB to check for
5063   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5064       or we cannot check the node
5065
5066   """
5067   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5068   for node in nodenames:
5069     info = nodeinfo[node]
5070     info.Raise("Cannot get current information from node %s" % node,
5071                prereq=True, ecode=errors.ECODE_ENVIRON)
5072     vg_free = info.payload.get("vg_free", None)
5073     if not isinstance(vg_free, int):
5074       raise errors.OpPrereqError("Can't compute free disk space on node"
5075                                  " %s for vg %s, result was '%s'" %
5076                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5077     if requested > vg_free:
5078       raise errors.OpPrereqError("Not enough disk space on target node %s"
5079                                  " vg %s: required %d MiB, available %d MiB" %
5080                                  (node, vg, requested, vg_free),
5081                                  errors.ECODE_NORES)
5082
5083
5084 class LUInstanceStartup(LogicalUnit):
5085   """Starts an instance.
5086
5087   """
5088   HPATH = "instance-start"
5089   HTYPE = constants.HTYPE_INSTANCE
5090   REQ_BGL = False
5091
5092   def CheckArguments(self):
5093     # extra beparams
5094     if self.op.beparams:
5095       # fill the beparams dict
5096       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5097
5098   def ExpandNames(self):
5099     self._ExpandAndLockInstance()
5100
5101   def BuildHooksEnv(self):
5102     """Build hooks env.
5103
5104     This runs on master, primary and secondary nodes of the instance.
5105
5106     """
5107     env = {
5108       "FORCE": self.op.force,
5109       }
5110     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5111     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5112     return env, nl, nl
5113
5114   def CheckPrereq(self):
5115     """Check prerequisites.
5116
5117     This checks that the instance is in the cluster.
5118
5119     """
5120     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5121     assert self.instance is not None, \
5122       "Cannot retrieve locked instance %s" % self.op.instance_name
5123
5124     # extra hvparams
5125     if self.op.hvparams:
5126       # check hypervisor parameter syntax (locally)
5127       cluster = self.cfg.GetClusterInfo()
5128       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5129       filled_hvp = cluster.FillHV(instance)
5130       filled_hvp.update(self.op.hvparams)
5131       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5132       hv_type.CheckParameterSyntax(filled_hvp)
5133       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5134
5135     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5136
5137     if self.primary_offline and self.op.ignore_offline_nodes:
5138       self.proc.LogWarning("Ignoring offline primary node")
5139
5140       if self.op.hvparams or self.op.beparams:
5141         self.proc.LogWarning("Overridden parameters are ignored")
5142     else:
5143       _CheckNodeOnline(self, instance.primary_node)
5144
5145       bep = self.cfg.GetClusterInfo().FillBE(instance)
5146
5147       # check bridges existence
5148       _CheckInstanceBridgesExist(self, instance)
5149
5150       remote_info = self.rpc.call_instance_info(instance.primary_node,
5151                                                 instance.name,
5152                                                 instance.hypervisor)
5153       remote_info.Raise("Error checking node %s" % instance.primary_node,
5154                         prereq=True, ecode=errors.ECODE_ENVIRON)
5155       if not remote_info.payload: # not running already
5156         _CheckNodeFreeMemory(self, instance.primary_node,
5157                              "starting instance %s" % instance.name,
5158                              bep[constants.BE_MEMORY], instance.hypervisor)
5159
5160   def Exec(self, feedback_fn):
5161     """Start the instance.
5162
5163     """
5164     instance = self.instance
5165     force = self.op.force
5166
5167     self.cfg.MarkInstanceUp(instance.name)
5168
5169     if self.primary_offline:
5170       assert self.op.ignore_offline_nodes
5171       self.proc.LogInfo("Primary node offline, marked instance as started")
5172     else:
5173       node_current = instance.primary_node
5174
5175       _StartInstanceDisks(self, instance, force)
5176
5177       result = self.rpc.call_instance_start(node_current, instance,
5178                                             self.op.hvparams, self.op.beparams)
5179       msg = result.fail_msg
5180       if msg:
5181         _ShutdownInstanceDisks(self, instance)
5182         raise errors.OpExecError("Could not start instance: %s" % msg)
5183
5184
5185 class LUInstanceReboot(LogicalUnit):
5186   """Reboot an instance.
5187
5188   """
5189   HPATH = "instance-reboot"
5190   HTYPE = constants.HTYPE_INSTANCE
5191   REQ_BGL = False
5192
5193   def ExpandNames(self):
5194     self._ExpandAndLockInstance()
5195
5196   def BuildHooksEnv(self):
5197     """Build hooks env.
5198
5199     This runs on master, primary and secondary nodes of the instance.
5200
5201     """
5202     env = {
5203       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5204       "REBOOT_TYPE": self.op.reboot_type,
5205       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5206       }
5207     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5208     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5209     return env, nl, nl
5210
5211   def CheckPrereq(self):
5212     """Check prerequisites.
5213
5214     This checks that the instance is in the cluster.
5215
5216     """
5217     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5218     assert self.instance is not None, \
5219       "Cannot retrieve locked instance %s" % self.op.instance_name
5220
5221     _CheckNodeOnline(self, instance.primary_node)
5222
5223     # check bridges existence
5224     _CheckInstanceBridgesExist(self, instance)
5225
5226   def Exec(self, feedback_fn):
5227     """Reboot the instance.
5228
5229     """
5230     instance = self.instance
5231     ignore_secondaries = self.op.ignore_secondaries
5232     reboot_type = self.op.reboot_type
5233
5234     remote_info = self.rpc.call_instance_info(instance.primary_node,
5235                                               instance.name,
5236                                               instance.hypervisor)
5237     remote_info.Raise("Error checking node %s" % instance.primary_node)
5238     instance_running = bool(remote_info.payload)
5239
5240     node_current = instance.primary_node
5241
5242     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5243                                             constants.INSTANCE_REBOOT_HARD]:
5244       for disk in instance.disks:
5245         self.cfg.SetDiskID(disk, node_current)
5246       result = self.rpc.call_instance_reboot(node_current, instance,
5247                                              reboot_type,
5248                                              self.op.shutdown_timeout)
5249       result.Raise("Could not reboot instance")
5250     else:
5251       if instance_running:
5252         result = self.rpc.call_instance_shutdown(node_current, instance,
5253                                                  self.op.shutdown_timeout)
5254         result.Raise("Could not shutdown instance for full reboot")
5255         _ShutdownInstanceDisks(self, instance)
5256       else:
5257         self.LogInfo("Instance %s was already stopped, starting now",
5258                      instance.name)
5259       _StartInstanceDisks(self, instance, ignore_secondaries)
5260       result = self.rpc.call_instance_start(node_current, instance, None, None)
5261       msg = result.fail_msg
5262       if msg:
5263         _ShutdownInstanceDisks(self, instance)
5264         raise errors.OpExecError("Could not start instance for"
5265                                  " full reboot: %s" % msg)
5266
5267     self.cfg.MarkInstanceUp(instance.name)
5268
5269
5270 class LUInstanceShutdown(LogicalUnit):
5271   """Shutdown an instance.
5272
5273   """
5274   HPATH = "instance-stop"
5275   HTYPE = constants.HTYPE_INSTANCE
5276   REQ_BGL = False
5277
5278   def ExpandNames(self):
5279     self._ExpandAndLockInstance()
5280
5281   def BuildHooksEnv(self):
5282     """Build hooks env.
5283
5284     This runs on master, primary and secondary nodes of the instance.
5285
5286     """
5287     env = _BuildInstanceHookEnvByObject(self, self.instance)
5288     env["TIMEOUT"] = self.op.timeout
5289     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5290     return env, nl, nl
5291
5292   def CheckPrereq(self):
5293     """Check prerequisites.
5294
5295     This checks that the instance is in the cluster.
5296
5297     """
5298     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5299     assert self.instance is not None, \
5300       "Cannot retrieve locked instance %s" % self.op.instance_name
5301
5302     self.primary_offline = \
5303       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5304
5305     if self.primary_offline and self.op.ignore_offline_nodes:
5306       self.proc.LogWarning("Ignoring offline primary node")
5307     else:
5308       _CheckNodeOnline(self, self.instance.primary_node)
5309
5310   def Exec(self, feedback_fn):
5311     """Shutdown the instance.
5312
5313     """
5314     instance = self.instance
5315     node_current = instance.primary_node
5316     timeout = self.op.timeout
5317
5318     self.cfg.MarkInstanceDown(instance.name)
5319
5320     if self.primary_offline:
5321       assert self.op.ignore_offline_nodes
5322       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5323     else:
5324       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5325       msg = result.fail_msg
5326       if msg:
5327         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5328
5329       _ShutdownInstanceDisks(self, instance)
5330
5331
5332 class LUInstanceReinstall(LogicalUnit):
5333   """Reinstall an instance.
5334
5335   """
5336   HPATH = "instance-reinstall"
5337   HTYPE = constants.HTYPE_INSTANCE
5338   REQ_BGL = False
5339
5340   def ExpandNames(self):
5341     self._ExpandAndLockInstance()
5342
5343   def BuildHooksEnv(self):
5344     """Build hooks env.
5345
5346     This runs on master, primary and secondary nodes of the instance.
5347
5348     """
5349     env = _BuildInstanceHookEnvByObject(self, self.instance)
5350     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5351     return env, nl, nl
5352
5353   def CheckPrereq(self):
5354     """Check prerequisites.
5355
5356     This checks that the instance is in the cluster and is not running.
5357
5358     """
5359     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5360     assert instance is not None, \
5361       "Cannot retrieve locked instance %s" % self.op.instance_name
5362     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5363                      " offline, cannot reinstall")
5364     for node in instance.secondary_nodes:
5365       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5366                        " cannot reinstall")
5367
5368     if instance.disk_template == constants.DT_DISKLESS:
5369       raise errors.OpPrereqError("Instance '%s' has no disks" %
5370                                  self.op.instance_name,
5371                                  errors.ECODE_INVAL)
5372     _CheckInstanceDown(self, instance, "cannot reinstall")
5373
5374     if self.op.os_type is not None:
5375       # OS verification
5376       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5377       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5378       instance_os = self.op.os_type
5379     else:
5380       instance_os = instance.os
5381
5382     nodelist = list(instance.all_nodes)
5383
5384     if self.op.osparams:
5385       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5386       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5387       self.os_inst = i_osdict # the new dict (without defaults)
5388     else:
5389       self.os_inst = None
5390
5391     self.instance = instance
5392
5393   def Exec(self, feedback_fn):
5394     """Reinstall the instance.
5395
5396     """
5397     inst = self.instance
5398
5399     if self.op.os_type is not None:
5400       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5401       inst.os = self.op.os_type
5402       # Write to configuration
5403       self.cfg.Update(inst, feedback_fn)
5404
5405     _StartInstanceDisks(self, inst, None)
5406     try:
5407       feedback_fn("Running the instance OS create scripts...")
5408       # FIXME: pass debug option from opcode to backend
5409       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5410                                              self.op.debug_level,
5411                                              osparams=self.os_inst)
5412       result.Raise("Could not install OS for instance %s on node %s" %
5413                    (inst.name, inst.primary_node))
5414     finally:
5415       _ShutdownInstanceDisks(self, inst)
5416
5417
5418 class LUInstanceRecreateDisks(LogicalUnit):
5419   """Recreate an instance's missing disks.
5420
5421   """
5422   HPATH = "instance-recreate-disks"
5423   HTYPE = constants.HTYPE_INSTANCE
5424   REQ_BGL = False
5425
5426   def ExpandNames(self):
5427     self._ExpandAndLockInstance()
5428
5429   def BuildHooksEnv(self):
5430     """Build hooks env.
5431
5432     This runs on master, primary and secondary nodes of the instance.
5433
5434     """
5435     env = _BuildInstanceHookEnvByObject(self, self.instance)
5436     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5437     return env, nl, nl
5438
5439   def CheckPrereq(self):
5440     """Check prerequisites.
5441
5442     This checks that the instance is in the cluster and is not running.
5443
5444     """
5445     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5446     assert instance is not None, \
5447       "Cannot retrieve locked instance %s" % self.op.instance_name
5448     _CheckNodeOnline(self, instance.primary_node)
5449
5450     if instance.disk_template == constants.DT_DISKLESS:
5451       raise errors.OpPrereqError("Instance '%s' has no disks" %
5452                                  self.op.instance_name, errors.ECODE_INVAL)
5453     _CheckInstanceDown(self, instance, "cannot recreate disks")
5454
5455     if not self.op.disks:
5456       self.op.disks = range(len(instance.disks))
5457     else:
5458       for idx in self.op.disks:
5459         if idx >= len(instance.disks):
5460           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5461                                      errors.ECODE_INVAL)
5462
5463     self.instance = instance
5464
5465   def Exec(self, feedback_fn):
5466     """Recreate the disks.
5467
5468     """
5469     to_skip = []
5470     for idx, _ in enumerate(self.instance.disks):
5471       if idx not in self.op.disks: # disk idx has not been passed in
5472         to_skip.append(idx)
5473         continue
5474
5475     _CreateDisks(self, self.instance, to_skip=to_skip)
5476
5477
5478 class LUInstanceRename(LogicalUnit):
5479   """Rename an instance.
5480
5481   """
5482   HPATH = "instance-rename"
5483   HTYPE = constants.HTYPE_INSTANCE
5484
5485   def CheckArguments(self):
5486     """Check arguments.
5487
5488     """
5489     if self.op.ip_check and not self.op.name_check:
5490       # TODO: make the ip check more flexible and not depend on the name check
5491       raise errors.OpPrereqError("Cannot do ip check without a name check",
5492                                  errors.ECODE_INVAL)
5493
5494   def BuildHooksEnv(self):
5495     """Build hooks env.
5496
5497     This runs on master, primary and secondary nodes of the instance.
5498
5499     """
5500     env = _BuildInstanceHookEnvByObject(self, self.instance)
5501     env["INSTANCE_NEW_NAME"] = self.op.new_name
5502     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5503     return env, nl, nl
5504
5505   def CheckPrereq(self):
5506     """Check prerequisites.
5507
5508     This checks that the instance is in the cluster and is not running.
5509
5510     """
5511     self.op.instance_name = _ExpandInstanceName(self.cfg,
5512                                                 self.op.instance_name)
5513     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5514     assert instance is not None
5515     _CheckNodeOnline(self, instance.primary_node)
5516     _CheckInstanceDown(self, instance, "cannot rename")
5517     self.instance = instance
5518
5519     new_name = self.op.new_name
5520     if self.op.name_check:
5521       hostname = netutils.GetHostname(name=new_name)
5522       self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5523                    hostname.name)
5524       new_name = self.op.new_name = hostname.name
5525       if (self.op.ip_check and
5526           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5527         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5528                                    (hostname.ip, new_name),
5529                                    errors.ECODE_NOTUNIQUE)
5530
5531     instance_list = self.cfg.GetInstanceList()
5532     if new_name in instance_list and new_name != instance.name:
5533       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5534                                  new_name, errors.ECODE_EXISTS)
5535
5536   def Exec(self, feedback_fn):
5537     """Rename the instance.
5538
5539     """
5540     inst = self.instance
5541     old_name = inst.name
5542
5543     rename_file_storage = False
5544     if (inst.disk_template == constants.DT_FILE and
5545         self.op.new_name != inst.name):
5546       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5547       rename_file_storage = True
5548
5549     self.cfg.RenameInstance(inst.name, self.op.new_name)
5550     # Change the instance lock. This is definitely safe while we hold the BGL
5551     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5552     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5553
5554     # re-read the instance from the configuration after rename
5555     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5556
5557     if rename_file_storage:
5558       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5559       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5560                                                      old_file_storage_dir,
5561                                                      new_file_storage_dir)
5562       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5563                    " (but the instance has been renamed in Ganeti)" %
5564                    (inst.primary_node, old_file_storage_dir,
5565                     new_file_storage_dir))
5566
5567     _StartInstanceDisks(self, inst, None)
5568     try:
5569       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5570                                                  old_name, self.op.debug_level)
5571       msg = result.fail_msg
5572       if msg:
5573         msg = ("Could not run OS rename script for instance %s on node %s"
5574                " (but the instance has been renamed in Ganeti): %s" %
5575                (inst.name, inst.primary_node, msg))
5576         self.proc.LogWarning(msg)
5577     finally:
5578       _ShutdownInstanceDisks(self, inst)
5579
5580     return inst.name
5581
5582
5583 class LUInstanceRemove(LogicalUnit):
5584   """Remove an instance.
5585
5586   """
5587   HPATH = "instance-remove"
5588   HTYPE = constants.HTYPE_INSTANCE
5589   REQ_BGL = False
5590
5591   def ExpandNames(self):
5592     self._ExpandAndLockInstance()
5593     self.needed_locks[locking.LEVEL_NODE] = []
5594     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5595
5596   def DeclareLocks(self, level):
5597     if level == locking.LEVEL_NODE:
5598       self._LockInstancesNodes()
5599
5600   def BuildHooksEnv(self):
5601     """Build hooks env.
5602
5603     This runs on master, primary and secondary nodes of the instance.
5604
5605     """
5606     env = _BuildInstanceHookEnvByObject(self, self.instance)
5607     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5608     nl = [self.cfg.GetMasterNode()]
5609     nl_post = list(self.instance.all_nodes) + nl
5610     return env, nl, nl_post
5611
5612   def CheckPrereq(self):
5613     """Check prerequisites.
5614
5615     This checks that the instance is in the cluster.
5616
5617     """
5618     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5619     assert self.instance is not None, \
5620       "Cannot retrieve locked instance %s" % self.op.instance_name
5621
5622   def Exec(self, feedback_fn):
5623     """Remove the instance.
5624
5625     """
5626     instance = self.instance
5627     logging.info("Shutting down instance %s on node %s",
5628                  instance.name, instance.primary_node)
5629
5630     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5631                                              self.op.shutdown_timeout)
5632     msg = result.fail_msg
5633     if msg:
5634       if self.op.ignore_failures:
5635         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5636       else:
5637         raise errors.OpExecError("Could not shutdown instance %s on"
5638                                  " node %s: %s" %
5639                                  (instance.name, instance.primary_node, msg))
5640
5641     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5642
5643
5644 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5645   """Utility function to remove an instance.
5646
5647   """
5648   logging.info("Removing block devices for instance %s", instance.name)
5649
5650   if not _RemoveDisks(lu, instance):
5651     if not ignore_failures:
5652       raise errors.OpExecError("Can't remove instance's disks")
5653     feedback_fn("Warning: can't remove instance's disks")
5654
5655   logging.info("Removing instance %s out of cluster config", instance.name)
5656
5657   lu.cfg.RemoveInstance(instance.name)
5658
5659   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5660     "Instance lock removal conflict"
5661
5662   # Remove lock for the instance
5663   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5664
5665
5666 class LUInstanceQuery(NoHooksLU):
5667   """Logical unit for querying instances.
5668
5669   """
5670   # pylint: disable-msg=W0142
5671   REQ_BGL = False
5672
5673   def CheckArguments(self):
5674     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5675                              self.op.output_fields, self.op.use_locking)
5676
5677   def ExpandNames(self):
5678     self.iq.ExpandNames(self)
5679
5680   def DeclareLocks(self, level):
5681     self.iq.DeclareLocks(self, level)
5682
5683   def Exec(self, feedback_fn):
5684     return self.iq.OldStyleQuery(self)
5685
5686
5687 class LUInstanceFailover(LogicalUnit):
5688   """Failover an instance.
5689
5690   """
5691   HPATH = "instance-failover"
5692   HTYPE = constants.HTYPE_INSTANCE
5693   REQ_BGL = False
5694
5695   def ExpandNames(self):
5696     self._ExpandAndLockInstance()
5697     self.needed_locks[locking.LEVEL_NODE] = []
5698     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5699
5700   def DeclareLocks(self, level):
5701     if level == locking.LEVEL_NODE:
5702       self._LockInstancesNodes()
5703
5704   def BuildHooksEnv(self):
5705     """Build hooks env.
5706
5707     This runs on master, primary and secondary nodes of the instance.
5708
5709     """
5710     instance = self.instance
5711     source_node = instance.primary_node
5712     target_node = instance.secondary_nodes[0]
5713     env = {
5714       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5715       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5716       "OLD_PRIMARY": source_node,
5717       "OLD_SECONDARY": target_node,
5718       "NEW_PRIMARY": target_node,
5719       "NEW_SECONDARY": source_node,
5720       }
5721     env.update(_BuildInstanceHookEnvByObject(self, instance))
5722     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5723     nl_post = list(nl)
5724     nl_post.append(source_node)
5725     return env, nl, nl_post
5726
5727   def CheckPrereq(self):
5728     """Check prerequisites.
5729
5730     This checks that the instance is in the cluster.
5731
5732     """
5733     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5734     assert self.instance is not None, \
5735       "Cannot retrieve locked instance %s" % self.op.instance_name
5736
5737     bep = self.cfg.GetClusterInfo().FillBE(instance)
5738     if instance.disk_template not in constants.DTS_NET_MIRROR:
5739       raise errors.OpPrereqError("Instance's disk layout is not"
5740                                  " network mirrored, cannot failover.",
5741                                  errors.ECODE_STATE)
5742
5743     secondary_nodes = instance.secondary_nodes
5744     if not secondary_nodes:
5745       raise errors.ProgrammerError("no secondary node but using "
5746                                    "a mirrored disk template")
5747
5748     target_node = secondary_nodes[0]
5749     _CheckNodeOnline(self, target_node)
5750     _CheckNodeNotDrained(self, target_node)
5751     if instance.admin_up:
5752       # check memory requirements on the secondary node
5753       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5754                            instance.name, bep[constants.BE_MEMORY],
5755                            instance.hypervisor)
5756     else:
5757       self.LogInfo("Not checking memory on the secondary node as"
5758                    " instance will not be started")
5759
5760     # check bridge existance
5761     _CheckInstanceBridgesExist(self, instance, node=target_node)
5762
5763   def Exec(self, feedback_fn):
5764     """Failover an instance.
5765
5766     The failover is done by shutting it down on its present node and
5767     starting it on the secondary.
5768
5769     """
5770     instance = self.instance
5771     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5772
5773     source_node = instance.primary_node
5774     target_node = instance.secondary_nodes[0]
5775
5776     if instance.admin_up:
5777       feedback_fn("* checking disk consistency between source and target")
5778       for dev in instance.disks:
5779         # for drbd, these are drbd over lvm
5780         if not _CheckDiskConsistency(self, dev, target_node, False):
5781           if not self.op.ignore_consistency:
5782             raise errors.OpExecError("Disk %s is degraded on target node,"
5783                                      " aborting failover." % dev.iv_name)
5784     else:
5785       feedback_fn("* not checking disk consistency as instance is not running")
5786
5787     feedback_fn("* shutting down instance on source node")
5788     logging.info("Shutting down instance %s on node %s",
5789                  instance.name, source_node)
5790
5791     result = self.rpc.call_instance_shutdown(source_node, instance,
5792                                              self.op.shutdown_timeout)
5793     msg = result.fail_msg
5794     if msg:
5795       if self.op.ignore_consistency or primary_node.offline:
5796         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5797                              " Proceeding anyway. Please make sure node"
5798                              " %s is down. Error details: %s",
5799                              instance.name, source_node, source_node, msg)
5800       else:
5801         raise errors.OpExecError("Could not shutdown instance %s on"
5802                                  " node %s: %s" %
5803                                  (instance.name, source_node, msg))
5804
5805     feedback_fn("* deactivating the instance's disks on source node")
5806     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5807       raise errors.OpExecError("Can't shut down the instance's disks.")
5808
5809     instance.primary_node = target_node
5810     # distribute new instance config to the other nodes
5811     self.cfg.Update(instance, feedback_fn)
5812
5813     # Only start the instance if it's marked as up
5814     if instance.admin_up:
5815       feedback_fn("* activating the instance's disks on target node")
5816       logging.info("Starting instance %s on node %s",
5817                    instance.name, target_node)
5818
5819       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5820                                            ignore_secondaries=True)
5821       if not disks_ok:
5822         _ShutdownInstanceDisks(self, instance)
5823         raise errors.OpExecError("Can't activate the instance's disks")
5824
5825       feedback_fn("* starting the instance on the target node")
5826       result = self.rpc.call_instance_start(target_node, instance, None, None)
5827       msg = result.fail_msg
5828       if msg:
5829         _ShutdownInstanceDisks(self, instance)
5830         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5831                                  (instance.name, target_node, msg))
5832
5833
5834 class LUInstanceMigrate(LogicalUnit):
5835   """Migrate an instance.
5836
5837   This is migration without shutting down, compared to the failover,
5838   which is done with shutdown.
5839
5840   """
5841   HPATH = "instance-migrate"
5842   HTYPE = constants.HTYPE_INSTANCE
5843   REQ_BGL = False
5844
5845   def ExpandNames(self):
5846     self._ExpandAndLockInstance()
5847
5848     self.needed_locks[locking.LEVEL_NODE] = []
5849     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5850
5851     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5852                                        self.op.cleanup)
5853     self.tasklets = [self._migrater]
5854
5855   def DeclareLocks(self, level):
5856     if level == locking.LEVEL_NODE:
5857       self._LockInstancesNodes()
5858
5859   def BuildHooksEnv(self):
5860     """Build hooks env.
5861
5862     This runs on master, primary and secondary nodes of the instance.
5863
5864     """
5865     instance = self._migrater.instance
5866     source_node = instance.primary_node
5867     target_node = instance.secondary_nodes[0]
5868     env = _BuildInstanceHookEnvByObject(self, instance)
5869     env["MIGRATE_LIVE"] = self._migrater.live
5870     env["MIGRATE_CLEANUP"] = self.op.cleanup
5871     env.update({
5872         "OLD_PRIMARY": source_node,
5873         "OLD_SECONDARY": target_node,
5874         "NEW_PRIMARY": target_node,
5875         "NEW_SECONDARY": source_node,
5876         })
5877     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5878     nl_post = list(nl)
5879     nl_post.append(source_node)
5880     return env, nl, nl_post
5881
5882
5883 class LUInstanceMove(LogicalUnit):
5884   """Move an instance by data-copying.
5885
5886   """
5887   HPATH = "instance-move"
5888   HTYPE = constants.HTYPE_INSTANCE
5889   REQ_BGL = False
5890
5891   def ExpandNames(self):
5892     self._ExpandAndLockInstance()
5893     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5894     self.op.target_node = target_node
5895     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5896     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5897
5898   def DeclareLocks(self, level):
5899     if level == locking.LEVEL_NODE:
5900       self._LockInstancesNodes(primary_only=True)
5901
5902   def BuildHooksEnv(self):
5903     """Build hooks env.
5904
5905     This runs on master, primary and secondary nodes of the instance.
5906
5907     """
5908     env = {
5909       "TARGET_NODE": self.op.target_node,
5910       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5911       }
5912     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5913     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5914                                        self.op.target_node]
5915     return env, nl, nl
5916
5917   def CheckPrereq(self):
5918     """Check prerequisites.
5919
5920     This checks that the instance is in the cluster.
5921
5922     """
5923     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5924     assert self.instance is not None, \
5925       "Cannot retrieve locked instance %s" % self.op.instance_name
5926
5927     node = self.cfg.GetNodeInfo(self.op.target_node)
5928     assert node is not None, \
5929       "Cannot retrieve locked node %s" % self.op.target_node
5930
5931     self.target_node = target_node = node.name
5932
5933     if target_node == instance.primary_node:
5934       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5935                                  (instance.name, target_node),
5936                                  errors.ECODE_STATE)
5937
5938     bep = self.cfg.GetClusterInfo().FillBE(instance)
5939
5940     for idx, dsk in enumerate(instance.disks):
5941       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5942         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5943                                    " cannot copy" % idx, errors.ECODE_STATE)
5944
5945     _CheckNodeOnline(self, target_node)
5946     _CheckNodeNotDrained(self, target_node)
5947     _CheckNodeVmCapable(self, target_node)
5948
5949     if instance.admin_up:
5950       # check memory requirements on the secondary node
5951       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5952                            instance.name, bep[constants.BE_MEMORY],
5953                            instance.hypervisor)
5954     else:
5955       self.LogInfo("Not checking memory on the secondary node as"
5956                    " instance will not be started")
5957
5958     # check bridge existance
5959     _CheckInstanceBridgesExist(self, instance, node=target_node)
5960
5961   def Exec(self, feedback_fn):
5962     """Move an instance.
5963
5964     The move is done by shutting it down on its present node, copying
5965     the data over (slow) and starting it on the new node.
5966
5967     """
5968     instance = self.instance
5969
5970     source_node = instance.primary_node
5971     target_node = self.target_node
5972
5973     self.LogInfo("Shutting down instance %s on source node %s",
5974                  instance.name, source_node)
5975
5976     result = self.rpc.call_instance_shutdown(source_node, instance,
5977                                              self.op.shutdown_timeout)
5978     msg = result.fail_msg
5979     if msg:
5980       if self.op.ignore_consistency:
5981         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5982                              " Proceeding anyway. Please make sure node"
5983                              " %s is down. Error details: %s",
5984                              instance.name, source_node, source_node, msg)
5985       else:
5986         raise errors.OpExecError("Could not shutdown instance %s on"
5987                                  " node %s: %s" %
5988                                  (instance.name, source_node, msg))
5989
5990     # create the target disks
5991     try:
5992       _CreateDisks(self, instance, target_node=target_node)
5993     except errors.OpExecError:
5994       self.LogWarning("Device creation failed, reverting...")
5995       try:
5996         _RemoveDisks(self, instance, target_node=target_node)
5997       finally:
5998         self.cfg.ReleaseDRBDMinors(instance.name)
5999         raise
6000
6001     cluster_name = self.cfg.GetClusterInfo().cluster_name
6002
6003     errs = []
6004     # activate, get path, copy the data over
6005     for idx, disk in enumerate(instance.disks):
6006       self.LogInfo("Copying data for disk %d", idx)
6007       result = self.rpc.call_blockdev_assemble(target_node, disk,
6008                                                instance.name, True, idx)
6009       if result.fail_msg:
6010         self.LogWarning("Can't assemble newly created disk %d: %s",
6011                         idx, result.fail_msg)
6012         errs.append(result.fail_msg)
6013         break
6014       dev_path = result.payload
6015       result = self.rpc.call_blockdev_export(source_node, disk,
6016                                              target_node, dev_path,
6017                                              cluster_name)
6018       if result.fail_msg:
6019         self.LogWarning("Can't copy data over for disk %d: %s",
6020                         idx, result.fail_msg)
6021         errs.append(result.fail_msg)
6022         break
6023
6024     if errs:
6025       self.LogWarning("Some disks failed to copy, aborting")
6026       try:
6027         _RemoveDisks(self, instance, target_node=target_node)
6028       finally:
6029         self.cfg.ReleaseDRBDMinors(instance.name)
6030         raise errors.OpExecError("Errors during disk copy: %s" %
6031                                  (",".join(errs),))
6032
6033     instance.primary_node = target_node
6034     self.cfg.Update(instance, feedback_fn)
6035
6036     self.LogInfo("Removing the disks on the original node")
6037     _RemoveDisks(self, instance, target_node=source_node)
6038
6039     # Only start the instance if it's marked as up
6040     if instance.admin_up:
6041       self.LogInfo("Starting instance %s on node %s",
6042                    instance.name, target_node)
6043
6044       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6045                                            ignore_secondaries=True)
6046       if not disks_ok:
6047         _ShutdownInstanceDisks(self, instance)
6048         raise errors.OpExecError("Can't activate the instance's disks")
6049
6050       result = self.rpc.call_instance_start(target_node, instance, None, None)
6051       msg = result.fail_msg
6052       if msg:
6053         _ShutdownInstanceDisks(self, instance)
6054         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6055                                  (instance.name, target_node, msg))
6056
6057
6058 class LUNodeMigrate(LogicalUnit):
6059   """Migrate all instances from a node.
6060
6061   """
6062   HPATH = "node-migrate"
6063   HTYPE = constants.HTYPE_NODE
6064   REQ_BGL = False
6065
6066   def ExpandNames(self):
6067     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6068
6069     self.needed_locks = {
6070       locking.LEVEL_NODE: [self.op.node_name],
6071       }
6072
6073     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6074
6075     # Create tasklets for migrating instances for all instances on this node
6076     names = []
6077     tasklets = []
6078
6079     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6080       logging.debug("Migrating instance %s", inst.name)
6081       names.append(inst.name)
6082
6083       tasklets.append(TLMigrateInstance(self, inst.name, False))
6084
6085     self.tasklets = tasklets
6086
6087     # Declare instance locks
6088     self.needed_locks[locking.LEVEL_INSTANCE] = names
6089
6090   def DeclareLocks(self, level):
6091     if level == locking.LEVEL_NODE:
6092       self._LockInstancesNodes()
6093
6094   def BuildHooksEnv(self):
6095     """Build hooks env.
6096
6097     This runs on the master, the primary and all the secondaries.
6098
6099     """
6100     env = {
6101       "NODE_NAME": self.op.node_name,
6102       }
6103
6104     nl = [self.cfg.GetMasterNode()]
6105
6106     return (env, nl, nl)
6107
6108
6109 class TLMigrateInstance(Tasklet):
6110   """Tasklet class for instance migration.
6111
6112   @type live: boolean
6113   @ivar live: whether the migration will be done live or non-live;
6114       this variable is initalized only after CheckPrereq has run
6115
6116   """
6117   def __init__(self, lu, instance_name, cleanup):
6118     """Initializes this class.
6119
6120     """
6121     Tasklet.__init__(self, lu)
6122
6123     # Parameters
6124     self.instance_name = instance_name
6125     self.cleanup = cleanup
6126     self.live = False # will be overridden later
6127
6128   def CheckPrereq(self):
6129     """Check prerequisites.
6130
6131     This checks that the instance is in the cluster.
6132
6133     """
6134     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6135     instance = self.cfg.GetInstanceInfo(instance_name)
6136     assert instance is not None
6137
6138     if instance.disk_template != constants.DT_DRBD8:
6139       raise errors.OpPrereqError("Instance's disk layout is not"
6140                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
6141
6142     secondary_nodes = instance.secondary_nodes
6143     if not secondary_nodes:
6144       raise errors.ConfigurationError("No secondary node but using"
6145                                       " drbd8 disk template")
6146
6147     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6148
6149     target_node = secondary_nodes[0]
6150     # check memory requirements on the secondary node
6151     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6152                          instance.name, i_be[constants.BE_MEMORY],
6153                          instance.hypervisor)
6154
6155     # check bridge existance
6156     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6157
6158     if not self.cleanup:
6159       _CheckNodeNotDrained(self.lu, target_node)
6160       result = self.rpc.call_instance_migratable(instance.primary_node,
6161                                                  instance)
6162       result.Raise("Can't migrate, please use failover",
6163                    prereq=True, ecode=errors.ECODE_STATE)
6164
6165     self.instance = instance
6166
6167     if self.lu.op.live is not None and self.lu.op.mode is not None:
6168       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6169                                  " parameters are accepted",
6170                                  errors.ECODE_INVAL)
6171     if self.lu.op.live is not None:
6172       if self.lu.op.live:
6173         self.lu.op.mode = constants.HT_MIGRATION_LIVE
6174       else:
6175         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6176       # reset the 'live' parameter to None so that repeated
6177       # invocations of CheckPrereq do not raise an exception
6178       self.lu.op.live = None
6179     elif self.lu.op.mode is None:
6180       # read the default value from the hypervisor
6181       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6182       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6183
6184     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6185
6186   def _WaitUntilSync(self):
6187     """Poll with custom rpc for disk sync.
6188
6189     This uses our own step-based rpc call.
6190
6191     """
6192     self.feedback_fn("* wait until resync is done")
6193     all_done = False
6194     while not all_done:
6195       all_done = True
6196       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6197                                             self.nodes_ip,
6198                                             self.instance.disks)
6199       min_percent = 100
6200       for node, nres in result.items():
6201         nres.Raise("Cannot resync disks on node %s" % node)
6202         node_done, node_percent = nres.payload
6203         all_done = all_done and node_done
6204         if node_percent is not None:
6205           min_percent = min(min_percent, node_percent)
6206       if not all_done:
6207         if min_percent < 100:
6208           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6209         time.sleep(2)
6210
6211   def _EnsureSecondary(self, node):
6212     """Demote a node to secondary.
6213
6214     """
6215     self.feedback_fn("* switching node %s to secondary mode" % node)
6216
6217     for dev in self.instance.disks:
6218       self.cfg.SetDiskID(dev, node)
6219
6220     result = self.rpc.call_blockdev_close(node, self.instance.name,
6221                                           self.instance.disks)
6222     result.Raise("Cannot change disk to secondary on node %s" % node)
6223
6224   def _GoStandalone(self):
6225     """Disconnect from the network.
6226
6227     """
6228     self.feedback_fn("* changing into standalone mode")
6229     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6230                                                self.instance.disks)
6231     for node, nres in result.items():
6232       nres.Raise("Cannot disconnect disks node %s" % node)
6233
6234   def _GoReconnect(self, multimaster):
6235     """Reconnect to the network.
6236
6237     """
6238     if multimaster:
6239       msg = "dual-master"
6240     else:
6241       msg = "single-master"
6242     self.feedback_fn("* changing disks into %s mode" % msg)
6243     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6244                                            self.instance.disks,
6245                                            self.instance.name, multimaster)
6246     for node, nres in result.items():
6247       nres.Raise("Cannot change disks config on node %s" % node)
6248
6249   def _ExecCleanup(self):
6250     """Try to cleanup after a failed migration.
6251
6252     The cleanup is done by:
6253       - check that the instance is running only on one node
6254         (and update the config if needed)
6255       - change disks on its secondary node to secondary
6256       - wait until disks are fully synchronized
6257       - disconnect from the network
6258       - change disks into single-master mode
6259       - wait again until disks are fully synchronized
6260
6261     """
6262     instance = self.instance
6263     target_node = self.target_node
6264     source_node = self.source_node
6265
6266     # check running on only one node
6267     self.feedback_fn("* checking where the instance actually runs"
6268                      " (if this hangs, the hypervisor might be in"
6269                      " a bad state)")
6270     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6271     for node, result in ins_l.items():
6272       result.Raise("Can't contact node %s" % node)
6273
6274     runningon_source = instance.name in ins_l[source_node].payload
6275     runningon_target = instance.name in ins_l[target_node].payload
6276
6277     if runningon_source and runningon_target:
6278       raise errors.OpExecError("Instance seems to be running on two nodes,"
6279                                " or the hypervisor is confused. You will have"
6280                                " to ensure manually that it runs only on one"
6281                                " and restart this operation.")
6282
6283     if not (runningon_source or runningon_target):
6284       raise errors.OpExecError("Instance does not seem to be running at all."
6285                                " In this case, it's safer to repair by"
6286                                " running 'gnt-instance stop' to ensure disk"
6287                                " shutdown, and then restarting it.")
6288
6289     if runningon_target:
6290       # the migration has actually succeeded, we need to update the config
6291       self.feedback_fn("* instance running on secondary node (%s),"
6292                        " updating config" % target_node)
6293       instance.primary_node = target_node
6294       self.cfg.Update(instance, self.feedback_fn)
6295       demoted_node = source_node
6296     else:
6297       self.feedback_fn("* instance confirmed to be running on its"
6298                        " primary node (%s)" % source_node)
6299       demoted_node = target_node
6300
6301     self._EnsureSecondary(demoted_node)
6302     try:
6303       self._WaitUntilSync()
6304     except errors.OpExecError:
6305       # we ignore here errors, since if the device is standalone, it
6306       # won't be able to sync
6307       pass
6308     self._GoStandalone()
6309     self._GoReconnect(False)
6310     self._WaitUntilSync()
6311
6312     self.feedback_fn("* done")
6313
6314   def _RevertDiskStatus(self):
6315     """Try to revert the disk status after a failed migration.
6316
6317     """
6318     target_node = self.target_node
6319     try:
6320       self._EnsureSecondary(target_node)
6321       self._GoStandalone()
6322       self._GoReconnect(False)
6323       self._WaitUntilSync()
6324     except errors.OpExecError, err:
6325       self.lu.LogWarning("Migration failed and I can't reconnect the"
6326                          " drives: error '%s'\n"
6327                          "Please look and recover the instance status" %
6328                          str(err))
6329
6330   def _AbortMigration(self):
6331     """Call the hypervisor code to abort a started migration.
6332
6333     """
6334     instance = self.instance
6335     target_node = self.target_node
6336     migration_info = self.migration_info
6337
6338     abort_result = self.rpc.call_finalize_migration(target_node,
6339                                                     instance,
6340                                                     migration_info,
6341                                                     False)
6342     abort_msg = abort_result.fail_msg
6343     if abort_msg:
6344       logging.error("Aborting migration failed on target node %s: %s",
6345                     target_node, abort_msg)
6346       # Don't raise an exception here, as we stil have to try to revert the
6347       # disk status, even if this step failed.
6348
6349   def _ExecMigration(self):
6350     """Migrate an instance.
6351
6352     The migrate is done by:
6353       - change the disks into dual-master mode
6354       - wait until disks are fully synchronized again
6355       - migrate the instance
6356       - change disks on the new secondary node (the old primary) to secondary
6357       - wait until disks are fully synchronized
6358       - change disks into single-master mode
6359
6360     """
6361     instance = self.instance
6362     target_node = self.target_node
6363     source_node = self.source_node
6364
6365     self.feedback_fn("* checking disk consistency between source and target")
6366     for dev in instance.disks:
6367       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6368         raise errors.OpExecError("Disk %s is degraded or not fully"
6369                                  " synchronized on target node,"
6370                                  " aborting migrate." % dev.iv_name)
6371
6372     # First get the migration information from the remote node
6373     result = self.rpc.call_migration_info(source_node, instance)
6374     msg = result.fail_msg
6375     if msg:
6376       log_err = ("Failed fetching source migration information from %s: %s" %
6377                  (source_node, msg))
6378       logging.error(log_err)
6379       raise errors.OpExecError(log_err)
6380
6381     self.migration_info = migration_info = result.payload
6382
6383     # Then switch the disks to master/master mode
6384     self._EnsureSecondary(target_node)
6385     self._GoStandalone()
6386     self._GoReconnect(True)
6387     self._WaitUntilSync()
6388
6389     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6390     result = self.rpc.call_accept_instance(target_node,
6391                                            instance,
6392                                            migration_info,
6393                                            self.nodes_ip[target_node])
6394
6395     msg = result.fail_msg
6396     if msg:
6397       logging.error("Instance pre-migration failed, trying to revert"
6398                     " disk status: %s", msg)
6399       self.feedback_fn("Pre-migration failed, aborting")
6400       self._AbortMigration()
6401       self._RevertDiskStatus()
6402       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6403                                (instance.name, msg))
6404
6405     self.feedback_fn("* migrating instance to %s" % target_node)
6406     time.sleep(10)
6407     result = self.rpc.call_instance_migrate(source_node, instance,
6408                                             self.nodes_ip[target_node],
6409                                             self.live)
6410     msg = result.fail_msg
6411     if msg:
6412       logging.error("Instance migration failed, trying to revert"
6413                     " disk status: %s", msg)
6414       self.feedback_fn("Migration failed, aborting")
6415       self._AbortMigration()
6416       self._RevertDiskStatus()
6417       raise errors.OpExecError("Could not migrate instance %s: %s" %
6418                                (instance.name, msg))
6419     time.sleep(10)
6420
6421     instance.primary_node = target_node
6422     # distribute new instance config to the other nodes
6423     self.cfg.Update(instance, self.feedback_fn)
6424
6425     result = self.rpc.call_finalize_migration(target_node,
6426                                               instance,
6427                                               migration_info,
6428                                               True)
6429     msg = result.fail_msg
6430     if msg:
6431       logging.error("Instance migration succeeded, but finalization failed:"
6432                     " %s", msg)
6433       raise errors.OpExecError("Could not finalize instance migration: %s" %
6434                                msg)
6435
6436     self._EnsureSecondary(source_node)
6437     self._WaitUntilSync()
6438     self._GoStandalone()
6439     self._GoReconnect(False)
6440     self._WaitUntilSync()
6441
6442     self.feedback_fn("* done")
6443
6444   def Exec(self, feedback_fn):
6445     """Perform the migration.
6446
6447     """
6448     feedback_fn("Migrating instance %s" % self.instance.name)
6449
6450     self.feedback_fn = feedback_fn
6451
6452     self.source_node = self.instance.primary_node
6453     self.target_node = self.instance.secondary_nodes[0]
6454     self.all_nodes = [self.source_node, self.target_node]
6455     self.nodes_ip = {
6456       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6457       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6458       }
6459
6460     if self.cleanup:
6461       return self._ExecCleanup()
6462     else:
6463       return self._ExecMigration()
6464
6465
6466 def _CreateBlockDev(lu, node, instance, device, force_create,
6467                     info, force_open):
6468   """Create a tree of block devices on a given node.
6469
6470   If this device type has to be created on secondaries, create it and
6471   all its children.
6472
6473   If not, just recurse to children keeping the same 'force' value.
6474
6475   @param lu: the lu on whose behalf we execute
6476   @param node: the node on which to create the device
6477   @type instance: L{objects.Instance}
6478   @param instance: the instance which owns the device
6479   @type device: L{objects.Disk}
6480   @param device: the device to create
6481   @type force_create: boolean
6482   @param force_create: whether to force creation of this device; this
6483       will be change to True whenever we find a device which has
6484       CreateOnSecondary() attribute
6485   @param info: the extra 'metadata' we should attach to the device
6486       (this will be represented as a LVM tag)
6487   @type force_open: boolean
6488   @param force_open: this parameter will be passes to the
6489       L{backend.BlockdevCreate} function where it specifies
6490       whether we run on primary or not, and it affects both
6491       the child assembly and the device own Open() execution
6492
6493   """
6494   if device.CreateOnSecondary():
6495     force_create = True
6496
6497   if device.children:
6498     for child in device.children:
6499       _CreateBlockDev(lu, node, instance, child, force_create,
6500                       info, force_open)
6501
6502   if not force_create:
6503     return
6504
6505   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6506
6507
6508 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6509   """Create a single block device on a given node.
6510
6511   This will not recurse over children of the device, so they must be
6512   created in advance.
6513
6514   @param lu: the lu on whose behalf we execute
6515   @param node: the node on which to create the device
6516   @type instance: L{objects.Instance}
6517   @param instance: the instance which owns the device
6518   @type device: L{objects.Disk}
6519   @param device: the device to create
6520   @param info: the extra 'metadata' we should attach to the device
6521       (this will be represented as a LVM tag)
6522   @type force_open: boolean
6523   @param force_open: this parameter will be passes to the
6524       L{backend.BlockdevCreate} function where it specifies
6525       whether we run on primary or not, and it affects both
6526       the child assembly and the device own Open() execution
6527
6528   """
6529   lu.cfg.SetDiskID(device, node)
6530   result = lu.rpc.call_blockdev_create(node, device, device.size,
6531                                        instance.name, force_open, info)
6532   result.Raise("Can't create block device %s on"
6533                " node %s for instance %s" % (device, node, instance.name))
6534   if device.physical_id is None:
6535     device.physical_id = result.payload
6536
6537
6538 def _GenerateUniqueNames(lu, exts):
6539   """Generate a suitable LV name.
6540
6541   This will generate a logical volume name for the given instance.
6542
6543   """
6544   results = []
6545   for val in exts:
6546     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6547     results.append("%s%s" % (new_id, val))
6548   return results
6549
6550
6551 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6552                          p_minor, s_minor):
6553   """Generate a drbd8 device complete with its children.
6554
6555   """
6556   port = lu.cfg.AllocatePort()
6557   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6558   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6559                           logical_id=(vgname, names[0]))
6560   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6561                           logical_id=(vgname, names[1]))
6562   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6563                           logical_id=(primary, secondary, port,
6564                                       p_minor, s_minor,
6565                                       shared_secret),
6566                           children=[dev_data, dev_meta],
6567                           iv_name=iv_name)
6568   return drbd_dev
6569
6570
6571 def _GenerateDiskTemplate(lu, template_name,
6572                           instance_name, primary_node,
6573                           secondary_nodes, disk_info,
6574                           file_storage_dir, file_driver,
6575                           base_index, feedback_fn):
6576   """Generate the entire disk layout for a given template type.
6577
6578   """
6579   #TODO: compute space requirements
6580
6581   vgname = lu.cfg.GetVGName()
6582   disk_count = len(disk_info)
6583   disks = []
6584   if template_name == constants.DT_DISKLESS:
6585     pass
6586   elif template_name == constants.DT_PLAIN:
6587     if len(secondary_nodes) != 0:
6588       raise errors.ProgrammerError("Wrong template configuration")
6589
6590     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6591                                       for i in range(disk_count)])
6592     for idx, disk in enumerate(disk_info):
6593       disk_index = idx + base_index
6594       vg = disk.get("vg", vgname)
6595       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6596       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6597                               logical_id=(vg, names[idx]),
6598                               iv_name="disk/%d" % disk_index,
6599                               mode=disk["mode"])
6600       disks.append(disk_dev)
6601   elif template_name == constants.DT_DRBD8:
6602     if len(secondary_nodes) != 1:
6603       raise errors.ProgrammerError("Wrong template configuration")
6604     remote_node = secondary_nodes[0]
6605     minors = lu.cfg.AllocateDRBDMinor(
6606       [primary_node, remote_node] * len(disk_info), instance_name)
6607
6608     names = []
6609     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6610                                                for i in range(disk_count)]):
6611       names.append(lv_prefix + "_data")
6612       names.append(lv_prefix + "_meta")
6613     for idx, disk in enumerate(disk_info):
6614       disk_index = idx + base_index
6615       vg = disk.get("vg", vgname)
6616       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6617                                       disk["size"], vg, names[idx*2:idx*2+2],
6618                                       "disk/%d" % disk_index,
6619                                       minors[idx*2], minors[idx*2+1])
6620       disk_dev.mode = disk["mode"]
6621       disks.append(disk_dev)
6622   elif template_name == constants.DT_FILE:
6623     if len(secondary_nodes) != 0:
6624       raise errors.ProgrammerError("Wrong template configuration")
6625
6626     opcodes.RequireFileStorage()
6627
6628     for idx, disk in enumerate(disk_info):
6629       disk_index = idx + base_index
6630       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6631                               iv_name="disk/%d" % disk_index,
6632                               logical_id=(file_driver,
6633                                           "%s/disk%d" % (file_storage_dir,
6634                                                          disk_index)),
6635                               mode=disk["mode"])
6636       disks.append(disk_dev)
6637   else:
6638     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6639   return disks
6640
6641
6642 def _GetInstanceInfoText(instance):
6643   """Compute that text that should be added to the disk's metadata.
6644
6645   """
6646   return "originstname+%s" % instance.name
6647
6648
6649 def _CalcEta(time_taken, written, total_size):
6650   """Calculates the ETA based on size written and total size.
6651
6652   @param time_taken: The time taken so far
6653   @param written: amount written so far
6654   @param total_size: The total size of data to be written
6655   @return: The remaining time in seconds
6656
6657   """
6658   avg_time = time_taken / float(written)
6659   return (total_size - written) * avg_time
6660
6661
6662 def _WipeDisks(lu, instance):
6663   """Wipes instance disks.
6664
6665   @type lu: L{LogicalUnit}
6666   @param lu: the logical unit on whose behalf we execute
6667   @type instance: L{objects.Instance}
6668   @param instance: the instance whose disks we should create
6669   @return: the success of the wipe
6670
6671   """
6672   node = instance.primary_node
6673   logging.info("Pause sync of instance %s disks", instance.name)
6674   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6675
6676   for idx, success in enumerate(result.payload):
6677     if not success:
6678       logging.warn("pause-sync of instance %s for disks %d failed",
6679                    instance.name, idx)
6680
6681   try:
6682     for idx, device in enumerate(instance.disks):
6683       lu.LogInfo("* Wiping disk %d", idx)
6684       logging.info("Wiping disk %d for instance %s", idx, instance.name)
6685
6686       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6687       # MAX_WIPE_CHUNK at max
6688       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6689                             constants.MIN_WIPE_CHUNK_PERCENT)
6690
6691       offset = 0
6692       size = device.size
6693       last_output = 0
6694       start_time = time.time()
6695
6696       while offset < size:
6697         wipe_size = min(wipe_chunk_size, size - offset)
6698         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6699         result.Raise("Could not wipe disk %d at offset %d for size %d" %
6700                      (idx, offset, wipe_size))
6701         now = time.time()
6702         offset += wipe_size
6703         if now - last_output >= 60:
6704           eta = _CalcEta(now - start_time, offset, size)
6705           lu.LogInfo(" - done: %.1f%% ETA: %s" %
6706                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
6707           last_output = now
6708   finally:
6709     logging.info("Resume sync of instance %s disks", instance.name)
6710
6711     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6712
6713     for idx, success in enumerate(result.payload):
6714       if not success:
6715         lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6716                       " look at the status and troubleshoot the issue.", idx)
6717         logging.warn("resume-sync of instance %s for disks %d failed",
6718                      instance.name, idx)
6719
6720
6721 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6722   """Create all disks for an instance.
6723
6724   This abstracts away some work from AddInstance.
6725
6726   @type lu: L{LogicalUnit}
6727   @param lu: the logical unit on whose behalf we execute
6728   @type instance: L{objects.Instance}
6729   @param instance: the instance whose disks we should create
6730   @type to_skip: list
6731   @param to_skip: list of indices to skip
6732   @type target_node: string
6733   @param target_node: if passed, overrides the target node for creation
6734   @rtype: boolean
6735   @return: the success of the creation
6736
6737   """
6738   info = _GetInstanceInfoText(instance)
6739   if target_node is None:
6740     pnode = instance.primary_node
6741     all_nodes = instance.all_nodes
6742   else:
6743     pnode = target_node
6744     all_nodes = [pnode]
6745
6746   if instance.disk_template == constants.DT_FILE:
6747     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6748     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6749
6750     result.Raise("Failed to create directory '%s' on"
6751                  " node %s" % (file_storage_dir, pnode))
6752
6753   # Note: this needs to be kept in sync with adding of disks in
6754   # LUInstanceSetParams
6755   for idx, device in enumerate(instance.disks):
6756     if to_skip and idx in to_skip:
6757       continue
6758     logging.info("Creating volume %s for instance %s",
6759                  device.iv_name, instance.name)
6760     #HARDCODE
6761     for node in all_nodes:
6762       f_create = node == pnode
6763       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6764
6765
6766 def _RemoveDisks(lu, instance, target_node=None):
6767   """Remove all disks for an instance.
6768
6769   This abstracts away some work from `AddInstance()` and
6770   `RemoveInstance()`. Note that in case some of the devices couldn't
6771   be removed, the removal will continue with the other ones (compare
6772   with `_CreateDisks()`).
6773
6774   @type lu: L{LogicalUnit}
6775   @param lu: the logical unit on whose behalf we execute
6776   @type instance: L{objects.Instance}
6777   @param instance: the instance whose disks we should remove
6778   @type target_node: string
6779   @param target_node: used to override the node on which to remove the disks
6780   @rtype: boolean
6781   @return: the success of the removal
6782
6783   """
6784   logging.info("Removing block devices for instance %s", instance.name)
6785
6786   all_result = True
6787   for device in instance.disks:
6788     if target_node:
6789       edata = [(target_node, device)]
6790     else:
6791       edata = device.ComputeNodeTree(instance.primary_node)
6792     for node, disk in edata:
6793       lu.cfg.SetDiskID(disk, node)
6794       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6795       if msg:
6796         lu.LogWarning("Could not remove block device %s on node %s,"
6797                       " continuing anyway: %s", device.iv_name, node, msg)
6798         all_result = False
6799
6800   if instance.disk_template == constants.DT_FILE:
6801     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6802     if target_node:
6803       tgt = target_node
6804     else:
6805       tgt = instance.primary_node
6806     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6807     if result.fail_msg:
6808       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6809                     file_storage_dir, instance.primary_node, result.fail_msg)
6810       all_result = False
6811
6812   return all_result
6813
6814
6815 def _ComputeDiskSizePerVG(disk_template, disks):
6816   """Compute disk size requirements in the volume group
6817
6818   """
6819   def _compute(disks, payload):
6820     """Universal algorithm
6821
6822     """
6823     vgs = {}
6824     for disk in disks:
6825       vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6826
6827     return vgs
6828
6829   # Required free disk space as a function of disk and swap space
6830   req_size_dict = {
6831     constants.DT_DISKLESS: {},
6832     constants.DT_PLAIN: _compute(disks, 0),
6833     # 128 MB are added for drbd metadata for each disk
6834     constants.DT_DRBD8: _compute(disks, 128),
6835     constants.DT_FILE: {},
6836   }
6837
6838   if disk_template not in req_size_dict:
6839     raise errors.ProgrammerError("Disk template '%s' size requirement"
6840                                  " is unknown" %  disk_template)
6841
6842   return req_size_dict[disk_template]
6843
6844
6845 def _ComputeDiskSize(disk_template, disks):
6846   """Compute disk size requirements in the volume group
6847
6848   """
6849   # Required free disk space as a function of disk and swap space
6850   req_size_dict = {
6851     constants.DT_DISKLESS: None,
6852     constants.DT_PLAIN: sum(d["size"] for d in disks),
6853     # 128 MB are added for drbd metadata for each disk
6854     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6855     constants.DT_FILE: None,
6856   }
6857
6858   if disk_template not in req_size_dict:
6859     raise errors.ProgrammerError("Disk template '%s' size requirement"
6860                                  " is unknown" %  disk_template)
6861
6862   return req_size_dict[disk_template]
6863
6864
6865 def _FilterVmNodes(lu, nodenames):
6866   """Filters out non-vm_capable nodes from a list.
6867
6868   @type lu: L{LogicalUnit}
6869   @param lu: the logical unit for which we check
6870   @type nodenames: list
6871   @param nodenames: the list of nodes on which we should check
6872   @rtype: list
6873   @return: the list of vm-capable nodes
6874
6875   """
6876   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
6877   return [name for name in nodenames if name not in vm_nodes]
6878
6879
6880 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6881   """Hypervisor parameter validation.
6882
6883   This function abstract the hypervisor parameter validation to be
6884   used in both instance create and instance modify.
6885
6886   @type lu: L{LogicalUnit}
6887   @param lu: the logical unit for which we check
6888   @type nodenames: list
6889   @param nodenames: the list of nodes on which we should check
6890   @type hvname: string
6891   @param hvname: the name of the hypervisor we should use
6892   @type hvparams: dict
6893   @param hvparams: the parameters which we need to check
6894   @raise errors.OpPrereqError: if the parameters are not valid
6895
6896   """
6897   nodenames = _FilterVmNodes(lu, nodenames)
6898   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6899                                                   hvname,
6900                                                   hvparams)
6901   for node in nodenames:
6902     info = hvinfo[node]
6903     if info.offline:
6904       continue
6905     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6906
6907
6908 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6909   """OS parameters validation.
6910
6911   @type lu: L{LogicalUnit}
6912   @param lu: the logical unit for which we check
6913   @type required: boolean
6914   @param required: whether the validation should fail if the OS is not
6915       found
6916   @type nodenames: list
6917   @param nodenames: the list of nodes on which we should check
6918   @type osname: string
6919   @param osname: the name of the hypervisor we should use
6920   @type osparams: dict
6921   @param osparams: the parameters which we need to check
6922   @raise errors.OpPrereqError: if the parameters are not valid
6923
6924   """
6925   nodenames = _FilterVmNodes(lu, nodenames)
6926   result = lu.rpc.call_os_validate(required, nodenames, osname,
6927                                    [constants.OS_VALIDATE_PARAMETERS],
6928                                    osparams)
6929   for node, nres in result.items():
6930     # we don't check for offline cases since this should be run only
6931     # against the master node and/or an instance's nodes
6932     nres.Raise("OS Parameters validation failed on node %s" % node)
6933     if not nres.payload:
6934       lu.LogInfo("OS %s not found on node %s, validation skipped",
6935                  osname, node)
6936
6937
6938 class LUInstanceCreate(LogicalUnit):
6939   """Create an instance.
6940
6941   """
6942   HPATH = "instance-add"
6943   HTYPE = constants.HTYPE_INSTANCE
6944   REQ_BGL = False
6945
6946   def CheckArguments(self):
6947     """Check arguments.
6948
6949     """
6950     # do not require name_check to ease forward/backward compatibility
6951     # for tools
6952     if self.op.no_install and self.op.start:
6953       self.LogInfo("No-installation mode selected, disabling startup")
6954       self.op.start = False
6955     # validate/normalize the instance name
6956     self.op.instance_name = \
6957       netutils.Hostname.GetNormalizedName(self.op.instance_name)
6958
6959     if self.op.ip_check and not self.op.name_check:
6960       # TODO: make the ip check more flexible and not depend on the name check
6961       raise errors.OpPrereqError("Cannot do ip check without a name check",
6962                                  errors.ECODE_INVAL)
6963
6964     # check nics' parameter names
6965     for nic in self.op.nics:
6966       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6967
6968     # check disks. parameter names and consistent adopt/no-adopt strategy
6969     has_adopt = has_no_adopt = False
6970     for disk in self.op.disks:
6971       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6972       if "adopt" in disk:
6973         has_adopt = True
6974       else:
6975         has_no_adopt = True
6976     if has_adopt and has_no_adopt:
6977       raise errors.OpPrereqError("Either all disks are adopted or none is",
6978                                  errors.ECODE_INVAL)
6979     if has_adopt:
6980       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6981         raise errors.OpPrereqError("Disk adoption is not supported for the"
6982                                    " '%s' disk template" %
6983                                    self.op.disk_template,
6984                                    errors.ECODE_INVAL)
6985       if self.op.iallocator is not None:
6986         raise errors.OpPrereqError("Disk adoption not allowed with an"
6987                                    " iallocator script", errors.ECODE_INVAL)
6988       if self.op.mode == constants.INSTANCE_IMPORT:
6989         raise errors.OpPrereqError("Disk adoption not allowed for"
6990                                    " instance import", errors.ECODE_INVAL)
6991
6992     self.adopt_disks = has_adopt
6993
6994     # instance name verification
6995     if self.op.name_check:
6996       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6997       self.op.instance_name = self.hostname1.name
6998       # used in CheckPrereq for ip ping check
6999       self.check_ip = self.hostname1.ip
7000     else:
7001       self.check_ip = None
7002
7003     # file storage checks
7004     if (self.op.file_driver and
7005         not self.op.file_driver in constants.FILE_DRIVER):
7006       raise errors.OpPrereqError("Invalid file driver name '%s'" %
7007                                  self.op.file_driver, errors.ECODE_INVAL)
7008
7009     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7010       raise errors.OpPrereqError("File storage directory path not absolute",
7011                                  errors.ECODE_INVAL)
7012
7013     ### Node/iallocator related checks
7014     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7015
7016     if self.op.pnode is not None:
7017       if self.op.disk_template in constants.DTS_NET_MIRROR:
7018         if self.op.snode is None:
7019           raise errors.OpPrereqError("The networked disk templates need"
7020                                      " a mirror node", errors.ECODE_INVAL)
7021       elif self.op.snode:
7022         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7023                         " template")
7024         self.op.snode = None
7025
7026     self._cds = _GetClusterDomainSecret()
7027
7028     if self.op.mode == constants.INSTANCE_IMPORT:
7029       # On import force_variant must be True, because if we forced it at
7030       # initial install, our only chance when importing it back is that it
7031       # works again!
7032       self.op.force_variant = True
7033
7034       if self.op.no_install:
7035         self.LogInfo("No-installation mode has no effect during import")
7036
7037     elif self.op.mode == constants.INSTANCE_CREATE:
7038       if self.op.os_type is None:
7039         raise errors.OpPrereqError("No guest OS specified",
7040                                    errors.ECODE_INVAL)
7041       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7042         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7043                                    " installation" % self.op.os_type,
7044                                    errors.ECODE_STATE)
7045       if self.op.disk_template is None:
7046         raise errors.OpPrereqError("No disk template specified",
7047                                    errors.ECODE_INVAL)
7048
7049     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7050       # Check handshake to ensure both clusters have the same domain secret
7051       src_handshake = self.op.source_handshake
7052       if not src_handshake:
7053         raise errors.OpPrereqError("Missing source handshake",
7054                                    errors.ECODE_INVAL)
7055
7056       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7057                                                            src_handshake)
7058       if errmsg:
7059         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7060                                    errors.ECODE_INVAL)
7061
7062       # Load and check source CA
7063       self.source_x509_ca_pem = self.op.source_x509_ca
7064       if not self.source_x509_ca_pem:
7065         raise errors.OpPrereqError("Missing source X509 CA",
7066                                    errors.ECODE_INVAL)
7067
7068       try:
7069         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7070                                                     self._cds)
7071       except OpenSSL.crypto.Error, err:
7072         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7073                                    (err, ), errors.ECODE_INVAL)
7074
7075       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7076       if errcode is not None:
7077         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7078                                    errors.ECODE_INVAL)
7079
7080       self.source_x509_ca = cert
7081
7082       src_instance_name = self.op.source_instance_name
7083       if not src_instance_name:
7084         raise errors.OpPrereqError("Missing source instance name",
7085                                    errors.ECODE_INVAL)
7086
7087       self.source_instance_name = \
7088           netutils.GetHostname(name=src_instance_name).name
7089
7090     else:
7091       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7092                                  self.op.mode, errors.ECODE_INVAL)
7093
7094   def ExpandNames(self):
7095     """ExpandNames for CreateInstance.
7096
7097     Figure out the right locks for instance creation.
7098
7099     """
7100     self.needed_locks = {}
7101
7102     instance_name = self.op.instance_name
7103     # this is just a preventive check, but someone might still add this
7104     # instance in the meantime, and creation will fail at lock-add time
7105     if instance_name in self.cfg.GetInstanceList():
7106       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7107                                  instance_name, errors.ECODE_EXISTS)
7108
7109     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7110
7111     if self.op.iallocator:
7112       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7113     else:
7114       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7115       nodelist = [self.op.pnode]
7116       if self.op.snode is not None:
7117         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7118         nodelist.append(self.op.snode)
7119       self.needed_locks[locking.LEVEL_NODE] = nodelist
7120
7121     # in case of import lock the source node too
7122     if self.op.mode == constants.INSTANCE_IMPORT:
7123       src_node = self.op.src_node
7124       src_path = self.op.src_path
7125
7126       if src_path is None:
7127         self.op.src_path = src_path = self.op.instance_name
7128
7129       if src_node is None:
7130         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7131         self.op.src_node = None
7132         if os.path.isabs(src_path):
7133           raise errors.OpPrereqError("Importing an instance from an absolute"
7134                                      " path requires a source node option.",
7135                                      errors.ECODE_INVAL)
7136       else:
7137         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7138         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7139           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7140         if not os.path.isabs(src_path):
7141           self.op.src_path = src_path = \
7142             utils.PathJoin(constants.EXPORT_DIR, src_path)
7143
7144   def _RunAllocator(self):
7145     """Run the allocator based on input opcode.
7146
7147     """
7148     nics = [n.ToDict() for n in self.nics]
7149     ial = IAllocator(self.cfg, self.rpc,
7150                      mode=constants.IALLOCATOR_MODE_ALLOC,
7151                      name=self.op.instance_name,
7152                      disk_template=self.op.disk_template,
7153                      tags=[],
7154                      os=self.op.os_type,
7155                      vcpus=self.be_full[constants.BE_VCPUS],
7156                      mem_size=self.be_full[constants.BE_MEMORY],
7157                      disks=self.disks,
7158                      nics=nics,
7159                      hypervisor=self.op.hypervisor,
7160                      )
7161
7162     ial.Run(self.op.iallocator)
7163
7164     if not ial.success:
7165       raise errors.OpPrereqError("Can't compute nodes using"
7166                                  " iallocator '%s': %s" %
7167                                  (self.op.iallocator, ial.info),
7168                                  errors.ECODE_NORES)
7169     if len(ial.result) != ial.required_nodes:
7170       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7171                                  " of nodes (%s), required %s" %
7172                                  (self.op.iallocator, len(ial.result),
7173                                   ial.required_nodes), errors.ECODE_FAULT)
7174     self.op.pnode = ial.result[0]
7175     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7176                  self.op.instance_name, self.op.iallocator,
7177                  utils.CommaJoin(ial.result))
7178     if ial.required_nodes == 2:
7179       self.op.snode = ial.result[1]
7180
7181   def BuildHooksEnv(self):
7182     """Build hooks env.
7183
7184     This runs on master, primary and secondary nodes of the instance.
7185
7186     """
7187     env = {
7188       "ADD_MODE": self.op.mode,
7189       }
7190     if self.op.mode == constants.INSTANCE_IMPORT:
7191       env["SRC_NODE"] = self.op.src_node
7192       env["SRC_PATH"] = self.op.src_path
7193       env["SRC_IMAGES"] = self.src_images
7194
7195     env.update(_BuildInstanceHookEnv(
7196       name=self.op.instance_name,
7197       primary_node=self.op.pnode,
7198       secondary_nodes=self.secondaries,
7199       status=self.op.start,
7200       os_type=self.op.os_type,
7201       memory=self.be_full[constants.BE_MEMORY],
7202       vcpus=self.be_full[constants.BE_VCPUS],
7203       nics=_NICListToTuple(self, self.nics),
7204       disk_template=self.op.disk_template,
7205       disks=[(d["size"], d["mode"]) for d in self.disks],
7206       bep=self.be_full,
7207       hvp=self.hv_full,
7208       hypervisor_name=self.op.hypervisor,
7209     ))
7210
7211     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7212           self.secondaries)
7213     return env, nl, nl
7214
7215   def _ReadExportInfo(self):
7216     """Reads the export information from disk.
7217
7218     It will override the opcode source node and path with the actual
7219     information, if these two were not specified before.
7220
7221     @return: the export information
7222
7223     """
7224     assert self.op.mode == constants.INSTANCE_IMPORT
7225
7226     src_node = self.op.src_node
7227     src_path = self.op.src_path
7228
7229     if src_node is None:
7230       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7231       exp_list = self.rpc.call_export_list(locked_nodes)
7232       found = False
7233       for node in exp_list:
7234         if exp_list[node].fail_msg:
7235           continue
7236         if src_path in exp_list[node].payload:
7237           found = True
7238           self.op.src_node = src_node = node
7239           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7240                                                        src_path)
7241           break
7242       if not found:
7243         raise errors.OpPrereqError("No export found for relative path %s" %
7244                                     src_path, errors.ECODE_INVAL)
7245
7246     _CheckNodeOnline(self, src_node)
7247     result = self.rpc.call_export_info(src_node, src_path)
7248     result.Raise("No export or invalid export found in dir %s" % src_path)
7249
7250     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7251     if not export_info.has_section(constants.INISECT_EXP):
7252       raise errors.ProgrammerError("Corrupted export config",
7253                                    errors.ECODE_ENVIRON)
7254
7255     ei_version = export_info.get(constants.INISECT_EXP, "version")
7256     if (int(ei_version) != constants.EXPORT_VERSION):
7257       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7258                                  (ei_version, constants.EXPORT_VERSION),
7259                                  errors.ECODE_ENVIRON)
7260     return export_info
7261
7262   def _ReadExportParams(self, einfo):
7263     """Use export parameters as defaults.
7264
7265     In case the opcode doesn't specify (as in override) some instance
7266     parameters, then try to use them from the export information, if
7267     that declares them.
7268
7269     """
7270     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7271
7272     if self.op.disk_template is None:
7273       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7274         self.op.disk_template = einfo.get(constants.INISECT_INS,
7275                                           "disk_template")
7276       else:
7277         raise errors.OpPrereqError("No disk template specified and the export"
7278                                    " is missing the disk_template information",
7279                                    errors.ECODE_INVAL)
7280
7281     if not self.op.disks:
7282       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7283         disks = []
7284         # TODO: import the disk iv_name too
7285         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7286           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7287           disks.append({"size": disk_sz})
7288         self.op.disks = disks
7289       else:
7290         raise errors.OpPrereqError("No disk info specified and the export"
7291                                    " is missing the disk information",
7292                                    errors.ECODE_INVAL)
7293
7294     if (not self.op.nics and
7295         einfo.has_option(constants.INISECT_INS, "nic_count")):
7296       nics = []
7297       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7298         ndict = {}
7299         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7300           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7301           ndict[name] = v
7302         nics.append(ndict)
7303       self.op.nics = nics
7304
7305     if (self.op.hypervisor is None and
7306         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7307       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7308     if einfo.has_section(constants.INISECT_HYP):
7309       # use the export parameters but do not override the ones
7310       # specified by the user
7311       for name, value in einfo.items(constants.INISECT_HYP):
7312         if name not in self.op.hvparams:
7313           self.op.hvparams[name] = value
7314
7315     if einfo.has_section(constants.INISECT_BEP):
7316       # use the parameters, without overriding
7317       for name, value in einfo.items(constants.INISECT_BEP):
7318         if name not in self.op.beparams:
7319           self.op.beparams[name] = value
7320     else:
7321       # try to read the parameters old style, from the main section
7322       for name in constants.BES_PARAMETERS:
7323         if (name not in self.op.beparams and
7324             einfo.has_option(constants.INISECT_INS, name)):
7325           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7326
7327     if einfo.has_section(constants.INISECT_OSP):
7328       # use the parameters, without overriding
7329       for name, value in einfo.items(constants.INISECT_OSP):
7330         if name not in self.op.osparams:
7331           self.op.osparams[name] = value
7332
7333   def _RevertToDefaults(self, cluster):
7334     """Revert the instance parameters to the default values.
7335
7336     """
7337     # hvparams
7338     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7339     for name in self.op.hvparams.keys():
7340       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7341         del self.op.hvparams[name]
7342     # beparams
7343     be_defs = cluster.SimpleFillBE({})
7344     for name in self.op.beparams.keys():
7345       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7346         del self.op.beparams[name]
7347     # nic params
7348     nic_defs = cluster.SimpleFillNIC({})
7349     for nic in self.op.nics:
7350       for name in constants.NICS_PARAMETERS:
7351         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7352           del nic[name]
7353     # osparams
7354     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7355     for name in self.op.osparams.keys():
7356       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7357         del self.op.osparams[name]
7358
7359   def CheckPrereq(self):
7360     """Check prerequisites.
7361
7362     """
7363     if self.op.mode == constants.INSTANCE_IMPORT:
7364       export_info = self._ReadExportInfo()
7365       self._ReadExportParams(export_info)
7366
7367     if (not self.cfg.GetVGName() and
7368         self.op.disk_template not in constants.DTS_NOT_LVM):
7369       raise errors.OpPrereqError("Cluster does not support lvm-based"
7370                                  " instances", errors.ECODE_STATE)
7371
7372     if self.op.hypervisor is None:
7373       self.op.hypervisor = self.cfg.GetHypervisorType()
7374
7375     cluster = self.cfg.GetClusterInfo()
7376     enabled_hvs = cluster.enabled_hypervisors
7377     if self.op.hypervisor not in enabled_hvs:
7378       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7379                                  " cluster (%s)" % (self.op.hypervisor,
7380                                   ",".join(enabled_hvs)),
7381                                  errors.ECODE_STATE)
7382
7383     # check hypervisor parameter syntax (locally)
7384     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7385     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7386                                       self.op.hvparams)
7387     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7388     hv_type.CheckParameterSyntax(filled_hvp)
7389     self.hv_full = filled_hvp
7390     # check that we don't specify global parameters on an instance
7391     _CheckGlobalHvParams(self.op.hvparams)
7392
7393     # fill and remember the beparams dict
7394     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7395     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7396
7397     # build os parameters
7398     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7399
7400     # now that hvp/bep are in final format, let's reset to defaults,
7401     # if told to do so
7402     if self.op.identify_defaults:
7403       self._RevertToDefaults(cluster)
7404
7405     # NIC buildup
7406     self.nics = []
7407     for idx, nic in enumerate(self.op.nics):
7408       nic_mode_req = nic.get("mode", None)
7409       nic_mode = nic_mode_req
7410       if nic_mode is None:
7411         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7412
7413       # in routed mode, for the first nic, the default ip is 'auto'
7414       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7415         default_ip_mode = constants.VALUE_AUTO
7416       else:
7417         default_ip_mode = constants.VALUE_NONE
7418
7419       # ip validity checks
7420       ip = nic.get("ip", default_ip_mode)
7421       if ip is None or ip.lower() == constants.VALUE_NONE:
7422         nic_ip = None
7423       elif ip.lower() == constants.VALUE_AUTO:
7424         if not self.op.name_check:
7425           raise errors.OpPrereqError("IP address set to auto but name checks"
7426                                      " have been skipped",
7427                                      errors.ECODE_INVAL)
7428         nic_ip = self.hostname1.ip
7429       else:
7430         if not netutils.IPAddress.IsValid(ip):
7431           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7432                                      errors.ECODE_INVAL)
7433         nic_ip = ip
7434
7435       # TODO: check the ip address for uniqueness
7436       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7437         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7438                                    errors.ECODE_INVAL)
7439
7440       # MAC address verification
7441       mac = nic.get("mac", constants.VALUE_AUTO)
7442       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7443         mac = utils.NormalizeAndValidateMac(mac)
7444
7445         try:
7446           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7447         except errors.ReservationError:
7448           raise errors.OpPrereqError("MAC address %s already in use"
7449                                      " in cluster" % mac,
7450                                      errors.ECODE_NOTUNIQUE)
7451
7452       # bridge verification
7453       bridge = nic.get("bridge", None)
7454       link = nic.get("link", None)
7455       if bridge and link:
7456         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7457                                    " at the same time", errors.ECODE_INVAL)
7458       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7459         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7460                                    errors.ECODE_INVAL)
7461       elif bridge:
7462         link = bridge
7463
7464       nicparams = {}
7465       if nic_mode_req:
7466         nicparams[constants.NIC_MODE] = nic_mode_req
7467       if link:
7468         nicparams[constants.NIC_LINK] = link
7469
7470       check_params = cluster.SimpleFillNIC(nicparams)
7471       objects.NIC.CheckParameterSyntax(check_params)
7472       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7473
7474     # disk checks/pre-build
7475     self.disks = []
7476     for disk in self.op.disks:
7477       mode = disk.get("mode", constants.DISK_RDWR)
7478       if mode not in constants.DISK_ACCESS_SET:
7479         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7480                                    mode, errors.ECODE_INVAL)
7481       size = disk.get("size", None)
7482       if size is None:
7483         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7484       try:
7485         size = int(size)
7486       except (TypeError, ValueError):
7487         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7488                                    errors.ECODE_INVAL)
7489       vg = disk.get("vg", self.cfg.GetVGName())
7490       new_disk = {"size": size, "mode": mode, "vg": vg}
7491       if "adopt" in disk:
7492         new_disk["adopt"] = disk["adopt"]
7493       self.disks.append(new_disk)
7494
7495     if self.op.mode == constants.INSTANCE_IMPORT:
7496
7497       # Check that the new instance doesn't have less disks than the export
7498       instance_disks = len(self.disks)
7499       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7500       if instance_disks < export_disks:
7501         raise errors.OpPrereqError("Not enough disks to import."
7502                                    " (instance: %d, export: %d)" %
7503                                    (instance_disks, export_disks),
7504                                    errors.ECODE_INVAL)
7505
7506       disk_images = []
7507       for idx in range(export_disks):
7508         option = 'disk%d_dump' % idx
7509         if export_info.has_option(constants.INISECT_INS, option):
7510           # FIXME: are the old os-es, disk sizes, etc. useful?
7511           export_name = export_info.get(constants.INISECT_INS, option)
7512           image = utils.PathJoin(self.op.src_path, export_name)
7513           disk_images.append(image)
7514         else:
7515           disk_images.append(False)
7516
7517       self.src_images = disk_images
7518
7519       old_name = export_info.get(constants.INISECT_INS, 'name')
7520       try:
7521         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7522       except (TypeError, ValueError), err:
7523         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7524                                    " an integer: %s" % str(err),
7525                                    errors.ECODE_STATE)
7526       if self.op.instance_name == old_name:
7527         for idx, nic in enumerate(self.nics):
7528           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7529             nic_mac_ini = 'nic%d_mac' % idx
7530             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7531
7532     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7533
7534     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7535     if self.op.ip_check:
7536       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7537         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7538                                    (self.check_ip, self.op.instance_name),
7539                                    errors.ECODE_NOTUNIQUE)
7540
7541     #### mac address generation
7542     # By generating here the mac address both the allocator and the hooks get
7543     # the real final mac address rather than the 'auto' or 'generate' value.
7544     # There is a race condition between the generation and the instance object
7545     # creation, which means that we know the mac is valid now, but we're not
7546     # sure it will be when we actually add the instance. If things go bad
7547     # adding the instance will abort because of a duplicate mac, and the
7548     # creation job will fail.
7549     for nic in self.nics:
7550       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7551         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7552
7553     #### allocator run
7554
7555     if self.op.iallocator is not None:
7556       self._RunAllocator()
7557
7558     #### node related checks
7559
7560     # check primary node
7561     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7562     assert self.pnode is not None, \
7563       "Cannot retrieve locked node %s" % self.op.pnode
7564     if pnode.offline:
7565       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7566                                  pnode.name, errors.ECODE_STATE)
7567     if pnode.drained:
7568       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7569                                  pnode.name, errors.ECODE_STATE)
7570     if not pnode.vm_capable:
7571       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7572                                  " '%s'" % pnode.name, errors.ECODE_STATE)
7573
7574     self.secondaries = []
7575
7576     # mirror node verification
7577     if self.op.disk_template in constants.DTS_NET_MIRROR:
7578       if self.op.snode == pnode.name:
7579         raise errors.OpPrereqError("The secondary node cannot be the"
7580                                    " primary node.", errors.ECODE_INVAL)
7581       _CheckNodeOnline(self, self.op.snode)
7582       _CheckNodeNotDrained(self, self.op.snode)
7583       _CheckNodeVmCapable(self, self.op.snode)
7584       self.secondaries.append(self.op.snode)
7585
7586     nodenames = [pnode.name] + self.secondaries
7587
7588     if not self.adopt_disks:
7589       # Check lv size requirements, if not adopting
7590       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7591       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7592
7593     else: # instead, we must check the adoption data
7594       all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7595       if len(all_lvs) != len(self.disks):
7596         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7597                                    errors.ECODE_INVAL)
7598       for lv_name in all_lvs:
7599         try:
7600           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7601           # to ReserveLV uses the same syntax
7602           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7603         except errors.ReservationError:
7604           raise errors.OpPrereqError("LV named %s used by another instance" %
7605                                      lv_name, errors.ECODE_NOTUNIQUE)
7606
7607       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7608       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7609
7610       node_lvs = self.rpc.call_lv_list([pnode.name],
7611                                        vg_names.payload.keys())[pnode.name]
7612       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7613       node_lvs = node_lvs.payload
7614
7615       delta = all_lvs.difference(node_lvs.keys())
7616       if delta:
7617         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7618                                    utils.CommaJoin(delta),
7619                                    errors.ECODE_INVAL)
7620       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7621       if online_lvs:
7622         raise errors.OpPrereqError("Online logical volumes found, cannot"
7623                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7624                                    errors.ECODE_STATE)
7625       # update the size of disk based on what is found
7626       for dsk in self.disks:
7627         dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7628
7629     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7630
7631     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7632     # check OS parameters (remotely)
7633     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7634
7635     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7636
7637     # memory check on primary node
7638     if self.op.start:
7639       _CheckNodeFreeMemory(self, self.pnode.name,
7640                            "creating instance %s" % self.op.instance_name,
7641                            self.be_full[constants.BE_MEMORY],
7642                            self.op.hypervisor)
7643
7644     self.dry_run_result = list(nodenames)
7645
7646   def Exec(self, feedback_fn):
7647     """Create and add the instance to the cluster.
7648
7649     """
7650     instance = self.op.instance_name
7651     pnode_name = self.pnode.name
7652
7653     ht_kind = self.op.hypervisor
7654     if ht_kind in constants.HTS_REQ_PORT:
7655       network_port = self.cfg.AllocatePort()
7656     else:
7657       network_port = None
7658
7659     if constants.ENABLE_FILE_STORAGE:
7660       # this is needed because os.path.join does not accept None arguments
7661       if self.op.file_storage_dir is None:
7662         string_file_storage_dir = ""
7663       else:
7664         string_file_storage_dir = self.op.file_storage_dir
7665
7666       # build the full file storage dir path
7667       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7668                                         string_file_storage_dir, instance)
7669     else:
7670       file_storage_dir = ""
7671
7672     disks = _GenerateDiskTemplate(self,
7673                                   self.op.disk_template,
7674                                   instance, pnode_name,
7675                                   self.secondaries,
7676                                   self.disks,
7677                                   file_storage_dir,
7678                                   self.op.file_driver,
7679                                   0,
7680                                   feedback_fn)
7681
7682     iobj = objects.Instance(name=instance, os=self.op.os_type,
7683                             primary_node=pnode_name,
7684                             nics=self.nics, disks=disks,
7685                             disk_template=self.op.disk_template,
7686                             admin_up=False,
7687                             network_port=network_port,
7688                             beparams=self.op.beparams,
7689                             hvparams=self.op.hvparams,
7690                             hypervisor=self.op.hypervisor,
7691                             osparams=self.op.osparams,
7692                             )
7693
7694     if self.adopt_disks:
7695       # rename LVs to the newly-generated names; we need to construct
7696       # 'fake' LV disks with the old data, plus the new unique_id
7697       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7698       rename_to = []
7699       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7700         rename_to.append(t_dsk.logical_id)
7701         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7702         self.cfg.SetDiskID(t_dsk, pnode_name)
7703       result = self.rpc.call_blockdev_rename(pnode_name,
7704                                              zip(tmp_disks, rename_to))
7705       result.Raise("Failed to rename adoped LVs")
7706     else:
7707       feedback_fn("* creating instance disks...")
7708       try:
7709         _CreateDisks(self, iobj)
7710       except errors.OpExecError:
7711         self.LogWarning("Device creation failed, reverting...")
7712         try:
7713           _RemoveDisks(self, iobj)
7714         finally:
7715           self.cfg.ReleaseDRBDMinors(instance)
7716           raise
7717
7718       if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7719         feedback_fn("* wiping instance disks...")
7720         try:
7721           _WipeDisks(self, iobj)
7722         except errors.OpExecError:
7723           self.LogWarning("Device wiping failed, reverting...")
7724           try:
7725             _RemoveDisks(self, iobj)
7726           finally:
7727             self.cfg.ReleaseDRBDMinors(instance)
7728             raise
7729
7730     feedback_fn("adding instance %s to cluster config" % instance)
7731
7732     self.cfg.AddInstance(iobj, self.proc.GetECId())
7733
7734     # Declare that we don't want to remove the instance lock anymore, as we've
7735     # added the instance to the config
7736     del self.remove_locks[locking.LEVEL_INSTANCE]
7737     # Unlock all the nodes
7738     if self.op.mode == constants.INSTANCE_IMPORT:
7739       nodes_keep = [self.op.src_node]
7740       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7741                        if node != self.op.src_node]
7742       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7743       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7744     else:
7745       self.context.glm.release(locking.LEVEL_NODE)
7746       del self.acquired_locks[locking.LEVEL_NODE]
7747
7748     if self.op.wait_for_sync:
7749       disk_abort = not _WaitForSync(self, iobj)
7750     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7751       # make sure the disks are not degraded (still sync-ing is ok)
7752       time.sleep(15)
7753       feedback_fn("* checking mirrors status")
7754       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7755     else:
7756       disk_abort = False
7757
7758     if disk_abort:
7759       _RemoveDisks(self, iobj)
7760       self.cfg.RemoveInstance(iobj.name)
7761       # Make sure the instance lock gets removed
7762       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7763       raise errors.OpExecError("There are some degraded disks for"
7764                                " this instance")
7765
7766     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7767       if self.op.mode == constants.INSTANCE_CREATE:
7768         if not self.op.no_install:
7769           feedback_fn("* running the instance OS create scripts...")
7770           # FIXME: pass debug option from opcode to backend
7771           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7772                                                  self.op.debug_level)
7773           result.Raise("Could not add os for instance %s"
7774                        " on node %s" % (instance, pnode_name))
7775
7776       elif self.op.mode == constants.INSTANCE_IMPORT:
7777         feedback_fn("* running the instance OS import scripts...")
7778
7779         transfers = []
7780
7781         for idx, image in enumerate(self.src_images):
7782           if not image:
7783             continue
7784
7785           # FIXME: pass debug option from opcode to backend
7786           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7787                                              constants.IEIO_FILE, (image, ),
7788                                              constants.IEIO_SCRIPT,
7789                                              (iobj.disks[idx], idx),
7790                                              None)
7791           transfers.append(dt)
7792
7793         import_result = \
7794           masterd.instance.TransferInstanceData(self, feedback_fn,
7795                                                 self.op.src_node, pnode_name,
7796                                                 self.pnode.secondary_ip,
7797                                                 iobj, transfers)
7798         if not compat.all(import_result):
7799           self.LogWarning("Some disks for instance %s on node %s were not"
7800                           " imported successfully" % (instance, pnode_name))
7801
7802       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7803         feedback_fn("* preparing remote import...")
7804         # The source cluster will stop the instance before attempting to make a
7805         # connection. In some cases stopping an instance can take a long time,
7806         # hence the shutdown timeout is added to the connection timeout.
7807         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7808                            self.op.source_shutdown_timeout)
7809         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7810
7811         assert iobj.primary_node == self.pnode.name
7812         disk_results = \
7813           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7814                                         self.source_x509_ca,
7815                                         self._cds, timeouts)
7816         if not compat.all(disk_results):
7817           # TODO: Should the instance still be started, even if some disks
7818           # failed to import (valid for local imports, too)?
7819           self.LogWarning("Some disks for instance %s on node %s were not"
7820                           " imported successfully" % (instance, pnode_name))
7821
7822         # Run rename script on newly imported instance
7823         assert iobj.name == instance
7824         feedback_fn("Running rename script for %s" % instance)
7825         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7826                                                    self.source_instance_name,
7827                                                    self.op.debug_level)
7828         if result.fail_msg:
7829           self.LogWarning("Failed to run rename script for %s on node"
7830                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7831
7832       else:
7833         # also checked in the prereq part
7834         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7835                                      % self.op.mode)
7836
7837     if self.op.start:
7838       iobj.admin_up = True
7839       self.cfg.Update(iobj, feedback_fn)
7840       logging.info("Starting instance %s on node %s", instance, pnode_name)
7841       feedback_fn("* starting instance...")
7842       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7843       result.Raise("Could not start instance")
7844
7845     return list(iobj.all_nodes)
7846
7847
7848 class LUInstanceConsole(NoHooksLU):
7849   """Connect to an instance's console.
7850
7851   This is somewhat special in that it returns the command line that
7852   you need to run on the master node in order to connect to the
7853   console.
7854
7855   """
7856   REQ_BGL = False
7857
7858   def ExpandNames(self):
7859     self._ExpandAndLockInstance()
7860
7861   def CheckPrereq(self):
7862     """Check prerequisites.
7863
7864     This checks that the instance is in the cluster.
7865
7866     """
7867     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7868     assert self.instance is not None, \
7869       "Cannot retrieve locked instance %s" % self.op.instance_name
7870     _CheckNodeOnline(self, self.instance.primary_node)
7871
7872   def Exec(self, feedback_fn):
7873     """Connect to the console of an instance
7874
7875     """
7876     instance = self.instance
7877     node = instance.primary_node
7878
7879     node_insts = self.rpc.call_instance_list([node],
7880                                              [instance.hypervisor])[node]
7881     node_insts.Raise("Can't get node information from %s" % node)
7882
7883     if instance.name not in node_insts.payload:
7884       if instance.admin_up:
7885         state = constants.INSTST_ERRORDOWN
7886       else:
7887         state = constants.INSTST_ADMINDOWN
7888       raise errors.OpExecError("Instance %s is not running (state %s)" %
7889                                (instance.name, state))
7890
7891     logging.debug("Connecting to console of %s on %s", instance.name, node)
7892
7893     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
7894
7895
7896 def _GetInstanceConsole(cluster, instance):
7897   """Returns console information for an instance.
7898
7899   @type cluster: L{objects.Cluster}
7900   @type instance: L{objects.Instance}
7901   @rtype: dict
7902
7903   """
7904   hyper = hypervisor.GetHypervisor(instance.hypervisor)
7905   # beparams and hvparams are passed separately, to avoid editing the
7906   # instance and then saving the defaults in the instance itself.
7907   hvparams = cluster.FillHV(instance)
7908   beparams = cluster.FillBE(instance)
7909   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7910
7911   assert console.instance == instance.name
7912   assert console.Validate()
7913
7914   return console.ToDict()
7915
7916
7917 class LUInstanceReplaceDisks(LogicalUnit):
7918   """Replace the disks of an instance.
7919
7920   """
7921   HPATH = "mirrors-replace"
7922   HTYPE = constants.HTYPE_INSTANCE
7923   REQ_BGL = False
7924
7925   def CheckArguments(self):
7926     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7927                                   self.op.iallocator)
7928
7929   def ExpandNames(self):
7930     self._ExpandAndLockInstance()
7931
7932     if self.op.iallocator is not None:
7933       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7934
7935     elif self.op.remote_node is not None:
7936       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7937       self.op.remote_node = remote_node
7938
7939       # Warning: do not remove the locking of the new secondary here
7940       # unless DRBD8.AddChildren is changed to work in parallel;
7941       # currently it doesn't since parallel invocations of
7942       # FindUnusedMinor will conflict
7943       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7944       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7945
7946     else:
7947       self.needed_locks[locking.LEVEL_NODE] = []
7948       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7949
7950     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7951                                    self.op.iallocator, self.op.remote_node,
7952                                    self.op.disks, False, self.op.early_release)
7953
7954     self.tasklets = [self.replacer]
7955
7956   def DeclareLocks(self, level):
7957     # If we're not already locking all nodes in the set we have to declare the
7958     # instance's primary/secondary nodes.
7959     if (level == locking.LEVEL_NODE and
7960         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7961       self._LockInstancesNodes()
7962
7963   def BuildHooksEnv(self):
7964     """Build hooks env.
7965
7966     This runs on the master, the primary and all the secondaries.
7967
7968     """
7969     instance = self.replacer.instance
7970     env = {
7971       "MODE": self.op.mode,
7972       "NEW_SECONDARY": self.op.remote_node,
7973       "OLD_SECONDARY": instance.secondary_nodes[0],
7974       }
7975     env.update(_BuildInstanceHookEnvByObject(self, instance))
7976     nl = [
7977       self.cfg.GetMasterNode(),
7978       instance.primary_node,
7979       ]
7980     if self.op.remote_node is not None:
7981       nl.append(self.op.remote_node)
7982     return env, nl, nl
7983
7984
7985 class TLReplaceDisks(Tasklet):
7986   """Replaces disks for an instance.
7987
7988   Note: Locking is not within the scope of this class.
7989
7990   """
7991   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7992                disks, delay_iallocator, early_release):
7993     """Initializes this class.
7994
7995     """
7996     Tasklet.__init__(self, lu)
7997
7998     # Parameters
7999     self.instance_name = instance_name
8000     self.mode = mode
8001     self.iallocator_name = iallocator_name
8002     self.remote_node = remote_node
8003     self.disks = disks
8004     self.delay_iallocator = delay_iallocator
8005     self.early_release = early_release
8006
8007     # Runtime data
8008     self.instance = None
8009     self.new_node = None
8010     self.target_node = None
8011     self.other_node = None
8012     self.remote_node_info = None
8013     self.node_secondary_ip = None
8014
8015   @staticmethod
8016   def CheckArguments(mode, remote_node, iallocator):
8017     """Helper function for users of this class.
8018
8019     """
8020     # check for valid parameter combination
8021     if mode == constants.REPLACE_DISK_CHG:
8022       if remote_node is None and iallocator is None:
8023         raise errors.OpPrereqError("When changing the secondary either an"
8024                                    " iallocator script must be used or the"
8025                                    " new node given", errors.ECODE_INVAL)
8026
8027       if remote_node is not None and iallocator is not None:
8028         raise errors.OpPrereqError("Give either the iallocator or the new"
8029                                    " secondary, not both", errors.ECODE_INVAL)
8030
8031     elif remote_node is not None or iallocator is not None:
8032       # Not replacing the secondary
8033       raise errors.OpPrereqError("The iallocator and new node options can"
8034                                  " only be used when changing the"
8035                                  " secondary node", errors.ECODE_INVAL)
8036
8037   @staticmethod
8038   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8039     """Compute a new secondary node using an IAllocator.
8040
8041     """
8042     ial = IAllocator(lu.cfg, lu.rpc,
8043                      mode=constants.IALLOCATOR_MODE_RELOC,
8044                      name=instance_name,
8045                      relocate_from=relocate_from)
8046
8047     ial.Run(iallocator_name)
8048
8049     if not ial.success:
8050       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8051                                  " %s" % (iallocator_name, ial.info),
8052                                  errors.ECODE_NORES)
8053
8054     if len(ial.result) != ial.required_nodes:
8055       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8056                                  " of nodes (%s), required %s" %
8057                                  (iallocator_name,
8058                                   len(ial.result), ial.required_nodes),
8059                                  errors.ECODE_FAULT)
8060
8061     remote_node_name = ial.result[0]
8062
8063     lu.LogInfo("Selected new secondary for instance '%s': %s",
8064                instance_name, remote_node_name)
8065
8066     return remote_node_name
8067
8068   def _FindFaultyDisks(self, node_name):
8069     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8070                                     node_name, True)
8071
8072   def CheckPrereq(self):
8073     """Check prerequisites.
8074
8075     This checks that the instance is in the cluster.
8076
8077     """
8078     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8079     assert instance is not None, \
8080       "Cannot retrieve locked instance %s" % self.instance_name
8081
8082     if instance.disk_template != constants.DT_DRBD8:
8083       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8084                                  " instances", errors.ECODE_INVAL)
8085
8086     if len(instance.secondary_nodes) != 1:
8087       raise errors.OpPrereqError("The instance has a strange layout,"
8088                                  " expected one secondary but found %d" %
8089                                  len(instance.secondary_nodes),
8090                                  errors.ECODE_FAULT)
8091
8092     if not self.delay_iallocator:
8093       self._CheckPrereq2()
8094
8095   def _CheckPrereq2(self):
8096     """Check prerequisites, second part.
8097
8098     This function should always be part of CheckPrereq. It was separated and is
8099     now called from Exec because during node evacuation iallocator was only
8100     called with an unmodified cluster model, not taking planned changes into
8101     account.
8102
8103     """
8104     instance = self.instance
8105     secondary_node = instance.secondary_nodes[0]
8106
8107     if self.iallocator_name is None:
8108       remote_node = self.remote_node
8109     else:
8110       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8111                                        instance.name, instance.secondary_nodes)
8112
8113     if remote_node is not None:
8114       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8115       assert self.remote_node_info is not None, \
8116         "Cannot retrieve locked node %s" % remote_node
8117     else:
8118       self.remote_node_info = None
8119
8120     if remote_node == self.instance.primary_node:
8121       raise errors.OpPrereqError("The specified node is the primary node of"
8122                                  " the instance.", errors.ECODE_INVAL)
8123
8124     if remote_node == secondary_node:
8125       raise errors.OpPrereqError("The specified node is already the"
8126                                  " secondary node of the instance.",
8127                                  errors.ECODE_INVAL)
8128
8129     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8130                                     constants.REPLACE_DISK_CHG):
8131       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8132                                  errors.ECODE_INVAL)
8133
8134     if self.mode == constants.REPLACE_DISK_AUTO:
8135       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8136       faulty_secondary = self._FindFaultyDisks(secondary_node)
8137
8138       if faulty_primary and faulty_secondary:
8139         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8140                                    " one node and can not be repaired"
8141                                    " automatically" % self.instance_name,
8142                                    errors.ECODE_STATE)
8143
8144       if faulty_primary:
8145         self.disks = faulty_primary
8146         self.target_node = instance.primary_node
8147         self.other_node = secondary_node
8148         check_nodes = [self.target_node, self.other_node]
8149       elif faulty_secondary:
8150         self.disks = faulty_secondary
8151         self.target_node = secondary_node
8152         self.other_node = instance.primary_node
8153         check_nodes = [self.target_node, self.other_node]
8154       else:
8155         self.disks = []
8156         check_nodes = []
8157
8158     else:
8159       # Non-automatic modes
8160       if self.mode == constants.REPLACE_DISK_PRI:
8161         self.target_node = instance.primary_node
8162         self.other_node = secondary_node
8163         check_nodes = [self.target_node, self.other_node]
8164
8165       elif self.mode == constants.REPLACE_DISK_SEC:
8166         self.target_node = secondary_node
8167         self.other_node = instance.primary_node
8168         check_nodes = [self.target_node, self.other_node]
8169
8170       elif self.mode == constants.REPLACE_DISK_CHG:
8171         self.new_node = remote_node
8172         self.other_node = instance.primary_node
8173         self.target_node = secondary_node
8174         check_nodes = [self.new_node, self.other_node]
8175
8176         _CheckNodeNotDrained(self.lu, remote_node)
8177         _CheckNodeVmCapable(self.lu, remote_node)
8178
8179         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8180         assert old_node_info is not None
8181         if old_node_info.offline and not self.early_release:
8182           # doesn't make sense to delay the release
8183           self.early_release = True
8184           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8185                           " early-release mode", secondary_node)
8186
8187       else:
8188         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8189                                      self.mode)
8190
8191       # If not specified all disks should be replaced
8192       if not self.disks:
8193         self.disks = range(len(self.instance.disks))
8194
8195     for node in check_nodes:
8196       _CheckNodeOnline(self.lu, node)
8197
8198     # Check whether disks are valid
8199     for disk_idx in self.disks:
8200       instance.FindDisk(disk_idx)
8201
8202     # Get secondary node IP addresses
8203     node_2nd_ip = {}
8204
8205     for node_name in [self.target_node, self.other_node, self.new_node]:
8206       if node_name is not None:
8207         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8208
8209     self.node_secondary_ip = node_2nd_ip
8210
8211   def Exec(self, feedback_fn):
8212     """Execute disk replacement.
8213
8214     This dispatches the disk replacement to the appropriate handler.
8215
8216     """
8217     if self.delay_iallocator:
8218       self._CheckPrereq2()
8219
8220     if not self.disks:
8221       feedback_fn("No disks need replacement")
8222       return
8223
8224     feedback_fn("Replacing disk(s) %s for %s" %
8225                 (utils.CommaJoin(self.disks), self.instance.name))
8226
8227     activate_disks = (not self.instance.admin_up)
8228
8229     # Activate the instance disks if we're replacing them on a down instance
8230     if activate_disks:
8231       _StartInstanceDisks(self.lu, self.instance, True)
8232
8233     try:
8234       # Should we replace the secondary node?
8235       if self.new_node is not None:
8236         fn = self._ExecDrbd8Secondary
8237       else:
8238         fn = self._ExecDrbd8DiskOnly
8239
8240       return fn(feedback_fn)
8241
8242     finally:
8243       # Deactivate the instance disks if we're replacing them on a
8244       # down instance
8245       if activate_disks:
8246         _SafeShutdownInstanceDisks(self.lu, self.instance)
8247
8248   def _CheckVolumeGroup(self, nodes):
8249     self.lu.LogInfo("Checking volume groups")
8250
8251     vgname = self.cfg.GetVGName()
8252
8253     # Make sure volume group exists on all involved nodes
8254     results = self.rpc.call_vg_list(nodes)
8255     if not results:
8256       raise errors.OpExecError("Can't list volume groups on the nodes")
8257
8258     for node in nodes:
8259       res = results[node]
8260       res.Raise("Error checking node %s" % node)
8261       if vgname not in res.payload:
8262         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8263                                  (vgname, node))
8264
8265   def _CheckDisksExistence(self, nodes):
8266     # Check disk existence
8267     for idx, dev in enumerate(self.instance.disks):
8268       if idx not in self.disks:
8269         continue
8270
8271       for node in nodes:
8272         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8273         self.cfg.SetDiskID(dev, node)
8274
8275         result = self.rpc.call_blockdev_find(node, dev)
8276
8277         msg = result.fail_msg
8278         if msg or not result.payload:
8279           if not msg:
8280             msg = "disk not found"
8281           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8282                                    (idx, node, msg))
8283
8284   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8285     for idx, dev in enumerate(self.instance.disks):
8286       if idx not in self.disks:
8287         continue
8288
8289       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8290                       (idx, node_name))
8291
8292       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8293                                    ldisk=ldisk):
8294         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8295                                  " replace disks for instance %s" %
8296                                  (node_name, self.instance.name))
8297
8298   def _CreateNewStorage(self, node_name):
8299     vgname = self.cfg.GetVGName()
8300     iv_names = {}
8301
8302     for idx, dev in enumerate(self.instance.disks):
8303       if idx not in self.disks:
8304         continue
8305
8306       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8307
8308       self.cfg.SetDiskID(dev, node_name)
8309
8310       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8311       names = _GenerateUniqueNames(self.lu, lv_names)
8312
8313       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8314                              logical_id=(vgname, names[0]))
8315       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8316                              logical_id=(vgname, names[1]))
8317
8318       new_lvs = [lv_data, lv_meta]
8319       old_lvs = dev.children
8320       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8321
8322       # we pass force_create=True to force the LVM creation
8323       for new_lv in new_lvs:
8324         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8325                         _GetInstanceInfoText(self.instance), False)
8326
8327     return iv_names
8328
8329   def _CheckDevices(self, node_name, iv_names):
8330     for name, (dev, _, _) in iv_names.iteritems():
8331       self.cfg.SetDiskID(dev, node_name)
8332
8333       result = self.rpc.call_blockdev_find(node_name, dev)
8334
8335       msg = result.fail_msg
8336       if msg or not result.payload:
8337         if not msg:
8338           msg = "disk not found"
8339         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8340                                  (name, msg))
8341
8342       if result.payload.is_degraded:
8343         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8344
8345   def _RemoveOldStorage(self, node_name, iv_names):
8346     for name, (_, old_lvs, _) in iv_names.iteritems():
8347       self.lu.LogInfo("Remove logical volumes for %s" % name)
8348
8349       for lv in old_lvs:
8350         self.cfg.SetDiskID(lv, node_name)
8351
8352         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8353         if msg:
8354           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8355                              hint="remove unused LVs manually")
8356
8357   def _ReleaseNodeLock(self, node_name):
8358     """Releases the lock for a given node."""
8359     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8360
8361   def _ExecDrbd8DiskOnly(self, feedback_fn):
8362     """Replace a disk on the primary or secondary for DRBD 8.
8363
8364     The algorithm for replace is quite complicated:
8365
8366       1. for each disk to be replaced:
8367
8368         1. create new LVs on the target node with unique names
8369         1. detach old LVs from the drbd device
8370         1. rename old LVs to name_replaced.<time_t>
8371         1. rename new LVs to old LVs
8372         1. attach the new LVs (with the old names now) to the drbd device
8373
8374       1. wait for sync across all devices
8375
8376       1. for each modified disk:
8377
8378         1. remove old LVs (which have the name name_replaces.<time_t>)
8379
8380     Failures are not very well handled.
8381
8382     """
8383     steps_total = 6
8384
8385     # Step: check device activation
8386     self.lu.LogStep(1, steps_total, "Check device existence")
8387     self._CheckDisksExistence([self.other_node, self.target_node])
8388     self._CheckVolumeGroup([self.target_node, self.other_node])
8389
8390     # Step: check other node consistency
8391     self.lu.LogStep(2, steps_total, "Check peer consistency")
8392     self._CheckDisksConsistency(self.other_node,
8393                                 self.other_node == self.instance.primary_node,
8394                                 False)
8395
8396     # Step: create new storage
8397     self.lu.LogStep(3, steps_total, "Allocate new storage")
8398     iv_names = self._CreateNewStorage(self.target_node)
8399
8400     # Step: for each lv, detach+rename*2+attach
8401     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8402     for dev, old_lvs, new_lvs in iv_names.itervalues():
8403       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8404
8405       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8406                                                      old_lvs)
8407       result.Raise("Can't detach drbd from local storage on node"
8408                    " %s for device %s" % (self.target_node, dev.iv_name))
8409       #dev.children = []
8410       #cfg.Update(instance)
8411
8412       # ok, we created the new LVs, so now we know we have the needed
8413       # storage; as such, we proceed on the target node to rename
8414       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8415       # using the assumption that logical_id == physical_id (which in
8416       # turn is the unique_id on that node)
8417
8418       # FIXME(iustin): use a better name for the replaced LVs
8419       temp_suffix = int(time.time())
8420       ren_fn = lambda d, suff: (d.physical_id[0],
8421                                 d.physical_id[1] + "_replaced-%s" % suff)
8422
8423       # Build the rename list based on what LVs exist on the node
8424       rename_old_to_new = []
8425       for to_ren in old_lvs:
8426         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8427         if not result.fail_msg and result.payload:
8428           # device exists
8429           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8430
8431       self.lu.LogInfo("Renaming the old LVs on the target node")
8432       result = self.rpc.call_blockdev_rename(self.target_node,
8433                                              rename_old_to_new)
8434       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8435
8436       # Now we rename the new LVs to the old LVs
8437       self.lu.LogInfo("Renaming the new LVs on the target node")
8438       rename_new_to_old = [(new, old.physical_id)
8439                            for old, new in zip(old_lvs, new_lvs)]
8440       result = self.rpc.call_blockdev_rename(self.target_node,
8441                                              rename_new_to_old)
8442       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8443
8444       for old, new in zip(old_lvs, new_lvs):
8445         new.logical_id = old.logical_id
8446         self.cfg.SetDiskID(new, self.target_node)
8447
8448       for disk in old_lvs:
8449         disk.logical_id = ren_fn(disk, temp_suffix)
8450         self.cfg.SetDiskID(disk, self.target_node)
8451
8452       # Now that the new lvs have the old name, we can add them to the device
8453       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8454       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8455                                                   new_lvs)
8456       msg = result.fail_msg
8457       if msg:
8458         for new_lv in new_lvs:
8459           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8460                                                new_lv).fail_msg
8461           if msg2:
8462             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8463                                hint=("cleanup manually the unused logical"
8464                                      "volumes"))
8465         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8466
8467       dev.children = new_lvs
8468
8469       self.cfg.Update(self.instance, feedback_fn)
8470
8471     cstep = 5
8472     if self.early_release:
8473       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8474       cstep += 1
8475       self._RemoveOldStorage(self.target_node, iv_names)
8476       # WARNING: we release both node locks here, do not do other RPCs
8477       # than WaitForSync to the primary node
8478       self._ReleaseNodeLock([self.target_node, self.other_node])
8479
8480     # Wait for sync
8481     # This can fail as the old devices are degraded and _WaitForSync
8482     # does a combined result over all disks, so we don't check its return value
8483     self.lu.LogStep(cstep, steps_total, "Sync devices")
8484     cstep += 1
8485     _WaitForSync(self.lu, self.instance)
8486
8487     # Check all devices manually
8488     self._CheckDevices(self.instance.primary_node, iv_names)
8489
8490     # Step: remove old storage
8491     if not self.early_release:
8492       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8493       cstep += 1
8494       self._RemoveOldStorage(self.target_node, iv_names)
8495
8496   def _ExecDrbd8Secondary(self, feedback_fn):
8497     """Replace the secondary node for DRBD 8.
8498
8499     The algorithm for replace is quite complicated:
8500       - for all disks of the instance:
8501         - create new LVs on the new node with same names
8502         - shutdown the drbd device on the old secondary
8503         - disconnect the drbd network on the primary
8504         - create the drbd device on the new secondary
8505         - network attach the drbd on the primary, using an artifice:
8506           the drbd code for Attach() will connect to the network if it
8507           finds a device which is connected to the good local disks but
8508           not network enabled
8509       - wait for sync across all devices
8510       - remove all disks from the old secondary
8511
8512     Failures are not very well handled.
8513
8514     """
8515     steps_total = 6
8516
8517     # Step: check device activation
8518     self.lu.LogStep(1, steps_total, "Check device existence")
8519     self._CheckDisksExistence([self.instance.primary_node])
8520     self._CheckVolumeGroup([self.instance.primary_node])
8521
8522     # Step: check other node consistency
8523     self.lu.LogStep(2, steps_total, "Check peer consistency")
8524     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8525
8526     # Step: create new storage
8527     self.lu.LogStep(3, steps_total, "Allocate new storage")
8528     for idx, dev in enumerate(self.instance.disks):
8529       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8530                       (self.new_node, idx))
8531       # we pass force_create=True to force LVM creation
8532       for new_lv in dev.children:
8533         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8534                         _GetInstanceInfoText(self.instance), False)
8535
8536     # Step 4: dbrd minors and drbd setups changes
8537     # after this, we must manually remove the drbd minors on both the
8538     # error and the success paths
8539     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8540     minors = self.cfg.AllocateDRBDMinor([self.new_node
8541                                          for dev in self.instance.disks],
8542                                         self.instance.name)
8543     logging.debug("Allocated minors %r", minors)
8544
8545     iv_names = {}
8546     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8547       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8548                       (self.new_node, idx))
8549       # create new devices on new_node; note that we create two IDs:
8550       # one without port, so the drbd will be activated without
8551       # networking information on the new node at this stage, and one
8552       # with network, for the latter activation in step 4
8553       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8554       if self.instance.primary_node == o_node1:
8555         p_minor = o_minor1
8556       else:
8557         assert self.instance.primary_node == o_node2, "Three-node instance?"
8558         p_minor = o_minor2
8559
8560       new_alone_id = (self.instance.primary_node, self.new_node, None,
8561                       p_minor, new_minor, o_secret)
8562       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8563                     p_minor, new_minor, o_secret)
8564
8565       iv_names[idx] = (dev, dev.children, new_net_id)
8566       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8567                     new_net_id)
8568       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8569                               logical_id=new_alone_id,
8570                               children=dev.children,
8571                               size=dev.size)
8572       try:
8573         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8574                               _GetInstanceInfoText(self.instance), False)
8575       except errors.GenericError:
8576         self.cfg.ReleaseDRBDMinors(self.instance.name)
8577         raise
8578
8579     # We have new devices, shutdown the drbd on the old secondary
8580     for idx, dev in enumerate(self.instance.disks):
8581       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8582       self.cfg.SetDiskID(dev, self.target_node)
8583       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8584       if msg:
8585         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8586                            "node: %s" % (idx, msg),
8587                            hint=("Please cleanup this device manually as"
8588                                  " soon as possible"))
8589
8590     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8591     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8592                                                self.node_secondary_ip,
8593                                                self.instance.disks)\
8594                                               [self.instance.primary_node]
8595
8596     msg = result.fail_msg
8597     if msg:
8598       # detaches didn't succeed (unlikely)
8599       self.cfg.ReleaseDRBDMinors(self.instance.name)
8600       raise errors.OpExecError("Can't detach the disks from the network on"
8601                                " old node: %s" % (msg,))
8602
8603     # if we managed to detach at least one, we update all the disks of
8604     # the instance to point to the new secondary
8605     self.lu.LogInfo("Updating instance configuration")
8606     for dev, _, new_logical_id in iv_names.itervalues():
8607       dev.logical_id = new_logical_id
8608       self.cfg.SetDiskID(dev, self.instance.primary_node)
8609
8610     self.cfg.Update(self.instance, feedback_fn)
8611
8612     # and now perform the drbd attach
8613     self.lu.LogInfo("Attaching primary drbds to new secondary"
8614                     " (standalone => connected)")
8615     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8616                                             self.new_node],
8617                                            self.node_secondary_ip,
8618                                            self.instance.disks,
8619                                            self.instance.name,
8620                                            False)
8621     for to_node, to_result in result.items():
8622       msg = to_result.fail_msg
8623       if msg:
8624         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8625                            to_node, msg,
8626                            hint=("please do a gnt-instance info to see the"
8627                                  " status of disks"))
8628     cstep = 5
8629     if self.early_release:
8630       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8631       cstep += 1
8632       self._RemoveOldStorage(self.target_node, iv_names)
8633       # WARNING: we release all node locks here, do not do other RPCs
8634       # than WaitForSync to the primary node
8635       self._ReleaseNodeLock([self.instance.primary_node,
8636                              self.target_node,
8637                              self.new_node])
8638
8639     # Wait for sync
8640     # This can fail as the old devices are degraded and _WaitForSync
8641     # does a combined result over all disks, so we don't check its return value
8642     self.lu.LogStep(cstep, steps_total, "Sync devices")
8643     cstep += 1
8644     _WaitForSync(self.lu, self.instance)
8645
8646     # Check all devices manually
8647     self._CheckDevices(self.instance.primary_node, iv_names)
8648
8649     # Step: remove old storage
8650     if not self.early_release:
8651       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8652       self._RemoveOldStorage(self.target_node, iv_names)
8653
8654
8655 class LURepairNodeStorage(NoHooksLU):
8656   """Repairs the volume group on a node.
8657
8658   """
8659   REQ_BGL = False
8660
8661   def CheckArguments(self):
8662     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8663
8664     storage_type = self.op.storage_type
8665
8666     if (constants.SO_FIX_CONSISTENCY not in
8667         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8668       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8669                                  " repaired" % storage_type,
8670                                  errors.ECODE_INVAL)
8671
8672   def ExpandNames(self):
8673     self.needed_locks = {
8674       locking.LEVEL_NODE: [self.op.node_name],
8675       }
8676
8677   def _CheckFaultyDisks(self, instance, node_name):
8678     """Ensure faulty disks abort the opcode or at least warn."""
8679     try:
8680       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8681                                   node_name, True):
8682         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8683                                    " node '%s'" % (instance.name, node_name),
8684                                    errors.ECODE_STATE)
8685     except errors.OpPrereqError, err:
8686       if self.op.ignore_consistency:
8687         self.proc.LogWarning(str(err.args[0]))
8688       else:
8689         raise
8690
8691   def CheckPrereq(self):
8692     """Check prerequisites.
8693
8694     """
8695     # Check whether any instance on this node has faulty disks
8696     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8697       if not inst.admin_up:
8698         continue
8699       check_nodes = set(inst.all_nodes)
8700       check_nodes.discard(self.op.node_name)
8701       for inst_node_name in check_nodes:
8702         self._CheckFaultyDisks(inst, inst_node_name)
8703
8704   def Exec(self, feedback_fn):
8705     feedback_fn("Repairing storage unit '%s' on %s ..." %
8706                 (self.op.name, self.op.node_name))
8707
8708     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8709     result = self.rpc.call_storage_execute(self.op.node_name,
8710                                            self.op.storage_type, st_args,
8711                                            self.op.name,
8712                                            constants.SO_FIX_CONSISTENCY)
8713     result.Raise("Failed to repair storage unit '%s' on %s" %
8714                  (self.op.name, self.op.node_name))
8715
8716
8717 class LUNodeEvacStrategy(NoHooksLU):
8718   """Computes the node evacuation strategy.
8719
8720   """
8721   REQ_BGL = False
8722
8723   def CheckArguments(self):
8724     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8725
8726   def ExpandNames(self):
8727     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8728     self.needed_locks = locks = {}
8729     if self.op.remote_node is None:
8730       locks[locking.LEVEL_NODE] = locking.ALL_SET
8731     else:
8732       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8733       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8734
8735   def Exec(self, feedback_fn):
8736     if self.op.remote_node is not None:
8737       instances = []
8738       for node in self.op.nodes:
8739         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8740       result = []
8741       for i in instances:
8742         if i.primary_node == self.op.remote_node:
8743           raise errors.OpPrereqError("Node %s is the primary node of"
8744                                      " instance %s, cannot use it as"
8745                                      " secondary" %
8746                                      (self.op.remote_node, i.name),
8747                                      errors.ECODE_INVAL)
8748         result.append([i.name, self.op.remote_node])
8749     else:
8750       ial = IAllocator(self.cfg, self.rpc,
8751                        mode=constants.IALLOCATOR_MODE_MEVAC,
8752                        evac_nodes=self.op.nodes)
8753       ial.Run(self.op.iallocator, validate=True)
8754       if not ial.success:
8755         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8756                                  errors.ECODE_NORES)
8757       result = ial.result
8758     return result
8759
8760
8761 class LUInstanceGrowDisk(LogicalUnit):
8762   """Grow a disk of an instance.
8763
8764   """
8765   HPATH = "disk-grow"
8766   HTYPE = constants.HTYPE_INSTANCE
8767   REQ_BGL = False
8768
8769   def ExpandNames(self):
8770     self._ExpandAndLockInstance()
8771     self.needed_locks[locking.LEVEL_NODE] = []
8772     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8773
8774   def DeclareLocks(self, level):
8775     if level == locking.LEVEL_NODE:
8776       self._LockInstancesNodes()
8777
8778   def BuildHooksEnv(self):
8779     """Build hooks env.
8780
8781     This runs on the master, the primary and all the secondaries.
8782
8783     """
8784     env = {
8785       "DISK": self.op.disk,
8786       "AMOUNT": self.op.amount,
8787       }
8788     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8789     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8790     return env, nl, nl
8791
8792   def CheckPrereq(self):
8793     """Check prerequisites.
8794
8795     This checks that the instance is in the cluster.
8796
8797     """
8798     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8799     assert instance is not None, \
8800       "Cannot retrieve locked instance %s" % self.op.instance_name
8801     nodenames = list(instance.all_nodes)
8802     for node in nodenames:
8803       _CheckNodeOnline(self, node)
8804
8805     self.instance = instance
8806
8807     if instance.disk_template not in constants.DTS_GROWABLE:
8808       raise errors.OpPrereqError("Instance's disk layout does not support"
8809                                  " growing.", errors.ECODE_INVAL)
8810
8811     self.disk = instance.FindDisk(self.op.disk)
8812
8813     if instance.disk_template != constants.DT_FILE:
8814       # TODO: check the free disk space for file, when that feature
8815       # will be supported
8816       _CheckNodesFreeDiskPerVG(self, nodenames,
8817                                self.disk.ComputeGrowth(self.op.amount))
8818
8819   def Exec(self, feedback_fn):
8820     """Execute disk grow.
8821
8822     """
8823     instance = self.instance
8824     disk = self.disk
8825
8826     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8827     if not disks_ok:
8828       raise errors.OpExecError("Cannot activate block device to grow")
8829
8830     for node in instance.all_nodes:
8831       self.cfg.SetDiskID(disk, node)
8832       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8833       result.Raise("Grow request failed to node %s" % node)
8834
8835       # TODO: Rewrite code to work properly
8836       # DRBD goes into sync mode for a short amount of time after executing the
8837       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8838       # calling "resize" in sync mode fails. Sleeping for a short amount of
8839       # time is a work-around.
8840       time.sleep(5)
8841
8842     disk.RecordGrow(self.op.amount)
8843     self.cfg.Update(instance, feedback_fn)
8844     if self.op.wait_for_sync:
8845       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8846       if disk_abort:
8847         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8848                              " status.\nPlease check the instance.")
8849       if not instance.admin_up:
8850         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8851     elif not instance.admin_up:
8852       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8853                            " not supposed to be running because no wait for"
8854                            " sync mode was requested.")
8855
8856
8857 class LUInstanceQueryData(NoHooksLU):
8858   """Query runtime instance data.
8859
8860   """
8861   REQ_BGL = False
8862
8863   def ExpandNames(self):
8864     self.needed_locks = {}
8865     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8866
8867     if self.op.instances:
8868       self.wanted_names = []
8869       for name in self.op.instances:
8870         full_name = _ExpandInstanceName(self.cfg, name)
8871         self.wanted_names.append(full_name)
8872       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8873     else:
8874       self.wanted_names = None
8875       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8876
8877     self.needed_locks[locking.LEVEL_NODE] = []
8878     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8879
8880   def DeclareLocks(self, level):
8881     if level == locking.LEVEL_NODE:
8882       self._LockInstancesNodes()
8883
8884   def CheckPrereq(self):
8885     """Check prerequisites.
8886
8887     This only checks the optional instance list against the existing names.
8888
8889     """
8890     if self.wanted_names is None:
8891       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8892
8893     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8894                              in self.wanted_names]
8895
8896   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8897     """Returns the status of a block device
8898
8899     """
8900     if self.op.static or not node:
8901       return None
8902
8903     self.cfg.SetDiskID(dev, node)
8904
8905     result = self.rpc.call_blockdev_find(node, dev)
8906     if result.offline:
8907       return None
8908
8909     result.Raise("Can't compute disk status for %s" % instance_name)
8910
8911     status = result.payload
8912     if status is None:
8913       return None
8914
8915     return (status.dev_path, status.major, status.minor,
8916             status.sync_percent, status.estimated_time,
8917             status.is_degraded, status.ldisk_status)
8918
8919   def _ComputeDiskStatus(self, instance, snode, dev):
8920     """Compute block device status.
8921
8922     """
8923     if dev.dev_type in constants.LDS_DRBD:
8924       # we change the snode then (otherwise we use the one passed in)
8925       if dev.logical_id[0] == instance.primary_node:
8926         snode = dev.logical_id[1]
8927       else:
8928         snode = dev.logical_id[0]
8929
8930     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8931                                               instance.name, dev)
8932     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8933
8934     if dev.children:
8935       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8936                       for child in dev.children]
8937     else:
8938       dev_children = []
8939
8940     data = {
8941       "iv_name": dev.iv_name,
8942       "dev_type": dev.dev_type,
8943       "logical_id": dev.logical_id,
8944       "physical_id": dev.physical_id,
8945       "pstatus": dev_pstatus,
8946       "sstatus": dev_sstatus,
8947       "children": dev_children,
8948       "mode": dev.mode,
8949       "size": dev.size,
8950       }
8951
8952     return data
8953
8954   def Exec(self, feedback_fn):
8955     """Gather and return data"""
8956     result = {}
8957
8958     cluster = self.cfg.GetClusterInfo()
8959
8960     for instance in self.wanted_instances:
8961       if not self.op.static:
8962         remote_info = self.rpc.call_instance_info(instance.primary_node,
8963                                                   instance.name,
8964                                                   instance.hypervisor)
8965         remote_info.Raise("Error checking node %s" % instance.primary_node)
8966         remote_info = remote_info.payload
8967         if remote_info and "state" in remote_info:
8968           remote_state = "up"
8969         else:
8970           remote_state = "down"
8971       else:
8972         remote_state = None
8973       if instance.admin_up:
8974         config_state = "up"
8975       else:
8976         config_state = "down"
8977
8978       disks = [self._ComputeDiskStatus(instance, None, device)
8979                for device in instance.disks]
8980
8981       idict = {
8982         "name": instance.name,
8983         "config_state": config_state,
8984         "run_state": remote_state,
8985         "pnode": instance.primary_node,
8986         "snodes": instance.secondary_nodes,
8987         "os": instance.os,
8988         # this happens to be the same format used for hooks
8989         "nics": _NICListToTuple(self, instance.nics),
8990         "disk_template": instance.disk_template,
8991         "disks": disks,
8992         "hypervisor": instance.hypervisor,
8993         "network_port": instance.network_port,
8994         "hv_instance": instance.hvparams,
8995         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8996         "be_instance": instance.beparams,
8997         "be_actual": cluster.FillBE(instance),
8998         "os_instance": instance.osparams,
8999         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9000         "serial_no": instance.serial_no,
9001         "mtime": instance.mtime,
9002         "ctime": instance.ctime,
9003         "uuid": instance.uuid,
9004         }
9005
9006       result[instance.name] = idict
9007
9008     return result
9009
9010
9011 class LUInstanceSetParams(LogicalUnit):
9012   """Modifies an instances's parameters.
9013
9014   """
9015   HPATH = "instance-modify"
9016   HTYPE = constants.HTYPE_INSTANCE
9017   REQ_BGL = False
9018
9019   def CheckArguments(self):
9020     if not (self.op.nics or self.op.disks or self.op.disk_template or
9021             self.op.hvparams or self.op.beparams or self.op.os_name):
9022       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9023
9024     if self.op.hvparams:
9025       _CheckGlobalHvParams(self.op.hvparams)
9026
9027     # Disk validation
9028     disk_addremove = 0
9029     for disk_op, disk_dict in self.op.disks:
9030       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9031       if disk_op == constants.DDM_REMOVE:
9032         disk_addremove += 1
9033         continue
9034       elif disk_op == constants.DDM_ADD:
9035         disk_addremove += 1
9036       else:
9037         if not isinstance(disk_op, int):
9038           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9039         if not isinstance(disk_dict, dict):
9040           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9041           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9042
9043       if disk_op == constants.DDM_ADD:
9044         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9045         if mode not in constants.DISK_ACCESS_SET:
9046           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9047                                      errors.ECODE_INVAL)
9048         size = disk_dict.get('size', None)
9049         if size is None:
9050           raise errors.OpPrereqError("Required disk parameter size missing",
9051                                      errors.ECODE_INVAL)
9052         try:
9053           size = int(size)
9054         except (TypeError, ValueError), err:
9055           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9056                                      str(err), errors.ECODE_INVAL)
9057         disk_dict['size'] = size
9058       else:
9059         # modification of disk
9060         if 'size' in disk_dict:
9061           raise errors.OpPrereqError("Disk size change not possible, use"
9062                                      " grow-disk", errors.ECODE_INVAL)
9063
9064     if disk_addremove > 1:
9065       raise errors.OpPrereqError("Only one disk add or remove operation"
9066                                  " supported at a time", errors.ECODE_INVAL)
9067
9068     if self.op.disks and self.op.disk_template is not None:
9069       raise errors.OpPrereqError("Disk template conversion and other disk"
9070                                  " changes not supported at the same time",
9071                                  errors.ECODE_INVAL)
9072
9073     if (self.op.disk_template and
9074         self.op.disk_template in constants.DTS_NET_MIRROR and
9075         self.op.remote_node is None):
9076       raise errors.OpPrereqError("Changing the disk template to a mirrored"
9077                                  " one requires specifying a secondary node",
9078                                  errors.ECODE_INVAL)
9079
9080     # NIC validation
9081     nic_addremove = 0
9082     for nic_op, nic_dict in self.op.nics:
9083       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9084       if nic_op == constants.DDM_REMOVE:
9085         nic_addremove += 1
9086         continue
9087       elif nic_op == constants.DDM_ADD:
9088         nic_addremove += 1
9089       else:
9090         if not isinstance(nic_op, int):
9091           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9092         if not isinstance(nic_dict, dict):
9093           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9094           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9095
9096       # nic_dict should be a dict
9097       nic_ip = nic_dict.get('ip', None)
9098       if nic_ip is not None:
9099         if nic_ip.lower() == constants.VALUE_NONE:
9100           nic_dict['ip'] = None
9101         else:
9102           if not netutils.IPAddress.IsValid(nic_ip):
9103             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9104                                        errors.ECODE_INVAL)
9105
9106       nic_bridge = nic_dict.get('bridge', None)
9107       nic_link = nic_dict.get('link', None)
9108       if nic_bridge and nic_link:
9109         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9110                                    " at the same time", errors.ECODE_INVAL)
9111       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9112         nic_dict['bridge'] = None
9113       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9114         nic_dict['link'] = None
9115
9116       if nic_op == constants.DDM_ADD:
9117         nic_mac = nic_dict.get('mac', None)
9118         if nic_mac is None:
9119           nic_dict['mac'] = constants.VALUE_AUTO
9120
9121       if 'mac' in nic_dict:
9122         nic_mac = nic_dict['mac']
9123         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9124           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9125
9126         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9127           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9128                                      " modifying an existing nic",
9129                                      errors.ECODE_INVAL)
9130
9131     if nic_addremove > 1:
9132       raise errors.OpPrereqError("Only one NIC add or remove operation"
9133                                  " supported at a time", errors.ECODE_INVAL)
9134
9135   def ExpandNames(self):
9136     self._ExpandAndLockInstance()
9137     self.needed_locks[locking.LEVEL_NODE] = []
9138     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9139
9140   def DeclareLocks(self, level):
9141     if level == locking.LEVEL_NODE:
9142       self._LockInstancesNodes()
9143       if self.op.disk_template and self.op.remote_node:
9144         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9145         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9146
9147   def BuildHooksEnv(self):
9148     """Build hooks env.
9149
9150     This runs on the master, primary and secondaries.
9151
9152     """
9153     args = dict()
9154     if constants.BE_MEMORY in self.be_new:
9155       args['memory'] = self.be_new[constants.BE_MEMORY]
9156     if constants.BE_VCPUS in self.be_new:
9157       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9158     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9159     # information at all.
9160     if self.op.nics:
9161       args['nics'] = []
9162       nic_override = dict(self.op.nics)
9163       for idx, nic in enumerate(self.instance.nics):
9164         if idx in nic_override:
9165           this_nic_override = nic_override[idx]
9166         else:
9167           this_nic_override = {}
9168         if 'ip' in this_nic_override:
9169           ip = this_nic_override['ip']
9170         else:
9171           ip = nic.ip
9172         if 'mac' in this_nic_override:
9173           mac = this_nic_override['mac']
9174         else:
9175           mac = nic.mac
9176         if idx in self.nic_pnew:
9177           nicparams = self.nic_pnew[idx]
9178         else:
9179           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9180         mode = nicparams[constants.NIC_MODE]
9181         link = nicparams[constants.NIC_LINK]
9182         args['nics'].append((ip, mac, mode, link))
9183       if constants.DDM_ADD in nic_override:
9184         ip = nic_override[constants.DDM_ADD].get('ip', None)
9185         mac = nic_override[constants.DDM_ADD]['mac']
9186         nicparams = self.nic_pnew[constants.DDM_ADD]
9187         mode = nicparams[constants.NIC_MODE]
9188         link = nicparams[constants.NIC_LINK]
9189         args['nics'].append((ip, mac, mode, link))
9190       elif constants.DDM_REMOVE in nic_override:
9191         del args['nics'][-1]
9192
9193     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9194     if self.op.disk_template:
9195       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9196     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9197     return env, nl, nl
9198
9199   def CheckPrereq(self):
9200     """Check prerequisites.
9201
9202     This only checks the instance list against the existing names.
9203
9204     """
9205     # checking the new params on the primary/secondary nodes
9206
9207     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9208     cluster = self.cluster = self.cfg.GetClusterInfo()
9209     assert self.instance is not None, \
9210       "Cannot retrieve locked instance %s" % self.op.instance_name
9211     pnode = instance.primary_node
9212     nodelist = list(instance.all_nodes)
9213
9214     # OS change
9215     if self.op.os_name and not self.op.force:
9216       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9217                       self.op.force_variant)
9218       instance_os = self.op.os_name
9219     else:
9220       instance_os = instance.os
9221
9222     if self.op.disk_template:
9223       if instance.disk_template == self.op.disk_template:
9224         raise errors.OpPrereqError("Instance already has disk template %s" %
9225                                    instance.disk_template, errors.ECODE_INVAL)
9226
9227       if (instance.disk_template,
9228           self.op.disk_template) not in self._DISK_CONVERSIONS:
9229         raise errors.OpPrereqError("Unsupported disk template conversion from"
9230                                    " %s to %s" % (instance.disk_template,
9231                                                   self.op.disk_template),
9232                                    errors.ECODE_INVAL)
9233       _CheckInstanceDown(self, instance, "cannot change disk template")
9234       if self.op.disk_template in constants.DTS_NET_MIRROR:
9235         if self.op.remote_node == pnode:
9236           raise errors.OpPrereqError("Given new secondary node %s is the same"
9237                                      " as the primary node of the instance" %
9238                                      self.op.remote_node, errors.ECODE_STATE)
9239         _CheckNodeOnline(self, self.op.remote_node)
9240         _CheckNodeNotDrained(self, self.op.remote_node)
9241         # FIXME: here we assume that the old instance type is DT_PLAIN
9242         assert instance.disk_template == constants.DT_PLAIN
9243         disks = [{"size": d.size, "vg": d.logical_id[0]}
9244                  for d in instance.disks]
9245         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9246         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9247
9248     # hvparams processing
9249     if self.op.hvparams:
9250       hv_type = instance.hypervisor
9251       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9252       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9253       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9254
9255       # local check
9256       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9257       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9258       self.hv_new = hv_new # the new actual values
9259       self.hv_inst = i_hvdict # the new dict (without defaults)
9260     else:
9261       self.hv_new = self.hv_inst = {}
9262
9263     # beparams processing
9264     if self.op.beparams:
9265       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9266                                    use_none=True)
9267       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9268       be_new = cluster.SimpleFillBE(i_bedict)
9269       self.be_new = be_new # the new actual values
9270       self.be_inst = i_bedict # the new dict (without defaults)
9271     else:
9272       self.be_new = self.be_inst = {}
9273
9274     # osparams processing
9275     if self.op.osparams:
9276       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9277       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9278       self.os_inst = i_osdict # the new dict (without defaults)
9279     else:
9280       self.os_inst = {}
9281
9282     self.warn = []
9283
9284     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9285       mem_check_list = [pnode]
9286       if be_new[constants.BE_AUTO_BALANCE]:
9287         # either we changed auto_balance to yes or it was from before
9288         mem_check_list.extend(instance.secondary_nodes)
9289       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9290                                                   instance.hypervisor)
9291       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9292                                          instance.hypervisor)
9293       pninfo = nodeinfo[pnode]
9294       msg = pninfo.fail_msg
9295       if msg:
9296         # Assume the primary node is unreachable and go ahead
9297         self.warn.append("Can't get info from primary node %s: %s" %
9298                          (pnode,  msg))
9299       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9300         self.warn.append("Node data from primary node %s doesn't contain"
9301                          " free memory information" % pnode)
9302       elif instance_info.fail_msg:
9303         self.warn.append("Can't get instance runtime information: %s" %
9304                         instance_info.fail_msg)
9305       else:
9306         if instance_info.payload:
9307           current_mem = int(instance_info.payload['memory'])
9308         else:
9309           # Assume instance not running
9310           # (there is a slight race condition here, but it's not very probable,
9311           # and we have no other way to check)
9312           current_mem = 0
9313         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9314                     pninfo.payload['memory_free'])
9315         if miss_mem > 0:
9316           raise errors.OpPrereqError("This change will prevent the instance"
9317                                      " from starting, due to %d MB of memory"
9318                                      " missing on its primary node" % miss_mem,
9319                                      errors.ECODE_NORES)
9320
9321       if be_new[constants.BE_AUTO_BALANCE]:
9322         for node, nres in nodeinfo.items():
9323           if node not in instance.secondary_nodes:
9324             continue
9325           msg = nres.fail_msg
9326           if msg:
9327             self.warn.append("Can't get info from secondary node %s: %s" %
9328                              (node, msg))
9329           elif not isinstance(nres.payload.get('memory_free', None), int):
9330             self.warn.append("Secondary node %s didn't return free"
9331                              " memory information" % node)
9332           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9333             self.warn.append("Not enough memory to failover instance to"
9334                              " secondary node %s" % node)
9335
9336     # NIC processing
9337     self.nic_pnew = {}
9338     self.nic_pinst = {}
9339     for nic_op, nic_dict in self.op.nics:
9340       if nic_op == constants.DDM_REMOVE:
9341         if not instance.nics:
9342           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9343                                      errors.ECODE_INVAL)
9344         continue
9345       if nic_op != constants.DDM_ADD:
9346         # an existing nic
9347         if not instance.nics:
9348           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9349                                      " no NICs" % nic_op,
9350                                      errors.ECODE_INVAL)
9351         if nic_op < 0 or nic_op >= len(instance.nics):
9352           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9353                                      " are 0 to %d" %
9354                                      (nic_op, len(instance.nics) - 1),
9355                                      errors.ECODE_INVAL)
9356         old_nic_params = instance.nics[nic_op].nicparams
9357         old_nic_ip = instance.nics[nic_op].ip
9358       else:
9359         old_nic_params = {}
9360         old_nic_ip = None
9361
9362       update_params_dict = dict([(key, nic_dict[key])
9363                                  for key in constants.NICS_PARAMETERS
9364                                  if key in nic_dict])
9365
9366       if 'bridge' in nic_dict:
9367         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9368
9369       new_nic_params = _GetUpdatedParams(old_nic_params,
9370                                          update_params_dict)
9371       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9372       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9373       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9374       self.nic_pinst[nic_op] = new_nic_params
9375       self.nic_pnew[nic_op] = new_filled_nic_params
9376       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9377
9378       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9379         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9380         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9381         if msg:
9382           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9383           if self.op.force:
9384             self.warn.append(msg)
9385           else:
9386             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9387       if new_nic_mode == constants.NIC_MODE_ROUTED:
9388         if 'ip' in nic_dict:
9389           nic_ip = nic_dict['ip']
9390         else:
9391           nic_ip = old_nic_ip
9392         if nic_ip is None:
9393           raise errors.OpPrereqError('Cannot set the nic ip to None'
9394                                      ' on a routed nic', errors.ECODE_INVAL)
9395       if 'mac' in nic_dict:
9396         nic_mac = nic_dict['mac']
9397         if nic_mac is None:
9398           raise errors.OpPrereqError('Cannot set the nic mac to None',
9399                                      errors.ECODE_INVAL)
9400         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9401           # otherwise generate the mac
9402           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9403         else:
9404           # or validate/reserve the current one
9405           try:
9406             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9407           except errors.ReservationError:
9408             raise errors.OpPrereqError("MAC address %s already in use"
9409                                        " in cluster" % nic_mac,
9410                                        errors.ECODE_NOTUNIQUE)
9411
9412     # DISK processing
9413     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9414       raise errors.OpPrereqError("Disk operations not supported for"
9415                                  " diskless instances",
9416                                  errors.ECODE_INVAL)
9417     for disk_op, _ in self.op.disks:
9418       if disk_op == constants.DDM_REMOVE:
9419         if len(instance.disks) == 1:
9420           raise errors.OpPrereqError("Cannot remove the last disk of"
9421                                      " an instance", errors.ECODE_INVAL)
9422         _CheckInstanceDown(self, instance, "cannot remove disks")
9423
9424       if (disk_op == constants.DDM_ADD and
9425           len(instance.disks) >= constants.MAX_DISKS):
9426         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9427                                    " add more" % constants.MAX_DISKS,
9428                                    errors.ECODE_STATE)
9429       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9430         # an existing disk
9431         if disk_op < 0 or disk_op >= len(instance.disks):
9432           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9433                                      " are 0 to %d" %
9434                                      (disk_op, len(instance.disks)),
9435                                      errors.ECODE_INVAL)
9436
9437     return
9438
9439   def _ConvertPlainToDrbd(self, feedback_fn):
9440     """Converts an instance from plain to drbd.
9441
9442     """
9443     feedback_fn("Converting template to drbd")
9444     instance = self.instance
9445     pnode = instance.primary_node
9446     snode = self.op.remote_node
9447
9448     # create a fake disk info for _GenerateDiskTemplate
9449     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9450     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9451                                       instance.name, pnode, [snode],
9452                                       disk_info, None, None, 0, feedback_fn)
9453     info = _GetInstanceInfoText(instance)
9454     feedback_fn("Creating aditional volumes...")
9455     # first, create the missing data and meta devices
9456     for disk in new_disks:
9457       # unfortunately this is... not too nice
9458       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9459                             info, True)
9460       for child in disk.children:
9461         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9462     # at this stage, all new LVs have been created, we can rename the
9463     # old ones
9464     feedback_fn("Renaming original volumes...")
9465     rename_list = [(o, n.children[0].logical_id)
9466                    for (o, n) in zip(instance.disks, new_disks)]
9467     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9468     result.Raise("Failed to rename original LVs")
9469
9470     feedback_fn("Initializing DRBD devices...")
9471     # all child devices are in place, we can now create the DRBD devices
9472     for disk in new_disks:
9473       for node in [pnode, snode]:
9474         f_create = node == pnode
9475         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9476
9477     # at this point, the instance has been modified
9478     instance.disk_template = constants.DT_DRBD8
9479     instance.disks = new_disks
9480     self.cfg.Update(instance, feedback_fn)
9481
9482     # disks are created, waiting for sync
9483     disk_abort = not _WaitForSync(self, instance)
9484     if disk_abort:
9485       raise errors.OpExecError("There are some degraded disks for"
9486                                " this instance, please cleanup manually")
9487
9488   def _ConvertDrbdToPlain(self, feedback_fn):
9489     """Converts an instance from drbd to plain.
9490
9491     """
9492     instance = self.instance
9493     assert len(instance.secondary_nodes) == 1
9494     pnode = instance.primary_node
9495     snode = instance.secondary_nodes[0]
9496     feedback_fn("Converting template to plain")
9497
9498     old_disks = instance.disks
9499     new_disks = [d.children[0] for d in old_disks]
9500
9501     # copy over size and mode
9502     for parent, child in zip(old_disks, new_disks):
9503       child.size = parent.size
9504       child.mode = parent.mode
9505
9506     # update instance structure
9507     instance.disks = new_disks
9508     instance.disk_template = constants.DT_PLAIN
9509     self.cfg.Update(instance, feedback_fn)
9510
9511     feedback_fn("Removing volumes on the secondary node...")
9512     for disk in old_disks:
9513       self.cfg.SetDiskID(disk, snode)
9514       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9515       if msg:
9516         self.LogWarning("Could not remove block device %s on node %s,"
9517                         " continuing anyway: %s", disk.iv_name, snode, msg)
9518
9519     feedback_fn("Removing unneeded volumes on the primary node...")
9520     for idx, disk in enumerate(old_disks):
9521       meta = disk.children[1]
9522       self.cfg.SetDiskID(meta, pnode)
9523       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9524       if msg:
9525         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9526                         " continuing anyway: %s", idx, pnode, msg)
9527
9528   def Exec(self, feedback_fn):
9529     """Modifies an instance.
9530
9531     All parameters take effect only at the next restart of the instance.
9532
9533     """
9534     # Process here the warnings from CheckPrereq, as we don't have a
9535     # feedback_fn there.
9536     for warn in self.warn:
9537       feedback_fn("WARNING: %s" % warn)
9538
9539     result = []
9540     instance = self.instance
9541     # disk changes
9542     for disk_op, disk_dict in self.op.disks:
9543       if disk_op == constants.DDM_REMOVE:
9544         # remove the last disk
9545         device = instance.disks.pop()
9546         device_idx = len(instance.disks)
9547         for node, disk in device.ComputeNodeTree(instance.primary_node):
9548           self.cfg.SetDiskID(disk, node)
9549           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9550           if msg:
9551             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9552                             " continuing anyway", device_idx, node, msg)
9553         result.append(("disk/%d" % device_idx, "remove"))
9554       elif disk_op == constants.DDM_ADD:
9555         # add a new disk
9556         if instance.disk_template == constants.DT_FILE:
9557           file_driver, file_path = instance.disks[0].logical_id
9558           file_path = os.path.dirname(file_path)
9559         else:
9560           file_driver = file_path = None
9561         disk_idx_base = len(instance.disks)
9562         new_disk = _GenerateDiskTemplate(self,
9563                                          instance.disk_template,
9564                                          instance.name, instance.primary_node,
9565                                          instance.secondary_nodes,
9566                                          [disk_dict],
9567                                          file_path,
9568                                          file_driver,
9569                                          disk_idx_base, feedback_fn)[0]
9570         instance.disks.append(new_disk)
9571         info = _GetInstanceInfoText(instance)
9572
9573         logging.info("Creating volume %s for instance %s",
9574                      new_disk.iv_name, instance.name)
9575         # Note: this needs to be kept in sync with _CreateDisks
9576         #HARDCODE
9577         for node in instance.all_nodes:
9578           f_create = node == instance.primary_node
9579           try:
9580             _CreateBlockDev(self, node, instance, new_disk,
9581                             f_create, info, f_create)
9582           except errors.OpExecError, err:
9583             self.LogWarning("Failed to create volume %s (%s) on"
9584                             " node %s: %s",
9585                             new_disk.iv_name, new_disk, node, err)
9586         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9587                        (new_disk.size, new_disk.mode)))
9588       else:
9589         # change a given disk
9590         instance.disks[disk_op].mode = disk_dict['mode']
9591         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9592
9593     if self.op.disk_template:
9594       r_shut = _ShutdownInstanceDisks(self, instance)
9595       if not r_shut:
9596         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9597                                  " proceed with disk template conversion")
9598       mode = (instance.disk_template, self.op.disk_template)
9599       try:
9600         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9601       except:
9602         self.cfg.ReleaseDRBDMinors(instance.name)
9603         raise
9604       result.append(("disk_template", self.op.disk_template))
9605
9606     # NIC changes
9607     for nic_op, nic_dict in self.op.nics:
9608       if nic_op == constants.DDM_REMOVE:
9609         # remove the last nic
9610         del instance.nics[-1]
9611         result.append(("nic.%d" % len(instance.nics), "remove"))
9612       elif nic_op == constants.DDM_ADD:
9613         # mac and bridge should be set, by now
9614         mac = nic_dict['mac']
9615         ip = nic_dict.get('ip', None)
9616         nicparams = self.nic_pinst[constants.DDM_ADD]
9617         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9618         instance.nics.append(new_nic)
9619         result.append(("nic.%d" % (len(instance.nics) - 1),
9620                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9621                        (new_nic.mac, new_nic.ip,
9622                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9623                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9624                        )))
9625       else:
9626         for key in 'mac', 'ip':
9627           if key in nic_dict:
9628             setattr(instance.nics[nic_op], key, nic_dict[key])
9629         if nic_op in self.nic_pinst:
9630           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9631         for key, val in nic_dict.iteritems():
9632           result.append(("nic.%s/%d" % (key, nic_op), val))
9633
9634     # hvparams changes
9635     if self.op.hvparams:
9636       instance.hvparams = self.hv_inst
9637       for key, val in self.op.hvparams.iteritems():
9638         result.append(("hv/%s" % key, val))
9639
9640     # beparams changes
9641     if self.op.beparams:
9642       instance.beparams = self.be_inst
9643       for key, val in self.op.beparams.iteritems():
9644         result.append(("be/%s" % key, val))
9645
9646     # OS change
9647     if self.op.os_name:
9648       instance.os = self.op.os_name
9649
9650     # osparams changes
9651     if self.op.osparams:
9652       instance.osparams = self.os_inst
9653       for key, val in self.op.osparams.iteritems():
9654         result.append(("os/%s" % key, val))
9655
9656     self.cfg.Update(instance, feedback_fn)
9657
9658     return result
9659
9660   _DISK_CONVERSIONS = {
9661     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9662     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9663     }
9664
9665
9666 class LUBackupQuery(NoHooksLU):
9667   """Query the exports list
9668
9669   """
9670   REQ_BGL = False
9671
9672   def ExpandNames(self):
9673     self.needed_locks = {}
9674     self.share_locks[locking.LEVEL_NODE] = 1
9675     if not self.op.nodes:
9676       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9677     else:
9678       self.needed_locks[locking.LEVEL_NODE] = \
9679         _GetWantedNodes(self, self.op.nodes)
9680
9681   def Exec(self, feedback_fn):
9682     """Compute the list of all the exported system images.
9683
9684     @rtype: dict
9685     @return: a dictionary with the structure node->(export-list)
9686         where export-list is a list of the instances exported on
9687         that node.
9688
9689     """
9690     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9691     rpcresult = self.rpc.call_export_list(self.nodes)
9692     result = {}
9693     for node in rpcresult:
9694       if rpcresult[node].fail_msg:
9695         result[node] = False
9696       else:
9697         result[node] = rpcresult[node].payload
9698
9699     return result
9700
9701
9702 class LUBackupPrepare(NoHooksLU):
9703   """Prepares an instance for an export and returns useful information.
9704
9705   """
9706   REQ_BGL = False
9707
9708   def ExpandNames(self):
9709     self._ExpandAndLockInstance()
9710
9711   def CheckPrereq(self):
9712     """Check prerequisites.
9713
9714     """
9715     instance_name = self.op.instance_name
9716
9717     self.instance = self.cfg.GetInstanceInfo(instance_name)
9718     assert self.instance is not None, \
9719           "Cannot retrieve locked instance %s" % self.op.instance_name
9720     _CheckNodeOnline(self, self.instance.primary_node)
9721
9722     self._cds = _GetClusterDomainSecret()
9723
9724   def Exec(self, feedback_fn):
9725     """Prepares an instance for an export.
9726
9727     """
9728     instance = self.instance
9729
9730     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9731       salt = utils.GenerateSecret(8)
9732
9733       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9734       result = self.rpc.call_x509_cert_create(instance.primary_node,
9735                                               constants.RIE_CERT_VALIDITY)
9736       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9737
9738       (name, cert_pem) = result.payload
9739
9740       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9741                                              cert_pem)
9742
9743       return {
9744         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9745         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9746                           salt),
9747         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9748         }
9749
9750     return None
9751
9752
9753 class LUBackupExport(LogicalUnit):
9754   """Export an instance to an image in the cluster.
9755
9756   """
9757   HPATH = "instance-export"
9758   HTYPE = constants.HTYPE_INSTANCE
9759   REQ_BGL = False
9760
9761   def CheckArguments(self):
9762     """Check the arguments.
9763
9764     """
9765     self.x509_key_name = self.op.x509_key_name
9766     self.dest_x509_ca_pem = self.op.destination_x509_ca
9767
9768     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9769       if not self.x509_key_name:
9770         raise errors.OpPrereqError("Missing X509 key name for encryption",
9771                                    errors.ECODE_INVAL)
9772
9773       if not self.dest_x509_ca_pem:
9774         raise errors.OpPrereqError("Missing destination X509 CA",
9775                                    errors.ECODE_INVAL)
9776
9777   def ExpandNames(self):
9778     self._ExpandAndLockInstance()
9779
9780     # Lock all nodes for local exports
9781     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9782       # FIXME: lock only instance primary and destination node
9783       #
9784       # Sad but true, for now we have do lock all nodes, as we don't know where
9785       # the previous export might be, and in this LU we search for it and
9786       # remove it from its current node. In the future we could fix this by:
9787       #  - making a tasklet to search (share-lock all), then create the
9788       #    new one, then one to remove, after
9789       #  - removing the removal operation altogether
9790       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9791
9792   def DeclareLocks(self, level):
9793     """Last minute lock declaration."""
9794     # All nodes are locked anyway, so nothing to do here.
9795
9796   def BuildHooksEnv(self):
9797     """Build hooks env.
9798
9799     This will run on the master, primary node and target node.
9800
9801     """
9802     env = {
9803       "EXPORT_MODE": self.op.mode,
9804       "EXPORT_NODE": self.op.target_node,
9805       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9806       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9807       # TODO: Generic function for boolean env variables
9808       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9809       }
9810
9811     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9812
9813     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9814
9815     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9816       nl.append(self.op.target_node)
9817
9818     return env, nl, nl
9819
9820   def CheckPrereq(self):
9821     """Check prerequisites.
9822
9823     This checks that the instance and node names are valid.
9824
9825     """
9826     instance_name = self.op.instance_name
9827
9828     self.instance = self.cfg.GetInstanceInfo(instance_name)
9829     assert self.instance is not None, \
9830           "Cannot retrieve locked instance %s" % self.op.instance_name
9831     _CheckNodeOnline(self, self.instance.primary_node)
9832
9833     if (self.op.remove_instance and self.instance.admin_up and
9834         not self.op.shutdown):
9835       raise errors.OpPrereqError("Can not remove instance without shutting it"
9836                                  " down before")
9837
9838     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9839       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9840       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9841       assert self.dst_node is not None
9842
9843       _CheckNodeOnline(self, self.dst_node.name)
9844       _CheckNodeNotDrained(self, self.dst_node.name)
9845
9846       self._cds = None
9847       self.dest_disk_info = None
9848       self.dest_x509_ca = None
9849
9850     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9851       self.dst_node = None
9852
9853       if len(self.op.target_node) != len(self.instance.disks):
9854         raise errors.OpPrereqError(("Received destination information for %s"
9855                                     " disks, but instance %s has %s disks") %
9856                                    (len(self.op.target_node), instance_name,
9857                                     len(self.instance.disks)),
9858                                    errors.ECODE_INVAL)
9859
9860       cds = _GetClusterDomainSecret()
9861
9862       # Check X509 key name
9863       try:
9864         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9865       except (TypeError, ValueError), err:
9866         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9867
9868       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9869         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9870                                    errors.ECODE_INVAL)
9871
9872       # Load and verify CA
9873       try:
9874         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9875       except OpenSSL.crypto.Error, err:
9876         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9877                                    (err, ), errors.ECODE_INVAL)
9878
9879       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9880       if errcode is not None:
9881         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9882                                    (msg, ), errors.ECODE_INVAL)
9883
9884       self.dest_x509_ca = cert
9885
9886       # Verify target information
9887       disk_info = []
9888       for idx, disk_data in enumerate(self.op.target_node):
9889         try:
9890           (host, port, magic) = \
9891             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9892         except errors.GenericError, err:
9893           raise errors.OpPrereqError("Target info for disk %s: %s" %
9894                                      (idx, err), errors.ECODE_INVAL)
9895
9896         disk_info.append((host, port, magic))
9897
9898       assert len(disk_info) == len(self.op.target_node)
9899       self.dest_disk_info = disk_info
9900
9901     else:
9902       raise errors.ProgrammerError("Unhandled export mode %r" %
9903                                    self.op.mode)
9904
9905     # instance disk type verification
9906     # TODO: Implement export support for file-based disks
9907     for disk in self.instance.disks:
9908       if disk.dev_type == constants.LD_FILE:
9909         raise errors.OpPrereqError("Export not supported for instances with"
9910                                    " file-based disks", errors.ECODE_INVAL)
9911
9912   def _CleanupExports(self, feedback_fn):
9913     """Removes exports of current instance from all other nodes.
9914
9915     If an instance in a cluster with nodes A..D was exported to node C, its
9916     exports will be removed from the nodes A, B and D.
9917
9918     """
9919     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9920
9921     nodelist = self.cfg.GetNodeList()
9922     nodelist.remove(self.dst_node.name)
9923
9924     # on one-node clusters nodelist will be empty after the removal
9925     # if we proceed the backup would be removed because OpBackupQuery
9926     # substitutes an empty list with the full cluster node list.
9927     iname = self.instance.name
9928     if nodelist:
9929       feedback_fn("Removing old exports for instance %s" % iname)
9930       exportlist = self.rpc.call_export_list(nodelist)
9931       for node in exportlist:
9932         if exportlist[node].fail_msg:
9933           continue
9934         if iname in exportlist[node].payload:
9935           msg = self.rpc.call_export_remove(node, iname).fail_msg
9936           if msg:
9937             self.LogWarning("Could not remove older export for instance %s"
9938                             " on node %s: %s", iname, node, msg)
9939
9940   def Exec(self, feedback_fn):
9941     """Export an instance to an image in the cluster.
9942
9943     """
9944     assert self.op.mode in constants.EXPORT_MODES
9945
9946     instance = self.instance
9947     src_node = instance.primary_node
9948
9949     if self.op.shutdown:
9950       # shutdown the instance, but not the disks
9951       feedback_fn("Shutting down instance %s" % instance.name)
9952       result = self.rpc.call_instance_shutdown(src_node, instance,
9953                                                self.op.shutdown_timeout)
9954       # TODO: Maybe ignore failures if ignore_remove_failures is set
9955       result.Raise("Could not shutdown instance %s on"
9956                    " node %s" % (instance.name, src_node))
9957
9958     # set the disks ID correctly since call_instance_start needs the
9959     # correct drbd minor to create the symlinks
9960     for disk in instance.disks:
9961       self.cfg.SetDiskID(disk, src_node)
9962
9963     activate_disks = (not instance.admin_up)
9964
9965     if activate_disks:
9966       # Activate the instance disks if we'exporting a stopped instance
9967       feedback_fn("Activating disks for %s" % instance.name)
9968       _StartInstanceDisks(self, instance, None)
9969
9970     try:
9971       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9972                                                      instance)
9973
9974       helper.CreateSnapshots()
9975       try:
9976         if (self.op.shutdown and instance.admin_up and
9977             not self.op.remove_instance):
9978           assert not activate_disks
9979           feedback_fn("Starting instance %s" % instance.name)
9980           result = self.rpc.call_instance_start(src_node, instance, None, None)
9981           msg = result.fail_msg
9982           if msg:
9983             feedback_fn("Failed to start instance: %s" % msg)
9984             _ShutdownInstanceDisks(self, instance)
9985             raise errors.OpExecError("Could not start instance: %s" % msg)
9986
9987         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9988           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9989         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9990           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9991           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9992
9993           (key_name, _, _) = self.x509_key_name
9994
9995           dest_ca_pem = \
9996             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9997                                             self.dest_x509_ca)
9998
9999           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10000                                                      key_name, dest_ca_pem,
10001                                                      timeouts)
10002       finally:
10003         helper.Cleanup()
10004
10005       # Check for backwards compatibility
10006       assert len(dresults) == len(instance.disks)
10007       assert compat.all(isinstance(i, bool) for i in dresults), \
10008              "Not all results are boolean: %r" % dresults
10009
10010     finally:
10011       if activate_disks:
10012         feedback_fn("Deactivating disks for %s" % instance.name)
10013         _ShutdownInstanceDisks(self, instance)
10014
10015     if not (compat.all(dresults) and fin_resu):
10016       failures = []
10017       if not fin_resu:
10018         failures.append("export finalization")
10019       if not compat.all(dresults):
10020         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10021                                if not dsk)
10022         failures.append("disk export: disk(s) %s" % fdsk)
10023
10024       raise errors.OpExecError("Export failed, errors in %s" %
10025                                utils.CommaJoin(failures))
10026
10027     # At this point, the export was successful, we can cleanup/finish
10028
10029     # Remove instance if requested
10030     if self.op.remove_instance:
10031       feedback_fn("Removing instance %s" % instance.name)
10032       _RemoveInstance(self, feedback_fn, instance,
10033                       self.op.ignore_remove_failures)
10034
10035     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10036       self._CleanupExports(feedback_fn)
10037
10038     return fin_resu, dresults
10039
10040
10041 class LUBackupRemove(NoHooksLU):
10042   """Remove exports related to the named instance.
10043
10044   """
10045   REQ_BGL = False
10046
10047   def ExpandNames(self):
10048     self.needed_locks = {}
10049     # We need all nodes to be locked in order for RemoveExport to work, but we
10050     # don't need to lock the instance itself, as nothing will happen to it (and
10051     # we can remove exports also for a removed instance)
10052     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10053
10054   def Exec(self, feedback_fn):
10055     """Remove any export.
10056
10057     """
10058     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10059     # If the instance was not found we'll try with the name that was passed in.
10060     # This will only work if it was an FQDN, though.
10061     fqdn_warn = False
10062     if not instance_name:
10063       fqdn_warn = True
10064       instance_name = self.op.instance_name
10065
10066     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10067     exportlist = self.rpc.call_export_list(locked_nodes)
10068     found = False
10069     for node in exportlist:
10070       msg = exportlist[node].fail_msg
10071       if msg:
10072         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10073         continue
10074       if instance_name in exportlist[node].payload:
10075         found = True
10076         result = self.rpc.call_export_remove(node, instance_name)
10077         msg = result.fail_msg
10078         if msg:
10079           logging.error("Could not remove export for instance %s"
10080                         " on node %s: %s", instance_name, node, msg)
10081
10082     if fqdn_warn and not found:
10083       feedback_fn("Export not found. If trying to remove an export belonging"
10084                   " to a deleted instance please use its Fully Qualified"
10085                   " Domain Name.")
10086
10087
10088 class LUGroupAdd(LogicalUnit):
10089   """Logical unit for creating node groups.
10090
10091   """
10092   HPATH = "group-add"
10093   HTYPE = constants.HTYPE_GROUP
10094   REQ_BGL = False
10095
10096   def ExpandNames(self):
10097     # We need the new group's UUID here so that we can create and acquire the
10098     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10099     # that it should not check whether the UUID exists in the configuration.
10100     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10101     self.needed_locks = {}
10102     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10103
10104   def CheckPrereq(self):
10105     """Check prerequisites.
10106
10107     This checks that the given group name is not an existing node group
10108     already.
10109
10110     """
10111     try:
10112       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10113     except errors.OpPrereqError:
10114       pass
10115     else:
10116       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10117                                  " node group (UUID: %s)" %
10118                                  (self.op.group_name, existing_uuid),
10119                                  errors.ECODE_EXISTS)
10120
10121     if self.op.ndparams:
10122       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10123
10124   def BuildHooksEnv(self):
10125     """Build hooks env.
10126
10127     """
10128     env = {
10129       "GROUP_NAME": self.op.group_name,
10130       }
10131     mn = self.cfg.GetMasterNode()
10132     return env, [mn], [mn]
10133
10134   def Exec(self, feedback_fn):
10135     """Add the node group to the cluster.
10136
10137     """
10138     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10139                                   uuid=self.group_uuid,
10140                                   alloc_policy=self.op.alloc_policy,
10141                                   ndparams=self.op.ndparams)
10142
10143     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10144     del self.remove_locks[locking.LEVEL_NODEGROUP]
10145
10146
10147 class LUGroupAssignNodes(NoHooksLU):
10148   """Logical unit for assigning nodes to groups.
10149
10150   """
10151   REQ_BGL = False
10152
10153   def ExpandNames(self):
10154     # These raise errors.OpPrereqError on their own:
10155     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10156     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10157
10158     # We want to lock all the affected nodes and groups. We have readily
10159     # available the list of nodes, and the *destination* group. To gather the
10160     # list of "source" groups, we need to fetch node information.
10161     self.node_data = self.cfg.GetAllNodesInfo()
10162     affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10163     affected_groups.add(self.group_uuid)
10164
10165     self.needed_locks = {
10166       locking.LEVEL_NODEGROUP: list(affected_groups),
10167       locking.LEVEL_NODE: self.op.nodes,
10168       }
10169
10170   def CheckPrereq(self):
10171     """Check prerequisites.
10172
10173     """
10174     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10175     instance_data = self.cfg.GetAllInstancesInfo()
10176
10177     if self.group is None:
10178       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10179                                (self.op.group_name, self.group_uuid))
10180
10181     (new_splits, previous_splits) = \
10182       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10183                                              for node in self.op.nodes],
10184                                             self.node_data, instance_data)
10185
10186     if new_splits:
10187       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10188
10189       if not self.op.force:
10190         raise errors.OpExecError("The following instances get split by this"
10191                                  " change and --force was not given: %s" %
10192                                  fmt_new_splits)
10193       else:
10194         self.LogWarning("This operation will split the following instances: %s",
10195                         fmt_new_splits)
10196
10197         if previous_splits:
10198           self.LogWarning("In addition, these already-split instances continue"
10199                           " to be spit across groups: %s",
10200                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10201
10202   def Exec(self, feedback_fn):
10203     """Assign nodes to a new group.
10204
10205     """
10206     for node in self.op.nodes:
10207       self.node_data[node].group = self.group_uuid
10208
10209     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10210
10211   @staticmethod
10212   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10213     """Check for split instances after a node assignment.
10214
10215     This method considers a series of node assignments as an atomic operation,
10216     and returns information about split instances after applying the set of
10217     changes.
10218
10219     In particular, it returns information about newly split instances, and
10220     instances that were already split, and remain so after the change.
10221
10222     Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10223     considered.
10224
10225     @type changes: list of (node_name, new_group_uuid) pairs.
10226     @param changes: list of node assignments to consider.
10227     @param node_data: a dict with data for all nodes
10228     @param instance_data: a dict with all instances to consider
10229     @rtype: a two-tuple
10230     @return: a list of instances that were previously okay and result split as a
10231       consequence of this change, and a list of instances that were previously
10232       split and this change does not fix.
10233
10234     """
10235     changed_nodes = dict((node, group) for node, group in changes
10236                          if node_data[node].group != group)
10237
10238     all_split_instances = set()
10239     previously_split_instances = set()
10240
10241     def InstanceNodes(instance):
10242       return [instance.primary_node] + list(instance.secondary_nodes)
10243
10244     for inst in instance_data.values():
10245       if inst.disk_template not in constants.DTS_NET_MIRROR:
10246         continue
10247
10248       instance_nodes = InstanceNodes(inst)
10249
10250       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10251         previously_split_instances.add(inst.name)
10252
10253       if len(set(changed_nodes.get(node, node_data[node].group)
10254                  for node in instance_nodes)) > 1:
10255         all_split_instances.add(inst.name)
10256
10257     return (list(all_split_instances - previously_split_instances),
10258             list(previously_split_instances & all_split_instances))
10259
10260
10261 class _GroupQuery(_QueryBase):
10262   FIELDS = query.GROUP_FIELDS
10263
10264   def ExpandNames(self, lu):
10265     lu.needed_locks = {}
10266
10267     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10268     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10269
10270     if not self.names:
10271       self.wanted = [name_to_uuid[name]
10272                      for name in utils.NiceSort(name_to_uuid.keys())]
10273     else:
10274       # Accept names to be either names or UUIDs.
10275       missing = []
10276       self.wanted = []
10277       all_uuid = frozenset(self._all_groups.keys())
10278
10279       for name in self.names:
10280         if name in all_uuid:
10281           self.wanted.append(name)
10282         elif name in name_to_uuid:
10283           self.wanted.append(name_to_uuid[name])
10284         else:
10285           missing.append(name)
10286
10287       if missing:
10288         raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10289                                    errors.ECODE_NOENT)
10290
10291   def DeclareLocks(self, lu, level):
10292     pass
10293
10294   def _GetQueryData(self, lu):
10295     """Computes the list of node groups and their attributes.
10296
10297     """
10298     do_nodes = query.GQ_NODE in self.requested_data
10299     do_instances = query.GQ_INST in self.requested_data
10300
10301     group_to_nodes = None
10302     group_to_instances = None
10303
10304     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10305     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10306     # latter GetAllInstancesInfo() is not enough, for we have to go through
10307     # instance->node. Hence, we will need to process nodes even if we only need
10308     # instance information.
10309     if do_nodes or do_instances:
10310       all_nodes = lu.cfg.GetAllNodesInfo()
10311       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10312       node_to_group = {}
10313
10314       for node in all_nodes.values():
10315         if node.group in group_to_nodes:
10316           group_to_nodes[node.group].append(node.name)
10317           node_to_group[node.name] = node.group
10318
10319       if do_instances:
10320         all_instances = lu.cfg.GetAllInstancesInfo()
10321         group_to_instances = dict((uuid, []) for uuid in self.wanted)
10322
10323         for instance in all_instances.values():
10324           node = instance.primary_node
10325           if node in node_to_group:
10326             group_to_instances[node_to_group[node]].append(instance.name)
10327
10328         if not do_nodes:
10329           # Do not pass on node information if it was not requested.
10330           group_to_nodes = None
10331
10332     return query.GroupQueryData([self._all_groups[uuid]
10333                                  for uuid in self.wanted],
10334                                 group_to_nodes, group_to_instances)
10335
10336
10337 class LUGroupQuery(NoHooksLU):
10338   """Logical unit for querying node groups.
10339
10340   """
10341   REQ_BGL = False
10342
10343   def CheckArguments(self):
10344     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10345                           self.op.output_fields, False)
10346
10347   def ExpandNames(self):
10348     self.gq.ExpandNames(self)
10349
10350   def Exec(self, feedback_fn):
10351     return self.gq.OldStyleQuery(self)
10352
10353
10354 class LUGroupSetParams(LogicalUnit):
10355   """Modifies the parameters of a node group.
10356
10357   """
10358   HPATH = "group-modify"
10359   HTYPE = constants.HTYPE_GROUP
10360   REQ_BGL = False
10361
10362   def CheckArguments(self):
10363     all_changes = [
10364       self.op.ndparams,
10365       self.op.alloc_policy,
10366       ]
10367
10368     if all_changes.count(None) == len(all_changes):
10369       raise errors.OpPrereqError("Please pass at least one modification",
10370                                  errors.ECODE_INVAL)
10371
10372   def ExpandNames(self):
10373     # This raises errors.OpPrereqError on its own:
10374     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10375
10376     self.needed_locks = {
10377       locking.LEVEL_NODEGROUP: [self.group_uuid],
10378       }
10379
10380   def CheckPrereq(self):
10381     """Check prerequisites.
10382
10383     """
10384     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10385
10386     if self.group is None:
10387       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10388                                (self.op.group_name, self.group_uuid))
10389
10390     if self.op.ndparams:
10391       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10392       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10393       self.new_ndparams = new_ndparams
10394
10395   def BuildHooksEnv(self):
10396     """Build hooks env.
10397
10398     """
10399     env = {
10400       "GROUP_NAME": self.op.group_name,
10401       "NEW_ALLOC_POLICY": self.op.alloc_policy,
10402       }
10403     mn = self.cfg.GetMasterNode()
10404     return env, [mn], [mn]
10405
10406   def Exec(self, feedback_fn):
10407     """Modifies the node group.
10408
10409     """
10410     result = []
10411
10412     if self.op.ndparams:
10413       self.group.ndparams = self.new_ndparams
10414       result.append(("ndparams", str(self.group.ndparams)))
10415
10416     if self.op.alloc_policy:
10417       self.group.alloc_policy = self.op.alloc_policy
10418
10419     self.cfg.Update(self.group, feedback_fn)
10420     return result
10421
10422
10423
10424 class LUGroupRemove(LogicalUnit):
10425   HPATH = "group-remove"
10426   HTYPE = constants.HTYPE_GROUP
10427   REQ_BGL = False
10428
10429   def ExpandNames(self):
10430     # This will raises errors.OpPrereqError on its own:
10431     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10432     self.needed_locks = {
10433       locking.LEVEL_NODEGROUP: [self.group_uuid],
10434       }
10435
10436   def CheckPrereq(self):
10437     """Check prerequisites.
10438
10439     This checks that the given group name exists as a node group, that is
10440     empty (i.e., contains no nodes), and that is not the last group of the
10441     cluster.
10442
10443     """
10444     # Verify that the group is empty.
10445     group_nodes = [node.name
10446                    for node in self.cfg.GetAllNodesInfo().values()
10447                    if node.group == self.group_uuid]
10448
10449     if group_nodes:
10450       raise errors.OpPrereqError("Group '%s' not empty, has the following"
10451                                  " nodes: %s" %
10452                                  (self.op.group_name,
10453                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
10454                                  errors.ECODE_STATE)
10455
10456     # Verify the cluster would not be left group-less.
10457     if len(self.cfg.GetNodeGroupList()) == 1:
10458       raise errors.OpPrereqError("Group '%s' is the only group,"
10459                                  " cannot be removed" %
10460                                  self.op.group_name,
10461                                  errors.ECODE_STATE)
10462
10463   def BuildHooksEnv(self):
10464     """Build hooks env.
10465
10466     """
10467     env = {
10468       "GROUP_NAME": self.op.group_name,
10469       }
10470     mn = self.cfg.GetMasterNode()
10471     return env, [mn], [mn]
10472
10473   def Exec(self, feedback_fn):
10474     """Remove the node group.
10475
10476     """
10477     try:
10478       self.cfg.RemoveNodeGroup(self.group_uuid)
10479     except errors.ConfigurationError:
10480       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10481                                (self.op.group_name, self.group_uuid))
10482
10483     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10484
10485
10486 class LUGroupRename(LogicalUnit):
10487   HPATH = "group-rename"
10488   HTYPE = constants.HTYPE_GROUP
10489   REQ_BGL = False
10490
10491   def ExpandNames(self):
10492     # This raises errors.OpPrereqError on its own:
10493     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10494
10495     self.needed_locks = {
10496       locking.LEVEL_NODEGROUP: [self.group_uuid],
10497       }
10498
10499   def CheckPrereq(self):
10500     """Check prerequisites.
10501
10502     Ensures requested new name is not yet used.
10503
10504     """
10505     try:
10506       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10507     except errors.OpPrereqError:
10508       pass
10509     else:
10510       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10511                                  " node group (UUID: %s)" %
10512                                  (self.op.new_name, new_name_uuid),
10513                                  errors.ECODE_EXISTS)
10514
10515   def BuildHooksEnv(self):
10516     """Build hooks env.
10517
10518     """
10519     env = {
10520       "OLD_NAME": self.op.group_name,
10521       "NEW_NAME": self.op.new_name,
10522       }
10523
10524     mn = self.cfg.GetMasterNode()
10525     all_nodes = self.cfg.GetAllNodesInfo()
10526     run_nodes = [mn]
10527     all_nodes.pop(mn, None)
10528
10529     for node in all_nodes.values():
10530       if node.group == self.group_uuid:
10531         run_nodes.append(node.name)
10532
10533     return env, run_nodes, run_nodes
10534
10535   def Exec(self, feedback_fn):
10536     """Rename the node group.
10537
10538     """
10539     group = self.cfg.GetNodeGroup(self.group_uuid)
10540
10541     if group is None:
10542       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10543                                (self.op.group_name, self.group_uuid))
10544
10545     group.name = self.op.new_name
10546     self.cfg.Update(group, feedback_fn)
10547
10548     return self.op.new_name
10549
10550
10551 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10552   """Generic tags LU.
10553
10554   This is an abstract class which is the parent of all the other tags LUs.
10555
10556   """
10557
10558   def ExpandNames(self):
10559     self.needed_locks = {}
10560     if self.op.kind == constants.TAG_NODE:
10561       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10562       self.needed_locks[locking.LEVEL_NODE] = self.op.name
10563     elif self.op.kind == constants.TAG_INSTANCE:
10564       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10565       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10566
10567     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10568     # not possible to acquire the BGL based on opcode parameters)
10569
10570   def CheckPrereq(self):
10571     """Check prerequisites.
10572
10573     """
10574     if self.op.kind == constants.TAG_CLUSTER:
10575       self.target = self.cfg.GetClusterInfo()
10576     elif self.op.kind == constants.TAG_NODE:
10577       self.target = self.cfg.GetNodeInfo(self.op.name)
10578     elif self.op.kind == constants.TAG_INSTANCE:
10579       self.target = self.cfg.GetInstanceInfo(self.op.name)
10580     else:
10581       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10582                                  str(self.op.kind), errors.ECODE_INVAL)
10583
10584
10585 class LUTagsGet(TagsLU):
10586   """Returns the tags of a given object.
10587
10588   """
10589   REQ_BGL = False
10590
10591   def ExpandNames(self):
10592     TagsLU.ExpandNames(self)
10593
10594     # Share locks as this is only a read operation
10595     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10596
10597   def Exec(self, feedback_fn):
10598     """Returns the tag list.
10599
10600     """
10601     return list(self.target.GetTags())
10602
10603
10604 class LUTagsSearch(NoHooksLU):
10605   """Searches the tags for a given pattern.
10606
10607   """
10608   REQ_BGL = False
10609
10610   def ExpandNames(self):
10611     self.needed_locks = {}
10612
10613   def CheckPrereq(self):
10614     """Check prerequisites.
10615
10616     This checks the pattern passed for validity by compiling it.
10617
10618     """
10619     try:
10620       self.re = re.compile(self.op.pattern)
10621     except re.error, err:
10622       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10623                                  (self.op.pattern, err), errors.ECODE_INVAL)
10624
10625   def Exec(self, feedback_fn):
10626     """Returns the tag list.
10627
10628     """
10629     cfg = self.cfg
10630     tgts = [("/cluster", cfg.GetClusterInfo())]
10631     ilist = cfg.GetAllInstancesInfo().values()
10632     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10633     nlist = cfg.GetAllNodesInfo().values()
10634     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10635     results = []
10636     for path, target in tgts:
10637       for tag in target.GetTags():
10638         if self.re.search(tag):
10639           results.append((path, tag))
10640     return results
10641
10642
10643 class LUTagsSet(TagsLU):
10644   """Sets a tag on a given object.
10645
10646   """
10647   REQ_BGL = False
10648
10649   def CheckPrereq(self):
10650     """Check prerequisites.
10651
10652     This checks the type and length of the tag name and value.
10653
10654     """
10655     TagsLU.CheckPrereq(self)
10656     for tag in self.op.tags:
10657       objects.TaggableObject.ValidateTag(tag)
10658
10659   def Exec(self, feedback_fn):
10660     """Sets the tag.
10661
10662     """
10663     try:
10664       for tag in self.op.tags:
10665         self.target.AddTag(tag)
10666     except errors.TagError, err:
10667       raise errors.OpExecError("Error while setting tag: %s" % str(err))
10668     self.cfg.Update(self.target, feedback_fn)
10669
10670
10671 class LUTagsDel(TagsLU):
10672   """Delete a list of tags from a given object.
10673
10674   """
10675   REQ_BGL = False
10676
10677   def CheckPrereq(self):
10678     """Check prerequisites.
10679
10680     This checks that we have the given tag.
10681
10682     """
10683     TagsLU.CheckPrereq(self)
10684     for tag in self.op.tags:
10685       objects.TaggableObject.ValidateTag(tag)
10686     del_tags = frozenset(self.op.tags)
10687     cur_tags = self.target.GetTags()
10688
10689     diff_tags = del_tags - cur_tags
10690     if diff_tags:
10691       diff_names = ("'%s'" % i for i in sorted(diff_tags))
10692       raise errors.OpPrereqError("Tag(s) %s not found" %
10693                                  (utils.CommaJoin(diff_names), ),
10694                                  errors.ECODE_NOENT)
10695
10696   def Exec(self, feedback_fn):
10697     """Remove the tag from the object.
10698
10699     """
10700     for tag in self.op.tags:
10701       self.target.RemoveTag(tag)
10702     self.cfg.Update(self.target, feedback_fn)
10703
10704
10705 class LUTestDelay(NoHooksLU):
10706   """Sleep for a specified amount of time.
10707
10708   This LU sleeps on the master and/or nodes for a specified amount of
10709   time.
10710
10711   """
10712   REQ_BGL = False
10713
10714   def ExpandNames(self):
10715     """Expand names and set required locks.
10716
10717     This expands the node list, if any.
10718
10719     """
10720     self.needed_locks = {}
10721     if self.op.on_nodes:
10722       # _GetWantedNodes can be used here, but is not always appropriate to use
10723       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10724       # more information.
10725       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10726       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10727
10728   def _TestDelay(self):
10729     """Do the actual sleep.
10730
10731     """
10732     if self.op.on_master:
10733       if not utils.TestDelay(self.op.duration):
10734         raise errors.OpExecError("Error during master delay test")
10735     if self.op.on_nodes:
10736       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10737       for node, node_result in result.items():
10738         node_result.Raise("Failure during rpc call to node %s" % node)
10739
10740   def Exec(self, feedback_fn):
10741     """Execute the test delay opcode, with the wanted repetitions.
10742
10743     """
10744     if self.op.repeat == 0:
10745       self._TestDelay()
10746     else:
10747       top_value = self.op.repeat - 1
10748       for i in range(self.op.repeat):
10749         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10750         self._TestDelay()
10751
10752
10753 class LUTestJqueue(NoHooksLU):
10754   """Utility LU to test some aspects of the job queue.
10755
10756   """
10757   REQ_BGL = False
10758
10759   # Must be lower than default timeout for WaitForJobChange to see whether it
10760   # notices changed jobs
10761   _CLIENT_CONNECT_TIMEOUT = 20.0
10762   _CLIENT_CONFIRM_TIMEOUT = 60.0
10763
10764   @classmethod
10765   def _NotifyUsingSocket(cls, cb, errcls):
10766     """Opens a Unix socket and waits for another program to connect.
10767
10768     @type cb: callable
10769     @param cb: Callback to send socket name to client
10770     @type errcls: class
10771     @param errcls: Exception class to use for errors
10772
10773     """
10774     # Using a temporary directory as there's no easy way to create temporary
10775     # sockets without writing a custom loop around tempfile.mktemp and
10776     # socket.bind
10777     tmpdir = tempfile.mkdtemp()
10778     try:
10779       tmpsock = utils.PathJoin(tmpdir, "sock")
10780
10781       logging.debug("Creating temporary socket at %s", tmpsock)
10782       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10783       try:
10784         sock.bind(tmpsock)
10785         sock.listen(1)
10786
10787         # Send details to client
10788         cb(tmpsock)
10789
10790         # Wait for client to connect before continuing
10791         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10792         try:
10793           (conn, _) = sock.accept()
10794         except socket.error, err:
10795           raise errcls("Client didn't connect in time (%s)" % err)
10796       finally:
10797         sock.close()
10798     finally:
10799       # Remove as soon as client is connected
10800       shutil.rmtree(tmpdir)
10801
10802     # Wait for client to close
10803     try:
10804       try:
10805         # pylint: disable-msg=E1101
10806         # Instance of '_socketobject' has no ... member
10807         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10808         conn.recv(1)
10809       except socket.error, err:
10810         raise errcls("Client failed to confirm notification (%s)" % err)
10811     finally:
10812       conn.close()
10813
10814   def _SendNotification(self, test, arg, sockname):
10815     """Sends a notification to the client.
10816
10817     @type test: string
10818     @param test: Test name
10819     @param arg: Test argument (depends on test)
10820     @type sockname: string
10821     @param sockname: Socket path
10822
10823     """
10824     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10825
10826   def _Notify(self, prereq, test, arg):
10827     """Notifies the client of a test.
10828
10829     @type prereq: bool
10830     @param prereq: Whether this is a prereq-phase test
10831     @type test: string
10832     @param test: Test name
10833     @param arg: Test argument (depends on test)
10834
10835     """
10836     if prereq:
10837       errcls = errors.OpPrereqError
10838     else:
10839       errcls = errors.OpExecError
10840
10841     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10842                                                   test, arg),
10843                                    errcls)
10844
10845   def CheckArguments(self):
10846     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10847     self.expandnames_calls = 0
10848
10849   def ExpandNames(self):
10850     checkargs_calls = getattr(self, "checkargs_calls", 0)
10851     if checkargs_calls < 1:
10852       raise errors.ProgrammerError("CheckArguments was not called")
10853
10854     self.expandnames_calls += 1
10855
10856     if self.op.notify_waitlock:
10857       self._Notify(True, constants.JQT_EXPANDNAMES, None)
10858
10859     self.LogInfo("Expanding names")
10860
10861     # Get lock on master node (just to get a lock, not for a particular reason)
10862     self.needed_locks = {
10863       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10864       }
10865
10866   def Exec(self, feedback_fn):
10867     if self.expandnames_calls < 1:
10868       raise errors.ProgrammerError("ExpandNames was not called")
10869
10870     if self.op.notify_exec:
10871       self._Notify(False, constants.JQT_EXEC, None)
10872
10873     self.LogInfo("Executing")
10874
10875     if self.op.log_messages:
10876       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10877       for idx, msg in enumerate(self.op.log_messages):
10878         self.LogInfo("Sending log message %s", idx + 1)
10879         feedback_fn(constants.JQT_MSGPREFIX + msg)
10880         # Report how many test messages have been sent
10881         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10882
10883     if self.op.fail:
10884       raise errors.OpExecError("Opcode failure was requested")
10885
10886     return True
10887
10888
10889 class IAllocator(object):
10890   """IAllocator framework.
10891
10892   An IAllocator instance has three sets of attributes:
10893     - cfg that is needed to query the cluster
10894     - input data (all members of the _KEYS class attribute are required)
10895     - four buffer attributes (in|out_data|text), that represent the
10896       input (to the external script) in text and data structure format,
10897       and the output from it, again in two formats
10898     - the result variables from the script (success, info, nodes) for
10899       easy usage
10900
10901   """
10902   # pylint: disable-msg=R0902
10903   # lots of instance attributes
10904   _ALLO_KEYS = [
10905     "name", "mem_size", "disks", "disk_template",
10906     "os", "tags", "nics", "vcpus", "hypervisor",
10907     ]
10908   _RELO_KEYS = [
10909     "name", "relocate_from",
10910     ]
10911   _EVAC_KEYS = [
10912     "evac_nodes",
10913     ]
10914
10915   def __init__(self, cfg, rpc, mode, **kwargs):
10916     self.cfg = cfg
10917     self.rpc = rpc
10918     # init buffer variables
10919     self.in_text = self.out_text = self.in_data = self.out_data = None
10920     # init all input fields so that pylint is happy
10921     self.mode = mode
10922     self.mem_size = self.disks = self.disk_template = None
10923     self.os = self.tags = self.nics = self.vcpus = None
10924     self.hypervisor = None
10925     self.relocate_from = None
10926     self.name = None
10927     self.evac_nodes = None
10928     # computed fields
10929     self.required_nodes = None
10930     # init result fields
10931     self.success = self.info = self.result = None
10932     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10933       keyset = self._ALLO_KEYS
10934       fn = self._AddNewInstance
10935     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10936       keyset = self._RELO_KEYS
10937       fn = self._AddRelocateInstance
10938     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10939       keyset = self._EVAC_KEYS
10940       fn = self._AddEvacuateNodes
10941     else:
10942       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10943                                    " IAllocator" % self.mode)
10944     for key in kwargs:
10945       if key not in keyset:
10946         raise errors.ProgrammerError("Invalid input parameter '%s' to"
10947                                      " IAllocator" % key)
10948       setattr(self, key, kwargs[key])
10949
10950     for key in keyset:
10951       if key not in kwargs:
10952         raise errors.ProgrammerError("Missing input parameter '%s' to"
10953                                      " IAllocator" % key)
10954     self._BuildInputData(fn)
10955
10956   def _ComputeClusterData(self):
10957     """Compute the generic allocator input data.
10958
10959     This is the data that is independent of the actual operation.
10960
10961     """
10962     cfg = self.cfg
10963     cluster_info = cfg.GetClusterInfo()
10964     # cluster data
10965     data = {
10966       "version": constants.IALLOCATOR_VERSION,
10967       "cluster_name": cfg.GetClusterName(),
10968       "cluster_tags": list(cluster_info.GetTags()),
10969       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10970       # we don't have job IDs
10971       }
10972     ninfo = cfg.GetAllNodesInfo()
10973     iinfo = cfg.GetAllInstancesInfo().values()
10974     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10975
10976     # node data
10977     node_list = [n.name for n in ninfo.values() if n.vm_capable]
10978
10979     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10980       hypervisor_name = self.hypervisor
10981     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10982       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10983     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10984       hypervisor_name = cluster_info.enabled_hypervisors[0]
10985
10986     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10987                                         hypervisor_name)
10988     node_iinfo = \
10989       self.rpc.call_all_instances_info(node_list,
10990                                        cluster_info.enabled_hypervisors)
10991
10992     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10993
10994     config_ndata = self._ComputeBasicNodeData(ninfo)
10995     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
10996                                                  i_list, config_ndata)
10997     assert len(data["nodes"]) == len(ninfo), \
10998         "Incomplete node data computed"
10999
11000     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11001
11002     self.in_data = data
11003
11004   @staticmethod
11005   def _ComputeNodeGroupData(cfg):
11006     """Compute node groups data.
11007
11008     """
11009     ng = {}
11010     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11011       ng[guuid] = {
11012         "name": gdata.name,
11013         "alloc_policy": gdata.alloc_policy,
11014         }
11015     return ng
11016
11017   @staticmethod
11018   def _ComputeBasicNodeData(node_cfg):
11019     """Compute global node data.
11020
11021     @rtype: dict
11022     @returns: a dict of name: (node dict, node config)
11023
11024     """
11025     node_results = {}
11026     for ninfo in node_cfg.values():
11027       # fill in static (config-based) values
11028       pnr = {
11029         "tags": list(ninfo.GetTags()),
11030         "primary_ip": ninfo.primary_ip,
11031         "secondary_ip": ninfo.secondary_ip,
11032         "offline": ninfo.offline,
11033         "drained": ninfo.drained,
11034         "master_candidate": ninfo.master_candidate,
11035         "group": ninfo.group,
11036         "master_capable": ninfo.master_capable,
11037         "vm_capable": ninfo.vm_capable,
11038         }
11039
11040       node_results[ninfo.name] = pnr
11041
11042     return node_results
11043
11044   @staticmethod
11045   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11046                               node_results):
11047     """Compute global node data.
11048
11049     @param node_results: the basic node structures as filled from the config
11050
11051     """
11052     # make a copy of the current dict
11053     node_results = dict(node_results)
11054     for nname, nresult in node_data.items():
11055       assert nname in node_results, "Missing basic data for node %s" % nname
11056       ninfo = node_cfg[nname]
11057
11058       if not (ninfo.offline or ninfo.drained):
11059         nresult.Raise("Can't get data for node %s" % nname)
11060         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11061                                 nname)
11062         remote_info = nresult.payload
11063
11064         for attr in ['memory_total', 'memory_free', 'memory_dom0',
11065                      'vg_size', 'vg_free', 'cpu_total']:
11066           if attr not in remote_info:
11067             raise errors.OpExecError("Node '%s' didn't return attribute"
11068                                      " '%s'" % (nname, attr))
11069           if not isinstance(remote_info[attr], int):
11070             raise errors.OpExecError("Node '%s' returned invalid value"
11071                                      " for '%s': %s" %
11072                                      (nname, attr, remote_info[attr]))
11073         # compute memory used by primary instances
11074         i_p_mem = i_p_up_mem = 0
11075         for iinfo, beinfo in i_list:
11076           if iinfo.primary_node == nname:
11077             i_p_mem += beinfo[constants.BE_MEMORY]
11078             if iinfo.name not in node_iinfo[nname].payload:
11079               i_used_mem = 0
11080             else:
11081               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11082             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11083             remote_info['memory_free'] -= max(0, i_mem_diff)
11084
11085             if iinfo.admin_up:
11086               i_p_up_mem += beinfo[constants.BE_MEMORY]
11087
11088         # compute memory used by instances
11089         pnr_dyn = {
11090           "total_memory": remote_info['memory_total'],
11091           "reserved_memory": remote_info['memory_dom0'],
11092           "free_memory": remote_info['memory_free'],
11093           "total_disk": remote_info['vg_size'],
11094           "free_disk": remote_info['vg_free'],
11095           "total_cpus": remote_info['cpu_total'],
11096           "i_pri_memory": i_p_mem,
11097           "i_pri_up_memory": i_p_up_mem,
11098           }
11099         pnr_dyn.update(node_results[nname])
11100         node_results[nname] = pnr_dyn
11101
11102     return node_results
11103
11104   @staticmethod
11105   def _ComputeInstanceData(cluster_info, i_list):
11106     """Compute global instance data.
11107
11108     """
11109     instance_data = {}
11110     for iinfo, beinfo in i_list:
11111       nic_data = []
11112       for nic in iinfo.nics:
11113         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11114         nic_dict = {"mac": nic.mac,
11115                     "ip": nic.ip,
11116                     "mode": filled_params[constants.NIC_MODE],
11117                     "link": filled_params[constants.NIC_LINK],
11118                    }
11119         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11120           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11121         nic_data.append(nic_dict)
11122       pir = {
11123         "tags": list(iinfo.GetTags()),
11124         "admin_up": iinfo.admin_up,
11125         "vcpus": beinfo[constants.BE_VCPUS],
11126         "memory": beinfo[constants.BE_MEMORY],
11127         "os": iinfo.os,
11128         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11129         "nics": nic_data,
11130         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11131         "disk_template": iinfo.disk_template,
11132         "hypervisor": iinfo.hypervisor,
11133         }
11134       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11135                                                  pir["disks"])
11136       instance_data[iinfo.name] = pir
11137
11138     return instance_data
11139
11140   def _AddNewInstance(self):
11141     """Add new instance data to allocator structure.
11142
11143     This in combination with _AllocatorGetClusterData will create the
11144     correct structure needed as input for the allocator.
11145
11146     The checks for the completeness of the opcode must have already been
11147     done.
11148
11149     """
11150     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11151
11152     if self.disk_template in constants.DTS_NET_MIRROR:
11153       self.required_nodes = 2
11154     else:
11155       self.required_nodes = 1
11156     request = {
11157       "name": self.name,
11158       "disk_template": self.disk_template,
11159       "tags": self.tags,
11160       "os": self.os,
11161       "vcpus": self.vcpus,
11162       "memory": self.mem_size,
11163       "disks": self.disks,
11164       "disk_space_total": disk_space,
11165       "nics": self.nics,
11166       "required_nodes": self.required_nodes,
11167       }
11168     return request
11169
11170   def _AddRelocateInstance(self):
11171     """Add relocate instance data to allocator structure.
11172
11173     This in combination with _IAllocatorGetClusterData will create the
11174     correct structure needed as input for the allocator.
11175
11176     The checks for the completeness of the opcode must have already been
11177     done.
11178
11179     """
11180     instance = self.cfg.GetInstanceInfo(self.name)
11181     if instance is None:
11182       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11183                                    " IAllocator" % self.name)
11184
11185     if instance.disk_template not in constants.DTS_NET_MIRROR:
11186       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11187                                  errors.ECODE_INVAL)
11188
11189     if len(instance.secondary_nodes) != 1:
11190       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11191                                  errors.ECODE_STATE)
11192
11193     self.required_nodes = 1
11194     disk_sizes = [{'size': disk.size} for disk in instance.disks]
11195     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11196
11197     request = {
11198       "name": self.name,
11199       "disk_space_total": disk_space,
11200       "required_nodes": self.required_nodes,
11201       "relocate_from": self.relocate_from,
11202       }
11203     return request
11204
11205   def _AddEvacuateNodes(self):
11206     """Add evacuate nodes data to allocator structure.
11207
11208     """
11209     request = {
11210       "evac_nodes": self.evac_nodes
11211       }
11212     return request
11213
11214   def _BuildInputData(self, fn):
11215     """Build input data structures.
11216
11217     """
11218     self._ComputeClusterData()
11219
11220     request = fn()
11221     request["type"] = self.mode
11222     self.in_data["request"] = request
11223
11224     self.in_text = serializer.Dump(self.in_data)
11225
11226   def Run(self, name, validate=True, call_fn=None):
11227     """Run an instance allocator and return the results.
11228
11229     """
11230     if call_fn is None:
11231       call_fn = self.rpc.call_iallocator_runner
11232
11233     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11234     result.Raise("Failure while running the iallocator script")
11235
11236     self.out_text = result.payload
11237     if validate:
11238       self._ValidateResult()
11239
11240   def _ValidateResult(self):
11241     """Process the allocator results.
11242
11243     This will process and if successful save the result in
11244     self.out_data and the other parameters.
11245
11246     """
11247     try:
11248       rdict = serializer.Load(self.out_text)
11249     except Exception, err:
11250       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11251
11252     if not isinstance(rdict, dict):
11253       raise errors.OpExecError("Can't parse iallocator results: not a dict")
11254
11255     # TODO: remove backwards compatiblity in later versions
11256     if "nodes" in rdict and "result" not in rdict:
11257       rdict["result"] = rdict["nodes"]
11258       del rdict["nodes"]
11259
11260     for key in "success", "info", "result":
11261       if key not in rdict:
11262         raise errors.OpExecError("Can't parse iallocator results:"
11263                                  " missing key '%s'" % key)
11264       setattr(self, key, rdict[key])
11265
11266     if not isinstance(rdict["result"], list):
11267       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11268                                " is not a list")
11269     self.out_data = rdict
11270
11271
11272 class LUTestAllocator(NoHooksLU):
11273   """Run allocator tests.
11274
11275   This LU runs the allocator tests
11276
11277   """
11278   def CheckPrereq(self):
11279     """Check prerequisites.
11280
11281     This checks the opcode parameters depending on the director and mode test.
11282
11283     """
11284     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11285       for attr in ["mem_size", "disks", "disk_template",
11286                    "os", "tags", "nics", "vcpus"]:
11287         if not hasattr(self.op, attr):
11288           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11289                                      attr, errors.ECODE_INVAL)
11290       iname = self.cfg.ExpandInstanceName(self.op.name)
11291       if iname is not None:
11292         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11293                                    iname, errors.ECODE_EXISTS)
11294       if not isinstance(self.op.nics, list):
11295         raise errors.OpPrereqError("Invalid parameter 'nics'",
11296                                    errors.ECODE_INVAL)
11297       if not isinstance(self.op.disks, list):
11298         raise errors.OpPrereqError("Invalid parameter 'disks'",
11299                                    errors.ECODE_INVAL)
11300       for row in self.op.disks:
11301         if (not isinstance(row, dict) or
11302             "size" not in row or
11303             not isinstance(row["size"], int) or
11304             "mode" not in row or
11305             row["mode"] not in ['r', 'w']):
11306           raise errors.OpPrereqError("Invalid contents of the 'disks'"
11307                                      " parameter", errors.ECODE_INVAL)
11308       if self.op.hypervisor is None:
11309         self.op.hypervisor = self.cfg.GetHypervisorType()
11310     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11311       fname = _ExpandInstanceName(self.cfg, self.op.name)
11312       self.op.name = fname
11313       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11314     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11315       if not hasattr(self.op, "evac_nodes"):
11316         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11317                                    " opcode input", errors.ECODE_INVAL)
11318     else:
11319       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11320                                  self.op.mode, errors.ECODE_INVAL)
11321
11322     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11323       if self.op.allocator is None:
11324         raise errors.OpPrereqError("Missing allocator name",
11325                                    errors.ECODE_INVAL)
11326     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11327       raise errors.OpPrereqError("Wrong allocator test '%s'" %
11328                                  self.op.direction, errors.ECODE_INVAL)
11329
11330   def Exec(self, feedback_fn):
11331     """Run the allocator test.
11332
11333     """
11334     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11335       ial = IAllocator(self.cfg, self.rpc,
11336                        mode=self.op.mode,
11337                        name=self.op.name,
11338                        mem_size=self.op.mem_size,
11339                        disks=self.op.disks,
11340                        disk_template=self.op.disk_template,
11341                        os=self.op.os,
11342                        tags=self.op.tags,
11343                        nics=self.op.nics,
11344                        vcpus=self.op.vcpus,
11345                        hypervisor=self.op.hypervisor,
11346                        )
11347     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11348       ial = IAllocator(self.cfg, self.rpc,
11349                        mode=self.op.mode,
11350                        name=self.op.name,
11351                        relocate_from=list(self.relocate_from),
11352                        )
11353     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11354       ial = IAllocator(self.cfg, self.rpc,
11355                        mode=self.op.mode,
11356                        evac_nodes=self.op.evac_nodes)
11357     else:
11358       raise errors.ProgrammerError("Uncatched mode %s in"
11359                                    " LUTestAllocator.Exec", self.op.mode)
11360
11361     if self.op.direction == constants.IALLOCATOR_DIR_IN:
11362       result = ial.in_text
11363     else:
11364       ial.Run(self.op.allocator, validate=False)
11365       result = ial.out_text
11366     return result
11367
11368
11369 #: Query type implementations
11370 _QUERY_IMPL = {
11371   constants.QR_INSTANCE: _InstanceQuery,
11372   constants.QR_NODE: _NodeQuery,
11373   constants.QR_GROUP: _GroupQuery,
11374   }
11375
11376
11377 def _GetQueryImplementation(name):
11378   """Returns the implemtnation for a query type.
11379
11380   @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11381
11382   """
11383   try:
11384     return _QUERY_IMPL[name]
11385   except KeyError:
11386     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11387                                errors.ECODE_INVAL)