code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 # End types
  78 class LogicalUnit(object):
  79   """Logical Unit base class.
  80
  81   Subclasses must follow these rules:
  82     - implement ExpandNames
  83     - implement CheckPrereq (except when tasklets are used)
  84     - implement Exec (except when tasklets are used)
  85     - implement BuildHooksEnv
  86     - redefine HPATH and HTYPE
  87     - optionally redefine their run requirements:
  88         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  89
  90   Note that all commands require root permissions.
  91
  92   @ivar dry_run_result: the value (if any) that will be returned to the caller
  93       in dry-run mode (signalled by opcode dry_run parameter)
  94
  95   """
  96   HPATH = None
  97   HTYPE = None
  98   REQ_BGL = True
  99
 100   def __init__(self, processor, op, context, rpc):
 101     """Constructor for LogicalUnit.
 102
 103     This needs to be overridden in derived classes in order to check op
 104     validity.
 105
 106     """
 107     self.proc = processor
 108     self.op = op
 109     self.cfg = context.cfg
 110     self.context = context
 111     self.rpc = rpc
 112     # Dicts used to declare locking needs to mcpu
 113     self.needed_locks = None
 114     self.acquired_locks = {}
 115     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 116     self.add_locks = {}
 117     self.remove_locks = {}
 118     # Used to force good behavior when calling helper functions
 119     self.recalculate_locks = {}
 120     self.__ssh = None
 121     # logging
 122     self.Log = processor.Log # pylint: disable-msg=C0103
 123     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 124     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 125     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 126     # support for dry-run
 127     self.dry_run_result = None
 128     # support for generic debug attribute
 129     if (not hasattr(self.op, "debug_level") or
 130         not isinstance(self.op.debug_level, int)):
 131       self.op.debug_level = 0
 132
 133     # Tasklets
 134     self.tasklets = None
 135
 136     # Validate opcode parameters and set defaults
 137     self.op.Validate(True)
 138
 139     self.CheckArguments()
 140
 141   def __GetSSH(self):
 142     """Returns the SshRunner object
 143
 144     """
 145     if not self.__ssh:
 146       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 147     return self.__ssh
 148
 149   ssh = property(fget=__GetSSH)
 150
 151   def CheckArguments(self):
 152     """Check syntactic validity for the opcode arguments.
 153
 154     This method is for doing a simple syntactic check and ensure
 155     validity of opcode parameters, without any cluster-related
 156     checks. While the same can be accomplished in ExpandNames and/or
 157     CheckPrereq, doing these separate is better because:
 158
 159       - ExpandNames is left as as purely a lock-related function
 160       - CheckPrereq is run after we have acquired locks (and possible
 161         waited for them)
 162
 163     The function is allowed to change the self.op attribute so that
 164     later methods can no longer worry about missing parameters.
 165
 166     """
 167     pass
 168
 169   def ExpandNames(self):
 170     """Expand names for this LU.
 171
 172     This method is called before starting to execute the opcode, and it should
 173     update all the parameters of the opcode to their canonical form (e.g. a
 174     short node name must be fully expanded after this method has successfully
 175     completed). This way locking, hooks, logging, etc. can work correctly.
 176
 177     LUs which implement this method must also populate the self.needed_locks
 178     member, as a dict with lock levels as keys, and a list of needed lock names
 179     as values. Rules:
 180
 181       - use an empty dict if you don't need any lock
 182       - if you don't need any lock at a particular level omit that level
 183       - don't put anything for the BGL level
 184       - if you want all locks at a level use locking.ALL_SET as a value
 185
 186     If you need to share locks (rather than acquire them exclusively) at one
 187     level you can modify self.share_locks, setting a true value (usually 1) for
 188     that level. By default locks are not shared.
 189
 190     This function can also define a list of tasklets, which then will be
 191     executed in order instead of the usual LU-level CheckPrereq and Exec
 192     functions, if those are not defined by the LU.
 193
 194     Examples::
 195
 196       # Acquire all nodes and one instance
 197       self.needed_locks = {
 198         locking.LEVEL_NODE: locking.ALL_SET,
 199         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 200       }
 201       # Acquire just two nodes
 202       self.needed_locks = {
 203         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 204       }
 205       # Acquire no locks
 206       self.needed_locks = {} # No, you can't leave it to the default value None
 207
 208     """
 209     # The implementation of this method is mandatory only if the new LU is
 210     # concurrent, so that old LUs don't need to be changed all at the same
 211     # time.
 212     if self.REQ_BGL:
 213       self.needed_locks = {} # Exclusive LUs don't need locks.
 214     else:
 215       raise NotImplementedError
 216
 217   def DeclareLocks(self, level):
 218     """Declare LU locking needs for a level
 219
 220     While most LUs can just declare their locking needs at ExpandNames time,
 221     sometimes there's the need to calculate some locks after having acquired
 222     the ones before. This function is called just before acquiring locks at a
 223     particular level, but after acquiring the ones at lower levels, and permits
 224     such calculations. It can be used to modify self.needed_locks, and by
 225     default it does nothing.
 226
 227     This function is only called if you have something already set in
 228     self.needed_locks for the level.
 229
 230     @param level: Locking level which is going to be locked
 231     @type level: member of ganeti.locking.LEVELS
 232
 233     """
 234
 235   def CheckPrereq(self):
 236     """Check prerequisites for this LU.
 237
 238     This method should check that the prerequisites for the execution
 239     of this LU are fulfilled. It can do internode communication, but
 240     it should be idempotent - no cluster or system changes are
 241     allowed.
 242
 243     The method should raise errors.OpPrereqError in case something is
 244     not fulfilled. Its return value is ignored.
 245
 246     This method should also update all the parameters of the opcode to
 247     their canonical form if it hasn't been done by ExpandNames before.
 248
 249     """
 250     if self.tasklets is not None:
 251       for (idx, tl) in enumerate(self.tasklets):
 252         logging.debug("Checking prerequisites for tasklet %s/%s",
 253                       idx + 1, len(self.tasklets))
 254         tl.CheckPrereq()
 255     else:
 256       pass
 257
 258   def Exec(self, feedback_fn):
 259     """Execute the LU.
 260
 261     This method should implement the actual work. It should raise
 262     errors.OpExecError for failures that are somewhat dealt with in
 263     code, or expected.
 264
 265     """
 266     if self.tasklets is not None:
 267       for (idx, tl) in enumerate(self.tasklets):
 268         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 269         tl.Exec(feedback_fn)
 270     else:
 271       raise NotImplementedError
 272
 273   def BuildHooksEnv(self):
 274     """Build hooks environment for this LU.
 275
 276     This method should return a three-node tuple consisting of: a dict
 277     containing the environment that will be used for running the
 278     specific hook for this LU, a list of node names on which the hook
 279     should run before the execution, and a list of node names on which
 280     the hook should run after the execution.
 281
 282     The keys of the dict must not have 'GANETI_' prefixed as this will
 283     be handled in the hooks runner. Also note additional keys will be
 284     added by the hooks runner. If the LU doesn't define any
 285     environment, an empty dict (and not None) should be returned.
 286
 287     No nodes should be returned as an empty list (and not None).
 288
 289     Note that if the HPATH for a LU class is None, this function will
 290     not be called.
 291
 292     """
 293     raise NotImplementedError
 294
 295   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 296     """Notify the LU about the results of its hooks.
 297
 298     This method is called every time a hooks phase is executed, and notifies
 299     the Logical Unit about the hooks' result. The LU can then use it to alter
 300     its result based on the hooks.  By default the method does nothing and the
 301     previous result is passed back unchanged but any LU can define it if it
 302     wants to use the local cluster hook-scripts somehow.
 303
 304     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 305         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 306     @param hook_results: the results of the multi-node hooks rpc call
 307     @param feedback_fn: function used send feedback back to the caller
 308     @param lu_result: the previous Exec result this LU had, or None
 309         in the PRE phase
 310     @return: the new Exec result, based on the previous result
 311         and hook results
 312
 313     """
 314     # API must be kept, thus we ignore the unused argument and could
 315     # be a function warnings
 316     # pylint: disable-msg=W0613,R0201
 317     return lu_result
 318
 319   def _ExpandAndLockInstance(self):
 320     """Helper function to expand and lock an instance.
 321
 322     Many LUs that work on an instance take its name in self.op.instance_name
 323     and need to expand it and then declare the expanded name for locking. This
 324     function does it, and then updates self.op.instance_name to the expanded
 325     name. It also initializes needed_locks as a dict, if this hasn't been done
 326     before.
 327
 328     """
 329     if self.needed_locks is None:
 330       self.needed_locks = {}
 331     else:
 332       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 333         "_ExpandAndLockInstance called with instance-level locks set"
 334     self.op.instance_name = _ExpandInstanceName(self.cfg,
 335                                                 self.op.instance_name)
 336     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 337
 338   def _LockInstancesNodes(self, primary_only=False):
 339     """Helper function to declare instances' nodes for locking.
 340
 341     This function should be called after locking one or more instances to lock
 342     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 343     with all primary or secondary nodes for instances already locked and
 344     present in self.needed_locks[locking.LEVEL_INSTANCE].
 345
 346     It should be called from DeclareLocks, and for safety only works if
 347     self.recalculate_locks[locking.LEVEL_NODE] is set.
 348
 349     In the future it may grow parameters to just lock some instance's nodes, or
 350     to just lock primaries or secondary nodes, if needed.
 351
 352     If should be called in DeclareLocks in a way similar to::
 353
 354       if level == locking.LEVEL_NODE:
 355         self._LockInstancesNodes()
 356
 357     @type primary_only: boolean
 358     @param primary_only: only lock primary nodes of locked instances
 359
 360     """
 361     assert locking.LEVEL_NODE in self.recalculate_locks, \
 362       "_LockInstancesNodes helper function called with no nodes to recalculate"
 363
 364     # TODO: check if we're really been called with the instance locks held
 365
 366     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 367     # future we might want to have different behaviors depending on the value
 368     # of self.recalculate_locks[locking.LEVEL_NODE]
 369     wanted_nodes = []
 370     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 371       instance = self.context.cfg.GetInstanceInfo(instance_name)
 372       wanted_nodes.append(instance.primary_node)
 373       if not primary_only:
 374         wanted_nodes.extend(instance.secondary_nodes)
 375
 376     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 377       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 378     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 379       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 380
 381     del self.recalculate_locks[locking.LEVEL_NODE]
 382
 383
 384 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 385   """Simple LU which runs no hooks.
 386
 387   This LU is intended as a parent for other LogicalUnits which will
 388   run no hooks, in order to reduce duplicate code.
 389
 390   """
 391   HPATH = None
 392   HTYPE = None
 393
 394   def BuildHooksEnv(self):
 395     """Empty BuildHooksEnv for NoHooksLu.
 396
 397     This just raises an error.
 398
 399     """
 400     assert False, "BuildHooksEnv called for NoHooksLUs"
 401
 402
 403 class Tasklet:
 404   """Tasklet base class.
 405
 406   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 407   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 408   tasklets know nothing about locks.
 409
 410   Subclasses must follow these rules:
 411     - Implement CheckPrereq
 412     - Implement Exec
 413
 414   """
 415   def __init__(self, lu):
 416     self.lu = lu
 417
 418     # Shortcuts
 419     self.cfg = lu.cfg
 420     self.rpc = lu.rpc
 421
 422   def CheckPrereq(self):
 423     """Check prerequisites for this tasklets.
 424
 425     This method should check whether the prerequisites for the execution of
 426     this tasklet are fulfilled. It can do internode communication, but it
 427     should be idempotent - no cluster or system changes are allowed.
 428
 429     The method should raise errors.OpPrereqError in case something is not
 430     fulfilled. Its return value is ignored.
 431
 432     This method should also update all parameters to their canonical form if it
 433     hasn't been done before.
 434
 435     """
 436     pass
 437
 438   def Exec(self, feedback_fn):
 439     """Execute the tasklet.
 440
 441     This method should implement the actual work. It should raise
 442     errors.OpExecError for failures that are somewhat dealt with in code, or
 443     expected.
 444
 445     """
 446     raise NotImplementedError
 447
 448
 449 class _QueryBase:
 450   """Base for query utility classes.
 451
 452   """
 453   #: Attribute holding field definitions
 454   FIELDS = None
 455
 456   def __init__(self, names, fields, use_locking):
 457     """Initializes this class.
 458
 459     """
 460     self.names = names
 461     self.use_locking = use_locking
 462
 463     self.query = query.Query(self.FIELDS, fields)
 464     self.requested_data = self.query.RequestedData()
 465
 466     self.do_locking = None
 467     self.wanted = None
 468
 469   def _GetNames(self, lu, all_names, lock_level):
 470     """Helper function to determine names asked for in the query.
 471
 472     """
 473     if self.do_locking:
 474       names = lu.acquired_locks[lock_level]
 475     else:
 476       names = all_names
 477
 478     if self.wanted == locking.ALL_SET:
 479       assert not self.names
 480       # caller didn't specify names, so ordering is not important
 481       return utils.NiceSort(names)
 482
 483     # caller specified names and we must keep the same order
 484     assert self.names
 485     assert not self.do_locking or lu.acquired_locks[lock_level]
 486
 487     missing = set(self.wanted).difference(names)
 488     if missing:
 489       raise errors.OpExecError("Some items were removed before retrieving"
 490                                " their data: %s" % missing)
 491
 492     # Return expanded names
 493     return self.wanted
 494
 495   @classmethod
 496   def FieldsQuery(cls, fields):
 497     """Returns list of available fields.
 498
 499     @return: List of L{objects.QueryFieldDefinition}
 500
 501     """
 502     return query.QueryFields(cls.FIELDS, fields)
 503
 504   def ExpandNames(self, lu):
 505     """Expand names for this query.
 506
 507     See L{LogicalUnit.ExpandNames}.
 508
 509     """
 510     raise NotImplementedError()
 511
 512   def DeclareLocks(self, lu, level):
 513     """Declare locks for this query.
 514
 515     See L{LogicalUnit.DeclareLocks}.
 516
 517     """
 518     raise NotImplementedError()
 519
 520   def _GetQueryData(self, lu):
 521     """Collects all data for this query.
 522
 523     @return: Query data object
 524
 525     """
 526     raise NotImplementedError()
 527
 528   def NewStyleQuery(self, lu):
 529     """Collect data and execute query.
 530
 531     """
 532     return query.GetQueryResponse(self.query, self._GetQueryData(lu))
 533
 534   def OldStyleQuery(self, lu):
 535     """Collect data and execute query.
 536
 537     """
 538     return self.query.OldStyleQuery(self._GetQueryData(lu))
 539
 540
 541 def _GetWantedNodes(lu, nodes):
 542   """Returns list of checked and expanded node names.
 543
 544   @type lu: L{LogicalUnit}
 545   @param lu: the logical unit on whose behalf we execute
 546   @type nodes: list
 547   @param nodes: list of node names or None for all nodes
 548   @rtype: list
 549   @return: the list of nodes, sorted
 550   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 551
 552   """
 553   if nodes:
 554     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 555
 556   return utils.NiceSort(lu.cfg.GetNodeList())
 557
 558
 559 def _GetWantedInstances(lu, instances):
 560   """Returns list of checked and expanded instance names.
 561
 562   @type lu: L{LogicalUnit}
 563   @param lu: the logical unit on whose behalf we execute
 564   @type instances: list
 565   @param instances: list of instance names or None for all instances
 566   @rtype: list
 567   @return: the list of instances, sorted
 568   @raise errors.OpPrereqError: if the instances parameter is wrong type
 569   @raise errors.OpPrereqError: if any of the passed instances is not found
 570
 571   """
 572   if instances:
 573     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 574   else:
 575     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 576   return wanted
 577
 578
 579 def _GetUpdatedParams(old_params, update_dict,
 580                       use_default=True, use_none=False):
 581   """Return the new version of a parameter dictionary.
 582
 583   @type old_params: dict
 584   @param old_params: old parameters
 585   @type update_dict: dict
 586   @param update_dict: dict containing new parameter values, or
 587       constants.VALUE_DEFAULT to reset the parameter to its default
 588       value
 589   @param use_default: boolean
 590   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 591       values as 'to be deleted' values
 592   @param use_none: boolean
 593   @type use_none: whether to recognise C{None} values as 'to be
 594       deleted' values
 595   @rtype: dict
 596   @return: the new parameter dictionary
 597
 598   """
 599   params_copy = copy.deepcopy(old_params)
 600   for key, val in update_dict.iteritems():
 601     if ((use_default and val == constants.VALUE_DEFAULT) or
 602         (use_none and val is None)):
 603       try:
 604         del params_copy[key]
 605       except KeyError:
 606         pass
 607     else:
 608       params_copy[key] = val
 609   return params_copy
 610
 611
 612 def _CheckOutputFields(static, dynamic, selected):
 613   """Checks whether all selected fields are valid.
 614
 615   @type static: L{utils.FieldSet}
 616   @param static: static fields set
 617   @type dynamic: L{utils.FieldSet}
 618   @param dynamic: dynamic fields set
 619
 620   """
 621   f = utils.FieldSet()
 622   f.Extend(static)
 623   f.Extend(dynamic)
 624
 625   delta = f.NonMatching(selected)
 626   if delta:
 627     raise errors.OpPrereqError("Unknown output fields selected: %s"
 628                                % ",".join(delta), errors.ECODE_INVAL)
 629
 630
 631 def _CheckGlobalHvParams(params):
 632   """Validates that given hypervisor params are not global ones.
 633
 634   This will ensure that instances don't get customised versions of
 635   global params.
 636
 637   """
 638   used_globals = constants.HVC_GLOBALS.intersection(params)
 639   if used_globals:
 640     msg = ("The following hypervisor parameters are global and cannot"
 641            " be customized at instance level, please modify them at"
 642            " cluster level: %s" % utils.CommaJoin(used_globals))
 643     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 644
 645
 646 def _CheckNodeOnline(lu, node, msg=None):
 647   """Ensure that a given node is online.
 648
 649   @param lu: the LU on behalf of which we make the check
 650   @param node: the node to check
 651   @param msg: if passed, should be a message to replace the default one
 652   @raise errors.OpPrereqError: if the node is offline
 653
 654   """
 655   if msg is None:
 656     msg = "Can't use offline node"
 657   if lu.cfg.GetNodeInfo(node).offline:
 658     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 659
 660
 661 def _CheckNodeNotDrained(lu, node):
 662   """Ensure that a given node is not drained.
 663
 664   @param lu: the LU on behalf of which we make the check
 665   @param node: the node to check
 666   @raise errors.OpPrereqError: if the node is drained
 667
 668   """
 669   if lu.cfg.GetNodeInfo(node).drained:
 670     raise errors.OpPrereqError("Can't use drained node %s" % node,
 671                                errors.ECODE_STATE)
 672
 673
 674 def _CheckNodeVmCapable(lu, node):
 675   """Ensure that a given node is vm capable.
 676
 677   @param lu: the LU on behalf of which we make the check
 678   @param node: the node to check
 679   @raise errors.OpPrereqError: if the node is not vm capable
 680
 681   """
 682   if not lu.cfg.GetNodeInfo(node).vm_capable:
 683     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 684                                errors.ECODE_STATE)
 685
 686
 687 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 688   """Ensure that a node supports a given OS.
 689
 690   @param lu: the LU on behalf of which we make the check
 691   @param node: the node to check
 692   @param os_name: the OS to query about
 693   @param force_variant: whether to ignore variant errors
 694   @raise errors.OpPrereqError: if the node is not supporting the OS
 695
 696   """
 697   result = lu.rpc.call_os_get(node, os_name)
 698   result.Raise("OS '%s' not in supported OS list for node %s" %
 699                (os_name, node),
 700                prereq=True, ecode=errors.ECODE_INVAL)
 701   if not force_variant:
 702     _CheckOSVariant(result.payload, os_name)
 703
 704
 705 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 706   """Ensure that a node has the given secondary ip.
 707
 708   @type lu: L{LogicalUnit}
 709   @param lu: the LU on behalf of which we make the check
 710   @type node: string
 711   @param node: the node to check
 712   @type secondary_ip: string
 713   @param secondary_ip: the ip to check
 714   @type prereq: boolean
 715   @param prereq: whether to throw a prerequisite or an execute error
 716   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 717   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 718
 719   """
 720   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 721   result.Raise("Failure checking secondary ip on node %s" % node,
 722                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 723   if not result.payload:
 724     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 725            " please fix and re-run this command" % secondary_ip)
 726     if prereq:
 727       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 728     else:
 729       raise errors.OpExecError(msg)
 730
 731
 732 def _GetClusterDomainSecret():
 733   """Reads the cluster domain secret.
 734
 735   """
 736   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 737                                strict=True)
 738
 739
 740 def _CheckInstanceDown(lu, instance, reason):
 741   """Ensure that an instance is not running."""
 742   if instance.admin_up:
 743     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 744                                (instance.name, reason), errors.ECODE_STATE)
 745
 746   pnode = instance.primary_node
 747   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 748   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 749               prereq=True, ecode=errors.ECODE_ENVIRON)
 750
 751   if instance.name in ins_l.payload:
 752     raise errors.OpPrereqError("Instance %s is running, %s" %
 753                                (instance.name, reason), errors.ECODE_STATE)
 754
 755
 756 def _ExpandItemName(fn, name, kind):
 757   """Expand an item name.
 758
 759   @param fn: the function to use for expansion
 760   @param name: requested item name
 761   @param kind: text description ('Node' or 'Instance')
 762   @return: the resolved (full) name
 763   @raise errors.OpPrereqError: if the item is not found
 764
 765   """
 766   full_name = fn(name)
 767   if full_name is None:
 768     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 769                                errors.ECODE_NOENT)
 770   return full_name
 771
 772
 773 def _ExpandNodeName(cfg, name):
 774   """Wrapper over L{_ExpandItemName} for nodes."""
 775   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 776
 777
 778 def _ExpandInstanceName(cfg, name):
 779   """Wrapper over L{_ExpandItemName} for instance."""
 780   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 781
 782
 783 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 784                           memory, vcpus, nics, disk_template, disks,
 785                           bep, hvp, hypervisor_name):
 786   """Builds instance related env variables for hooks
 787
 788   This builds the hook environment from individual variables.
 789
 790   @type name: string
 791   @param name: the name of the instance
 792   @type primary_node: string
 793   @param primary_node: the name of the instance's primary node
 794   @type secondary_nodes: list
 795   @param secondary_nodes: list of secondary nodes as strings
 796   @type os_type: string
 797   @param os_type: the name of the instance's OS
 798   @type status: boolean
 799   @param status: the should_run status of the instance
 800   @type memory: string
 801   @param memory: the memory size of the instance
 802   @type vcpus: string
 803   @param vcpus: the count of VCPUs the instance has
 804   @type nics: list
 805   @param nics: list of tuples (ip, mac, mode, link) representing
 806       the NICs the instance has
 807   @type disk_template: string
 808   @param disk_template: the disk template of the instance
 809   @type disks: list
 810   @param disks: the list of (size, mode) pairs
 811   @type bep: dict
 812   @param bep: the backend parameters for the instance
 813   @type hvp: dict
 814   @param hvp: the hypervisor parameters for the instance
 815   @type hypervisor_name: string
 816   @param hypervisor_name: the hypervisor for the instance
 817   @rtype: dict
 818   @return: the hook environment for this instance
 819
 820   """
 821   if status:
 822     str_status = "up"
 823   else:
 824     str_status = "down"
 825   env = {
 826     "OP_TARGET": name,
 827     "INSTANCE_NAME": name,
 828     "INSTANCE_PRIMARY": primary_node,
 829     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 830     "INSTANCE_OS_TYPE": os_type,
 831     "INSTANCE_STATUS": str_status,
 832     "INSTANCE_MEMORY": memory,
 833     "INSTANCE_VCPUS": vcpus,
 834     "INSTANCE_DISK_TEMPLATE": disk_template,
 835     "INSTANCE_HYPERVISOR": hypervisor_name,
 836   }
 837
 838   if nics:
 839     nic_count = len(nics)
 840     for idx, (ip, mac, mode, link) in enumerate(nics):
 841       if ip is None:
 842         ip = ""
 843       env["INSTANCE_NIC%d_IP" % idx] = ip
 844       env["INSTANCE_NIC%d_MAC" % idx] = mac
 845       env["INSTANCE_NIC%d_MODE" % idx] = mode
 846       env["INSTANCE_NIC%d_LINK" % idx] = link
 847       if mode == constants.NIC_MODE_BRIDGED:
 848         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 849   else:
 850     nic_count = 0
 851
 852   env["INSTANCE_NIC_COUNT"] = nic_count
 853
 854   if disks:
 855     disk_count = len(disks)
 856     for idx, (size, mode) in enumerate(disks):
 857       env["INSTANCE_DISK%d_SIZE" % idx] = size
 858       env["INSTANCE_DISK%d_MODE" % idx] = mode
 859   else:
 860     disk_count = 0
 861
 862   env["INSTANCE_DISK_COUNT"] = disk_count
 863
 864   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 865     for key, value in source.items():
 866       env["INSTANCE_%s_%s" % (kind, key)] = value
 867
 868   return env
 869
 870
 871 def _NICListToTuple(lu, nics):
 872   """Build a list of nic information tuples.
 873
 874   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 875   value in LUInstanceQueryData.
 876
 877   @type lu:  L{LogicalUnit}
 878   @param lu: the logical unit on whose behalf we execute
 879   @type nics: list of L{objects.NIC}
 880   @param nics: list of nics to convert to hooks tuples
 881
 882   """
 883   hooks_nics = []
 884   cluster = lu.cfg.GetClusterInfo()
 885   for nic in nics:
 886     ip = nic.ip
 887     mac = nic.mac
 888     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 889     mode = filled_params[constants.NIC_MODE]
 890     link = filled_params[constants.NIC_LINK]
 891     hooks_nics.append((ip, mac, mode, link))
 892   return hooks_nics
 893
 894
 895 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 896   """Builds instance related env variables for hooks from an object.
 897
 898   @type lu: L{LogicalUnit}
 899   @param lu: the logical unit on whose behalf we execute
 900   @type instance: L{objects.Instance}
 901   @param instance: the instance for which we should build the
 902       environment
 903   @type override: dict
 904   @param override: dictionary with key/values that will override
 905       our values
 906   @rtype: dict
 907   @return: the hook environment dictionary
 908
 909   """
 910   cluster = lu.cfg.GetClusterInfo()
 911   bep = cluster.FillBE(instance)
 912   hvp = cluster.FillHV(instance)
 913   args = {
 914     'name': instance.name,
 915     'primary_node': instance.primary_node,
 916     'secondary_nodes': instance.secondary_nodes,
 917     'os_type': instance.os,
 918     'status': instance.admin_up,
 919     'memory': bep[constants.BE_MEMORY],
 920     'vcpus': bep[constants.BE_VCPUS],
 921     'nics': _NICListToTuple(lu, instance.nics),
 922     'disk_template': instance.disk_template,
 923     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 924     'bep': bep,
 925     'hvp': hvp,
 926     'hypervisor_name': instance.hypervisor,
 927   }
 928   if override:
 929     args.update(override)
 930   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 931
 932
 933 def _AdjustCandidatePool(lu, exceptions):
 934   """Adjust the candidate pool after node operations.
 935
 936   """
 937   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 938   if mod_list:
 939     lu.LogInfo("Promoted nodes to master candidate role: %s",
 940                utils.CommaJoin(node.name for node in mod_list))
 941     for name in mod_list:
 942       lu.context.ReaddNode(name)
 943   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 944   if mc_now > mc_max:
 945     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 946                (mc_now, mc_max))
 947
 948
 949 def _DecideSelfPromotion(lu, exceptions=None):
 950   """Decide whether I should promote myself as a master candidate.
 951
 952   """
 953   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 954   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 955   # the new node will increase mc_max with one, so:
 956   mc_should = min(mc_should + 1, cp_size)
 957   return mc_now < mc_should
 958
 959
 960 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 961   """Check that the brigdes needed by a list of nics exist.
 962
 963   """
 964   cluster = lu.cfg.GetClusterInfo()
 965   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 966   brlist = [params[constants.NIC_LINK] for params in paramslist
 967             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 968   if brlist:
 969     result = lu.rpc.call_bridges_exist(target_node, brlist)
 970     result.Raise("Error checking bridges on destination node '%s'" %
 971                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 972
 973
 974 def _CheckInstanceBridgesExist(lu, instance, node=None):
 975   """Check that the brigdes needed by an instance exist.
 976
 977   """
 978   if node is None:
 979     node = instance.primary_node
 980   _CheckNicsBridgesExist(lu, instance.nics, node)
 981
 982
 983 def _CheckOSVariant(os_obj, name):
 984   """Check whether an OS name conforms to the os variants specification.
 985
 986   @type os_obj: L{objects.OS}
 987   @param os_obj: OS object to check
 988   @type name: string
 989   @param name: OS name passed by the user, to check for validity
 990
 991   """
 992   if not os_obj.supported_variants:
 993     return
 994   variant = objects.OS.GetVariant(name)
 995   if not variant:
 996     raise errors.OpPrereqError("OS name must include a variant",
 997                                errors.ECODE_INVAL)
 998
 999   if variant not in os_obj.supported_variants:
1000     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1001
1002
1003 def _GetNodeInstancesInner(cfg, fn):
1004   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1005
1006
1007 def _GetNodeInstances(cfg, node_name):
1008   """Returns a list of all primary and secondary instances on a node.
1009
1010   """
1011
1012   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1013
1014
1015 def _GetNodePrimaryInstances(cfg, node_name):
1016   """Returns primary instances on a node.
1017
1018   """
1019   return _GetNodeInstancesInner(cfg,
1020                                 lambda inst: node_name == inst.primary_node)
1021
1022
1023 def _GetNodeSecondaryInstances(cfg, node_name):
1024   """Returns secondary instances on a node.
1025
1026   """
1027   return _GetNodeInstancesInner(cfg,
1028                                 lambda inst: node_name in inst.secondary_nodes)
1029
1030
1031 def _GetStorageTypeArgs(cfg, storage_type):
1032   """Returns the arguments for a storage type.
1033
1034   """
1035   # Special case for file storage
1036   if storage_type == constants.ST_FILE:
1037     # storage.FileStorage wants a list of storage directories
1038     return [[cfg.GetFileStorageDir()]]
1039
1040   return []
1041
1042
1043 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1044   faulty = []
1045
1046   for dev in instance.disks:
1047     cfg.SetDiskID(dev, node_name)
1048
1049   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1050   result.Raise("Failed to get disk status from node %s" % node_name,
1051                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1052
1053   for idx, bdev_status in enumerate(result.payload):
1054     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1055       faulty.append(idx)
1056
1057   return faulty
1058
1059
1060 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1061   """Check the sanity of iallocator and node arguments and use the
1062   cluster-wide iallocator if appropriate.
1063
1064   Check that at most one of (iallocator, node) is specified. If none is
1065   specified, then the LU's opcode's iallocator slot is filled with the
1066   cluster-wide default iallocator.
1067
1068   @type iallocator_slot: string
1069   @param iallocator_slot: the name of the opcode iallocator slot
1070   @type node_slot: string
1071   @param node_slot: the name of the opcode target node slot
1072
1073   """
1074   node = getattr(lu.op, node_slot, None)
1075   iallocator = getattr(lu.op, iallocator_slot, None)
1076
1077   if node is not None and iallocator is not None:
1078     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1079                                errors.ECODE_INVAL)
1080   elif node is None and iallocator is None:
1081     default_iallocator = lu.cfg.GetDefaultIAllocator()
1082     if default_iallocator:
1083       setattr(lu.op, iallocator_slot, default_iallocator)
1084     else:
1085       raise errors.OpPrereqError("No iallocator or node given and no"
1086                                  " cluster-wide default iallocator found."
1087                                  " Please specify either an iallocator or a"
1088                                  " node, or set a cluster-wide default"
1089                                  " iallocator.")
1090
1091
1092 class LUClusterPostInit(LogicalUnit):
1093   """Logical unit for running hooks after cluster initialization.
1094
1095   """
1096   HPATH = "cluster-init"
1097   HTYPE = constants.HTYPE_CLUSTER
1098
1099   def BuildHooksEnv(self):
1100     """Build hooks env.
1101
1102     """
1103     env = {"OP_TARGET": self.cfg.GetClusterName()}
1104     mn = self.cfg.GetMasterNode()
1105     return env, [], [mn]
1106
1107   def Exec(self, feedback_fn):
1108     """Nothing to do.
1109
1110     """
1111     return True
1112
1113
1114 class LUClusterDestroy(LogicalUnit):
1115   """Logical unit for destroying the cluster.
1116
1117   """
1118   HPATH = "cluster-destroy"
1119   HTYPE = constants.HTYPE_CLUSTER
1120
1121   def BuildHooksEnv(self):
1122     """Build hooks env.
1123
1124     """
1125     env = {"OP_TARGET": self.cfg.GetClusterName()}
1126     return env, [], []
1127
1128   def CheckPrereq(self):
1129     """Check prerequisites.
1130
1131     This checks whether the cluster is empty.
1132
1133     Any errors are signaled by raising errors.OpPrereqError.
1134
1135     """
1136     master = self.cfg.GetMasterNode()
1137
1138     nodelist = self.cfg.GetNodeList()
1139     if len(nodelist) != 1 or nodelist[0] != master:
1140       raise errors.OpPrereqError("There are still %d node(s) in"
1141                                  " this cluster." % (len(nodelist) - 1),
1142                                  errors.ECODE_INVAL)
1143     instancelist = self.cfg.GetInstanceList()
1144     if instancelist:
1145       raise errors.OpPrereqError("There are still %d instance(s) in"
1146                                  " this cluster." % len(instancelist),
1147                                  errors.ECODE_INVAL)
1148
1149   def Exec(self, feedback_fn):
1150     """Destroys the cluster.
1151
1152     """
1153     master = self.cfg.GetMasterNode()
1154
1155     # Run post hooks on master node before it's removed
1156     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1157     try:
1158       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1159     except:
1160       # pylint: disable-msg=W0702
1161       self.LogWarning("Errors occurred running hooks on %s" % master)
1162
1163     result = self.rpc.call_node_stop_master(master, False)
1164     result.Raise("Could not disable the master role")
1165
1166     return master
1167
1168
1169 def _VerifyCertificate(filename):
1170   """Verifies a certificate for LUClusterVerify.
1171
1172   @type filename: string
1173   @param filename: Path to PEM file
1174
1175   """
1176   try:
1177     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1178                                            utils.ReadFile(filename))
1179   except Exception, err: # pylint: disable-msg=W0703
1180     return (LUClusterVerify.ETYPE_ERROR,
1181             "Failed to load X509 certificate %s: %s" % (filename, err))
1182
1183   (errcode, msg) = \
1184     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1185                                 constants.SSL_CERT_EXPIRATION_ERROR)
1186
1187   if msg:
1188     fnamemsg = "While verifying %s: %s" % (filename, msg)
1189   else:
1190     fnamemsg = None
1191
1192   if errcode is None:
1193     return (None, fnamemsg)
1194   elif errcode == utils.CERT_WARNING:
1195     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1196   elif errcode == utils.CERT_ERROR:
1197     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1198
1199   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1200
1201
1202 class LUClusterVerify(LogicalUnit):
1203   """Verifies the cluster status.
1204
1205   """
1206   HPATH = "cluster-verify"
1207   HTYPE = constants.HTYPE_CLUSTER
1208   REQ_BGL = False
1209
1210   TCLUSTER = "cluster"
1211   TNODE = "node"
1212   TINSTANCE = "instance"
1213
1214   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1215   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1216   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1217   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1218   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1219   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1220   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1221   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1222   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1223   ENODEDRBD = (TNODE, "ENODEDRBD")
1224   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1225   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1226   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1227   ENODEHV = (TNODE, "ENODEHV")
1228   ENODELVM = (TNODE, "ENODELVM")
1229   ENODEN1 = (TNODE, "ENODEN1")
1230   ENODENET = (TNODE, "ENODENET")
1231   ENODEOS = (TNODE, "ENODEOS")
1232   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1233   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1234   ENODERPC = (TNODE, "ENODERPC")
1235   ENODESSH = (TNODE, "ENODESSH")
1236   ENODEVERSION = (TNODE, "ENODEVERSION")
1237   ENODESETUP = (TNODE, "ENODESETUP")
1238   ENODETIME = (TNODE, "ENODETIME")
1239   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1240
1241   ETYPE_FIELD = "code"
1242   ETYPE_ERROR = "ERROR"
1243   ETYPE_WARNING = "WARNING"
1244
1245   _HOOKS_INDENT_RE = re.compile("^", re.M)
1246
1247   class NodeImage(object):
1248     """A class representing the logical and physical status of a node.
1249
1250     @type name: string
1251     @ivar name: the node name to which this object refers
1252     @ivar volumes: a structure as returned from
1253         L{ganeti.backend.GetVolumeList} (runtime)
1254     @ivar instances: a list of running instances (runtime)
1255     @ivar pinst: list of configured primary instances (config)
1256     @ivar sinst: list of configured secondary instances (config)
1257     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1258         of this node (config)
1259     @ivar mfree: free memory, as reported by hypervisor (runtime)
1260     @ivar dfree: free disk, as reported by the node (runtime)
1261     @ivar offline: the offline status (config)
1262     @type rpc_fail: boolean
1263     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1264         not whether the individual keys were correct) (runtime)
1265     @type lvm_fail: boolean
1266     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1267     @type hyp_fail: boolean
1268     @ivar hyp_fail: whether the RPC call didn't return the instance list
1269     @type ghost: boolean
1270     @ivar ghost: whether this is a known node or not (config)
1271     @type os_fail: boolean
1272     @ivar os_fail: whether the RPC call didn't return valid OS data
1273     @type oslist: list
1274     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1275     @type vm_capable: boolean
1276     @ivar vm_capable: whether the node can host instances
1277
1278     """
1279     def __init__(self, offline=False, name=None, vm_capable=True):
1280       self.name = name
1281       self.volumes = {}
1282       self.instances = []
1283       self.pinst = []
1284       self.sinst = []
1285       self.sbp = {}
1286       self.mfree = 0
1287       self.dfree = 0
1288       self.offline = offline
1289       self.vm_capable = vm_capable
1290       self.rpc_fail = False
1291       self.lvm_fail = False
1292       self.hyp_fail = False
1293       self.ghost = False
1294       self.os_fail = False
1295       self.oslist = {}
1296
1297   def ExpandNames(self):
1298     self.needed_locks = {
1299       locking.LEVEL_NODE: locking.ALL_SET,
1300       locking.LEVEL_INSTANCE: locking.ALL_SET,
1301     }
1302     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1303
1304   def _Error(self, ecode, item, msg, *args, **kwargs):
1305     """Format an error message.
1306
1307     Based on the opcode's error_codes parameter, either format a
1308     parseable error code, or a simpler error string.
1309
1310     This must be called only from Exec and functions called from Exec.
1311
1312     """
1313     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1314     itype, etxt = ecode
1315     # first complete the msg
1316     if args:
1317       msg = msg % args
1318     # then format the whole message
1319     if self.op.error_codes:
1320       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1321     else:
1322       if item:
1323         item = " " + item
1324       else:
1325         item = ""
1326       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1327     # and finally report it via the feedback_fn
1328     self._feedback_fn("  - %s" % msg)
1329
1330   def _ErrorIf(self, cond, *args, **kwargs):
1331     """Log an error message if the passed condition is True.
1332
1333     """
1334     cond = bool(cond) or self.op.debug_simulate_errors
1335     if cond:
1336       self._Error(*args, **kwargs)
1337     # do not mark the operation as failed for WARN cases only
1338     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1339       self.bad = self.bad or cond
1340
1341   def _VerifyNode(self, ninfo, nresult):
1342     """Perform some basic validation on data returned from a node.
1343
1344       - check the result data structure is well formed and has all the
1345         mandatory fields
1346       - check ganeti version
1347
1348     @type ninfo: L{objects.Node}
1349     @param ninfo: the node to check
1350     @param nresult: the results from the node
1351     @rtype: boolean
1352     @return: whether overall this call was successful (and we can expect
1353          reasonable values in the respose)
1354
1355     """
1356     node = ninfo.name
1357     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1358
1359     # main result, nresult should be a non-empty dict
1360     test = not nresult or not isinstance(nresult, dict)
1361     _ErrorIf(test, self.ENODERPC, node,
1362                   "unable to verify node: no data returned")
1363     if test:
1364       return False
1365
1366     # compares ganeti version
1367     local_version = constants.PROTOCOL_VERSION
1368     remote_version = nresult.get("version", None)
1369     test = not (remote_version and
1370                 isinstance(remote_version, (list, tuple)) and
1371                 len(remote_version) == 2)
1372     _ErrorIf(test, self.ENODERPC, node,
1373              "connection to node returned invalid data")
1374     if test:
1375       return False
1376
1377     test = local_version != remote_version[0]
1378     _ErrorIf(test, self.ENODEVERSION, node,
1379              "incompatible protocol versions: master %s,"
1380              " node %s", local_version, remote_version[0])
1381     if test:
1382       return False
1383
1384     # node seems compatible, we can actually try to look into its results
1385
1386     # full package version
1387     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1388                   self.ENODEVERSION, node,
1389                   "software version mismatch: master %s, node %s",
1390                   constants.RELEASE_VERSION, remote_version[1],
1391                   code=self.ETYPE_WARNING)
1392
1393     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1394     if ninfo.vm_capable and isinstance(hyp_result, dict):
1395       for hv_name, hv_result in hyp_result.iteritems():
1396         test = hv_result is not None
1397         _ErrorIf(test, self.ENODEHV, node,
1398                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1399
1400     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1401     if ninfo.vm_capable and isinstance(hvp_result, list):
1402       for item, hv_name, hv_result in hvp_result:
1403         _ErrorIf(True, self.ENODEHV, node,
1404                  "hypervisor %s parameter verify failure (source %s): %s",
1405                  hv_name, item, hv_result)
1406
1407     test = nresult.get(constants.NV_NODESETUP,
1408                            ["Missing NODESETUP results"])
1409     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1410              "; ".join(test))
1411
1412     return True
1413
1414   def _VerifyNodeTime(self, ninfo, nresult,
1415                       nvinfo_starttime, nvinfo_endtime):
1416     """Check the node time.
1417
1418     @type ninfo: L{objects.Node}
1419     @param ninfo: the node to check
1420     @param nresult: the remote results for the node
1421     @param nvinfo_starttime: the start time of the RPC call
1422     @param nvinfo_endtime: the end time of the RPC call
1423
1424     """
1425     node = ninfo.name
1426     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1427
1428     ntime = nresult.get(constants.NV_TIME, None)
1429     try:
1430       ntime_merged = utils.MergeTime(ntime)
1431     except (ValueError, TypeError):
1432       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1433       return
1434
1435     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1436       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1437     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1438       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1439     else:
1440       ntime_diff = None
1441
1442     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1443              "Node time diverges by at least %s from master node time",
1444              ntime_diff)
1445
1446   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1447     """Check the node LVM results.
1448
1449     @type ninfo: L{objects.Node}
1450     @param ninfo: the node to check
1451     @param nresult: the remote results for the node
1452     @param vg_name: the configured VG name
1453
1454     """
1455     if vg_name is None:
1456       return
1457
1458     node = ninfo.name
1459     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1460
1461     # checks vg existence and size > 20G
1462     vglist = nresult.get(constants.NV_VGLIST, None)
1463     test = not vglist
1464     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1465     if not test:
1466       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1467                                             constants.MIN_VG_SIZE)
1468       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1469
1470     # check pv names
1471     pvlist = nresult.get(constants.NV_PVLIST, None)
1472     test = pvlist is None
1473     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1474     if not test:
1475       # check that ':' is not present in PV names, since it's a
1476       # special character for lvcreate (denotes the range of PEs to
1477       # use on the PV)
1478       for _, pvname, owner_vg in pvlist:
1479         test = ":" in pvname
1480         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1481                  " '%s' of VG '%s'", pvname, owner_vg)
1482
1483   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1484     """Check the node bridges.
1485
1486     @type ninfo: L{objects.Node}
1487     @param ninfo: the node to check
1488     @param nresult: the remote results for the node
1489     @param bridges: the expected list of bridges
1490
1491     """
1492     if not bridges:
1493       return
1494
1495     node = ninfo.name
1496     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1497
1498     missing = nresult.get(constants.NV_BRIDGES, None)
1499     test = not isinstance(missing, list)
1500     _ErrorIf(test, self.ENODENET, node,
1501              "did not return valid bridge information")
1502     if not test:
1503       _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1504                utils.CommaJoin(sorted(missing)))
1505
1506   def _VerifyNodeNetwork(self, ninfo, nresult):
1507     """Check the node network connectivity results.
1508
1509     @type ninfo: L{objects.Node}
1510     @param ninfo: the node to check
1511     @param nresult: the remote results for the node
1512
1513     """
1514     node = ninfo.name
1515     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1516
1517     test = constants.NV_NODELIST not in nresult
1518     _ErrorIf(test, self.ENODESSH, node,
1519              "node hasn't returned node ssh connectivity data")
1520     if not test:
1521       if nresult[constants.NV_NODELIST]:
1522         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1523           _ErrorIf(True, self.ENODESSH, node,
1524                    "ssh communication with node '%s': %s", a_node, a_msg)
1525
1526     test = constants.NV_NODENETTEST not in nresult
1527     _ErrorIf(test, self.ENODENET, node,
1528              "node hasn't returned node tcp connectivity data")
1529     if not test:
1530       if nresult[constants.NV_NODENETTEST]:
1531         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1532         for anode in nlist:
1533           _ErrorIf(True, self.ENODENET, node,
1534                    "tcp communication with node '%s': %s",
1535                    anode, nresult[constants.NV_NODENETTEST][anode])
1536
1537     test = constants.NV_MASTERIP not in nresult
1538     _ErrorIf(test, self.ENODENET, node,
1539              "node hasn't returned node master IP reachability data")
1540     if not test:
1541       if not nresult[constants.NV_MASTERIP]:
1542         if node == self.master_node:
1543           msg = "the master node cannot reach the master IP (not configured?)"
1544         else:
1545           msg = "cannot reach the master IP"
1546         _ErrorIf(True, self.ENODENET, node, msg)
1547
1548   def _VerifyInstance(self, instance, instanceconfig, node_image,
1549                       diskstatus):
1550     """Verify an instance.
1551
1552     This function checks to see if the required block devices are
1553     available on the instance's node.
1554
1555     """
1556     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1557     node_current = instanceconfig.primary_node
1558
1559     node_vol_should = {}
1560     instanceconfig.MapLVsByNode(node_vol_should)
1561
1562     for node in node_vol_should:
1563       n_img = node_image[node]
1564       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1565         # ignore missing volumes on offline or broken nodes
1566         continue
1567       for volume in node_vol_should[node]:
1568         test = volume not in n_img.volumes
1569         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1570                  "volume %s missing on node %s", volume, node)
1571
1572     if instanceconfig.admin_up:
1573       pri_img = node_image[node_current]
1574       test = instance not in pri_img.instances and not pri_img.offline
1575       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1576                "instance not running on its primary node %s",
1577                node_current)
1578
1579     for node, n_img in node_image.items():
1580       if node != node_current:
1581         test = instance in n_img.instances
1582         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1583                  "instance should not run on node %s", node)
1584
1585     diskdata = [(nname, success, status, idx)
1586                 for (nname, disks) in diskstatus.items()
1587                 for idx, (success, status) in enumerate(disks)]
1588
1589     for nname, success, bdev_status, idx in diskdata:
1590       # the 'ghost node' construction in Exec() ensures that we have a
1591       # node here
1592       snode = node_image[nname]
1593       bad_snode = snode.ghost or snode.offline
1594       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1595                self.EINSTANCEFAULTYDISK, instance,
1596                "couldn't retrieve status for disk/%s on %s: %s",
1597                idx, nname, bdev_status)
1598       _ErrorIf((instanceconfig.admin_up and success and
1599                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1600                self.EINSTANCEFAULTYDISK, instance,
1601                "disk/%s on %s is faulty", idx, nname)
1602
1603   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1604     """Verify if there are any unknown volumes in the cluster.
1605
1606     The .os, .swap and backup volumes are ignored. All other volumes are
1607     reported as unknown.
1608
1609     @type reserved: L{ganeti.utils.FieldSet}
1610     @param reserved: a FieldSet of reserved volume names
1611
1612     """
1613     for node, n_img in node_image.items():
1614       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1615         # skip non-healthy nodes
1616         continue
1617       for volume in n_img.volumes:
1618         test = ((node not in node_vol_should or
1619                 volume not in node_vol_should[node]) and
1620                 not reserved.Matches(volume))
1621         self._ErrorIf(test, self.ENODEORPHANLV, node,
1622                       "volume %s is unknown", volume)
1623
1624   def _VerifyOrphanInstances(self, instancelist, node_image):
1625     """Verify the list of running instances.
1626
1627     This checks what instances are running but unknown to the cluster.
1628
1629     """
1630     for node, n_img in node_image.items():
1631       for o_inst in n_img.instances:
1632         test = o_inst not in instancelist
1633         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1634                       "instance %s on node %s should not exist", o_inst, node)
1635
1636   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1637     """Verify N+1 Memory Resilience.
1638
1639     Check that if one single node dies we can still start all the
1640     instances it was primary for.
1641
1642     """
1643     for node, n_img in node_image.items():
1644       # This code checks that every node which is now listed as
1645       # secondary has enough memory to host all instances it is
1646       # supposed to should a single other node in the cluster fail.
1647       # FIXME: not ready for failover to an arbitrary node
1648       # FIXME: does not support file-backed instances
1649       # WARNING: we currently take into account down instances as well
1650       # as up ones, considering that even if they're down someone
1651       # might want to start them even in the event of a node failure.
1652       if n_img.offline:
1653         # we're skipping offline nodes from the N+1 warning, since
1654         # most likely we don't have good memory infromation from them;
1655         # we already list instances living on such nodes, and that's
1656         # enough warning
1657         continue
1658       for prinode, instances in n_img.sbp.items():
1659         needed_mem = 0
1660         for instance in instances:
1661           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1662           if bep[constants.BE_AUTO_BALANCE]:
1663             needed_mem += bep[constants.BE_MEMORY]
1664         test = n_img.mfree < needed_mem
1665         self._ErrorIf(test, self.ENODEN1, node,
1666                       "not enough memory to accomodate instance failovers"
1667                       " should node %s fail (%dMiB needed, %dMiB available)",
1668                       prinode, needed_mem, n_img.mfree)
1669
1670   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1671                        master_files):
1672     """Verifies and computes the node required file checksums.
1673
1674     @type ninfo: L{objects.Node}
1675     @param ninfo: the node to check
1676     @param nresult: the remote results for the node
1677     @param file_list: required list of files
1678     @param local_cksum: dictionary of local files and their checksums
1679     @param master_files: list of files that only masters should have
1680
1681     """
1682     node = ninfo.name
1683     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1684
1685     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1686     test = not isinstance(remote_cksum, dict)
1687     _ErrorIf(test, self.ENODEFILECHECK, node,
1688              "node hasn't returned file checksum data")
1689     if test:
1690       return
1691
1692     for file_name in file_list:
1693       node_is_mc = ninfo.master_candidate
1694       must_have = (file_name not in master_files) or node_is_mc
1695       # missing
1696       test1 = file_name not in remote_cksum
1697       # invalid checksum
1698       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1699       # existing and good
1700       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1701       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1702                "file '%s' missing", file_name)
1703       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1704                "file '%s' has wrong checksum", file_name)
1705       # not candidate and this is not a must-have file
1706       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1707                "file '%s' should not exist on non master"
1708                " candidates (and the file is outdated)", file_name)
1709       # all good, except non-master/non-must have combination
1710       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1711                "file '%s' should not exist"
1712                " on non master candidates", file_name)
1713
1714   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1715                       drbd_map):
1716     """Verifies and the node DRBD status.
1717
1718     @type ninfo: L{objects.Node}
1719     @param ninfo: the node to check
1720     @param nresult: the remote results for the node
1721     @param instanceinfo: the dict of instances
1722     @param drbd_helper: the configured DRBD usermode helper
1723     @param drbd_map: the DRBD map as returned by
1724         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1725
1726     """
1727     node = ninfo.name
1728     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1729
1730     if drbd_helper:
1731       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1732       test = (helper_result == None)
1733       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1734                "no drbd usermode helper returned")
1735       if helper_result:
1736         status, payload = helper_result
1737         test = not status
1738         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1739                  "drbd usermode helper check unsuccessful: %s", payload)
1740         test = status and (payload != drbd_helper)
1741         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1742                  "wrong drbd usermode helper: %s", payload)
1743
1744     # compute the DRBD minors
1745     node_drbd = {}
1746     for minor, instance in drbd_map[node].items():
1747       test = instance not in instanceinfo
1748       _ErrorIf(test, self.ECLUSTERCFG, None,
1749                "ghost instance '%s' in temporary DRBD map", instance)
1750         # ghost instance should not be running, but otherwise we
1751         # don't give double warnings (both ghost instance and
1752         # unallocated minor in use)
1753       if test:
1754         node_drbd[minor] = (instance, False)
1755       else:
1756         instance = instanceinfo[instance]
1757         node_drbd[minor] = (instance.name, instance.admin_up)
1758
1759     # and now check them
1760     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1761     test = not isinstance(used_minors, (tuple, list))
1762     _ErrorIf(test, self.ENODEDRBD, node,
1763              "cannot parse drbd status file: %s", str(used_minors))
1764     if test:
1765       # we cannot check drbd status
1766       return
1767
1768     for minor, (iname, must_exist) in node_drbd.items():
1769       test = minor not in used_minors and must_exist
1770       _ErrorIf(test, self.ENODEDRBD, node,
1771                "drbd minor %d of instance %s is not active", minor, iname)
1772     for minor in used_minors:
1773       test = minor not in node_drbd
1774       _ErrorIf(test, self.ENODEDRBD, node,
1775                "unallocated drbd minor %d is in use", minor)
1776
1777   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1778     """Builds the node OS structures.
1779
1780     @type ninfo: L{objects.Node}
1781     @param ninfo: the node to check
1782     @param nresult: the remote results for the node
1783     @param nimg: the node image object
1784
1785     """
1786     node = ninfo.name
1787     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1788
1789     remote_os = nresult.get(constants.NV_OSLIST, None)
1790     test = (not isinstance(remote_os, list) or
1791             not compat.all(isinstance(v, list) and len(v) == 7
1792                            for v in remote_os))
1793
1794     _ErrorIf(test, self.ENODEOS, node,
1795              "node hasn't returned valid OS data")
1796
1797     nimg.os_fail = test
1798
1799     if test:
1800       return
1801
1802     os_dict = {}
1803
1804     for (name, os_path, status, diagnose,
1805          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1806
1807       if name not in os_dict:
1808         os_dict[name] = []
1809
1810       # parameters is a list of lists instead of list of tuples due to
1811       # JSON lacking a real tuple type, fix it:
1812       parameters = [tuple(v) for v in parameters]
1813       os_dict[name].append((os_path, status, diagnose,
1814                             set(variants), set(parameters), set(api_ver)))
1815
1816     nimg.oslist = os_dict
1817
1818   def _VerifyNodeOS(self, ninfo, nimg, base):
1819     """Verifies the node OS list.
1820
1821     @type ninfo: L{objects.Node}
1822     @param ninfo: the node to check
1823     @param nimg: the node image object
1824     @param base: the 'template' node we match against (e.g. from the master)
1825
1826     """
1827     node = ninfo.name
1828     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1829
1830     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1831
1832     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1833     for os_name, os_data in nimg.oslist.items():
1834       assert os_data, "Empty OS status for OS %s?!" % os_name
1835       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1836       _ErrorIf(not f_status, self.ENODEOS, node,
1837                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1838       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1839                "OS '%s' has multiple entries (first one shadows the rest): %s",
1840                os_name, utils.CommaJoin([v[0] for v in os_data]))
1841       # this will catched in backend too
1842       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1843                and not f_var, self.ENODEOS, node,
1844                "OS %s with API at least %d does not declare any variant",
1845                os_name, constants.OS_API_V15)
1846       # comparisons with the 'base' image
1847       test = os_name not in base.oslist
1848       _ErrorIf(test, self.ENODEOS, node,
1849                "Extra OS %s not present on reference node (%s)",
1850                os_name, base.name)
1851       if test:
1852         continue
1853       assert base.oslist[os_name], "Base node has empty OS status?"
1854       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1855       if not b_status:
1856         # base OS is invalid, skipping
1857         continue
1858       for kind, a, b in [("API version", f_api, b_api),
1859                          ("variants list", f_var, b_var),
1860                          ("parameters", beautify_params(f_param),
1861                           beautify_params(b_param))]:
1862         _ErrorIf(a != b, self.ENODEOS, node,
1863                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
1864                  kind, os_name, base.name,
1865                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
1866
1867     # check any missing OSes
1868     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1869     _ErrorIf(missing, self.ENODEOS, node,
1870              "OSes present on reference node %s but missing on this node: %s",
1871              base.name, utils.CommaJoin(missing))
1872
1873   def _VerifyOob(self, ninfo, nresult):
1874     """Verifies out of band functionality of a node.
1875
1876     @type ninfo: L{objects.Node}
1877     @param ninfo: the node to check
1878     @param nresult: the remote results for the node
1879
1880     """
1881     node = ninfo.name
1882     # We just have to verify the paths on master and/or master candidates
1883     # as the oob helper is invoked on the master
1884     if ((ninfo.master_candidate or ninfo.master_capable) and
1885         constants.NV_OOB_PATHS in nresult):
1886       for path_result in nresult[constants.NV_OOB_PATHS]:
1887         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1888
1889   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1890     """Verifies and updates the node volume data.
1891
1892     This function will update a L{NodeImage}'s internal structures
1893     with data from the remote call.
1894
1895     @type ninfo: L{objects.Node}
1896     @param ninfo: the node to check
1897     @param nresult: the remote results for the node
1898     @param nimg: the node image object
1899     @param vg_name: the configured VG name
1900
1901     """
1902     node = ninfo.name
1903     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1904
1905     nimg.lvm_fail = True
1906     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1907     if vg_name is None:
1908       pass
1909     elif isinstance(lvdata, basestring):
1910       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1911                utils.SafeEncode(lvdata))
1912     elif not isinstance(lvdata, dict):
1913       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1914     else:
1915       nimg.volumes = lvdata
1916       nimg.lvm_fail = False
1917
1918   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1919     """Verifies and updates the node instance list.
1920
1921     If the listing was successful, then updates this node's instance
1922     list. Otherwise, it marks the RPC call as failed for the instance
1923     list key.
1924
1925     @type ninfo: L{objects.Node}
1926     @param ninfo: the node to check
1927     @param nresult: the remote results for the node
1928     @param nimg: the node image object
1929
1930     """
1931     idata = nresult.get(constants.NV_INSTANCELIST, None)
1932     test = not isinstance(idata, list)
1933     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1934                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1935     if test:
1936       nimg.hyp_fail = True
1937     else:
1938       nimg.instances = idata
1939
1940   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1941     """Verifies and computes a node information map
1942
1943     @type ninfo: L{objects.Node}
1944     @param ninfo: the node to check
1945     @param nresult: the remote results for the node
1946     @param nimg: the node image object
1947     @param vg_name: the configured VG name
1948
1949     """
1950     node = ninfo.name
1951     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1952
1953     # try to read free memory (from the hypervisor)
1954     hv_info = nresult.get(constants.NV_HVINFO, None)
1955     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1956     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1957     if not test:
1958       try:
1959         nimg.mfree = int(hv_info["memory_free"])
1960       except (ValueError, TypeError):
1961         _ErrorIf(True, self.ENODERPC, node,
1962                  "node returned invalid nodeinfo, check hypervisor")
1963
1964     # FIXME: devise a free space model for file based instances as well
1965     if vg_name is not None:
1966       test = (constants.NV_VGLIST not in nresult or
1967               vg_name not in nresult[constants.NV_VGLIST])
1968       _ErrorIf(test, self.ENODELVM, node,
1969                "node didn't return data for the volume group '%s'"
1970                " - it is either missing or broken", vg_name)
1971       if not test:
1972         try:
1973           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1974         except (ValueError, TypeError):
1975           _ErrorIf(True, self.ENODERPC, node,
1976                    "node returned invalid LVM info, check LVM status")
1977
1978   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1979     """Gets per-disk status information for all instances.
1980
1981     @type nodelist: list of strings
1982     @param nodelist: Node names
1983     @type node_image: dict of (name, L{objects.Node})
1984     @param node_image: Node objects
1985     @type instanceinfo: dict of (name, L{objects.Instance})
1986     @param instanceinfo: Instance objects
1987     @rtype: {instance: {node: [(succes, payload)]}}
1988     @return: a dictionary of per-instance dictionaries with nodes as
1989         keys and disk information as values; the disk information is a
1990         list of tuples (success, payload)
1991
1992     """
1993     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1994
1995     node_disks = {}
1996     node_disks_devonly = {}
1997     diskless_instances = set()
1998     diskless = constants.DT_DISKLESS
1999
2000     for nname in nodelist:
2001       node_instances = list(itertools.chain(node_image[nname].pinst,
2002                                             node_image[nname].sinst))
2003       diskless_instances.update(inst for inst in node_instances
2004                                 if instanceinfo[inst].disk_template == diskless)
2005       disks = [(inst, disk)
2006                for inst in node_instances
2007                for disk in instanceinfo[inst].disks]
2008
2009       if not disks:
2010         # No need to collect data
2011         continue
2012
2013       node_disks[nname] = disks
2014
2015       # Creating copies as SetDiskID below will modify the objects and that can
2016       # lead to incorrect data returned from nodes
2017       devonly = [dev.Copy() for (_, dev) in disks]
2018
2019       for dev in devonly:
2020         self.cfg.SetDiskID(dev, nname)
2021
2022       node_disks_devonly[nname] = devonly
2023
2024     assert len(node_disks) == len(node_disks_devonly)
2025
2026     # Collect data from all nodes with disks
2027     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2028                                                           node_disks_devonly)
2029
2030     assert len(result) == len(node_disks)
2031
2032     instdisk = {}
2033
2034     for (nname, nres) in result.items():
2035       disks = node_disks[nname]
2036
2037       if nres.offline:
2038         # No data from this node
2039         data = len(disks) * [(False, "node offline")]
2040       else:
2041         msg = nres.fail_msg
2042         _ErrorIf(msg, self.ENODERPC, nname,
2043                  "while getting disk information: %s", msg)
2044         if msg:
2045           # No data from this node
2046           data = len(disks) * [(False, msg)]
2047         else:
2048           data = []
2049           for idx, i in enumerate(nres.payload):
2050             if isinstance(i, (tuple, list)) and len(i) == 2:
2051               data.append(i)
2052             else:
2053               logging.warning("Invalid result from node %s, entry %d: %s",
2054                               nname, idx, i)
2055               data.append((False, "Invalid result from the remote node"))
2056
2057       for ((inst, _), status) in zip(disks, data):
2058         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2059
2060     # Add empty entries for diskless instances.
2061     for inst in diskless_instances:
2062       assert inst not in instdisk
2063       instdisk[inst] = {}
2064
2065     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2066                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2067                       compat.all(isinstance(s, (tuple, list)) and
2068                                  len(s) == 2 for s in statuses)
2069                       for inst, nnames in instdisk.items()
2070                       for nname, statuses in nnames.items())
2071     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2072
2073     return instdisk
2074
2075   def _VerifyHVP(self, hvp_data):
2076     """Verifies locally the syntax of the hypervisor parameters.
2077
2078     """
2079     for item, hv_name, hv_params in hvp_data:
2080       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2081              (item, hv_name))
2082       try:
2083         hv_class = hypervisor.GetHypervisor(hv_name)
2084         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2085         hv_class.CheckParameterSyntax(hv_params)
2086       except errors.GenericError, err:
2087         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2088
2089
2090   def BuildHooksEnv(self):
2091     """Build hooks env.
2092
2093     Cluster-Verify hooks just ran in the post phase and their failure makes
2094     the output be logged in the verify output and the verification to fail.
2095
2096     """
2097     all_nodes = self.cfg.GetNodeList()
2098     env = {
2099       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2100       }
2101     for node in self.cfg.GetAllNodesInfo().values():
2102       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2103
2104     return env, [], all_nodes
2105
2106   def Exec(self, feedback_fn):
2107     """Verify integrity of cluster, performing various test on nodes.
2108
2109     """
2110     # This method has too many local variables. pylint: disable-msg=R0914
2111     self.bad = False
2112     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2113     verbose = self.op.verbose
2114     self._feedback_fn = feedback_fn
2115     feedback_fn("* Verifying global settings")
2116     for msg in self.cfg.VerifyConfig():
2117       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2118
2119     # Check the cluster certificates
2120     for cert_filename in constants.ALL_CERT_FILES:
2121       (errcode, msg) = _VerifyCertificate(cert_filename)
2122       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2123
2124     vg_name = self.cfg.GetVGName()
2125     drbd_helper = self.cfg.GetDRBDHelper()
2126     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2127     cluster = self.cfg.GetClusterInfo()
2128     nodeinfo_byname = self.cfg.GetAllNodesInfo()
2129     nodelist = utils.NiceSort(nodeinfo_byname.keys())
2130     nodeinfo = [nodeinfo_byname[nname] for nname in nodelist]
2131     instanceinfo = self.cfg.GetAllInstancesInfo()
2132     instancelist = utils.NiceSort(instanceinfo.keys())
2133     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2134     i_non_redundant = [] # Non redundant instances
2135     i_non_a_balanced = [] # Non auto-balanced instances
2136     n_offline = 0 # Count of offline nodes
2137     n_drained = 0 # Count of nodes being drained
2138     node_vol_should = {}
2139
2140     # FIXME: verify OS list
2141     # do local checksums
2142     master_files = [constants.CLUSTER_CONF_FILE]
2143     master_node = self.master_node = self.cfg.GetMasterNode()
2144     master_ip = self.cfg.GetMasterIP()
2145
2146     file_names = ssconf.SimpleStore().GetFileList()
2147     file_names.extend(constants.ALL_CERT_FILES)
2148     file_names.extend(master_files)
2149     if cluster.modify_etc_hosts:
2150       file_names.append(constants.ETC_HOSTS)
2151
2152     local_checksums = utils.FingerprintFiles(file_names)
2153
2154     # Compute the set of hypervisor parameters
2155     hvp_data = []
2156     for hv_name in hypervisors:
2157       hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2158     for os_name, os_hvp in cluster.os_hvp.items():
2159       for hv_name, hv_params in os_hvp.items():
2160         if not hv_params:
2161           continue
2162         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2163         hvp_data.append(("os %s" % os_name, hv_name, full_params))
2164     # TODO: collapse identical parameter values in a single one
2165     for instance in instanceinfo.values():
2166       if not instance.hvparams:
2167         continue
2168       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2169                        cluster.FillHV(instance)))
2170     # and verify them locally
2171     self._VerifyHVP(hvp_data)
2172
2173     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2174     node_verify_param = {
2175       constants.NV_FILELIST: file_names,
2176       constants.NV_NODELIST: [node.name for node in nodeinfo
2177                               if not node.offline],
2178       constants.NV_HYPERVISOR: hypervisors,
2179       constants.NV_HVPARAMS: hvp_data,
2180       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2181                                   node.secondary_ip) for node in nodeinfo
2182                                  if not node.offline],
2183       constants.NV_INSTANCELIST: hypervisors,
2184       constants.NV_VERSION: None,
2185       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2186       constants.NV_NODESETUP: None,
2187       constants.NV_TIME: None,
2188       constants.NV_MASTERIP: (master_node, master_ip),
2189       constants.NV_OSLIST: None,
2190       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2191       }
2192
2193     if vg_name is not None:
2194       node_verify_param[constants.NV_VGLIST] = None
2195       node_verify_param[constants.NV_LVLIST] = vg_name
2196       node_verify_param[constants.NV_PVLIST] = [vg_name]
2197       node_verify_param[constants.NV_DRBDLIST] = None
2198
2199     if drbd_helper:
2200       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2201
2202     # bridge checks
2203     # FIXME: this needs to be changed per node-group, not cluster-wide
2204     bridges = set()
2205     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2206     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2207       bridges.add(default_nicpp[constants.NIC_LINK])
2208     for instance in instanceinfo.values():
2209       for nic in instance.nics:
2210         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2211         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2212           bridges.add(full_nic[constants.NIC_LINK])
2213
2214     if bridges:
2215       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2216
2217     # Build our expected cluster state
2218     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2219                                                  name=node.name,
2220                                                  vm_capable=node.vm_capable))
2221                       for node in nodeinfo)
2222
2223     # Gather OOB paths
2224     oob_paths = []
2225     for node in nodeinfo:
2226       path = _SupportsOob(self.cfg, node)
2227       if path and path not in oob_paths:
2228         oob_paths.append(path)
2229
2230     if oob_paths:
2231       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2232
2233     for instance in instancelist:
2234       inst_config = instanceinfo[instance]
2235
2236       for nname in inst_config.all_nodes:
2237         if nname not in node_image:
2238           # ghost node
2239           gnode = self.NodeImage(name=nname)
2240           gnode.ghost = True
2241           node_image[nname] = gnode
2242
2243       inst_config.MapLVsByNode(node_vol_should)
2244
2245       pnode = inst_config.primary_node
2246       node_image[pnode].pinst.append(instance)
2247
2248       for snode in inst_config.secondary_nodes:
2249         nimg = node_image[snode]
2250         nimg.sinst.append(instance)
2251         if pnode not in nimg.sbp:
2252           nimg.sbp[pnode] = []
2253         nimg.sbp[pnode].append(instance)
2254
2255     # At this point, we have the in-memory data structures complete,
2256     # except for the runtime information, which we'll gather next
2257
2258     # Due to the way our RPC system works, exact response times cannot be
2259     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2260     # time before and after executing the request, we can at least have a time
2261     # window.
2262     nvinfo_starttime = time.time()
2263     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2264                                            self.cfg.GetClusterName())
2265     nvinfo_endtime = time.time()
2266
2267     all_drbd_map = self.cfg.ComputeDRBDMap()
2268
2269     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2270     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2271
2272     feedback_fn("* Verifying node status")
2273
2274     refos_img = None
2275
2276     for node_i in nodeinfo:
2277       node = node_i.name
2278       nimg = node_image[node]
2279
2280       if node_i.offline:
2281         if verbose:
2282           feedback_fn("* Skipping offline node %s" % (node,))
2283         n_offline += 1
2284         continue
2285
2286       if node == master_node:
2287         ntype = "master"
2288       elif node_i.master_candidate:
2289         ntype = "master candidate"
2290       elif node_i.drained:
2291         ntype = "drained"
2292         n_drained += 1
2293       else:
2294         ntype = "regular"
2295       if verbose:
2296         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2297
2298       msg = all_nvinfo[node].fail_msg
2299       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2300       if msg:
2301         nimg.rpc_fail = True
2302         continue
2303
2304       nresult = all_nvinfo[node].payload
2305
2306       nimg.call_ok = self._VerifyNode(node_i, nresult)
2307       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2308       self._VerifyNodeNetwork(node_i, nresult)
2309       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2310                             master_files)
2311
2312       self._VerifyOob(node_i, nresult)
2313
2314       if nimg.vm_capable:
2315         self._VerifyNodeLVM(node_i, nresult, vg_name)
2316         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2317                              all_drbd_map)
2318
2319         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2320         self._UpdateNodeInstances(node_i, nresult, nimg)
2321         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2322         self._UpdateNodeOS(node_i, nresult, nimg)
2323         if not nimg.os_fail:
2324           if refos_img is None:
2325             refos_img = nimg
2326           self._VerifyNodeOS(node_i, nimg, refos_img)
2327         self._VerifyNodeBridges(node_i, nresult, bridges)
2328
2329     feedback_fn("* Verifying instance status")
2330     for instance in instancelist:
2331       if verbose:
2332         feedback_fn("* Verifying instance %s" % instance)
2333       inst_config = instanceinfo[instance]
2334       self._VerifyInstance(instance, inst_config, node_image,
2335                            instdisk[instance])
2336       inst_nodes_offline = []
2337
2338       pnode = inst_config.primary_node
2339       pnode_img = node_image[pnode]
2340       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2341                self.ENODERPC, pnode, "instance %s, connection to"
2342                " primary node failed", instance)
2343
2344       _ErrorIf(pnode_img.offline, self.EINSTANCEBADNODE, instance,
2345                "instance lives on offline node %s", inst_config.primary_node)
2346
2347       # If the instance is non-redundant we cannot survive losing its primary
2348       # node, so we are not N+1 compliant. On the other hand we have no disk
2349       # templates with more than one secondary so that situation is not well
2350       # supported either.
2351       # FIXME: does not support file-backed instances
2352       if not inst_config.secondary_nodes:
2353         i_non_redundant.append(instance)
2354
2355       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2356                instance, "instance has multiple secondary nodes: %s",
2357                utils.CommaJoin(inst_config.secondary_nodes),
2358                code=self.ETYPE_WARNING)
2359
2360       if inst_config.disk_template in constants.DTS_NET_MIRROR:
2361         pnode = inst_config.primary_node
2362         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2363         instance_groups = {}
2364
2365         for node in instance_nodes:
2366           instance_groups.setdefault(nodeinfo_byname[node].group,
2367                                      []).append(node)
2368
2369         pretty_list = [
2370           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2371           # Sort so that we always list the primary node first.
2372           for group, nodes in sorted(instance_groups.items(),
2373                                      key=lambda (_, nodes): pnode in nodes,
2374                                      reverse=True)]
2375
2376         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2377                       instance, "instance has primary and secondary nodes in"
2378                       " different groups: %s", utils.CommaJoin(pretty_list),
2379                       code=self.ETYPE_WARNING)
2380
2381       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2382         i_non_a_balanced.append(instance)
2383
2384       for snode in inst_config.secondary_nodes:
2385         s_img = node_image[snode]
2386         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2387                  "instance %s, connection to secondary node failed", instance)
2388
2389         if s_img.offline:
2390           inst_nodes_offline.append(snode)
2391
2392       # warn that the instance lives on offline nodes
2393       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2394                "instance has offline secondary node(s) %s",
2395                utils.CommaJoin(inst_nodes_offline))
2396       # ... or ghost/non-vm_capable nodes
2397       for node in inst_config.all_nodes:
2398         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2399                  "instance lives on ghost node %s", node)
2400         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2401                  instance, "instance lives on non-vm_capable node %s", node)
2402
2403     feedback_fn("* Verifying orphan volumes")
2404     reserved = utils.FieldSet(*cluster.reserved_lvs)
2405     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2406
2407     feedback_fn("* Verifying orphan instances")
2408     self._VerifyOrphanInstances(instancelist, node_image)
2409
2410     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2411       feedback_fn("* Verifying N+1 Memory redundancy")
2412       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2413
2414     feedback_fn("* Other Notes")
2415     if i_non_redundant:
2416       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2417                   % len(i_non_redundant))
2418
2419     if i_non_a_balanced:
2420       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2421                   % len(i_non_a_balanced))
2422
2423     if n_offline:
2424       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2425
2426     if n_drained:
2427       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2428
2429     return not self.bad
2430
2431   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2432     """Analyze the post-hooks' result
2433
2434     This method analyses the hook result, handles it, and sends some
2435     nicely-formatted feedback back to the user.
2436
2437     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2438         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2439     @param hooks_results: the results of the multi-node hooks rpc call
2440     @param feedback_fn: function used send feedback back to the caller
2441     @param lu_result: previous Exec result
2442     @return: the new Exec result, based on the previous result
2443         and hook results
2444
2445     """
2446     # We only really run POST phase hooks, and are only interested in
2447     # their results
2448     if phase == constants.HOOKS_PHASE_POST:
2449       # Used to change hooks' output to proper indentation
2450       feedback_fn("* Hooks Results")
2451       assert hooks_results, "invalid result from hooks"
2452
2453       for node_name in hooks_results:
2454         res = hooks_results[node_name]
2455         msg = res.fail_msg
2456         test = msg and not res.offline
2457         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2458                       "Communication failure in hooks execution: %s", msg)
2459         if res.offline or msg:
2460           # No need to investigate payload if node is offline or gave an error.
2461           # override manually lu_result here as _ErrorIf only
2462           # overrides self.bad
2463           lu_result = 1
2464           continue
2465         for script, hkr, output in res.payload:
2466           test = hkr == constants.HKR_FAIL
2467           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2468                         "Script %s failed, output:", script)
2469           if test:
2470             output = self._HOOKS_INDENT_RE.sub('      ', output)
2471             feedback_fn("%s" % output)
2472             lu_result = 0
2473
2474       return lu_result
2475
2476
2477 class LUClusterVerifyDisks(NoHooksLU):
2478   """Verifies the cluster disks status.
2479
2480   """
2481   REQ_BGL = False
2482
2483   def ExpandNames(self):
2484     self.needed_locks = {
2485       locking.LEVEL_NODE: locking.ALL_SET,
2486       locking.LEVEL_INSTANCE: locking.ALL_SET,
2487     }
2488     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2489
2490   def Exec(self, feedback_fn):
2491     """Verify integrity of cluster disks.
2492
2493     @rtype: tuple of three items
2494     @return: a tuple of (dict of node-to-node_error, list of instances
2495         which need activate-disks, dict of instance: (node, volume) for
2496         missing volumes
2497
2498     """
2499     result = res_nodes, res_instances, res_missing = {}, [], {}
2500
2501     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2502     instances = self.cfg.GetAllInstancesInfo().values()
2503
2504     nv_dict = {}
2505     for inst in instances:
2506       inst_lvs = {}
2507       if not inst.admin_up:
2508         continue
2509       inst.MapLVsByNode(inst_lvs)
2510       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2511       for node, vol_list in inst_lvs.iteritems():
2512         for vol in vol_list:
2513           nv_dict[(node, vol)] = inst
2514
2515     if not nv_dict:
2516       return result
2517
2518     node_lvs = self.rpc.call_lv_list(nodes, [])
2519     for node, node_res in node_lvs.items():
2520       if node_res.offline:
2521         continue
2522       msg = node_res.fail_msg
2523       if msg:
2524         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2525         res_nodes[node] = msg
2526         continue
2527
2528       lvs = node_res.payload
2529       for lv_name, (_, _, lv_online) in lvs.items():
2530         inst = nv_dict.pop((node, lv_name), None)
2531         if (not lv_online and inst is not None
2532             and inst.name not in res_instances):
2533           res_instances.append(inst.name)
2534
2535     # any leftover items in nv_dict are missing LVs, let's arrange the
2536     # data better
2537     for key, inst in nv_dict.iteritems():
2538       if inst.name not in res_missing:
2539         res_missing[inst.name] = []
2540       res_missing[inst.name].append(key)
2541
2542     return result
2543
2544
2545 class LUClusterRepairDiskSizes(NoHooksLU):
2546   """Verifies the cluster disks sizes.
2547
2548   """
2549   REQ_BGL = False
2550
2551   def ExpandNames(self):
2552     if self.op.instances:
2553       self.wanted_names = []
2554       for name in self.op.instances:
2555         full_name = _ExpandInstanceName(self.cfg, name)
2556         self.wanted_names.append(full_name)
2557       self.needed_locks = {
2558         locking.LEVEL_NODE: [],
2559         locking.LEVEL_INSTANCE: self.wanted_names,
2560         }
2561       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2562     else:
2563       self.wanted_names = None
2564       self.needed_locks = {
2565         locking.LEVEL_NODE: locking.ALL_SET,
2566         locking.LEVEL_INSTANCE: locking.ALL_SET,
2567         }
2568     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2569
2570   def DeclareLocks(self, level):
2571     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2572       self._LockInstancesNodes(primary_only=True)
2573
2574   def CheckPrereq(self):
2575     """Check prerequisites.
2576
2577     This only checks the optional instance list against the existing names.
2578
2579     """
2580     if self.wanted_names is None:
2581       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2582
2583     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2584                              in self.wanted_names]
2585
2586   def _EnsureChildSizes(self, disk):
2587     """Ensure children of the disk have the needed disk size.
2588
2589     This is valid mainly for DRBD8 and fixes an issue where the
2590     children have smaller disk size.
2591
2592     @param disk: an L{ganeti.objects.Disk} object
2593
2594     """
2595     if disk.dev_type == constants.LD_DRBD8:
2596       assert disk.children, "Empty children for DRBD8?"
2597       fchild = disk.children[0]
2598       mismatch = fchild.size < disk.size
2599       if mismatch:
2600         self.LogInfo("Child disk has size %d, parent %d, fixing",
2601                      fchild.size, disk.size)
2602         fchild.size = disk.size
2603
2604       # and we recurse on this child only, not on the metadev
2605       return self._EnsureChildSizes(fchild) or mismatch
2606     else:
2607       return False
2608
2609   def Exec(self, feedback_fn):
2610     """Verify the size of cluster disks.
2611
2612     """
2613     # TODO: check child disks too
2614     # TODO: check differences in size between primary/secondary nodes
2615     per_node_disks = {}
2616     for instance in self.wanted_instances:
2617       pnode = instance.primary_node
2618       if pnode not in per_node_disks:
2619         per_node_disks[pnode] = []
2620       for idx, disk in enumerate(instance.disks):
2621         per_node_disks[pnode].append((instance, idx, disk))
2622
2623     changed = []
2624     for node, dskl in per_node_disks.items():
2625       newl = [v[2].Copy() for v in dskl]
2626       for dsk in newl:
2627         self.cfg.SetDiskID(dsk, node)
2628       result = self.rpc.call_blockdev_getsize(node, newl)
2629       if result.fail_msg:
2630         self.LogWarning("Failure in blockdev_getsize call to node"
2631                         " %s, ignoring", node)
2632         continue
2633       if len(result.payload) != len(dskl):
2634         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2635                         " result.payload=%s", node, len(dskl), result.payload)
2636         self.LogWarning("Invalid result from node %s, ignoring node results",
2637                         node)
2638         continue
2639       for ((instance, idx, disk), size) in zip(dskl, result.payload):
2640         if size is None:
2641           self.LogWarning("Disk %d of instance %s did not return size"
2642                           " information, ignoring", idx, instance.name)
2643           continue
2644         if not isinstance(size, (int, long)):
2645           self.LogWarning("Disk %d of instance %s did not return valid"
2646                           " size information, ignoring", idx, instance.name)
2647           continue
2648         size = size >> 20
2649         if size != disk.size:
2650           self.LogInfo("Disk %d of instance %s has mismatched size,"
2651                        " correcting: recorded %d, actual %d", idx,
2652                        instance.name, disk.size, size)
2653           disk.size = size
2654           self.cfg.Update(instance, feedback_fn)
2655           changed.append((instance.name, idx, size))
2656         if self._EnsureChildSizes(disk):
2657           self.cfg.Update(instance, feedback_fn)
2658           changed.append((instance.name, idx, disk.size))
2659     return changed
2660
2661
2662 class LUClusterRename(LogicalUnit):
2663   """Rename the cluster.
2664
2665   """
2666   HPATH = "cluster-rename"
2667   HTYPE = constants.HTYPE_CLUSTER
2668
2669   def BuildHooksEnv(self):
2670     """Build hooks env.
2671
2672     """
2673     env = {
2674       "OP_TARGET": self.cfg.GetClusterName(),
2675       "NEW_NAME": self.op.name,
2676       }
2677     mn = self.cfg.GetMasterNode()
2678     all_nodes = self.cfg.GetNodeList()
2679     return env, [mn], all_nodes
2680
2681   def CheckPrereq(self):
2682     """Verify that the passed name is a valid one.
2683
2684     """
2685     hostname = netutils.GetHostname(name=self.op.name,
2686                                     family=self.cfg.GetPrimaryIPFamily())
2687
2688     new_name = hostname.name
2689     self.ip = new_ip = hostname.ip
2690     old_name = self.cfg.GetClusterName()
2691     old_ip = self.cfg.GetMasterIP()
2692     if new_name == old_name and new_ip == old_ip:
2693       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2694                                  " cluster has changed",
2695                                  errors.ECODE_INVAL)
2696     if new_ip != old_ip:
2697       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2698         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2699                                    " reachable on the network" %
2700                                    new_ip, errors.ECODE_NOTUNIQUE)
2701
2702     self.op.name = new_name
2703
2704   def Exec(self, feedback_fn):
2705     """Rename the cluster.
2706
2707     """
2708     clustername = self.op.name
2709     ip = self.ip
2710
2711     # shutdown the master IP
2712     master = self.cfg.GetMasterNode()
2713     result = self.rpc.call_node_stop_master(master, False)
2714     result.Raise("Could not disable the master role")
2715
2716     try:
2717       cluster = self.cfg.GetClusterInfo()
2718       cluster.cluster_name = clustername
2719       cluster.master_ip = ip
2720       self.cfg.Update(cluster, feedback_fn)
2721
2722       # update the known hosts file
2723       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2724       node_list = self.cfg.GetOnlineNodeList()
2725       try:
2726         node_list.remove(master)
2727       except ValueError:
2728         pass
2729       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2730     finally:
2731       result = self.rpc.call_node_start_master(master, False, False)
2732       msg = result.fail_msg
2733       if msg:
2734         self.LogWarning("Could not re-enable the master role on"
2735                         " the master, please restart manually: %s", msg)
2736
2737     return clustername
2738
2739
2740 class LUClusterSetParams(LogicalUnit):
2741   """Change the parameters of the cluster.
2742
2743   """
2744   HPATH = "cluster-modify"
2745   HTYPE = constants.HTYPE_CLUSTER
2746   REQ_BGL = False
2747
2748   def CheckArguments(self):
2749     """Check parameters
2750
2751     """
2752     if self.op.uid_pool:
2753       uidpool.CheckUidPool(self.op.uid_pool)
2754
2755     if self.op.add_uids:
2756       uidpool.CheckUidPool(self.op.add_uids)
2757
2758     if self.op.remove_uids:
2759       uidpool.CheckUidPool(self.op.remove_uids)
2760
2761   def ExpandNames(self):
2762     # FIXME: in the future maybe other cluster params won't require checking on
2763     # all nodes to be modified.
2764     self.needed_locks = {
2765       locking.LEVEL_NODE: locking.ALL_SET,
2766     }
2767     self.share_locks[locking.LEVEL_NODE] = 1
2768
2769   def BuildHooksEnv(self):
2770     """Build hooks env.
2771
2772     """
2773     env = {
2774       "OP_TARGET": self.cfg.GetClusterName(),
2775       "NEW_VG_NAME": self.op.vg_name,
2776       }
2777     mn = self.cfg.GetMasterNode()
2778     return env, [mn], [mn]
2779
2780   def CheckPrereq(self):
2781     """Check prerequisites.
2782
2783     This checks whether the given params don't conflict and
2784     if the given volume group is valid.
2785
2786     """
2787     if self.op.vg_name is not None and not self.op.vg_name:
2788       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2789         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2790                                    " instances exist", errors.ECODE_INVAL)
2791
2792     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2793       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2794         raise errors.OpPrereqError("Cannot disable drbd helper while"
2795                                    " drbd-based instances exist",
2796                                    errors.ECODE_INVAL)
2797
2798     node_list = self.acquired_locks[locking.LEVEL_NODE]
2799
2800     # if vg_name not None, checks given volume group on all nodes
2801     if self.op.vg_name:
2802       vglist = self.rpc.call_vg_list(node_list)
2803       for node in node_list:
2804         msg = vglist[node].fail_msg
2805         if msg:
2806           # ignoring down node
2807           self.LogWarning("Error while gathering data on node %s"
2808                           " (ignoring node): %s", node, msg)
2809           continue
2810         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2811                                               self.op.vg_name,
2812                                               constants.MIN_VG_SIZE)
2813         if vgstatus:
2814           raise errors.OpPrereqError("Error on node '%s': %s" %
2815                                      (node, vgstatus), errors.ECODE_ENVIRON)
2816
2817     if self.op.drbd_helper:
2818       # checks given drbd helper on all nodes
2819       helpers = self.rpc.call_drbd_helper(node_list)
2820       for node in node_list:
2821         ninfo = self.cfg.GetNodeInfo(node)
2822         if ninfo.offline:
2823           self.LogInfo("Not checking drbd helper on offline node %s", node)
2824           continue
2825         msg = helpers[node].fail_msg
2826         if msg:
2827           raise errors.OpPrereqError("Error checking drbd helper on node"
2828                                      " '%s': %s" % (node, msg),
2829                                      errors.ECODE_ENVIRON)
2830         node_helper = helpers[node].payload
2831         if node_helper != self.op.drbd_helper:
2832           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2833                                      (node, node_helper), errors.ECODE_ENVIRON)
2834
2835     self.cluster = cluster = self.cfg.GetClusterInfo()
2836     # validate params changes
2837     if self.op.beparams:
2838       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2839       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2840
2841     if self.op.ndparams:
2842       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2843       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2844
2845       # TODO: we need a more general way to handle resetting
2846       # cluster-level parameters to default values
2847       if self.new_ndparams["oob_program"] == "":
2848         self.new_ndparams["oob_program"] = \
2849             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2850
2851     if self.op.nicparams:
2852       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2853       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2854       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2855       nic_errors = []
2856
2857       # check all instances for consistency
2858       for instance in self.cfg.GetAllInstancesInfo().values():
2859         for nic_idx, nic in enumerate(instance.nics):
2860           params_copy = copy.deepcopy(nic.nicparams)
2861           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2862
2863           # check parameter syntax
2864           try:
2865             objects.NIC.CheckParameterSyntax(params_filled)
2866           except errors.ConfigurationError, err:
2867             nic_errors.append("Instance %s, nic/%d: %s" %
2868                               (instance.name, nic_idx, err))
2869
2870           # if we're moving instances to routed, check that they have an ip
2871           target_mode = params_filled[constants.NIC_MODE]
2872           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2873             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
2874                               " address" % (instance.name, nic_idx))
2875       if nic_errors:
2876         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2877                                    "\n".join(nic_errors))
2878
2879     # hypervisor list/parameters
2880     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2881     if self.op.hvparams:
2882       for hv_name, hv_dict in self.op.hvparams.items():
2883         if hv_name not in self.new_hvparams:
2884           self.new_hvparams[hv_name] = hv_dict
2885         else:
2886           self.new_hvparams[hv_name].update(hv_dict)
2887
2888     # os hypervisor parameters
2889     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2890     if self.op.os_hvp:
2891       for os_name, hvs in self.op.os_hvp.items():
2892         if os_name not in self.new_os_hvp:
2893           self.new_os_hvp[os_name] = hvs
2894         else:
2895           for hv_name, hv_dict in hvs.items():
2896             if hv_name not in self.new_os_hvp[os_name]:
2897               self.new_os_hvp[os_name][hv_name] = hv_dict
2898             else:
2899               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2900
2901     # os parameters
2902     self.new_osp = objects.FillDict(cluster.osparams, {})
2903     if self.op.osparams:
2904       for os_name, osp in self.op.osparams.items():
2905         if os_name not in self.new_osp:
2906           self.new_osp[os_name] = {}
2907
2908         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2909                                                   use_none=True)
2910
2911         if not self.new_osp[os_name]:
2912           # we removed all parameters
2913           del self.new_osp[os_name]
2914         else:
2915           # check the parameter validity (remote check)
2916           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2917                          os_name, self.new_osp[os_name])
2918
2919     # changes to the hypervisor list
2920     if self.op.enabled_hypervisors is not None:
2921       self.hv_list = self.op.enabled_hypervisors
2922       for hv in self.hv_list:
2923         # if the hypervisor doesn't already exist in the cluster
2924         # hvparams, we initialize it to empty, and then (in both
2925         # cases) we make sure to fill the defaults, as we might not
2926         # have a complete defaults list if the hypervisor wasn't
2927         # enabled before
2928         if hv not in new_hvp:
2929           new_hvp[hv] = {}
2930         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2931         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2932     else:
2933       self.hv_list = cluster.enabled_hypervisors
2934
2935     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2936       # either the enabled list has changed, or the parameters have, validate
2937       for hv_name, hv_params in self.new_hvparams.items():
2938         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2939             (self.op.enabled_hypervisors and
2940              hv_name in self.op.enabled_hypervisors)):
2941           # either this is a new hypervisor, or its parameters have changed
2942           hv_class = hypervisor.GetHypervisor(hv_name)
2943           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2944           hv_class.CheckParameterSyntax(hv_params)
2945           _CheckHVParams(self, node_list, hv_name, hv_params)
2946
2947     if self.op.os_hvp:
2948       # no need to check any newly-enabled hypervisors, since the
2949       # defaults have already been checked in the above code-block
2950       for os_name, os_hvp in self.new_os_hvp.items():
2951         for hv_name, hv_params in os_hvp.items():
2952           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2953           # we need to fill in the new os_hvp on top of the actual hv_p
2954           cluster_defaults = self.new_hvparams.get(hv_name, {})
2955           new_osp = objects.FillDict(cluster_defaults, hv_params)
2956           hv_class = hypervisor.GetHypervisor(hv_name)
2957           hv_class.CheckParameterSyntax(new_osp)
2958           _CheckHVParams(self, node_list, hv_name, new_osp)
2959
2960     if self.op.default_iallocator:
2961       alloc_script = utils.FindFile(self.op.default_iallocator,
2962                                     constants.IALLOCATOR_SEARCH_PATH,
2963                                     os.path.isfile)
2964       if alloc_script is None:
2965         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2966                                    " specified" % self.op.default_iallocator,
2967                                    errors.ECODE_INVAL)
2968
2969   def Exec(self, feedback_fn):
2970     """Change the parameters of the cluster.
2971
2972     """
2973     if self.op.vg_name is not None:
2974       new_volume = self.op.vg_name
2975       if not new_volume:
2976         new_volume = None
2977       if new_volume != self.cfg.GetVGName():
2978         self.cfg.SetVGName(new_volume)
2979       else:
2980         feedback_fn("Cluster LVM configuration already in desired"
2981                     " state, not changing")
2982     if self.op.drbd_helper is not None:
2983       new_helper = self.op.drbd_helper
2984       if not new_helper:
2985         new_helper = None
2986       if new_helper != self.cfg.GetDRBDHelper():
2987         self.cfg.SetDRBDHelper(new_helper)
2988       else:
2989         feedback_fn("Cluster DRBD helper already in desired state,"
2990                     " not changing")
2991     if self.op.hvparams:
2992       self.cluster.hvparams = self.new_hvparams
2993     if self.op.os_hvp:
2994       self.cluster.os_hvp = self.new_os_hvp
2995     if self.op.enabled_hypervisors is not None:
2996       self.cluster.hvparams = self.new_hvparams
2997       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2998     if self.op.beparams:
2999       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3000     if self.op.nicparams:
3001       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3002     if self.op.osparams:
3003       self.cluster.osparams = self.new_osp
3004     if self.op.ndparams:
3005       self.cluster.ndparams = self.new_ndparams
3006
3007     if self.op.candidate_pool_size is not None:
3008       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3009       # we need to update the pool size here, otherwise the save will fail
3010       _AdjustCandidatePool(self, [])
3011
3012     if self.op.maintain_node_health is not None:
3013       self.cluster.maintain_node_health = self.op.maintain_node_health
3014
3015     if self.op.prealloc_wipe_disks is not None:
3016       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3017
3018     if self.op.add_uids is not None:
3019       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3020
3021     if self.op.remove_uids is not None:
3022       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3023
3024     if self.op.uid_pool is not None:
3025       self.cluster.uid_pool = self.op.uid_pool
3026
3027     if self.op.default_iallocator is not None:
3028       self.cluster.default_iallocator = self.op.default_iallocator
3029
3030     if self.op.reserved_lvs is not None:
3031       self.cluster.reserved_lvs = self.op.reserved_lvs
3032
3033     def helper_os(aname, mods, desc):
3034       desc += " OS list"
3035       lst = getattr(self.cluster, aname)
3036       for key, val in mods:
3037         if key == constants.DDM_ADD:
3038           if val in lst:
3039             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3040           else:
3041             lst.append(val)
3042         elif key == constants.DDM_REMOVE:
3043           if val in lst:
3044             lst.remove(val)
3045           else:
3046             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3047         else:
3048           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3049
3050     if self.op.hidden_os:
3051       helper_os("hidden_os", self.op.hidden_os, "hidden")
3052
3053     if self.op.blacklisted_os:
3054       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3055
3056     if self.op.master_netdev:
3057       master = self.cfg.GetMasterNode()
3058       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3059                   self.cluster.master_netdev)
3060       result = self.rpc.call_node_stop_master(master, False)
3061       result.Raise("Could not disable the master ip")
3062       feedback_fn("Changing master_netdev from %s to %s" %
3063                   (self.cluster.master_netdev, self.op.master_netdev))
3064       self.cluster.master_netdev = self.op.master_netdev
3065
3066     self.cfg.Update(self.cluster, feedback_fn)
3067
3068     if self.op.master_netdev:
3069       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3070                   self.op.master_netdev)
3071       result = self.rpc.call_node_start_master(master, False, False)
3072       if result.fail_msg:
3073         self.LogWarning("Could not re-enable the master ip on"
3074                         " the master, please restart manually: %s",
3075                         result.fail_msg)
3076
3077
3078 def _UploadHelper(lu, nodes, fname):
3079   """Helper for uploading a file and showing warnings.
3080
3081   """
3082   if os.path.exists(fname):
3083     result = lu.rpc.call_upload_file(nodes, fname)
3084     for to_node, to_result in result.items():
3085       msg = to_result.fail_msg
3086       if msg:
3087         msg = ("Copy of file %s to node %s failed: %s" %
3088                (fname, to_node, msg))
3089         lu.proc.LogWarning(msg)
3090
3091
3092 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3093   """Distribute additional files which are part of the cluster configuration.
3094
3095   ConfigWriter takes care of distributing the config and ssconf files, but
3096   there are more files which should be distributed to all nodes. This function
3097   makes sure those are copied.
3098
3099   @param lu: calling logical unit
3100   @param additional_nodes: list of nodes not in the config to distribute to
3101   @type additional_vm: boolean
3102   @param additional_vm: whether the additional nodes are vm-capable or not
3103
3104   """
3105   # 1. Gather target nodes
3106   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3107   dist_nodes = lu.cfg.GetOnlineNodeList()
3108   nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3109   vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3110   if additional_nodes is not None:
3111     dist_nodes.extend(additional_nodes)
3112     if additional_vm:
3113       vm_nodes.extend(additional_nodes)
3114   if myself.name in dist_nodes:
3115     dist_nodes.remove(myself.name)
3116   if myself.name in vm_nodes:
3117     vm_nodes.remove(myself.name)
3118
3119   # 2. Gather files to distribute
3120   dist_files = set([constants.ETC_HOSTS,
3121                     constants.SSH_KNOWN_HOSTS_FILE,
3122                     constants.RAPI_CERT_FILE,
3123                     constants.RAPI_USERS_FILE,
3124                     constants.CONFD_HMAC_KEY,
3125                     constants.CLUSTER_DOMAIN_SECRET_FILE,
3126                    ])
3127
3128   vm_files = set()
3129   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3130   for hv_name in enabled_hypervisors:
3131     hv_class = hypervisor.GetHypervisor(hv_name)
3132     vm_files.update(hv_class.GetAncillaryFiles())
3133
3134   # 3. Perform the files upload
3135   for fname in dist_files:
3136     _UploadHelper(lu, dist_nodes, fname)
3137   for fname in vm_files:
3138     _UploadHelper(lu, vm_nodes, fname)
3139
3140
3141 class LUClusterRedistConf(NoHooksLU):
3142   """Force the redistribution of cluster configuration.
3143
3144   This is a very simple LU.
3145
3146   """
3147   REQ_BGL = False
3148
3149   def ExpandNames(self):
3150     self.needed_locks = {
3151       locking.LEVEL_NODE: locking.ALL_SET,
3152     }
3153     self.share_locks[locking.LEVEL_NODE] = 1
3154
3155   def Exec(self, feedback_fn):
3156     """Redistribute the configuration.
3157
3158     """
3159     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3160     _RedistributeAncillaryFiles(self)
3161
3162
3163 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3164   """Sleep and poll for an instance's disk to sync.
3165
3166   """
3167   if not instance.disks or disks is not None and not disks:
3168     return True
3169
3170   disks = _ExpandCheckDisks(instance, disks)
3171
3172   if not oneshot:
3173     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3174
3175   node = instance.primary_node
3176
3177   for dev in disks:
3178     lu.cfg.SetDiskID(dev, node)
3179
3180   # TODO: Convert to utils.Retry
3181
3182   retries = 0
3183   degr_retries = 10 # in seconds, as we sleep 1 second each time
3184   while True:
3185     max_time = 0
3186     done = True
3187     cumul_degraded = False
3188     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3189     msg = rstats.fail_msg
3190     if msg:
3191       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3192       retries += 1
3193       if retries >= 10:
3194         raise errors.RemoteError("Can't contact node %s for mirror data,"
3195                                  " aborting." % node)
3196       time.sleep(6)
3197       continue
3198     rstats = rstats.payload
3199     retries = 0
3200     for i, mstat in enumerate(rstats):
3201       if mstat is None:
3202         lu.LogWarning("Can't compute data for node %s/%s",
3203                            node, disks[i].iv_name)
3204         continue
3205
3206       cumul_degraded = (cumul_degraded or
3207                         (mstat.is_degraded and mstat.sync_percent is None))
3208       if mstat.sync_percent is not None:
3209         done = False
3210         if mstat.estimated_time is not None:
3211           rem_time = ("%s remaining (estimated)" %
3212                       utils.FormatSeconds(mstat.estimated_time))
3213           max_time = mstat.estimated_time
3214         else:
3215           rem_time = "no time estimate"
3216         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3217                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3218
3219     # if we're done but degraded, let's do a few small retries, to
3220     # make sure we see a stable and not transient situation; therefore
3221     # we force restart of the loop
3222     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3223       logging.info("Degraded disks found, %d retries left", degr_retries)
3224       degr_retries -= 1
3225       time.sleep(1)
3226       continue
3227
3228     if done or oneshot:
3229       break
3230
3231     time.sleep(min(60, max_time))
3232
3233   if done:
3234     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3235   return not cumul_degraded
3236
3237
3238 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3239   """Check that mirrors are not degraded.
3240
3241   The ldisk parameter, if True, will change the test from the
3242   is_degraded attribute (which represents overall non-ok status for
3243   the device(s)) to the ldisk (representing the local storage status).
3244
3245   """
3246   lu.cfg.SetDiskID(dev, node)
3247
3248   result = True
3249
3250   if on_primary or dev.AssembleOnSecondary():
3251     rstats = lu.rpc.call_blockdev_find(node, dev)
3252     msg = rstats.fail_msg
3253     if msg:
3254       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3255       result = False
3256     elif not rstats.payload:
3257       lu.LogWarning("Can't find disk on node %s", node)
3258       result = False
3259     else:
3260       if ldisk:
3261         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3262       else:
3263         result = result and not rstats.payload.is_degraded
3264
3265   if dev.children:
3266     for child in dev.children:
3267       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3268
3269   return result
3270
3271
3272 class LUOobCommand(NoHooksLU):
3273   """Logical unit for OOB handling.
3274
3275   """
3276   REG_BGL = False
3277
3278   def CheckPrereq(self):
3279     """Check prerequisites.
3280
3281     This checks:
3282      - the node exists in the configuration
3283      - OOB is supported
3284
3285     Any errors are signaled by raising errors.OpPrereqError.
3286
3287     """
3288     self.nodes = []
3289     for node_name in self.op.node_names:
3290       node = self.cfg.GetNodeInfo(node_name)
3291
3292       if node is None:
3293         raise errors.OpPrereqError("Node %s not found" % node_name,
3294                                    errors.ECODE_NOENT)
3295       else:
3296         self.nodes.append(node)
3297
3298       if (self.op.command == constants.OOB_POWER_OFF and not node.offline):
3299         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3300                                     " not marked offline") % node_name,
3301                                    errors.ECODE_STATE)
3302
3303   def ExpandNames(self):
3304     """Gather locks we need.
3305
3306     """
3307     if self.op.node_names:
3308       self.op.node_names = [_ExpandNodeName(self.cfg, name)
3309                             for name in self.op.node_names]
3310     else:
3311       self.op.node_names = self.cfg.GetNodeList()
3312
3313     self.needed_locks = {
3314       locking.LEVEL_NODE: self.op.node_names,
3315       }
3316
3317   def Exec(self, feedback_fn):
3318     """Execute OOB and return result if we expect any.
3319
3320     """
3321     master_node = self.cfg.GetMasterNode()
3322     ret = []
3323
3324     for node in self.nodes:
3325       node_entry = [(constants.RS_NORMAL, node.name)]
3326       ret.append(node_entry)
3327
3328       oob_program = _SupportsOob(self.cfg, node)
3329
3330       if not oob_program:
3331         node_entry.append((constants.RS_UNAVAIL, None))
3332         continue
3333
3334       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3335                    self.op.command, oob_program, node.name)
3336       result = self.rpc.call_run_oob(master_node, oob_program,
3337                                      self.op.command, node.name,
3338                                      self.op.timeout)
3339
3340       if result.fail_msg:
3341         self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3342                         node.name, result.fail_msg)
3343         node_entry.append((constants.RS_NODATA, None))
3344       else:
3345         try:
3346           self._CheckPayload(result)
3347         except errors.OpExecError, err:
3348           self.LogWarning("The payload returned by '%s' is not valid: %s",
3349                           node.name, err)
3350           node_entry.append((constants.RS_NODATA, None))
3351         else:
3352           if self.op.command == constants.OOB_HEALTH:
3353             # For health we should log important events
3354             for item, status in result.payload:
3355               if status in [constants.OOB_STATUS_WARNING,
3356                             constants.OOB_STATUS_CRITICAL]:
3357                 self.LogWarning("On node '%s' item '%s' has status '%s'",
3358                                 node.name, item, status)
3359
3360           if self.op.command == constants.OOB_POWER_ON:
3361             node.powered = True
3362           elif self.op.command == constants.OOB_POWER_OFF:
3363             node.powered = False
3364           elif self.op.command == constants.OOB_POWER_STATUS:
3365             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3366             if powered != node.powered:
3367               logging.warning(("Recorded power state (%s) of node '%s' does not"
3368                                " match actual power state (%s)"), node.powered,
3369                               node.name, powered)
3370
3371           # For configuration changing commands we should update the node
3372           if self.op.command in (constants.OOB_POWER_ON,
3373                                  constants.OOB_POWER_OFF):
3374             self.cfg.Update(node, feedback_fn)
3375
3376           node_entry.append((constants.RS_NORMAL, result.payload))
3377
3378     return ret
3379
3380   def _CheckPayload(self, result):
3381     """Checks if the payload is valid.
3382
3383     @param result: RPC result
3384     @raises errors.OpExecError: If payload is not valid
3385
3386     """
3387     errs = []
3388     if self.op.command == constants.OOB_HEALTH:
3389       if not isinstance(result.payload, list):
3390         errs.append("command 'health' is expected to return a list but got %s" %
3391                     type(result.payload))
3392       else:
3393         for item, status in result.payload:
3394           if status not in constants.OOB_STATUSES:
3395             errs.append("health item '%s' has invalid status '%s'" %
3396                         (item, status))
3397
3398     if self.op.command == constants.OOB_POWER_STATUS:
3399       if not isinstance(result.payload, dict):
3400         errs.append("power-status is expected to return a dict but got %s" %
3401                     type(result.payload))
3402
3403     if self.op.command in [
3404         constants.OOB_POWER_ON,
3405         constants.OOB_POWER_OFF,
3406         constants.OOB_POWER_CYCLE,
3407         ]:
3408       if result.payload is not None:
3409         errs.append("%s is expected to not return payload but got '%s'" %
3410                     (self.op.command, result.payload))
3411
3412     if errs:
3413       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3414                                utils.CommaJoin(errs))
3415
3416
3417
3418 class LUOsDiagnose(NoHooksLU):
3419   """Logical unit for OS diagnose/query.
3420
3421   """
3422   REQ_BGL = False
3423   _HID = "hidden"
3424   _BLK = "blacklisted"
3425   _VLD = "valid"
3426   _FIELDS_STATIC = utils.FieldSet()
3427   _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3428                                    "parameters", "api_versions", _HID, _BLK)
3429
3430   def CheckArguments(self):
3431     if self.op.names:
3432       raise errors.OpPrereqError("Selective OS query not supported",
3433                                  errors.ECODE_INVAL)
3434
3435     _CheckOutputFields(static=self._FIELDS_STATIC,
3436                        dynamic=self._FIELDS_DYNAMIC,
3437                        selected=self.op.output_fields)
3438
3439   def ExpandNames(self):
3440     # Lock all nodes, in shared mode
3441     # Temporary removal of locks, should be reverted later
3442     # TODO: reintroduce locks when they are lighter-weight
3443     self.needed_locks = {}
3444     #self.share_locks[locking.LEVEL_NODE] = 1
3445     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3446
3447   @staticmethod
3448   def _DiagnoseByOS(rlist):
3449     """Remaps a per-node return list into an a per-os per-node dictionary
3450
3451     @param rlist: a map with node names as keys and OS objects as values
3452
3453     @rtype: dict
3454     @return: a dictionary with osnames as keys and as value another
3455         map, with nodes as keys and tuples of (path, status, diagnose,
3456         variants, parameters, api_versions) as values, eg::
3457
3458           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3459                                      (/srv/..., False, "invalid api")],
3460                            "node2": [(/srv/..., True, "", [], [])]}
3461           }
3462
3463     """
3464     all_os = {}
3465     # we build here the list of nodes that didn't fail the RPC (at RPC
3466     # level), so that nodes with a non-responding node daemon don't
3467     # make all OSes invalid
3468     good_nodes = [node_name for node_name in rlist
3469                   if not rlist[node_name].fail_msg]
3470     for node_name, nr in rlist.items():
3471       if nr.fail_msg or not nr.payload:
3472         continue
3473       for (name, path, status, diagnose, variants,
3474            params, api_versions) in nr.payload:
3475         if name not in all_os:
3476           # build a list of nodes for this os containing empty lists
3477           # for each node in node_list
3478           all_os[name] = {}
3479           for nname in good_nodes:
3480             all_os[name][nname] = []
3481         # convert params from [name, help] to (name, help)
3482         params = [tuple(v) for v in params]
3483         all_os[name][node_name].append((path, status, diagnose,
3484                                         variants, params, api_versions))
3485     return all_os
3486
3487   def Exec(self, feedback_fn):
3488     """Compute the list of OSes.
3489
3490     """
3491     valid_nodes = [node.name
3492                    for node in self.cfg.GetAllNodesInfo().values()
3493                    if not node.offline and node.vm_capable]
3494     node_data = self.rpc.call_os_diagnose(valid_nodes)
3495     pol = self._DiagnoseByOS(node_data)
3496     output = []
3497     cluster = self.cfg.GetClusterInfo()
3498
3499     for os_name in utils.NiceSort(pol.keys()):
3500       os_data = pol[os_name]
3501       row = []
3502       valid = True
3503       (variants, params, api_versions) = null_state = (set(), set(), set())
3504       for idx, osl in enumerate(os_data.values()):
3505         valid = bool(valid and osl and osl[0][1])
3506         if not valid:
3507           (variants, params, api_versions) = null_state
3508           break
3509         node_variants, node_params, node_api = osl[0][3:6]
3510         if idx == 0: # first entry
3511           variants = set(node_variants)
3512           params = set(node_params)
3513           api_versions = set(node_api)
3514         else: # keep consistency
3515           variants.intersection_update(node_variants)
3516           params.intersection_update(node_params)
3517           api_versions.intersection_update(node_api)
3518
3519       is_hid = os_name in cluster.hidden_os
3520       is_blk = os_name in cluster.blacklisted_os
3521       if ((self._HID not in self.op.output_fields and is_hid) or
3522           (self._BLK not in self.op.output_fields and is_blk) or
3523           (self._VLD not in self.op.output_fields and not valid)):
3524         continue
3525
3526       for field in self.op.output_fields:
3527         if field == "name":
3528           val = os_name
3529         elif field == self._VLD:
3530           val = valid
3531         elif field == "node_status":
3532           # this is just a copy of the dict
3533           val = {}
3534           for node_name, nos_list in os_data.items():
3535             val[node_name] = nos_list
3536         elif field == "variants":
3537           val = utils.NiceSort(list(variants))
3538         elif field == "parameters":
3539           val = list(params)
3540         elif field == "api_versions":
3541           val = list(api_versions)
3542         elif field == self._HID:
3543           val = is_hid
3544         elif field == self._BLK:
3545           val = is_blk
3546         else:
3547           raise errors.ParameterError(field)
3548         row.append(val)
3549       output.append(row)
3550
3551     return output
3552
3553
3554 class LUNodeRemove(LogicalUnit):
3555   """Logical unit for removing a node.
3556
3557   """
3558   HPATH = "node-remove"
3559   HTYPE = constants.HTYPE_NODE
3560
3561   def BuildHooksEnv(self):
3562     """Build hooks env.
3563
3564     This doesn't run on the target node in the pre phase as a failed
3565     node would then be impossible to remove.
3566
3567     """
3568     env = {
3569       "OP_TARGET": self.op.node_name,
3570       "NODE_NAME": self.op.node_name,
3571       }
3572     all_nodes = self.cfg.GetNodeList()
3573     try:
3574       all_nodes.remove(self.op.node_name)
3575     except ValueError:
3576       logging.warning("Node %s which is about to be removed not found"
3577                       " in the all nodes list", self.op.node_name)
3578     return env, all_nodes, all_nodes
3579
3580   def CheckPrereq(self):
3581     """Check prerequisites.
3582
3583     This checks:
3584      - the node exists in the configuration
3585      - it does not have primary or secondary instances
3586      - it's not the master
3587
3588     Any errors are signaled by raising errors.OpPrereqError.
3589
3590     """
3591     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3592     node = self.cfg.GetNodeInfo(self.op.node_name)
3593     assert node is not None
3594
3595     instance_list = self.cfg.GetInstanceList()
3596
3597     masternode = self.cfg.GetMasterNode()
3598     if node.name == masternode:
3599       raise errors.OpPrereqError("Node is the master node,"
3600                                  " you need to failover first.",
3601                                  errors.ECODE_INVAL)
3602
3603     for instance_name in instance_list:
3604       instance = self.cfg.GetInstanceInfo(instance_name)
3605       if node.name in instance.all_nodes:
3606         raise errors.OpPrereqError("Instance %s is still running on the node,"
3607                                    " please remove first." % instance_name,
3608                                    errors.ECODE_INVAL)
3609     self.op.node_name = node.name
3610     self.node = node
3611
3612   def Exec(self, feedback_fn):
3613     """Removes the node from the cluster.
3614
3615     """
3616     node = self.node
3617     logging.info("Stopping the node daemon and removing configs from node %s",
3618                  node.name)
3619
3620     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3621
3622     # Promote nodes to master candidate as needed
3623     _AdjustCandidatePool(self, exceptions=[node.name])
3624     self.context.RemoveNode(node.name)
3625
3626     # Run post hooks on the node before it's removed
3627     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3628     try:
3629       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3630     except:
3631       # pylint: disable-msg=W0702
3632       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3633
3634     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3635     msg = result.fail_msg
3636     if msg:
3637       self.LogWarning("Errors encountered on the remote node while leaving"
3638                       " the cluster: %s", msg)
3639
3640     # Remove node from our /etc/hosts
3641     if self.cfg.GetClusterInfo().modify_etc_hosts:
3642       master_node = self.cfg.GetMasterNode()
3643       result = self.rpc.call_etc_hosts_modify(master_node,
3644                                               constants.ETC_HOSTS_REMOVE,
3645                                               node.name, None)
3646       result.Raise("Can't update hosts file with new host data")
3647       _RedistributeAncillaryFiles(self)
3648
3649
3650 class _NodeQuery(_QueryBase):
3651   FIELDS = query.NODE_FIELDS
3652
3653   def ExpandNames(self, lu):
3654     lu.needed_locks = {}
3655     lu.share_locks[locking.LEVEL_NODE] = 1
3656
3657     if self.names:
3658       self.wanted = _GetWantedNodes(lu, self.names)
3659     else:
3660       self.wanted = locking.ALL_SET
3661
3662     self.do_locking = (self.use_locking and
3663                        query.NQ_LIVE in self.requested_data)
3664
3665     if self.do_locking:
3666       # if we don't request only static fields, we need to lock the nodes
3667       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3668
3669   def DeclareLocks(self, lu, level):
3670     pass
3671
3672   def _GetQueryData(self, lu):
3673     """Computes the list of nodes and their attributes.
3674
3675     """
3676     all_info = lu.cfg.GetAllNodesInfo()
3677
3678     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3679
3680     # Gather data as requested
3681     if query.NQ_LIVE in self.requested_data:
3682       # filter out non-vm_capable nodes
3683       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3684
3685       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3686                                         lu.cfg.GetHypervisorType())
3687       live_data = dict((name, nresult.payload)
3688                        for (name, nresult) in node_data.items()
3689                        if not nresult.fail_msg and nresult.payload)
3690     else:
3691       live_data = None
3692
3693     if query.NQ_INST in self.requested_data:
3694       node_to_primary = dict([(name, set()) for name in nodenames])
3695       node_to_secondary = dict([(name, set()) for name in nodenames])
3696
3697       inst_data = lu.cfg.GetAllInstancesInfo()
3698
3699       for inst in inst_data.values():
3700         if inst.primary_node in node_to_primary:
3701           node_to_primary[inst.primary_node].add(inst.name)
3702         for secnode in inst.secondary_nodes:
3703           if secnode in node_to_secondary:
3704             node_to_secondary[secnode].add(inst.name)
3705     else:
3706       node_to_primary = None
3707       node_to_secondary = None
3708
3709     if query.NQ_OOB in self.requested_data:
3710       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3711                          for name, node in all_info.iteritems())
3712     else:
3713       oob_support = None
3714
3715     if query.NQ_GROUP in self.requested_data:
3716       groups = lu.cfg.GetAllNodeGroupsInfo()
3717     else:
3718       groups = {}
3719
3720     return query.NodeQueryData([all_info[name] for name in nodenames],
3721                                live_data, lu.cfg.GetMasterNode(),
3722                                node_to_primary, node_to_secondary, groups,
3723                                oob_support, lu.cfg.GetClusterInfo())
3724
3725
3726 class LUNodeQuery(NoHooksLU):
3727   """Logical unit for querying nodes.
3728
3729   """
3730   # pylint: disable-msg=W0142
3731   REQ_BGL = False
3732
3733   def CheckArguments(self):
3734     self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3735                          self.op.use_locking)
3736
3737   def ExpandNames(self):
3738     self.nq.ExpandNames(self)
3739
3740   def Exec(self, feedback_fn):
3741     return self.nq.OldStyleQuery(self)
3742
3743
3744 class LUNodeQueryvols(NoHooksLU):
3745   """Logical unit for getting volumes on node(s).
3746
3747   """
3748   REQ_BGL = False
3749   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3750   _FIELDS_STATIC = utils.FieldSet("node")
3751
3752   def CheckArguments(self):
3753     _CheckOutputFields(static=self._FIELDS_STATIC,
3754                        dynamic=self._FIELDS_DYNAMIC,
3755                        selected=self.op.output_fields)
3756
3757   def ExpandNames(self):
3758     self.needed_locks = {}
3759     self.share_locks[locking.LEVEL_NODE] = 1
3760     if not self.op.nodes:
3761       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3762     else:
3763       self.needed_locks[locking.LEVEL_NODE] = \
3764         _GetWantedNodes(self, self.op.nodes)
3765
3766   def Exec(self, feedback_fn):
3767     """Computes the list of nodes and their attributes.
3768
3769     """
3770     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3771     volumes = self.rpc.call_node_volumes(nodenames)
3772
3773     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3774              in self.cfg.GetInstanceList()]
3775
3776     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3777
3778     output = []
3779     for node in nodenames:
3780       nresult = volumes[node]
3781       if nresult.offline:
3782         continue
3783       msg = nresult.fail_msg
3784       if msg:
3785         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3786         continue
3787
3788       node_vols = nresult.payload[:]
3789       node_vols.sort(key=lambda vol: vol['dev'])
3790
3791       for vol in node_vols:
3792         node_output = []
3793         for field in self.op.output_fields:
3794           if field == "node":
3795             val = node
3796           elif field == "phys":
3797             val = vol['dev']
3798           elif field == "vg":
3799             val = vol['vg']
3800           elif field == "name":
3801             val = vol['name']
3802           elif field == "size":
3803             val = int(float(vol['size']))
3804           elif field == "instance":
3805             for inst in ilist:
3806               if node not in lv_by_node[inst]:
3807                 continue
3808               if vol['name'] in lv_by_node[inst][node]:
3809                 val = inst.name
3810                 break
3811             else:
3812               val = '-'
3813           else:
3814             raise errors.ParameterError(field)
3815           node_output.append(str(val))
3816
3817         output.append(node_output)
3818
3819     return output
3820
3821
3822 class LUNodeQueryStorage(NoHooksLU):
3823   """Logical unit for getting information on storage units on node(s).
3824
3825   """
3826   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3827   REQ_BGL = False
3828
3829   def CheckArguments(self):
3830     _CheckOutputFields(static=self._FIELDS_STATIC,
3831                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3832                        selected=self.op.output_fields)
3833
3834   def ExpandNames(self):
3835     self.needed_locks = {}
3836     self.share_locks[locking.LEVEL_NODE] = 1
3837
3838     if self.op.nodes:
3839       self.needed_locks[locking.LEVEL_NODE] = \
3840         _GetWantedNodes(self, self.op.nodes)
3841     else:
3842       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3843
3844   def Exec(self, feedback_fn):
3845     """Computes the list of nodes and their attributes.
3846
3847     """
3848     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3849
3850     # Always get name to sort by
3851     if constants.SF_NAME in self.op.output_fields:
3852       fields = self.op.output_fields[:]
3853     else:
3854       fields = [constants.SF_NAME] + self.op.output_fields
3855
3856     # Never ask for node or type as it's only known to the LU
3857     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3858       while extra in fields:
3859         fields.remove(extra)
3860
3861     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3862     name_idx = field_idx[constants.SF_NAME]
3863
3864     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3865     data = self.rpc.call_storage_list(self.nodes,
3866                                       self.op.storage_type, st_args,
3867                                       self.op.name, fields)
3868
3869     result = []
3870
3871     for node in utils.NiceSort(self.nodes):
3872       nresult = data[node]
3873       if nresult.offline:
3874         continue
3875
3876       msg = nresult.fail_msg
3877       if msg:
3878         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3879         continue
3880
3881       rows = dict([(row[name_idx], row) for row in nresult.payload])
3882
3883       for name in utils.NiceSort(rows.keys()):
3884         row = rows[name]
3885
3886         out = []
3887
3888         for field in self.op.output_fields:
3889           if field == constants.SF_NODE:
3890             val = node
3891           elif field == constants.SF_TYPE:
3892             val = self.op.storage_type
3893           elif field in field_idx:
3894             val = row[field_idx[field]]
3895           else:
3896             raise errors.ParameterError(field)
3897
3898           out.append(val)
3899
3900         result.append(out)
3901
3902     return result
3903
3904
3905 class _InstanceQuery(_QueryBase):
3906   FIELDS = query.INSTANCE_FIELDS
3907
3908   def ExpandNames(self, lu):
3909     lu.needed_locks = {}
3910     lu.share_locks[locking.LEVEL_INSTANCE] = 1
3911     lu.share_locks[locking.LEVEL_NODE] = 1
3912
3913     if self.names:
3914       self.wanted = _GetWantedInstances(lu, self.names)
3915     else:
3916       self.wanted = locking.ALL_SET
3917
3918     self.do_locking = (self.use_locking and
3919                        query.IQ_LIVE in self.requested_data)
3920     if self.do_locking:
3921       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3922       lu.needed_locks[locking.LEVEL_NODE] = []
3923       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3924
3925   def DeclareLocks(self, lu, level):
3926     if level == locking.LEVEL_NODE and self.do_locking:
3927       lu._LockInstancesNodes() # pylint: disable-msg=W0212
3928
3929   def _GetQueryData(self, lu):
3930     """Computes the list of instances and their attributes.
3931
3932     """
3933     cluster = lu.cfg.GetClusterInfo()
3934     all_info = lu.cfg.GetAllInstancesInfo()
3935
3936     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3937
3938     instance_list = [all_info[name] for name in instance_names]
3939     nodes = frozenset(itertools.chain(*(inst.all_nodes
3940                                         for inst in instance_list)))
3941     hv_list = list(set([inst.hypervisor for inst in instance_list]))
3942     bad_nodes = []
3943     offline_nodes = []
3944     wrongnode_inst = set()
3945
3946     # Gather data as requested
3947     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
3948       live_data = {}
3949       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3950       for name in nodes:
3951         result = node_data[name]
3952         if result.offline:
3953           # offline nodes will be in both lists
3954           assert result.fail_msg
3955           offline_nodes.append(name)
3956         if result.fail_msg:
3957           bad_nodes.append(name)
3958         elif result.payload:
3959           for inst in result.payload:
3960             if inst in all_info:
3961               if all_info[inst].primary_node == name:
3962                 live_data.update(result.payload)
3963               else:
3964                 wrongnode_inst.add(inst)
3965             else:
3966               # orphan instance; we don't list it here as we don't
3967               # handle this case yet in the output of instance listing
3968               logging.warning("Orphan instance '%s' found on node %s",
3969                               inst, name)
3970         # else no instance is alive
3971     else:
3972       live_data = {}
3973
3974     if query.IQ_DISKUSAGE in self.requested_data:
3975       disk_usage = dict((inst.name,
3976                          _ComputeDiskSize(inst.disk_template,
3977                                           [{"size": disk.size}
3978                                            for disk in inst.disks]))
3979                         for inst in instance_list)
3980     else:
3981       disk_usage = None
3982
3983     if query.IQ_CONSOLE in self.requested_data:
3984       consinfo = {}
3985       for inst in instance_list:
3986         if inst.name in live_data:
3987           # Instance is running
3988           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
3989         else:
3990           consinfo[inst.name] = None
3991       assert set(consinfo.keys()) == set(instance_names)
3992     else:
3993       consinfo = None
3994
3995     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3996                                    disk_usage, offline_nodes, bad_nodes,
3997                                    live_data, wrongnode_inst, consinfo)
3998
3999
4000 class LUQuery(NoHooksLU):
4001   """Query for resources/items of a certain kind.
4002
4003   """
4004   # pylint: disable-msg=W0142
4005   REQ_BGL = False
4006
4007   def CheckArguments(self):
4008     qcls = _GetQueryImplementation(self.op.what)
4009     names = qlang.ReadSimpleFilter("name", self.op.filter)
4010
4011     self.impl = qcls(names, self.op.fields, False)
4012
4013   def ExpandNames(self):
4014     self.impl.ExpandNames(self)
4015
4016   def DeclareLocks(self, level):
4017     self.impl.DeclareLocks(self, level)
4018
4019   def Exec(self, feedback_fn):
4020     return self.impl.NewStyleQuery(self)
4021
4022
4023 class LUQueryFields(NoHooksLU):
4024   """Query for resources/items of a certain kind.
4025
4026   """
4027   # pylint: disable-msg=W0142
4028   REQ_BGL = False
4029
4030   def CheckArguments(self):
4031     self.qcls = _GetQueryImplementation(self.op.what)
4032
4033   def ExpandNames(self):
4034     self.needed_locks = {}
4035
4036   def Exec(self, feedback_fn):
4037     return self.qcls.FieldsQuery(self.op.fields)
4038
4039
4040 class LUNodeModifyStorage(NoHooksLU):
4041   """Logical unit for modifying a storage volume on a node.
4042
4043   """
4044   REQ_BGL = False
4045
4046   def CheckArguments(self):
4047     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4048
4049     storage_type = self.op.storage_type
4050
4051     try:
4052       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4053     except KeyError:
4054       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4055                                  " modified" % storage_type,
4056                                  errors.ECODE_INVAL)
4057
4058     diff = set(self.op.changes.keys()) - modifiable
4059     if diff:
4060       raise errors.OpPrereqError("The following fields can not be modified for"
4061                                  " storage units of type '%s': %r" %
4062                                  (storage_type, list(diff)),
4063                                  errors.ECODE_INVAL)
4064
4065   def ExpandNames(self):
4066     self.needed_locks = {
4067       locking.LEVEL_NODE: self.op.node_name,
4068       }
4069
4070   def Exec(self, feedback_fn):
4071     """Computes the list of nodes and their attributes.
4072
4073     """
4074     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4075     result = self.rpc.call_storage_modify(self.op.node_name,
4076                                           self.op.storage_type, st_args,
4077                                           self.op.name, self.op.changes)
4078     result.Raise("Failed to modify storage unit '%s' on %s" %
4079                  (self.op.name, self.op.node_name))
4080
4081
4082 class LUNodeAdd(LogicalUnit):
4083   """Logical unit for adding node to the cluster.
4084
4085   """
4086   HPATH = "node-add"
4087   HTYPE = constants.HTYPE_NODE
4088   _NFLAGS = ["master_capable", "vm_capable"]
4089
4090   def CheckArguments(self):
4091     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4092     # validate/normalize the node name
4093     self.hostname = netutils.GetHostname(name=self.op.node_name,
4094                                          family=self.primary_ip_family)
4095     self.op.node_name = self.hostname.name
4096
4097     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4098       raise errors.OpPrereqError("Cannot readd the master node",
4099                                  errors.ECODE_STATE)
4100
4101     if self.op.readd and self.op.group:
4102       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4103                                  " being readded", errors.ECODE_INVAL)
4104
4105   def BuildHooksEnv(self):
4106     """Build hooks env.
4107
4108     This will run on all nodes before, and on all nodes + the new node after.
4109
4110     """
4111     env = {
4112       "OP_TARGET": self.op.node_name,
4113       "NODE_NAME": self.op.node_name,
4114       "NODE_PIP": self.op.primary_ip,
4115       "NODE_SIP": self.op.secondary_ip,
4116       "MASTER_CAPABLE": str(self.op.master_capable),
4117       "VM_CAPABLE": str(self.op.vm_capable),
4118       }
4119     nodes_0 = self.cfg.GetNodeList()
4120     nodes_1 = nodes_0 + [self.op.node_name, ]
4121     return env, nodes_0, nodes_1
4122
4123   def CheckPrereq(self):
4124     """Check prerequisites.
4125
4126     This checks:
4127      - the new node is not already in the config
4128      - it is resolvable
4129      - its parameters (single/dual homed) matches the cluster
4130
4131     Any errors are signaled by raising errors.OpPrereqError.
4132
4133     """
4134     cfg = self.cfg
4135     hostname = self.hostname
4136     node = hostname.name
4137     primary_ip = self.op.primary_ip = hostname.ip
4138     if self.op.secondary_ip is None:
4139       if self.primary_ip_family == netutils.IP6Address.family:
4140         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4141                                    " IPv4 address must be given as secondary",
4142                                    errors.ECODE_INVAL)
4143       self.op.secondary_ip = primary_ip
4144
4145     secondary_ip = self.op.secondary_ip
4146     if not netutils.IP4Address.IsValid(secondary_ip):
4147       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4148                                  " address" % secondary_ip, errors.ECODE_INVAL)
4149
4150     node_list = cfg.GetNodeList()
4151     if not self.op.readd and node in node_list:
4152       raise errors.OpPrereqError("Node %s is already in the configuration" %
4153                                  node, errors.ECODE_EXISTS)
4154     elif self.op.readd and node not in node_list:
4155       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4156                                  errors.ECODE_NOENT)
4157
4158     self.changed_primary_ip = False
4159
4160     for existing_node_name in node_list:
4161       existing_node = cfg.GetNodeInfo(existing_node_name)
4162
4163       if self.op.readd and node == existing_node_name:
4164         if existing_node.secondary_ip != secondary_ip:
4165           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4166                                      " address configuration as before",
4167                                      errors.ECODE_INVAL)
4168         if existing_node.primary_ip != primary_ip:
4169           self.changed_primary_ip = True
4170
4171         continue
4172
4173       if (existing_node.primary_ip == primary_ip or
4174           existing_node.secondary_ip == primary_ip or
4175           existing_node.primary_ip == secondary_ip or
4176           existing_node.secondary_ip == secondary_ip):
4177         raise errors.OpPrereqError("New node ip address(es) conflict with"
4178                                    " existing node %s" % existing_node.name,
4179                                    errors.ECODE_NOTUNIQUE)
4180
4181     # After this 'if' block, None is no longer a valid value for the
4182     # _capable op attributes
4183     if self.op.readd:
4184       old_node = self.cfg.GetNodeInfo(node)
4185       assert old_node is not None, "Can't retrieve locked node %s" % node
4186       for attr in self._NFLAGS:
4187         if getattr(self.op, attr) is None:
4188           setattr(self.op, attr, getattr(old_node, attr))
4189     else:
4190       for attr in self._NFLAGS:
4191         if getattr(self.op, attr) is None:
4192           setattr(self.op, attr, True)
4193
4194     if self.op.readd and not self.op.vm_capable:
4195       pri, sec = cfg.GetNodeInstances(node)
4196       if pri or sec:
4197         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4198                                    " flag set to false, but it already holds"
4199                                    " instances" % node,
4200                                    errors.ECODE_STATE)
4201
4202     # check that the type of the node (single versus dual homed) is the
4203     # same as for the master
4204     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4205     master_singlehomed = myself.secondary_ip == myself.primary_ip
4206     newbie_singlehomed = secondary_ip == primary_ip
4207     if master_singlehomed != newbie_singlehomed:
4208       if master_singlehomed:
4209         raise errors.OpPrereqError("The master has no secondary ip but the"
4210                                    " new node has one",
4211                                    errors.ECODE_INVAL)
4212       else:
4213         raise errors.OpPrereqError("The master has a secondary ip but the"
4214                                    " new node doesn't have one",
4215                                    errors.ECODE_INVAL)
4216
4217     # checks reachability
4218     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4219       raise errors.OpPrereqError("Node not reachable by ping",
4220                                  errors.ECODE_ENVIRON)
4221
4222     if not newbie_singlehomed:
4223       # check reachability from my secondary ip to newbie's secondary ip
4224       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4225                            source=myself.secondary_ip):
4226         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4227                                    " based ping to node daemon port",
4228                                    errors.ECODE_ENVIRON)
4229
4230     if self.op.readd:
4231       exceptions = [node]
4232     else:
4233       exceptions = []
4234
4235     if self.op.master_capable:
4236       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4237     else:
4238       self.master_candidate = False
4239
4240     if self.op.readd:
4241       self.new_node = old_node
4242     else:
4243       node_group = cfg.LookupNodeGroup(self.op.group)
4244       self.new_node = objects.Node(name=node,
4245                                    primary_ip=primary_ip,
4246                                    secondary_ip=secondary_ip,
4247                                    master_candidate=self.master_candidate,
4248                                    offline=False, drained=False,
4249                                    group=node_group)
4250
4251     if self.op.ndparams:
4252       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4253
4254   def Exec(self, feedback_fn):
4255     """Adds the new node to the cluster.
4256
4257     """
4258     new_node = self.new_node
4259     node = new_node.name
4260
4261     # We adding a new node so we assume it's powered
4262     new_node.powered = True
4263
4264     # for re-adds, reset the offline/drained/master-candidate flags;
4265     # we need to reset here, otherwise offline would prevent RPC calls
4266     # later in the procedure; this also means that if the re-add
4267     # fails, we are left with a non-offlined, broken node
4268     if self.op.readd:
4269       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4270       self.LogInfo("Readding a node, the offline/drained flags were reset")
4271       # if we demote the node, we do cleanup later in the procedure
4272       new_node.master_candidate = self.master_candidate
4273       if self.changed_primary_ip:
4274         new_node.primary_ip = self.op.primary_ip
4275
4276     # copy the master/vm_capable flags
4277     for attr in self._NFLAGS:
4278       setattr(new_node, attr, getattr(self.op, attr))
4279
4280     # notify the user about any possible mc promotion
4281     if new_node.master_candidate:
4282       self.LogInfo("Node will be a master candidate")
4283
4284     if self.op.ndparams:
4285       new_node.ndparams = self.op.ndparams
4286     else:
4287       new_node.ndparams = {}
4288
4289     # check connectivity
4290     result = self.rpc.call_version([node])[node]
4291     result.Raise("Can't get version information from node %s" % node)
4292     if constants.PROTOCOL_VERSION == result.payload:
4293       logging.info("Communication to node %s fine, sw version %s match",
4294                    node, result.payload)
4295     else:
4296       raise errors.OpExecError("Version mismatch master version %s,"
4297                                " node version %s" %
4298                                (constants.PROTOCOL_VERSION, result.payload))
4299
4300     # Add node to our /etc/hosts, and add key to known_hosts
4301     if self.cfg.GetClusterInfo().modify_etc_hosts:
4302       master_node = self.cfg.GetMasterNode()
4303       result = self.rpc.call_etc_hosts_modify(master_node,
4304                                               constants.ETC_HOSTS_ADD,
4305                                               self.hostname.name,
4306                                               self.hostname.ip)
4307       result.Raise("Can't update hosts file with new host data")
4308
4309     if new_node.secondary_ip != new_node.primary_ip:
4310       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4311                                False)
4312
4313     node_verify_list = [self.cfg.GetMasterNode()]
4314     node_verify_param = {
4315       constants.NV_NODELIST: [node],
4316       # TODO: do a node-net-test as well?
4317     }
4318
4319     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4320                                        self.cfg.GetClusterName())
4321     for verifier in node_verify_list:
4322       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4323       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4324       if nl_payload:
4325         for failed in nl_payload:
4326           feedback_fn("ssh/hostname verification failed"
4327                       " (checking from %s): %s" %
4328                       (verifier, nl_payload[failed]))
4329         raise errors.OpExecError("ssh/hostname verification failed")
4330
4331     if self.op.readd:
4332       _RedistributeAncillaryFiles(self)
4333       self.context.ReaddNode(new_node)
4334       # make sure we redistribute the config
4335       self.cfg.Update(new_node, feedback_fn)
4336       # and make sure the new node will not have old files around
4337       if not new_node.master_candidate:
4338         result = self.rpc.call_node_demote_from_mc(new_node.name)
4339         msg = result.fail_msg
4340         if msg:
4341           self.LogWarning("Node failed to demote itself from master"
4342                           " candidate status: %s" % msg)
4343     else:
4344       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4345                                   additional_vm=self.op.vm_capable)
4346       self.context.AddNode(new_node, self.proc.GetECId())
4347
4348
4349 class LUNodeSetParams(LogicalUnit):
4350   """Modifies the parameters of a node.
4351
4352   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4353       to the node role (as _ROLE_*)
4354   @cvar _R2F: a dictionary from node role to tuples of flags
4355   @cvar _FLAGS: a list of attribute names corresponding to the flags
4356
4357   """
4358   HPATH = "node-modify"
4359   HTYPE = constants.HTYPE_NODE
4360   REQ_BGL = False
4361   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4362   _F2R = {
4363     (True, False, False): _ROLE_CANDIDATE,
4364     (False, True, False): _ROLE_DRAINED,
4365     (False, False, True): _ROLE_OFFLINE,
4366     (False, False, False): _ROLE_REGULAR,
4367     }
4368   _R2F = dict((v, k) for k, v in _F2R.items())
4369   _FLAGS = ["master_candidate", "drained", "offline"]
4370
4371   def CheckArguments(self):
4372     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4373     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4374                 self.op.master_capable, self.op.vm_capable,
4375                 self.op.secondary_ip, self.op.ndparams]
4376     if all_mods.count(None) == len(all_mods):
4377       raise errors.OpPrereqError("Please pass at least one modification",
4378                                  errors.ECODE_INVAL)
4379     if all_mods.count(True) > 1:
4380       raise errors.OpPrereqError("Can't set the node into more than one"
4381                                  " state at the same time",
4382                                  errors.ECODE_INVAL)
4383
4384     # Boolean value that tells us whether we might be demoting from MC
4385     self.might_demote = (self.op.master_candidate == False or
4386                          self.op.offline == True or
4387                          self.op.drained == True or
4388                          self.op.master_capable == False)
4389
4390     if self.op.secondary_ip:
4391       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4392         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4393                                    " address" % self.op.secondary_ip,
4394                                    errors.ECODE_INVAL)
4395
4396     self.lock_all = self.op.auto_promote and self.might_demote
4397     self.lock_instances = self.op.secondary_ip is not None
4398
4399   def ExpandNames(self):
4400     if self.lock_all:
4401       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4402     else:
4403       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4404
4405     if self.lock_instances:
4406       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4407
4408   def DeclareLocks(self, level):
4409     # If we have locked all instances, before waiting to lock nodes, release
4410     # all the ones living on nodes unrelated to the current operation.
4411     if level == locking.LEVEL_NODE and self.lock_instances:
4412       instances_release = []
4413       instances_keep = []
4414       self.affected_instances = []
4415       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4416         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4417           instance = self.context.cfg.GetInstanceInfo(instance_name)
4418           i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4419           if i_mirrored and self.op.node_name in instance.all_nodes:
4420             instances_keep.append(instance_name)
4421             self.affected_instances.append(instance)
4422           else:
4423             instances_release.append(instance_name)
4424         if instances_release:
4425           self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4426           self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4427
4428   def BuildHooksEnv(self):
4429     """Build hooks env.
4430
4431     This runs on the master node.
4432
4433     """
4434     env = {
4435       "OP_TARGET": self.op.node_name,
4436       "MASTER_CANDIDATE": str(self.op.master_candidate),
4437       "OFFLINE": str(self.op.offline),
4438       "DRAINED": str(self.op.drained),
4439       "MASTER_CAPABLE": str(self.op.master_capable),
4440       "VM_CAPABLE": str(self.op.vm_capable),
4441       }
4442     nl = [self.cfg.GetMasterNode(),
4443           self.op.node_name]
4444     return env, nl, nl
4445
4446   def CheckPrereq(self):
4447     """Check prerequisites.
4448
4449     This only checks the instance list against the existing names.
4450
4451     """
4452     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4453
4454     if (self.op.master_candidate is not None or
4455         self.op.drained is not None or
4456         self.op.offline is not None):
4457       # we can't change the master's node flags
4458       if self.op.node_name == self.cfg.GetMasterNode():
4459         raise errors.OpPrereqError("The master role can be changed"
4460                                    " only via master-failover",
4461                                    errors.ECODE_INVAL)
4462
4463     if self.op.master_candidate and not node.master_capable:
4464       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4465                                  " it a master candidate" % node.name,
4466                                  errors.ECODE_STATE)
4467
4468     if self.op.vm_capable == False:
4469       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4470       if ipri or isec:
4471         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4472                                    " the vm_capable flag" % node.name,
4473                                    errors.ECODE_STATE)
4474
4475     if node.master_candidate and self.might_demote and not self.lock_all:
4476       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4477       # check if after removing the current node, we're missing master
4478       # candidates
4479       (mc_remaining, mc_should, _) = \
4480           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4481       if mc_remaining < mc_should:
4482         raise errors.OpPrereqError("Not enough master candidates, please"
4483                                    " pass auto promote option to allow"
4484                                    " promotion", errors.ECODE_STATE)
4485
4486     self.old_flags = old_flags = (node.master_candidate,
4487                                   node.drained, node.offline)
4488     assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4489     self.old_role = old_role = self._F2R[old_flags]
4490
4491     # Check for ineffective changes
4492     for attr in self._FLAGS:
4493       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4494         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4495         setattr(self.op, attr, None)
4496
4497     # Past this point, any flag change to False means a transition
4498     # away from the respective state, as only real changes are kept
4499
4500     # TODO: We might query the real power state if it supports OOB
4501     if _SupportsOob(self.cfg, node):
4502       if self.op.offline is False and not (node.powered or
4503                                            self.op.powered == True):
4504         raise errors.OpPrereqError(("Please power on node %s first before you"
4505                                     " can reset offline state") %
4506                                    self.op.node_name)
4507     elif self.op.powered is not None:
4508       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4509                                   " which does not support out-of-band"
4510                                   " handling") % self.op.node_name)
4511
4512     # If we're being deofflined/drained, we'll MC ourself if needed
4513     if (self.op.drained == False or self.op.offline == False or
4514         (self.op.master_capable and not node.master_capable)):
4515       if _DecideSelfPromotion(self):
4516         self.op.master_candidate = True
4517         self.LogInfo("Auto-promoting node to master candidate")
4518
4519     # If we're no longer master capable, we'll demote ourselves from MC
4520     if self.op.master_capable == False and node.master_candidate:
4521       self.LogInfo("Demoting from master candidate")
4522       self.op.master_candidate = False
4523
4524     # Compute new role
4525     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4526     if self.op.master_candidate:
4527       new_role = self._ROLE_CANDIDATE
4528     elif self.op.drained:
4529       new_role = self._ROLE_DRAINED
4530     elif self.op.offline:
4531       new_role = self._ROLE_OFFLINE
4532     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4533       # False is still in new flags, which means we're un-setting (the
4534       # only) True flag
4535       new_role = self._ROLE_REGULAR
4536     else: # no new flags, nothing, keep old role
4537       new_role = old_role
4538
4539     self.new_role = new_role
4540
4541     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4542       # Trying to transition out of offline status
4543       result = self.rpc.call_version([node.name])[node.name]
4544       if result.fail_msg:
4545         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4546                                    " to report its version: %s" %
4547                                    (node.name, result.fail_msg),
4548                                    errors.ECODE_STATE)
4549       else:
4550         self.LogWarning("Transitioning node from offline to online state"
4551                         " without using re-add. Please make sure the node"
4552                         " is healthy!")
4553
4554     if self.op.secondary_ip:
4555       # Ok even without locking, because this can't be changed by any LU
4556       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4557       master_singlehomed = master.secondary_ip == master.primary_ip
4558       if master_singlehomed and self.op.secondary_ip:
4559         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4560                                    " homed cluster", errors.ECODE_INVAL)
4561
4562       if node.offline:
4563         if self.affected_instances:
4564           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4565                                      " node has instances (%s) configured"
4566                                      " to use it" % self.affected_instances)
4567       else:
4568         # On online nodes, check that no instances are running, and that
4569         # the node has the new ip and we can reach it.
4570         for instance in self.affected_instances:
4571           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4572
4573         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4574         if master.name != node.name:
4575           # check reachability from master secondary ip to new secondary ip
4576           if not netutils.TcpPing(self.op.secondary_ip,
4577                                   constants.DEFAULT_NODED_PORT,
4578                                   source=master.secondary_ip):
4579             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4580                                        " based ping to node daemon port",
4581                                        errors.ECODE_ENVIRON)
4582
4583     if self.op.ndparams:
4584       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4585       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4586       self.new_ndparams = new_ndparams
4587
4588   def Exec(self, feedback_fn):
4589     """Modifies a node.
4590
4591     """
4592     node = self.node
4593     old_role = self.old_role
4594     new_role = self.new_role
4595
4596     result = []
4597
4598     if self.op.ndparams:
4599       node.ndparams = self.new_ndparams
4600
4601     if self.op.powered is not None:
4602       node.powered = self.op.powered
4603
4604     for attr in ["master_capable", "vm_capable"]:
4605       val = getattr(self.op, attr)
4606       if val is not None:
4607         setattr(node, attr, val)
4608         result.append((attr, str(val)))
4609
4610     if new_role != old_role:
4611       # Tell the node to demote itself, if no longer MC and not offline
4612       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4613         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4614         if msg:
4615           self.LogWarning("Node failed to demote itself: %s", msg)
4616
4617       new_flags = self._R2F[new_role]
4618       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4619         if of != nf:
4620           result.append((desc, str(nf)))
4621       (node.master_candidate, node.drained, node.offline) = new_flags
4622
4623       # we locked all nodes, we adjust the CP before updating this node
4624       if self.lock_all:
4625         _AdjustCandidatePool(self, [node.name])
4626
4627     if self.op.secondary_ip:
4628       node.secondary_ip = self.op.secondary_ip
4629       result.append(("secondary_ip", self.op.secondary_ip))
4630
4631     # this will trigger configuration file update, if needed
4632     self.cfg.Update(node, feedback_fn)
4633
4634     # this will trigger job queue propagation or cleanup if the mc
4635     # flag changed
4636     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4637       self.context.ReaddNode(node)
4638
4639     return result
4640
4641
4642 class LUNodePowercycle(NoHooksLU):
4643   """Powercycles a node.
4644
4645   """
4646   REQ_BGL = False
4647
4648   def CheckArguments(self):
4649     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4650     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4651       raise errors.OpPrereqError("The node is the master and the force"
4652                                  " parameter was not set",
4653                                  errors.ECODE_INVAL)
4654
4655   def ExpandNames(self):
4656     """Locking for PowercycleNode.
4657
4658     This is a last-resort option and shouldn't block on other
4659     jobs. Therefore, we grab no locks.
4660
4661     """
4662     self.needed_locks = {}
4663
4664   def Exec(self, feedback_fn):
4665     """Reboots a node.
4666
4667     """
4668     result = self.rpc.call_node_powercycle(self.op.node_name,
4669                                            self.cfg.GetHypervisorType())
4670     result.Raise("Failed to schedule the reboot")
4671     return result.payload
4672
4673
4674 class LUClusterQuery(NoHooksLU):
4675   """Query cluster configuration.
4676
4677   """
4678   REQ_BGL = False
4679
4680   def ExpandNames(self):
4681     self.needed_locks = {}
4682
4683   def Exec(self, feedback_fn):
4684     """Return cluster config.
4685
4686     """
4687     cluster = self.cfg.GetClusterInfo()
4688     os_hvp = {}
4689
4690     # Filter just for enabled hypervisors
4691     for os_name, hv_dict in cluster.os_hvp.items():
4692       os_hvp[os_name] = {}
4693       for hv_name, hv_params in hv_dict.items():
4694         if hv_name in cluster.enabled_hypervisors:
4695           os_hvp[os_name][hv_name] = hv_params
4696
4697     # Convert ip_family to ip_version
4698     primary_ip_version = constants.IP4_VERSION
4699     if cluster.primary_ip_family == netutils.IP6Address.family:
4700       primary_ip_version = constants.IP6_VERSION
4701
4702     result = {
4703       "software_version": constants.RELEASE_VERSION,
4704       "protocol_version": constants.PROTOCOL_VERSION,
4705       "config_version": constants.CONFIG_VERSION,
4706       "os_api_version": max(constants.OS_API_VERSIONS),
4707       "export_version": constants.EXPORT_VERSION,
4708       "architecture": (platform.architecture()[0], platform.machine()),
4709       "name": cluster.cluster_name,
4710       "master": cluster.master_node,
4711       "default_hypervisor": cluster.enabled_hypervisors[0],
4712       "enabled_hypervisors": cluster.enabled_hypervisors,
4713       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4714                         for hypervisor_name in cluster.enabled_hypervisors]),
4715       "os_hvp": os_hvp,
4716       "beparams": cluster.beparams,
4717       "osparams": cluster.osparams,
4718       "nicparams": cluster.nicparams,
4719       "ndparams": cluster.ndparams,
4720       "candidate_pool_size": cluster.candidate_pool_size,
4721       "master_netdev": cluster.master_netdev,
4722       "volume_group_name": cluster.volume_group_name,
4723       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4724       "file_storage_dir": cluster.file_storage_dir,
4725       "maintain_node_health": cluster.maintain_node_health,
4726       "ctime": cluster.ctime,
4727       "mtime": cluster.mtime,
4728       "uuid": cluster.uuid,
4729       "tags": list(cluster.GetTags()),
4730       "uid_pool": cluster.uid_pool,
4731       "default_iallocator": cluster.default_iallocator,
4732       "reserved_lvs": cluster.reserved_lvs,
4733       "primary_ip_version": primary_ip_version,
4734       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4735       "hidden_os": cluster.hidden_os,
4736       "blacklisted_os": cluster.blacklisted_os,
4737       }
4738
4739     return result
4740
4741
4742 class LUClusterConfigQuery(NoHooksLU):
4743   """Return configuration values.
4744
4745   """
4746   REQ_BGL = False
4747   _FIELDS_DYNAMIC = utils.FieldSet()
4748   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4749                                   "watcher_pause", "volume_group_name")
4750
4751   def CheckArguments(self):
4752     _CheckOutputFields(static=self._FIELDS_STATIC,
4753                        dynamic=self._FIELDS_DYNAMIC,
4754                        selected=self.op.output_fields)
4755
4756   def ExpandNames(self):
4757     self.needed_locks = {}
4758
4759   def Exec(self, feedback_fn):
4760     """Dump a representation of the cluster config to the standard output.
4761
4762     """
4763     values = []
4764     for field in self.op.output_fields:
4765       if field == "cluster_name":
4766         entry = self.cfg.GetClusterName()
4767       elif field == "master_node":
4768         entry = self.cfg.GetMasterNode()
4769       elif field == "drain_flag":
4770         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4771       elif field == "watcher_pause":
4772         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4773       elif field == "volume_group_name":
4774         entry = self.cfg.GetVGName()
4775       else:
4776         raise errors.ParameterError(field)
4777       values.append(entry)
4778     return values
4779
4780
4781 class LUInstanceActivateDisks(NoHooksLU):
4782   """Bring up an instance's disks.
4783
4784   """
4785   REQ_BGL = False
4786
4787   def ExpandNames(self):
4788     self._ExpandAndLockInstance()
4789     self.needed_locks[locking.LEVEL_NODE] = []
4790     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4791
4792   def DeclareLocks(self, level):
4793     if level == locking.LEVEL_NODE:
4794       self._LockInstancesNodes()
4795
4796   def CheckPrereq(self):
4797     """Check prerequisites.
4798
4799     This checks that the instance is in the cluster.
4800
4801     """
4802     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4803     assert self.instance is not None, \
4804       "Cannot retrieve locked instance %s" % self.op.instance_name
4805     _CheckNodeOnline(self, self.instance.primary_node)
4806
4807   def Exec(self, feedback_fn):
4808     """Activate the disks.
4809
4810     """
4811     disks_ok, disks_info = \
4812               _AssembleInstanceDisks(self, self.instance,
4813                                      ignore_size=self.op.ignore_size)
4814     if not disks_ok:
4815       raise errors.OpExecError("Cannot activate block devices")
4816
4817     return disks_info
4818
4819
4820 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4821                            ignore_size=False):
4822   """Prepare the block devices for an instance.
4823
4824   This sets up the block devices on all nodes.
4825
4826   @type lu: L{LogicalUnit}
4827   @param lu: the logical unit on whose behalf we execute
4828   @type instance: L{objects.Instance}
4829   @param instance: the instance for whose disks we assemble
4830   @type disks: list of L{objects.Disk} or None
4831   @param disks: which disks to assemble (or all, if None)
4832   @type ignore_secondaries: boolean
4833   @param ignore_secondaries: if true, errors on secondary nodes
4834       won't result in an error return from the function
4835   @type ignore_size: boolean
4836   @param ignore_size: if true, the current known size of the disk
4837       will not be used during the disk activation, useful for cases
4838       when the size is wrong
4839   @return: False if the operation failed, otherwise a list of
4840       (host, instance_visible_name, node_visible_name)
4841       with the mapping from node devices to instance devices
4842
4843   """
4844   device_info = []
4845   disks_ok = True
4846   iname = instance.name
4847   disks = _ExpandCheckDisks(instance, disks)
4848
4849   # With the two passes mechanism we try to reduce the window of
4850   # opportunity for the race condition of switching DRBD to primary
4851   # before handshaking occured, but we do not eliminate it
4852
4853   # The proper fix would be to wait (with some limits) until the
4854   # connection has been made and drbd transitions from WFConnection
4855   # into any other network-connected state (Connected, SyncTarget,
4856   # SyncSource, etc.)
4857
4858   # 1st pass, assemble on all nodes in secondary mode
4859   for idx, inst_disk in enumerate(disks):
4860     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4861       if ignore_size:
4862         node_disk = node_disk.Copy()
4863         node_disk.UnsetSize()
4864       lu.cfg.SetDiskID(node_disk, node)
4865       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
4866       msg = result.fail_msg
4867       if msg:
4868         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4869                            " (is_primary=False, pass=1): %s",
4870                            inst_disk.iv_name, node, msg)
4871         if not ignore_secondaries:
4872           disks_ok = False
4873
4874   # FIXME: race condition on drbd migration to primary
4875
4876   # 2nd pass, do only the primary node
4877   for idx, inst_disk in enumerate(disks):
4878     dev_path = None
4879
4880     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4881       if node != instance.primary_node:
4882         continue
4883       if ignore_size:
4884         node_disk = node_disk.Copy()
4885         node_disk.UnsetSize()
4886       lu.cfg.SetDiskID(node_disk, node)
4887       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
4888       msg = result.fail_msg
4889       if msg:
4890         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4891                            " (is_primary=True, pass=2): %s",
4892                            inst_disk.iv_name, node, msg)
4893         disks_ok = False
4894       else:
4895         dev_path = result.payload
4896
4897     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4898
4899   # leave the disks configured for the primary node
4900   # this is a workaround that would be fixed better by
4901   # improving the logical/physical id handling
4902   for disk in disks:
4903     lu.cfg.SetDiskID(disk, instance.primary_node)
4904
4905   return disks_ok, device_info
4906
4907
4908 def _StartInstanceDisks(lu, instance, force):
4909   """Start the disks of an instance.
4910
4911   """
4912   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4913                                            ignore_secondaries=force)
4914   if not disks_ok:
4915     _ShutdownInstanceDisks(lu, instance)
4916     if force is not None and not force:
4917       lu.proc.LogWarning("", hint="If the message above refers to a"
4918                          " secondary node,"
4919                          " you can retry the operation using '--force'.")
4920     raise errors.OpExecError("Disk consistency error")
4921
4922
4923 class LUInstanceDeactivateDisks(NoHooksLU):
4924   """Shutdown an instance's disks.
4925
4926   """
4927   REQ_BGL = False
4928
4929   def ExpandNames(self):
4930     self._ExpandAndLockInstance()
4931     self.needed_locks[locking.LEVEL_NODE] = []
4932     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4933
4934   def DeclareLocks(self, level):
4935     if level == locking.LEVEL_NODE:
4936       self._LockInstancesNodes()
4937
4938   def CheckPrereq(self):
4939     """Check prerequisites.
4940
4941     This checks that the instance is in the cluster.
4942
4943     """
4944     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4945     assert self.instance is not None, \
4946       "Cannot retrieve locked instance %s" % self.op.instance_name
4947
4948   def Exec(self, feedback_fn):
4949     """Deactivate the disks
4950
4951     """
4952     instance = self.instance
4953     if self.op.force:
4954       _ShutdownInstanceDisks(self, instance)
4955     else:
4956       _SafeShutdownInstanceDisks(self, instance)
4957
4958
4959 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4960   """Shutdown block devices of an instance.
4961
4962   This function checks if an instance is running, before calling
4963   _ShutdownInstanceDisks.
4964
4965   """
4966   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4967   _ShutdownInstanceDisks(lu, instance, disks=disks)
4968
4969
4970 def _ExpandCheckDisks(instance, disks):
4971   """Return the instance disks selected by the disks list
4972
4973   @type disks: list of L{objects.Disk} or None
4974   @param disks: selected disks
4975   @rtype: list of L{objects.Disk}
4976   @return: selected instance disks to act on
4977
4978   """
4979   if disks is None:
4980     return instance.disks
4981   else:
4982     if not set(disks).issubset(instance.disks):
4983       raise errors.ProgrammerError("Can only act on disks belonging to the"
4984                                    " target instance")
4985     return disks
4986
4987
4988 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4989   """Shutdown block devices of an instance.
4990
4991   This does the shutdown on all nodes of the instance.
4992
4993   If the ignore_primary is false, errors on the primary node are
4994   ignored.
4995
4996   """
4997   all_result = True
4998   disks = _ExpandCheckDisks(instance, disks)
4999
5000   for disk in disks:
5001     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5002       lu.cfg.SetDiskID(top_disk, node)
5003       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5004       msg = result.fail_msg
5005       if msg:
5006         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5007                       disk.iv_name, node, msg)
5008         if ((node == instance.primary_node and not ignore_primary) or
5009             (node != instance.primary_node and not result.offline)):
5010           all_result = False
5011   return all_result
5012
5013
5014 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5015   """Checks if a node has enough free memory.
5016
5017   This function check if a given node has the needed amount of free
5018   memory. In case the node has less memory or we cannot get the
5019   information from the node, this function raise an OpPrereqError
5020   exception.
5021
5022   @type lu: C{LogicalUnit}
5023   @param lu: a logical unit from which we get configuration data
5024   @type node: C{str}
5025   @param node: the node to check
5026   @type reason: C{str}
5027   @param reason: string to use in the error message
5028   @type requested: C{int}
5029   @param requested: the amount of memory in MiB to check for
5030   @type hypervisor_name: C{str}
5031   @param hypervisor_name: the hypervisor to ask for memory stats
5032   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5033       we cannot check the node
5034
5035   """
5036   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5037   nodeinfo[node].Raise("Can't get data from node %s" % node,
5038                        prereq=True, ecode=errors.ECODE_ENVIRON)
5039   free_mem = nodeinfo[node].payload.get('memory_free', None)
5040   if not isinstance(free_mem, int):
5041     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5042                                " was '%s'" % (node, free_mem),
5043                                errors.ECODE_ENVIRON)
5044   if requested > free_mem:
5045     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5046                                " needed %s MiB, available %s MiB" %
5047                                (node, reason, requested, free_mem),
5048                                errors.ECODE_NORES)
5049
5050
5051 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5052   """Checks if nodes have enough free disk space in the all VGs.
5053
5054   This function check if all given nodes have the needed amount of
5055   free disk. In case any node has less disk or we cannot get the
5056   information from the node, this function raise an OpPrereqError
5057   exception.
5058
5059   @type lu: C{LogicalUnit}
5060   @param lu: a logical unit from which we get configuration data
5061   @type nodenames: C{list}
5062   @param nodenames: the list of node names to check
5063   @type req_sizes: C{dict}
5064   @param req_sizes: the hash of vg and corresponding amount of disk in
5065       MiB to check for
5066   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5067       or we cannot check the node
5068
5069   """
5070   for vg, req_size in req_sizes.items():
5071     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5072
5073
5074 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5075   """Checks if nodes have enough free disk space in the specified VG.
5076
5077   This function check if all given nodes have the needed amount of
5078   free disk. In case any node has less disk or we cannot get the
5079   information from the node, this function raise an OpPrereqError
5080   exception.
5081
5082   @type lu: C{LogicalUnit}
5083   @param lu: a logical unit from which we get configuration data
5084   @type nodenames: C{list}
5085   @param nodenames: the list of node names to check
5086   @type vg: C{str}
5087   @param vg: the volume group to check
5088   @type requested: C{int}
5089   @param requested: the amount of disk in MiB to check for
5090   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5091       or we cannot check the node
5092
5093   """
5094   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5095   for node in nodenames:
5096     info = nodeinfo[node]
5097     info.Raise("Cannot get current information from node %s" % node,
5098                prereq=True, ecode=errors.ECODE_ENVIRON)
5099     vg_free = info.payload.get("vg_free", None)
5100     if not isinstance(vg_free, int):
5101       raise errors.OpPrereqError("Can't compute free disk space on node"
5102                                  " %s for vg %s, result was '%s'" %
5103                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5104     if requested > vg_free:
5105       raise errors.OpPrereqError("Not enough disk space on target node %s"
5106                                  " vg %s: required %d MiB, available %d MiB" %
5107                                  (node, vg, requested, vg_free),
5108                                  errors.ECODE_NORES)
5109
5110
5111 class LUInstanceStartup(LogicalUnit):
5112   """Starts an instance.
5113
5114   """
5115   HPATH = "instance-start"
5116   HTYPE = constants.HTYPE_INSTANCE
5117   REQ_BGL = False
5118
5119   def CheckArguments(self):
5120     # extra beparams
5121     if self.op.beparams:
5122       # fill the beparams dict
5123       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5124
5125   def ExpandNames(self):
5126     self._ExpandAndLockInstance()
5127
5128   def BuildHooksEnv(self):
5129     """Build hooks env.
5130
5131     This runs on master, primary and secondary nodes of the instance.
5132
5133     """
5134     env = {
5135       "FORCE": self.op.force,
5136       }
5137     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5138     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5139     return env, nl, nl
5140
5141   def CheckPrereq(self):
5142     """Check prerequisites.
5143
5144     This checks that the instance is in the cluster.
5145
5146     """
5147     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5148     assert self.instance is not None, \
5149       "Cannot retrieve locked instance %s" % self.op.instance_name
5150
5151     # extra hvparams
5152     if self.op.hvparams:
5153       # check hypervisor parameter syntax (locally)
5154       cluster = self.cfg.GetClusterInfo()
5155       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5156       filled_hvp = cluster.FillHV(instance)
5157       filled_hvp.update(self.op.hvparams)
5158       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5159       hv_type.CheckParameterSyntax(filled_hvp)
5160       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5161
5162     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5163
5164     if self.primary_offline and self.op.ignore_offline_nodes:
5165       self.proc.LogWarning("Ignoring offline primary node")
5166
5167       if self.op.hvparams or self.op.beparams:
5168         self.proc.LogWarning("Overridden parameters are ignored")
5169     else:
5170       _CheckNodeOnline(self, instance.primary_node)
5171
5172       bep = self.cfg.GetClusterInfo().FillBE(instance)
5173
5174       # check bridges existence
5175       _CheckInstanceBridgesExist(self, instance)
5176
5177       remote_info = self.rpc.call_instance_info(instance.primary_node,
5178                                                 instance.name,
5179                                                 instance.hypervisor)
5180       remote_info.Raise("Error checking node %s" % instance.primary_node,
5181                         prereq=True, ecode=errors.ECODE_ENVIRON)
5182       if not remote_info.payload: # not running already
5183         _CheckNodeFreeMemory(self, instance.primary_node,
5184                              "starting instance %s" % instance.name,
5185                              bep[constants.BE_MEMORY], instance.hypervisor)
5186
5187   def Exec(self, feedback_fn):
5188     """Start the instance.
5189
5190     """
5191     instance = self.instance
5192     force = self.op.force
5193
5194     if not self.op.no_remember:
5195       self.cfg.MarkInstanceUp(instance.name)
5196
5197     if self.primary_offline:
5198       assert self.op.ignore_offline_nodes
5199       self.proc.LogInfo("Primary node offline, marked instance as started")
5200     else:
5201       node_current = instance.primary_node
5202
5203       _StartInstanceDisks(self, instance, force)
5204
5205       result = self.rpc.call_instance_start(node_current, instance,
5206                                             self.op.hvparams, self.op.beparams)
5207       msg = result.fail_msg
5208       if msg:
5209         _ShutdownInstanceDisks(self, instance)
5210         raise errors.OpExecError("Could not start instance: %s" % msg)
5211
5212
5213 class LUInstanceReboot(LogicalUnit):
5214   """Reboot an instance.
5215
5216   """
5217   HPATH = "instance-reboot"
5218   HTYPE = constants.HTYPE_INSTANCE
5219   REQ_BGL = False
5220
5221   def ExpandNames(self):
5222     self._ExpandAndLockInstance()
5223
5224   def BuildHooksEnv(self):
5225     """Build hooks env.
5226
5227     This runs on master, primary and secondary nodes of the instance.
5228
5229     """
5230     env = {
5231       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5232       "REBOOT_TYPE": self.op.reboot_type,
5233       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5234       }
5235     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5236     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5237     return env, nl, nl
5238
5239   def CheckPrereq(self):
5240     """Check prerequisites.
5241
5242     This checks that the instance is in the cluster.
5243
5244     """
5245     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5246     assert self.instance is not None, \
5247       "Cannot retrieve locked instance %s" % self.op.instance_name
5248
5249     _CheckNodeOnline(self, instance.primary_node)
5250
5251     # check bridges existence
5252     _CheckInstanceBridgesExist(self, instance)
5253
5254   def Exec(self, feedback_fn):
5255     """Reboot the instance.
5256
5257     """
5258     instance = self.instance
5259     ignore_secondaries = self.op.ignore_secondaries
5260     reboot_type = self.op.reboot_type
5261
5262     node_current = instance.primary_node
5263
5264     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5265                        constants.INSTANCE_REBOOT_HARD]:
5266       for disk in instance.disks:
5267         self.cfg.SetDiskID(disk, node_current)
5268       result = self.rpc.call_instance_reboot(node_current, instance,
5269                                              reboot_type,
5270                                              self.op.shutdown_timeout)
5271       result.Raise("Could not reboot instance")
5272     else:
5273       result = self.rpc.call_instance_shutdown(node_current, instance,
5274                                                self.op.shutdown_timeout)
5275       result.Raise("Could not shutdown instance for full reboot")
5276       _ShutdownInstanceDisks(self, instance)
5277       _StartInstanceDisks(self, instance, ignore_secondaries)
5278       result = self.rpc.call_instance_start(node_current, instance, None, None)
5279       msg = result.fail_msg
5280       if msg:
5281         _ShutdownInstanceDisks(self, instance)
5282         raise errors.OpExecError("Could not start instance for"
5283                                  " full reboot: %s" % msg)
5284
5285     self.cfg.MarkInstanceUp(instance.name)
5286
5287
5288 class LUInstanceShutdown(LogicalUnit):
5289   """Shutdown an instance.
5290
5291   """
5292   HPATH = "instance-stop"
5293   HTYPE = constants.HTYPE_INSTANCE
5294   REQ_BGL = False
5295
5296   def ExpandNames(self):
5297     self._ExpandAndLockInstance()
5298
5299   def BuildHooksEnv(self):
5300     """Build hooks env.
5301
5302     This runs on master, primary and secondary nodes of the instance.
5303
5304     """
5305     env = _BuildInstanceHookEnvByObject(self, self.instance)
5306     env["TIMEOUT"] = self.op.timeout
5307     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5308     return env, nl, nl
5309
5310   def CheckPrereq(self):
5311     """Check prerequisites.
5312
5313     This checks that the instance is in the cluster.
5314
5315     """
5316     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5317     assert self.instance is not None, \
5318       "Cannot retrieve locked instance %s" % self.op.instance_name
5319
5320     self.primary_offline = \
5321       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5322
5323     if self.primary_offline and self.op.ignore_offline_nodes:
5324       self.proc.LogWarning("Ignoring offline primary node")
5325     else:
5326       _CheckNodeOnline(self, self.instance.primary_node)
5327
5328   def Exec(self, feedback_fn):
5329     """Shutdown the instance.
5330
5331     """
5332     instance = self.instance
5333     node_current = instance.primary_node
5334     timeout = self.op.timeout
5335
5336     if not self.op.no_remember:
5337       self.cfg.MarkInstanceDown(instance.name)
5338
5339     if self.primary_offline:
5340       assert self.op.ignore_offline_nodes
5341       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5342     else:
5343       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5344       msg = result.fail_msg
5345       if msg:
5346         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5347
5348       _ShutdownInstanceDisks(self, instance)
5349
5350
5351 class LUInstanceReinstall(LogicalUnit):
5352   """Reinstall an instance.
5353
5354   """
5355   HPATH = "instance-reinstall"
5356   HTYPE = constants.HTYPE_INSTANCE
5357   REQ_BGL = False
5358
5359   def ExpandNames(self):
5360     self._ExpandAndLockInstance()
5361
5362   def BuildHooksEnv(self):
5363     """Build hooks env.
5364
5365     This runs on master, primary and secondary nodes of the instance.
5366
5367     """
5368     env = _BuildInstanceHookEnvByObject(self, self.instance)
5369     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5370     return env, nl, nl
5371
5372   def CheckPrereq(self):
5373     """Check prerequisites.
5374
5375     This checks that the instance is in the cluster and is not running.
5376
5377     """
5378     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5379     assert instance is not None, \
5380       "Cannot retrieve locked instance %s" % self.op.instance_name
5381     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5382                      " offline, cannot reinstall")
5383     for node in instance.secondary_nodes:
5384       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5385                        " cannot reinstall")
5386
5387     if instance.disk_template == constants.DT_DISKLESS:
5388       raise errors.OpPrereqError("Instance '%s' has no disks" %
5389                                  self.op.instance_name,
5390                                  errors.ECODE_INVAL)
5391     _CheckInstanceDown(self, instance, "cannot reinstall")
5392
5393     if self.op.os_type is not None:
5394       # OS verification
5395       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5396       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5397       instance_os = self.op.os_type
5398     else:
5399       instance_os = instance.os
5400
5401     nodelist = list(instance.all_nodes)
5402
5403     if self.op.osparams:
5404       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5405       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5406       self.os_inst = i_osdict # the new dict (without defaults)
5407     else:
5408       self.os_inst = None
5409
5410     self.instance = instance
5411
5412   def Exec(self, feedback_fn):
5413     """Reinstall the instance.
5414
5415     """
5416     inst = self.instance
5417
5418     if self.op.os_type is not None:
5419       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5420       inst.os = self.op.os_type
5421       # Write to configuration
5422       self.cfg.Update(inst, feedback_fn)
5423
5424     _StartInstanceDisks(self, inst, None)
5425     try:
5426       feedback_fn("Running the instance OS create scripts...")
5427       # FIXME: pass debug option from opcode to backend
5428       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5429                                              self.op.debug_level,
5430                                              osparams=self.os_inst)
5431       result.Raise("Could not install OS for instance %s on node %s" %
5432                    (inst.name, inst.primary_node))
5433     finally:
5434       _ShutdownInstanceDisks(self, inst)
5435
5436
5437 class LUInstanceRecreateDisks(LogicalUnit):
5438   """Recreate an instance's missing disks.
5439
5440   """
5441   HPATH = "instance-recreate-disks"
5442   HTYPE = constants.HTYPE_INSTANCE
5443   REQ_BGL = False
5444
5445   def CheckArguments(self):
5446     # normalise the disk list
5447     self.op.disks = sorted(frozenset(self.op.disks))
5448
5449   def ExpandNames(self):
5450     self._ExpandAndLockInstance()
5451     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5452     if self.op.nodes:
5453       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
5454       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
5455     else:
5456       self.needed_locks[locking.LEVEL_NODE] = []
5457
5458   def DeclareLocks(self, level):
5459     if level == locking.LEVEL_NODE:
5460       # if we replace the nodes, we only need to lock the old primary,
5461       # otherwise we need to lock all nodes for disk re-creation
5462       primary_only = bool(self.op.nodes)
5463       self._LockInstancesNodes(primary_only=primary_only)
5464
5465   def BuildHooksEnv(self):
5466     """Build hooks env.
5467
5468     This runs on master, primary and secondary nodes of the instance.
5469
5470     """
5471     env = _BuildInstanceHookEnvByObject(self, self.instance)
5472     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5473     return env, nl, nl
5474
5475   def CheckPrereq(self):
5476     """Check prerequisites.
5477
5478     This checks that the instance is in the cluster and is not running.
5479
5480     """
5481     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5482     assert instance is not None, \
5483       "Cannot retrieve locked instance %s" % self.op.instance_name
5484     if self.op.nodes:
5485       if len(self.op.nodes) != len(instance.all_nodes):
5486         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
5487                                    " %d replacement nodes were specified" %
5488                                    (instance.name, len(instance.all_nodes),
5489                                     len(self.op.nodes)),
5490                                    errors.ECODE_INVAL)
5491       assert instance.disk_template != constants.DT_DRBD8 or \
5492           len(self.op.nodes) == 2
5493       assert instance.disk_template != constants.DT_PLAIN or \
5494           len(self.op.nodes) == 1
5495       primary_node = self.op.nodes[0]
5496     else:
5497       primary_node = instance.primary_node
5498     _CheckNodeOnline(self, primary_node)
5499
5500     if instance.disk_template == constants.DT_DISKLESS:
5501       raise errors.OpPrereqError("Instance '%s' has no disks" %
5502                                  self.op.instance_name, errors.ECODE_INVAL)
5503     # if we replace nodes *and* the old primary is offline, we don't
5504     # check
5505     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
5506     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
5507     if not (self.op.nodes and old_pnode.offline):
5508       _CheckInstanceDown(self, instance, "cannot recreate disks")
5509
5510     if not self.op.disks:
5511       self.op.disks = range(len(instance.disks))
5512     else:
5513       for idx in self.op.disks:
5514         if idx >= len(instance.disks):
5515           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5516                                      errors.ECODE_INVAL)
5517     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
5518       raise errors.OpPrereqError("Can't recreate disks partially and"
5519                                  " change the nodes at the same time",
5520                                  errors.ECODE_INVAL)
5521     self.instance = instance
5522
5523   def Exec(self, feedback_fn):
5524     """Recreate the disks.
5525
5526     """
5527     # change primary node, if needed
5528     if self.op.nodes:
5529       self.instance.primary_node = self.op.nodes[0]
5530       self.LogWarning("Changing the instance's nodes, you will have to"
5531                       " remove any disks left on the older nodes manually")
5532
5533     to_skip = []
5534     for idx, disk in enumerate(self.instance.disks):
5535       if idx not in self.op.disks: # disk idx has not been passed in
5536         to_skip.append(idx)
5537         continue
5538       # update secondaries for disks, if needed
5539       if self.op.nodes:
5540         if disk.dev_type == constants.LD_DRBD8:
5541           # need to update the nodes
5542           assert len(self.op.nodes) == 2
5543           logical_id = list(disk.logical_id)
5544           logical_id[0] = self.op.nodes[0]
5545           logical_id[1] = self.op.nodes[1]
5546           disk.logical_id = tuple(logical_id)
5547
5548     if self.op.nodes:
5549       self.cfg.Update(self.instance, feedback_fn)
5550
5551     _CreateDisks(self, self.instance, to_skip=to_skip)
5552
5553
5554 class LUInstanceRename(LogicalUnit):
5555   """Rename an instance.
5556
5557   """
5558   HPATH = "instance-rename"
5559   HTYPE = constants.HTYPE_INSTANCE
5560
5561   def CheckArguments(self):
5562     """Check arguments.
5563
5564     """
5565     if self.op.ip_check and not self.op.name_check:
5566       # TODO: make the ip check more flexible and not depend on the name check
5567       raise errors.OpPrereqError("Cannot do ip check without a name check",
5568                                  errors.ECODE_INVAL)
5569
5570   def BuildHooksEnv(self):
5571     """Build hooks env.
5572
5573     This runs on master, primary and secondary nodes of the instance.
5574
5575     """
5576     env = _BuildInstanceHookEnvByObject(self, self.instance)
5577     env["INSTANCE_NEW_NAME"] = self.op.new_name
5578     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5579     return env, nl, nl
5580
5581   def CheckPrereq(self):
5582     """Check prerequisites.
5583
5584     This checks that the instance is in the cluster and is not running.
5585
5586     """
5587     self.op.instance_name = _ExpandInstanceName(self.cfg,
5588                                                 self.op.instance_name)
5589     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5590     assert instance is not None
5591     _CheckNodeOnline(self, instance.primary_node)
5592     _CheckInstanceDown(self, instance, "cannot rename")
5593     self.instance = instance
5594
5595     new_name = self.op.new_name
5596     if self.op.name_check:
5597       hostname = netutils.GetHostname(name=new_name)
5598       if hostname != new_name:
5599         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5600                      hostname.name)
5601       new_name = self.op.new_name = hostname.name
5602       if (self.op.ip_check and
5603           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5604         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5605                                    (hostname.ip, new_name),
5606                                    errors.ECODE_NOTUNIQUE)
5607
5608     instance_list = self.cfg.GetInstanceList()
5609     if new_name in instance_list and new_name != instance.name:
5610       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5611                                  new_name, errors.ECODE_EXISTS)
5612
5613   def Exec(self, feedback_fn):
5614     """Rename the instance.
5615
5616     """
5617     inst = self.instance
5618     old_name = inst.name
5619
5620     rename_file_storage = False
5621     if (inst.disk_template == constants.DT_FILE and
5622         self.op.new_name != inst.name):
5623       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5624       rename_file_storage = True
5625
5626     self.cfg.RenameInstance(inst.name, self.op.new_name)
5627     # Change the instance lock. This is definitely safe while we hold the BGL
5628     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5629     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5630
5631     # re-read the instance from the configuration after rename
5632     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5633
5634     if rename_file_storage:
5635       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5636       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5637                                                      old_file_storage_dir,
5638                                                      new_file_storage_dir)
5639       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5640                    " (but the instance has been renamed in Ganeti)" %
5641                    (inst.primary_node, old_file_storage_dir,
5642                     new_file_storage_dir))
5643
5644     _StartInstanceDisks(self, inst, None)
5645     try:
5646       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5647                                                  old_name, self.op.debug_level)
5648       msg = result.fail_msg
5649       if msg:
5650         msg = ("Could not run OS rename script for instance %s on node %s"
5651                " (but the instance has been renamed in Ganeti): %s" %
5652                (inst.name, inst.primary_node, msg))
5653         self.proc.LogWarning(msg)
5654     finally:
5655       _ShutdownInstanceDisks(self, inst)
5656
5657     return inst.name
5658
5659
5660 class LUInstanceRemove(LogicalUnit):
5661   """Remove an instance.
5662
5663   """
5664   HPATH = "instance-remove"
5665   HTYPE = constants.HTYPE_INSTANCE
5666   REQ_BGL = False
5667
5668   def ExpandNames(self):
5669     self._ExpandAndLockInstance()
5670     self.needed_locks[locking.LEVEL_NODE] = []
5671     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5672
5673   def DeclareLocks(self, level):
5674     if level == locking.LEVEL_NODE:
5675       self._LockInstancesNodes()
5676
5677   def BuildHooksEnv(self):
5678     """Build hooks env.
5679
5680     This runs on master, primary and secondary nodes of the instance.
5681
5682     """
5683     env = _BuildInstanceHookEnvByObject(self, self.instance)
5684     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5685     nl = [self.cfg.GetMasterNode()]
5686     nl_post = list(self.instance.all_nodes) + nl
5687     return env, nl, nl_post
5688
5689   def CheckPrereq(self):
5690     """Check prerequisites.
5691
5692     This checks that the instance is in the cluster.
5693
5694     """
5695     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5696     assert self.instance is not None, \
5697       "Cannot retrieve locked instance %s" % self.op.instance_name
5698
5699   def Exec(self, feedback_fn):
5700     """Remove the instance.
5701
5702     """
5703     instance = self.instance
5704     logging.info("Shutting down instance %s on node %s",
5705                  instance.name, instance.primary_node)
5706
5707     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5708                                              self.op.shutdown_timeout)
5709     msg = result.fail_msg
5710     if msg:
5711       if self.op.ignore_failures:
5712         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5713       else:
5714         raise errors.OpExecError("Could not shutdown instance %s on"
5715                                  " node %s: %s" %
5716                                  (instance.name, instance.primary_node, msg))
5717
5718     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5719
5720
5721 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5722   """Utility function to remove an instance.
5723
5724   """
5725   logging.info("Removing block devices for instance %s", instance.name)
5726
5727   if not _RemoveDisks(lu, instance):
5728     if not ignore_failures:
5729       raise errors.OpExecError("Can't remove instance's disks")
5730     feedback_fn("Warning: can't remove instance's disks")
5731
5732   logging.info("Removing instance %s out of cluster config", instance.name)
5733
5734   lu.cfg.RemoveInstance(instance.name)
5735
5736   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5737     "Instance lock removal conflict"
5738
5739   # Remove lock for the instance
5740   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5741
5742
5743 class LUInstanceQuery(NoHooksLU):
5744   """Logical unit for querying instances.
5745
5746   """
5747   # pylint: disable-msg=W0142
5748   REQ_BGL = False
5749
5750   def CheckArguments(self):
5751     self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5752                              self.op.use_locking)
5753
5754   def ExpandNames(self):
5755     self.iq.ExpandNames(self)
5756
5757   def DeclareLocks(self, level):
5758     self.iq.DeclareLocks(self, level)
5759
5760   def Exec(self, feedback_fn):
5761     return self.iq.OldStyleQuery(self)
5762
5763
5764 class LUInstanceFailover(LogicalUnit):
5765   """Failover an instance.
5766
5767   """
5768   HPATH = "instance-failover"
5769   HTYPE = constants.HTYPE_INSTANCE
5770   REQ_BGL = False
5771
5772   def ExpandNames(self):
5773     self._ExpandAndLockInstance()
5774     self.needed_locks[locking.LEVEL_NODE] = []
5775     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5776
5777   def DeclareLocks(self, level):
5778     if level == locking.LEVEL_NODE:
5779       self._LockInstancesNodes()
5780
5781   def BuildHooksEnv(self):
5782     """Build hooks env.
5783
5784     This runs on master, primary and secondary nodes of the instance.
5785
5786     """
5787     instance = self.instance
5788     source_node = instance.primary_node
5789     target_node = instance.secondary_nodes[0]
5790     env = {
5791       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5792       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5793       "OLD_PRIMARY": source_node,
5794       "OLD_SECONDARY": target_node,
5795       "NEW_PRIMARY": target_node,
5796       "NEW_SECONDARY": source_node,
5797       }
5798     env.update(_BuildInstanceHookEnvByObject(self, instance))
5799     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5800     nl_post = list(nl)
5801     nl_post.append(source_node)
5802     return env, nl, nl_post
5803
5804   def CheckPrereq(self):
5805     """Check prerequisites.
5806
5807     This checks that the instance is in the cluster.
5808
5809     """
5810     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5811     assert self.instance is not None, \
5812       "Cannot retrieve locked instance %s" % self.op.instance_name
5813
5814     bep = self.cfg.GetClusterInfo().FillBE(instance)
5815     if instance.disk_template not in constants.DTS_NET_MIRROR:
5816       raise errors.OpPrereqError("Instance's disk layout is not"
5817                                  " network mirrored, cannot failover.",
5818                                  errors.ECODE_STATE)
5819
5820     secondary_nodes = instance.secondary_nodes
5821     if not secondary_nodes:
5822       raise errors.ProgrammerError("no secondary node but using "
5823                                    "a mirrored disk template")
5824
5825     target_node = secondary_nodes[0]
5826     _CheckNodeOnline(self, target_node)
5827     _CheckNodeNotDrained(self, target_node)
5828     if instance.admin_up:
5829       # check memory requirements on the secondary node
5830       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5831                            instance.name, bep[constants.BE_MEMORY],
5832                            instance.hypervisor)
5833     else:
5834       self.LogInfo("Not checking memory on the secondary node as"
5835                    " instance will not be started")
5836
5837     # check bridge existance
5838     _CheckInstanceBridgesExist(self, instance, node=target_node)
5839
5840   def Exec(self, feedback_fn):
5841     """Failover an instance.
5842
5843     The failover is done by shutting it down on its present node and
5844     starting it on the secondary.
5845
5846     """
5847     instance = self.instance
5848     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5849
5850     source_node = instance.primary_node
5851     target_node = instance.secondary_nodes[0]
5852
5853     if instance.admin_up:
5854       feedback_fn("* checking disk consistency between source and target")
5855       for dev in instance.disks:
5856         # for drbd, these are drbd over lvm
5857         if not _CheckDiskConsistency(self, dev, target_node, False):
5858           if not self.op.ignore_consistency:
5859             raise errors.OpExecError("Disk %s is degraded on target node,"
5860                                      " aborting failover." % dev.iv_name)
5861     else:
5862       feedback_fn("* not checking disk consistency as instance is not running")
5863
5864     feedback_fn("* shutting down instance on source node")
5865     logging.info("Shutting down instance %s on node %s",
5866                  instance.name, source_node)
5867
5868     result = self.rpc.call_instance_shutdown(source_node, instance,
5869                                              self.op.shutdown_timeout)
5870     msg = result.fail_msg
5871     if msg:
5872       if self.op.ignore_consistency or primary_node.offline:
5873         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5874                              " Proceeding anyway. Please make sure node"
5875                              " %s is down. Error details: %s",
5876                              instance.name, source_node, source_node, msg)
5877       else:
5878         raise errors.OpExecError("Could not shutdown instance %s on"
5879                                  " node %s: %s" %
5880                                  (instance.name, source_node, msg))
5881
5882     feedback_fn("* deactivating the instance's disks on source node")
5883     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5884       raise errors.OpExecError("Can't shut down the instance's disks.")
5885
5886     instance.primary_node = target_node
5887     # distribute new instance config to the other nodes
5888     self.cfg.Update(instance, feedback_fn)
5889
5890     # Only start the instance if it's marked as up
5891     if instance.admin_up:
5892       feedback_fn("* activating the instance's disks on target node")
5893       logging.info("Starting instance %s on node %s",
5894                    instance.name, target_node)
5895
5896       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5897                                            ignore_secondaries=True)
5898       if not disks_ok:
5899         _ShutdownInstanceDisks(self, instance)
5900         raise errors.OpExecError("Can't activate the instance's disks")
5901
5902       feedback_fn("* starting the instance on the target node")
5903       result = self.rpc.call_instance_start(target_node, instance, None, None)
5904       msg = result.fail_msg
5905       if msg:
5906         _ShutdownInstanceDisks(self, instance)
5907         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5908                                  (instance.name, target_node, msg))
5909
5910
5911 class LUInstanceMigrate(LogicalUnit):
5912   """Migrate an instance.
5913
5914   This is migration without shutting down, compared to the failover,
5915   which is done with shutdown.
5916
5917   """
5918   HPATH = "instance-migrate"
5919   HTYPE = constants.HTYPE_INSTANCE
5920   REQ_BGL = False
5921
5922   def ExpandNames(self):
5923     self._ExpandAndLockInstance()
5924
5925     self.needed_locks[locking.LEVEL_NODE] = []
5926     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5927
5928     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5929                                        self.op.cleanup)
5930     self.tasklets = [self._migrater]
5931
5932   def DeclareLocks(self, level):
5933     if level == locking.LEVEL_NODE:
5934       self._LockInstancesNodes()
5935
5936   def BuildHooksEnv(self):
5937     """Build hooks env.
5938
5939     This runs on master, primary and secondary nodes of the instance.
5940
5941     """
5942     instance = self._migrater.instance
5943     source_node = instance.primary_node
5944     target_node = instance.secondary_nodes[0]
5945     env = _BuildInstanceHookEnvByObject(self, instance)
5946     env["MIGRATE_LIVE"] = self._migrater.live
5947     env["MIGRATE_CLEANUP"] = self.op.cleanup
5948     env.update({
5949         "OLD_PRIMARY": source_node,
5950         "OLD_SECONDARY": target_node,
5951         "NEW_PRIMARY": target_node,
5952         "NEW_SECONDARY": source_node,
5953         })
5954     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5955     nl_post = list(nl)
5956     nl_post.append(source_node)
5957     return env, nl, nl_post
5958
5959
5960 class LUInstanceMove(LogicalUnit):
5961   """Move an instance by data-copying.
5962
5963   """
5964   HPATH = "instance-move"
5965   HTYPE = constants.HTYPE_INSTANCE
5966   REQ_BGL = False
5967
5968   def ExpandNames(self):
5969     self._ExpandAndLockInstance()
5970     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5971     self.op.target_node = target_node
5972     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5973     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5974
5975   def DeclareLocks(self, level):
5976     if level == locking.LEVEL_NODE:
5977       self._LockInstancesNodes(primary_only=True)
5978
5979   def BuildHooksEnv(self):
5980     """Build hooks env.
5981
5982     This runs on master, primary and secondary nodes of the instance.
5983
5984     """
5985     env = {
5986       "TARGET_NODE": self.op.target_node,
5987       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5988       }
5989     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5990     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5991                                        self.op.target_node]
5992     return env, nl, nl
5993
5994   def CheckPrereq(self):
5995     """Check prerequisites.
5996
5997     This checks that the instance is in the cluster.
5998
5999     """
6000     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6001     assert self.instance is not None, \
6002       "Cannot retrieve locked instance %s" % self.op.instance_name
6003
6004     node = self.cfg.GetNodeInfo(self.op.target_node)
6005     assert node is not None, \
6006       "Cannot retrieve locked node %s" % self.op.target_node
6007
6008     self.target_node = target_node = node.name
6009
6010     if target_node == instance.primary_node:
6011       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6012                                  (instance.name, target_node),
6013                                  errors.ECODE_STATE)
6014
6015     bep = self.cfg.GetClusterInfo().FillBE(instance)
6016
6017     for idx, dsk in enumerate(instance.disks):
6018       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6019         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6020                                    " cannot copy" % idx, errors.ECODE_STATE)
6021
6022     _CheckNodeOnline(self, target_node)
6023     _CheckNodeNotDrained(self, target_node)
6024     _CheckNodeVmCapable(self, target_node)
6025
6026     if instance.admin_up:
6027       # check memory requirements on the secondary node
6028       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6029                            instance.name, bep[constants.BE_MEMORY],
6030                            instance.hypervisor)
6031     else:
6032       self.LogInfo("Not checking memory on the secondary node as"
6033                    " instance will not be started")
6034
6035     # check bridge existance
6036     _CheckInstanceBridgesExist(self, instance, node=target_node)
6037
6038   def Exec(self, feedback_fn):
6039     """Move an instance.
6040
6041     The move is done by shutting it down on its present node, copying
6042     the data over (slow) and starting it on the new node.
6043
6044     """
6045     instance = self.instance
6046
6047     source_node = instance.primary_node
6048     target_node = self.target_node
6049
6050     self.LogInfo("Shutting down instance %s on source node %s",
6051                  instance.name, source_node)
6052
6053     result = self.rpc.call_instance_shutdown(source_node, instance,
6054                                              self.op.shutdown_timeout)
6055     msg = result.fail_msg
6056     if msg:
6057       if self.op.ignore_consistency:
6058         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6059                              " Proceeding anyway. Please make sure node"
6060                              " %s is down. Error details: %s",
6061                              instance.name, source_node, source_node, msg)
6062       else:
6063         raise errors.OpExecError("Could not shutdown instance %s on"
6064                                  " node %s: %s" %
6065                                  (instance.name, source_node, msg))
6066
6067     # create the target disks
6068     try:
6069       _CreateDisks(self, instance, target_node=target_node)
6070     except errors.OpExecError:
6071       self.LogWarning("Device creation failed, reverting...")
6072       try:
6073         _RemoveDisks(self, instance, target_node=target_node)
6074       finally:
6075         self.cfg.ReleaseDRBDMinors(instance.name)
6076         raise
6077
6078     cluster_name = self.cfg.GetClusterInfo().cluster_name
6079
6080     errs = []
6081     # activate, get path, copy the data over
6082     for idx, disk in enumerate(instance.disks):
6083       self.LogInfo("Copying data for disk %d", idx)
6084       result = self.rpc.call_blockdev_assemble(target_node, disk,
6085                                                instance.name, True, idx)
6086       if result.fail_msg:
6087         self.LogWarning("Can't assemble newly created disk %d: %s",
6088                         idx, result.fail_msg)
6089         errs.append(result.fail_msg)
6090         break
6091       dev_path = result.payload
6092       result = self.rpc.call_blockdev_export(source_node, disk,
6093                                              target_node, dev_path,
6094                                              cluster_name)
6095       if result.fail_msg:
6096         self.LogWarning("Can't copy data over for disk %d: %s",
6097                         idx, result.fail_msg)
6098         errs.append(result.fail_msg)
6099         break
6100
6101     if errs:
6102       self.LogWarning("Some disks failed to copy, aborting")
6103       try:
6104         _RemoveDisks(self, instance, target_node=target_node)
6105       finally:
6106         self.cfg.ReleaseDRBDMinors(instance.name)
6107         raise errors.OpExecError("Errors during disk copy: %s" %
6108                                  (",".join(errs),))
6109
6110     instance.primary_node = target_node
6111     self.cfg.Update(instance, feedback_fn)
6112
6113     self.LogInfo("Removing the disks on the original node")
6114     _RemoveDisks(self, instance, target_node=source_node)
6115
6116     # Only start the instance if it's marked as up
6117     if instance.admin_up:
6118       self.LogInfo("Starting instance %s on node %s",
6119                    instance.name, target_node)
6120
6121       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6122                                            ignore_secondaries=True)
6123       if not disks_ok:
6124         _ShutdownInstanceDisks(self, instance)
6125         raise errors.OpExecError("Can't activate the instance's disks")
6126
6127       result = self.rpc.call_instance_start(target_node, instance, None, None)
6128       msg = result.fail_msg
6129       if msg:
6130         _ShutdownInstanceDisks(self, instance)
6131         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6132                                  (instance.name, target_node, msg))
6133
6134
6135 class LUNodeMigrate(LogicalUnit):
6136   """Migrate all instances from a node.
6137
6138   """
6139   HPATH = "node-migrate"
6140   HTYPE = constants.HTYPE_NODE
6141   REQ_BGL = False
6142
6143   def ExpandNames(self):
6144     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6145
6146     self.needed_locks = {
6147       locking.LEVEL_NODE: [self.op.node_name],
6148       }
6149
6150     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6151
6152     # Create tasklets for migrating instances for all instances on this node
6153     names = []
6154     tasklets = []
6155
6156     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6157       logging.debug("Migrating instance %s", inst.name)
6158       names.append(inst.name)
6159
6160       tasklets.append(TLMigrateInstance(self, inst.name, False))
6161
6162     self.tasklets = tasklets
6163
6164     # Declare instance locks
6165     self.needed_locks[locking.LEVEL_INSTANCE] = names
6166
6167   def DeclareLocks(self, level):
6168     if level == locking.LEVEL_NODE:
6169       self._LockInstancesNodes()
6170
6171   def BuildHooksEnv(self):
6172     """Build hooks env.
6173
6174     This runs on the master, the primary and all the secondaries.
6175
6176     """
6177     env = {
6178       "NODE_NAME": self.op.node_name,
6179       }
6180
6181     nl = [self.cfg.GetMasterNode()]
6182
6183     return (env, nl, nl)
6184
6185
6186 class TLMigrateInstance(Tasklet):
6187   """Tasklet class for instance migration.
6188
6189   @type live: boolean
6190   @ivar live: whether the migration will be done live or non-live;
6191       this variable is initalized only after CheckPrereq has run
6192
6193   """
6194   def __init__(self, lu, instance_name, cleanup):
6195     """Initializes this class.
6196
6197     """
6198     Tasklet.__init__(self, lu)
6199
6200     # Parameters
6201     self.instance_name = instance_name
6202     self.cleanup = cleanup
6203     self.live = False # will be overridden later
6204
6205   def CheckPrereq(self):
6206     """Check prerequisites.
6207
6208     This checks that the instance is in the cluster.
6209
6210     """
6211     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6212     instance = self.cfg.GetInstanceInfo(instance_name)
6213     assert instance is not None
6214
6215     if instance.disk_template != constants.DT_DRBD8:
6216       raise errors.OpPrereqError("Instance's disk layout is not"
6217                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
6218
6219     secondary_nodes = instance.secondary_nodes
6220     if not secondary_nodes:
6221       raise errors.ConfigurationError("No secondary node but using"
6222                                       " drbd8 disk template")
6223
6224     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6225
6226     target_node = secondary_nodes[0]
6227     # check memory requirements on the secondary node
6228     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6229                          instance.name, i_be[constants.BE_MEMORY],
6230                          instance.hypervisor)
6231
6232     # check bridge existance
6233     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6234
6235     if not self.cleanup:
6236       _CheckNodeNotDrained(self.lu, target_node)
6237       result = self.rpc.call_instance_migratable(instance.primary_node,
6238                                                  instance)
6239       result.Raise("Can't migrate, please use failover",
6240                    prereq=True, ecode=errors.ECODE_STATE)
6241
6242     self.instance = instance
6243
6244     if self.lu.op.live is not None and self.lu.op.mode is not None:
6245       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6246                                  " parameters are accepted",
6247                                  errors.ECODE_INVAL)
6248     if self.lu.op.live is not None:
6249       if self.lu.op.live:
6250         self.lu.op.mode = constants.HT_MIGRATION_LIVE
6251       else:
6252         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6253       # reset the 'live' parameter to None so that repeated
6254       # invocations of CheckPrereq do not raise an exception
6255       self.lu.op.live = None
6256     elif self.lu.op.mode is None:
6257       # read the default value from the hypervisor
6258       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6259       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6260
6261     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6262
6263   def _WaitUntilSync(self):
6264     """Poll with custom rpc for disk sync.
6265
6266     This uses our own step-based rpc call.
6267
6268     """
6269     self.feedback_fn("* wait until resync is done")
6270     all_done = False
6271     while not all_done:
6272       all_done = True
6273       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6274                                             self.nodes_ip,
6275                                             self.instance.disks)
6276       min_percent = 100
6277       for node, nres in result.items():
6278         nres.Raise("Cannot resync disks on node %s" % node)
6279         node_done, node_percent = nres.payload
6280         all_done = all_done and node_done
6281         if node_percent is not None:
6282           min_percent = min(min_percent, node_percent)
6283       if not all_done:
6284         if min_percent < 100:
6285           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6286         time.sleep(2)
6287
6288   def _EnsureSecondary(self, node):
6289     """Demote a node to secondary.
6290
6291     """
6292     self.feedback_fn("* switching node %s to secondary mode" % node)
6293
6294     for dev in self.instance.disks:
6295       self.cfg.SetDiskID(dev, node)
6296
6297     result = self.rpc.call_blockdev_close(node, self.instance.name,
6298                                           self.instance.disks)
6299     result.Raise("Cannot change disk to secondary on node %s" % node)
6300
6301   def _GoStandalone(self):
6302     """Disconnect from the network.
6303
6304     """
6305     self.feedback_fn("* changing into standalone mode")
6306     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6307                                                self.instance.disks)
6308     for node, nres in result.items():
6309       nres.Raise("Cannot disconnect disks node %s" % node)
6310
6311   def _GoReconnect(self, multimaster):
6312     """Reconnect to the network.
6313
6314     """
6315     if multimaster:
6316       msg = "dual-master"
6317     else:
6318       msg = "single-master"
6319     self.feedback_fn("* changing disks into %s mode" % msg)
6320     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6321                                            self.instance.disks,
6322                                            self.instance.name, multimaster)
6323     for node, nres in result.items():
6324       nres.Raise("Cannot change disks config on node %s" % node)
6325
6326   def _ExecCleanup(self):
6327     """Try to cleanup after a failed migration.
6328
6329     The cleanup is done by:
6330       - check that the instance is running only on one node
6331         (and update the config if needed)
6332       - change disks on its secondary node to secondary
6333       - wait until disks are fully synchronized
6334       - disconnect from the network
6335       - change disks into single-master mode
6336       - wait again until disks are fully synchronized
6337
6338     """
6339     instance = self.instance
6340     target_node = self.target_node
6341     source_node = self.source_node
6342
6343     # check running on only one node
6344     self.feedback_fn("* checking where the instance actually runs"
6345                      " (if this hangs, the hypervisor might be in"
6346                      " a bad state)")
6347     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6348     for node, result in ins_l.items():
6349       result.Raise("Can't contact node %s" % node)
6350
6351     runningon_source = instance.name in ins_l[source_node].payload
6352     runningon_target = instance.name in ins_l[target_node].payload
6353
6354     if runningon_source and runningon_target:
6355       raise errors.OpExecError("Instance seems to be running on two nodes,"
6356                                " or the hypervisor is confused. You will have"
6357                                " to ensure manually that it runs only on one"
6358                                " and restart this operation.")
6359
6360     if not (runningon_source or runningon_target):
6361       raise errors.OpExecError("Instance does not seem to be running at all."
6362                                " In this case, it's safer to repair by"
6363                                " running 'gnt-instance stop' to ensure disk"
6364                                " shutdown, and then restarting it.")
6365
6366     if runningon_target:
6367       # the migration has actually succeeded, we need to update the config
6368       self.feedback_fn("* instance running on secondary node (%s),"
6369                        " updating config" % target_node)
6370       instance.primary_node = target_node
6371       self.cfg.Update(instance, self.feedback_fn)
6372       demoted_node = source_node
6373     else:
6374       self.feedback_fn("* instance confirmed to be running on its"
6375                        " primary node (%s)" % source_node)
6376       demoted_node = target_node
6377
6378     self._EnsureSecondary(demoted_node)
6379     try:
6380       self._WaitUntilSync()
6381     except errors.OpExecError:
6382       # we ignore here errors, since if the device is standalone, it
6383       # won't be able to sync
6384       pass
6385     self._GoStandalone()
6386     self._GoReconnect(False)
6387     self._WaitUntilSync()
6388
6389     self.feedback_fn("* done")
6390
6391   def _RevertDiskStatus(self):
6392     """Try to revert the disk status after a failed migration.
6393
6394     """
6395     target_node = self.target_node
6396     try:
6397       self._EnsureSecondary(target_node)
6398       self._GoStandalone()
6399       self._GoReconnect(False)
6400       self._WaitUntilSync()
6401     except errors.OpExecError, err:
6402       self.lu.LogWarning("Migration failed and I can't reconnect the"
6403                          " drives: error '%s'\n"
6404                          "Please look and recover the instance status" %
6405                          str(err))
6406
6407   def _AbortMigration(self):
6408     """Call the hypervisor code to abort a started migration.
6409
6410     """
6411     instance = self.instance
6412     target_node = self.target_node
6413     migration_info = self.migration_info
6414
6415     abort_result = self.rpc.call_finalize_migration(target_node,
6416                                                     instance,
6417                                                     migration_info,
6418                                                     False)
6419     abort_msg = abort_result.fail_msg
6420     if abort_msg:
6421       logging.error("Aborting migration failed on target node %s: %s",
6422                     target_node, abort_msg)
6423       # Don't raise an exception here, as we stil have to try to revert the
6424       # disk status, even if this step failed.
6425
6426   def _ExecMigration(self):
6427     """Migrate an instance.
6428
6429     The migrate is done by:
6430       - change the disks into dual-master mode
6431       - wait until disks are fully synchronized again
6432       - migrate the instance
6433       - change disks on the new secondary node (the old primary) to secondary
6434       - wait until disks are fully synchronized
6435       - change disks into single-master mode
6436
6437     """
6438     instance = self.instance
6439     target_node = self.target_node
6440     source_node = self.source_node
6441
6442     self.feedback_fn("* checking disk consistency between source and target")
6443     for dev in instance.disks:
6444       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6445         raise errors.OpExecError("Disk %s is degraded or not fully"
6446                                  " synchronized on target node,"
6447                                  " aborting migrate." % dev.iv_name)
6448
6449     # First get the migration information from the remote node
6450     result = self.rpc.call_migration_info(source_node, instance)
6451     msg = result.fail_msg
6452     if msg:
6453       log_err = ("Failed fetching source migration information from %s: %s" %
6454                  (source_node, msg))
6455       logging.error(log_err)
6456       raise errors.OpExecError(log_err)
6457
6458     self.migration_info = migration_info = result.payload
6459
6460     # Then switch the disks to master/master mode
6461     self._EnsureSecondary(target_node)
6462     self._GoStandalone()
6463     self._GoReconnect(True)
6464     self._WaitUntilSync()
6465
6466     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6467     result = self.rpc.call_accept_instance(target_node,
6468                                            instance,
6469                                            migration_info,
6470                                            self.nodes_ip[target_node])
6471
6472     msg = result.fail_msg
6473     if msg:
6474       logging.error("Instance pre-migration failed, trying to revert"
6475                     " disk status: %s", msg)
6476       self.feedback_fn("Pre-migration failed, aborting")
6477       self._AbortMigration()
6478       self._RevertDiskStatus()
6479       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6480                                (instance.name, msg))
6481
6482     self.feedback_fn("* migrating instance to %s" % target_node)
6483     time.sleep(10)
6484     result = self.rpc.call_instance_migrate(source_node, instance,
6485                                             self.nodes_ip[target_node],
6486                                             self.live)
6487     msg = result.fail_msg
6488     if msg:
6489       logging.error("Instance migration failed, trying to revert"
6490                     " disk status: %s", msg)
6491       self.feedback_fn("Migration failed, aborting")
6492       self._AbortMigration()
6493       self._RevertDiskStatus()
6494       raise errors.OpExecError("Could not migrate instance %s: %s" %
6495                                (instance.name, msg))
6496     time.sleep(10)
6497
6498     instance.primary_node = target_node
6499     # distribute new instance config to the other nodes
6500     self.cfg.Update(instance, self.feedback_fn)
6501
6502     result = self.rpc.call_finalize_migration(target_node,
6503                                               instance,
6504                                               migration_info,
6505                                               True)
6506     msg = result.fail_msg
6507     if msg:
6508       logging.error("Instance migration succeeded, but finalization failed:"
6509                     " %s", msg)
6510       raise errors.OpExecError("Could not finalize instance migration: %s" %
6511                                msg)
6512
6513     self._EnsureSecondary(source_node)
6514     self._WaitUntilSync()
6515     self._GoStandalone()
6516     self._GoReconnect(False)
6517     self._WaitUntilSync()
6518
6519     self.feedback_fn("* done")
6520
6521   def Exec(self, feedback_fn):
6522     """Perform the migration.
6523
6524     """
6525     feedback_fn("Migrating instance %s" % self.instance.name)
6526
6527     self.feedback_fn = feedback_fn
6528
6529     self.source_node = self.instance.primary_node
6530     self.target_node = self.instance.secondary_nodes[0]
6531     self.all_nodes = [self.source_node, self.target_node]
6532     self.nodes_ip = {
6533       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6534       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6535       }
6536
6537     if self.cleanup:
6538       return self._ExecCleanup()
6539     else:
6540       return self._ExecMigration()
6541
6542
6543 def _CreateBlockDev(lu, node, instance, device, force_create,
6544                     info, force_open):
6545   """Create a tree of block devices on a given node.
6546
6547   If this device type has to be created on secondaries, create it and
6548   all its children.
6549
6550   If not, just recurse to children keeping the same 'force' value.
6551
6552   @param lu: the lu on whose behalf we execute
6553   @param node: the node on which to create the device
6554   @type instance: L{objects.Instance}
6555   @param instance: the instance which owns the device
6556   @type device: L{objects.Disk}
6557   @param device: the device to create
6558   @type force_create: boolean
6559   @param force_create: whether to force creation of this device; this
6560       will be change to True whenever we find a device which has
6561       CreateOnSecondary() attribute
6562   @param info: the extra 'metadata' we should attach to the device
6563       (this will be represented as a LVM tag)
6564   @type force_open: boolean
6565   @param force_open: this parameter will be passes to the
6566       L{backend.BlockdevCreate} function where it specifies
6567       whether we run on primary or not, and it affects both
6568       the child assembly and the device own Open() execution
6569
6570   """
6571   if device.CreateOnSecondary():
6572     force_create = True
6573
6574   if device.children:
6575     for child in device.children:
6576       _CreateBlockDev(lu, node, instance, child, force_create,
6577                       info, force_open)
6578
6579   if not force_create:
6580     return
6581
6582   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6583
6584
6585 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6586   """Create a single block device on a given node.
6587
6588   This will not recurse over children of the device, so they must be
6589   created in advance.
6590
6591   @param lu: the lu on whose behalf we execute
6592   @param node: the node on which to create the device
6593   @type instance: L{objects.Instance}
6594   @param instance: the instance which owns the device
6595   @type device: L{objects.Disk}
6596   @param device: the device to create
6597   @param info: the extra 'metadata' we should attach to the device
6598       (this will be represented as a LVM tag)
6599   @type force_open: boolean
6600   @param force_open: this parameter will be passes to the
6601       L{backend.BlockdevCreate} function where it specifies
6602       whether we run on primary or not, and it affects both
6603       the child assembly and the device own Open() execution
6604
6605   """
6606   lu.cfg.SetDiskID(device, node)
6607   result = lu.rpc.call_blockdev_create(node, device, device.size,
6608                                        instance.name, force_open, info)
6609   result.Raise("Can't create block device %s on"
6610                " node %s for instance %s" % (device, node, instance.name))
6611   if device.physical_id is None:
6612     device.physical_id = result.payload
6613
6614
6615 def _GenerateUniqueNames(lu, exts):
6616   """Generate a suitable LV name.
6617
6618   This will generate a logical volume name for the given instance.
6619
6620   """
6621   results = []
6622   for val in exts:
6623     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6624     results.append("%s%s" % (new_id, val))
6625   return results
6626
6627
6628 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
6629                          iv_name, p_minor, s_minor):
6630   """Generate a drbd8 device complete with its children.
6631
6632   """
6633   assert len(vgnames) == len(names) == 2
6634   port = lu.cfg.AllocatePort()
6635   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6636   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6637                           logical_id=(vgnames[0], names[0]))
6638   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6639                           logical_id=(vgnames[1], names[1]))
6640   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6641                           logical_id=(primary, secondary, port,
6642                                       p_minor, s_minor,
6643                                       shared_secret),
6644                           children=[dev_data, dev_meta],
6645                           iv_name=iv_name)
6646   return drbd_dev
6647
6648
6649 def _GenerateDiskTemplate(lu, template_name,
6650                           instance_name, primary_node,
6651                           secondary_nodes, disk_info,
6652                           file_storage_dir, file_driver,
6653                           base_index, feedback_fn):
6654   """Generate the entire disk layout for a given template type.
6655
6656   """
6657   #TODO: compute space requirements
6658
6659   vgname = lu.cfg.GetVGName()
6660   disk_count = len(disk_info)
6661   disks = []
6662   if template_name == constants.DT_DISKLESS:
6663     pass
6664   elif template_name == constants.DT_PLAIN:
6665     if len(secondary_nodes) != 0:
6666       raise errors.ProgrammerError("Wrong template configuration")
6667
6668     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6669                                       for i in range(disk_count)])
6670     for idx, disk in enumerate(disk_info):
6671       disk_index = idx + base_index
6672       vg = disk.get("vg", vgname)
6673       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6674       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6675                               logical_id=(vg, names[idx]),
6676                               iv_name="disk/%d" % disk_index,
6677                               mode=disk["mode"])
6678       disks.append(disk_dev)
6679   elif template_name == constants.DT_DRBD8:
6680     if len(secondary_nodes) != 1:
6681       raise errors.ProgrammerError("Wrong template configuration")
6682     remote_node = secondary_nodes[0]
6683     minors = lu.cfg.AllocateDRBDMinor(
6684       [primary_node, remote_node] * len(disk_info), instance_name)
6685
6686     names = []
6687     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6688                                                for i in range(disk_count)]):
6689       names.append(lv_prefix + "_data")
6690       names.append(lv_prefix + "_meta")
6691     for idx, disk in enumerate(disk_info):
6692       disk_index = idx + base_index
6693       data_vg = disk.get("vg", vgname)
6694       meta_vg = disk.get("metavg", data_vg)
6695       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6696                                       disk["size"], [data_vg, meta_vg],
6697                                       names[idx*2:idx*2+2],
6698                                       "disk/%d" % disk_index,
6699                                       minors[idx*2], minors[idx*2+1])
6700       disk_dev.mode = disk["mode"]
6701       disks.append(disk_dev)
6702   elif template_name == constants.DT_FILE:
6703     if len(secondary_nodes) != 0:
6704       raise errors.ProgrammerError("Wrong template configuration")
6705
6706     opcodes.RequireFileStorage()
6707
6708     for idx, disk in enumerate(disk_info):
6709       disk_index = idx + base_index
6710       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6711                               iv_name="disk/%d" % disk_index,
6712                               logical_id=(file_driver,
6713                                           "%s/disk%d" % (file_storage_dir,
6714                                                          disk_index)),
6715                               mode=disk["mode"])
6716       disks.append(disk_dev)
6717   else:
6718     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6719   return disks
6720
6721
6722 def _GetInstanceInfoText(instance):
6723   """Compute that text that should be added to the disk's metadata.
6724
6725   """
6726   return "originstname+%s" % instance.name
6727
6728
6729 def _CalcEta(time_taken, written, total_size):
6730   """Calculates the ETA based on size written and total size.
6731
6732   @param time_taken: The time taken so far
6733   @param written: amount written so far
6734   @param total_size: The total size of data to be written
6735   @return: The remaining time in seconds
6736
6737   """
6738   avg_time = time_taken / float(written)
6739   return (total_size - written) * avg_time
6740
6741
6742 def _WipeDisks(lu, instance):
6743   """Wipes instance disks.
6744
6745   @type lu: L{LogicalUnit}
6746   @param lu: the logical unit on whose behalf we execute
6747   @type instance: L{objects.Instance}
6748   @param instance: the instance whose disks we should create
6749   @return: the success of the wipe
6750
6751   """
6752   node = instance.primary_node
6753
6754   for device in instance.disks:
6755     lu.cfg.SetDiskID(device, node)
6756
6757   logging.info("Pause sync of instance %s disks", instance.name)
6758   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6759
6760   for idx, success in enumerate(result.payload):
6761     if not success:
6762       logging.warn("pause-sync of instance %s for disks %d failed",
6763                    instance.name, idx)
6764
6765   try:
6766     for idx, device in enumerate(instance.disks):
6767       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6768       # MAX_WIPE_CHUNK at max
6769       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6770                             constants.MIN_WIPE_CHUNK_PERCENT)
6771       # we _must_ make this an int, otherwise rounding errors will
6772       # occur
6773       wipe_chunk_size = int(wipe_chunk_size)
6774
6775       lu.LogInfo("* Wiping disk %d", idx)
6776       logging.info("Wiping disk %d for instance %s, node %s using"
6777                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
6778
6779       offset = 0
6780       size = device.size
6781       last_output = 0
6782       start_time = time.time()
6783
6784       while offset < size:
6785         wipe_size = min(wipe_chunk_size, size - offset)
6786         logging.debug("Wiping disk %d, offset %s, chunk %s",
6787                       idx, offset, wipe_size)
6788         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6789         result.Raise("Could not wipe disk %d at offset %d for size %d" %
6790                      (idx, offset, wipe_size))
6791         now = time.time()
6792         offset += wipe_size
6793         if now - last_output >= 60:
6794           eta = _CalcEta(now - start_time, offset, size)
6795           lu.LogInfo(" - done: %.1f%% ETA: %s" %
6796                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
6797           last_output = now
6798   finally:
6799     logging.info("Resume sync of instance %s disks", instance.name)
6800
6801     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6802
6803     for idx, success in enumerate(result.payload):
6804       if not success:
6805         lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6806                       " look at the status and troubleshoot the issue.", idx)
6807         logging.warn("resume-sync of instance %s for disks %d failed",
6808                      instance.name, idx)
6809
6810
6811 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6812   """Create all disks for an instance.
6813
6814   This abstracts away some work from AddInstance.
6815
6816   @type lu: L{LogicalUnit}
6817   @param lu: the logical unit on whose behalf we execute
6818   @type instance: L{objects.Instance}
6819   @param instance: the instance whose disks we should create
6820   @type to_skip: list
6821   @param to_skip: list of indices to skip
6822   @type target_node: string
6823   @param target_node: if passed, overrides the target node for creation
6824   @rtype: boolean
6825   @return: the success of the creation
6826
6827   """
6828   info = _GetInstanceInfoText(instance)
6829   if target_node is None:
6830     pnode = instance.primary_node
6831     all_nodes = instance.all_nodes
6832   else:
6833     pnode = target_node
6834     all_nodes = [pnode]
6835
6836   if instance.disk_template == constants.DT_FILE:
6837     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6838     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6839
6840     result.Raise("Failed to create directory '%s' on"
6841                  " node %s" % (file_storage_dir, pnode))
6842
6843   # Note: this needs to be kept in sync with adding of disks in
6844   # LUInstanceSetParams
6845   for idx, device in enumerate(instance.disks):
6846     if to_skip and idx in to_skip:
6847       continue
6848     logging.info("Creating volume %s for instance %s",
6849                  device.iv_name, instance.name)
6850     #HARDCODE
6851     for node in all_nodes:
6852       f_create = node == pnode
6853       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6854
6855
6856 def _RemoveDisks(lu, instance, target_node=None):
6857   """Remove all disks for an instance.
6858
6859   This abstracts away some work from `AddInstance()` and
6860   `RemoveInstance()`. Note that in case some of the devices couldn't
6861   be removed, the removal will continue with the other ones (compare
6862   with `_CreateDisks()`).
6863
6864   @type lu: L{LogicalUnit}
6865   @param lu: the logical unit on whose behalf we execute
6866   @type instance: L{objects.Instance}
6867   @param instance: the instance whose disks we should remove
6868   @type target_node: string
6869   @param target_node: used to override the node on which to remove the disks
6870   @rtype: boolean
6871   @return: the success of the removal
6872
6873   """
6874   logging.info("Removing block devices for instance %s", instance.name)
6875
6876   all_result = True
6877   for device in instance.disks:
6878     if target_node:
6879       edata = [(target_node, device)]
6880     else:
6881       edata = device.ComputeNodeTree(instance.primary_node)
6882     for node, disk in edata:
6883       lu.cfg.SetDiskID(disk, node)
6884       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6885       if msg:
6886         lu.LogWarning("Could not remove block device %s on node %s,"
6887                       " continuing anyway: %s", device.iv_name, node, msg)
6888         all_result = False
6889
6890   if instance.disk_template == constants.DT_FILE:
6891     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6892     if target_node:
6893       tgt = target_node
6894     else:
6895       tgt = instance.primary_node
6896     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6897     if result.fail_msg:
6898       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6899                     file_storage_dir, instance.primary_node, result.fail_msg)
6900       all_result = False
6901
6902   return all_result
6903
6904
6905 def _ComputeDiskSizePerVG(disk_template, disks):
6906   """Compute disk size requirements in the volume group
6907
6908   """
6909   def _compute(disks, payload):
6910     """Universal algorithm
6911
6912     """
6913     vgs = {}
6914     for disk in disks:
6915       vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6916
6917     return vgs
6918
6919   # Required free disk space as a function of disk and swap space
6920   req_size_dict = {
6921     constants.DT_DISKLESS: {},
6922     constants.DT_PLAIN: _compute(disks, 0),
6923     # 128 MB are added for drbd metadata for each disk
6924     constants.DT_DRBD8: _compute(disks, 128),
6925     constants.DT_FILE: {},
6926   }
6927
6928   if disk_template not in req_size_dict:
6929     raise errors.ProgrammerError("Disk template '%s' size requirement"
6930                                  " is unknown" %  disk_template)
6931
6932   return req_size_dict[disk_template]
6933
6934
6935 def _ComputeDiskSize(disk_template, disks):
6936   """Compute disk size requirements in the volume group
6937
6938   """
6939   # Required free disk space as a function of disk and swap space
6940   req_size_dict = {
6941     constants.DT_DISKLESS: None,
6942     constants.DT_PLAIN: sum(d["size"] for d in disks),
6943     # 128 MB are added for drbd metadata for each disk
6944     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6945     constants.DT_FILE: None,
6946   }
6947
6948   if disk_template not in req_size_dict:
6949     raise errors.ProgrammerError("Disk template '%s' size requirement"
6950                                  " is unknown" %  disk_template)
6951
6952   return req_size_dict[disk_template]
6953
6954
6955 def _FilterVmNodes(lu, nodenames):
6956   """Filters out non-vm_capable nodes from a list.
6957
6958   @type lu: L{LogicalUnit}
6959   @param lu: the logical unit for which we check
6960   @type nodenames: list
6961   @param nodenames: the list of nodes on which we should check
6962   @rtype: list
6963   @return: the list of vm-capable nodes
6964
6965   """
6966   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
6967   return [name for name in nodenames if name not in vm_nodes]
6968
6969
6970 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6971   """Hypervisor parameter validation.
6972
6973   This function abstract the hypervisor parameter validation to be
6974   used in both instance create and instance modify.
6975
6976   @type lu: L{LogicalUnit}
6977   @param lu: the logical unit for which we check
6978   @type nodenames: list
6979   @param nodenames: the list of nodes on which we should check
6980   @type hvname: string
6981   @param hvname: the name of the hypervisor we should use
6982   @type hvparams: dict
6983   @param hvparams: the parameters which we need to check
6984   @raise errors.OpPrereqError: if the parameters are not valid
6985
6986   """
6987   nodenames = _FilterVmNodes(lu, nodenames)
6988   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6989                                                   hvname,
6990                                                   hvparams)
6991   for node in nodenames:
6992     info = hvinfo[node]
6993     if info.offline:
6994       continue
6995     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6996
6997
6998 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6999   """OS parameters validation.
7000
7001   @type lu: L{LogicalUnit}
7002   @param lu: the logical unit for which we check
7003   @type required: boolean
7004   @param required: whether the validation should fail if the OS is not
7005       found
7006   @type nodenames: list
7007   @param nodenames: the list of nodes on which we should check
7008   @type osname: string
7009   @param osname: the name of the hypervisor we should use
7010   @type osparams: dict
7011   @param osparams: the parameters which we need to check
7012   @raise errors.OpPrereqError: if the parameters are not valid
7013
7014   """
7015   nodenames = _FilterVmNodes(lu, nodenames)
7016   result = lu.rpc.call_os_validate(required, nodenames, osname,
7017                                    [constants.OS_VALIDATE_PARAMETERS],
7018                                    osparams)
7019   for node, nres in result.items():
7020     # we don't check for offline cases since this should be run only
7021     # against the master node and/or an instance's nodes
7022     nres.Raise("OS Parameters validation failed on node %s" % node)
7023     if not nres.payload:
7024       lu.LogInfo("OS %s not found on node %s, validation skipped",
7025                  osname, node)
7026
7027
7028 class LUInstanceCreate(LogicalUnit):
7029   """Create an instance.
7030
7031   """
7032   HPATH = "instance-add"
7033   HTYPE = constants.HTYPE_INSTANCE
7034   REQ_BGL = False
7035
7036   def CheckArguments(self):
7037     """Check arguments.
7038
7039     """
7040     # do not require name_check to ease forward/backward compatibility
7041     # for tools
7042     if self.op.no_install and self.op.start:
7043       self.LogInfo("No-installation mode selected, disabling startup")
7044       self.op.start = False
7045     # validate/normalize the instance name
7046     self.op.instance_name = \
7047       netutils.Hostname.GetNormalizedName(self.op.instance_name)
7048
7049     if self.op.ip_check and not self.op.name_check:
7050       # TODO: make the ip check more flexible and not depend on the name check
7051       raise errors.OpPrereqError("Cannot do ip check without a name check",
7052                                  errors.ECODE_INVAL)
7053
7054     # check nics' parameter names
7055     for nic in self.op.nics:
7056       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7057
7058     # check disks. parameter names and consistent adopt/no-adopt strategy
7059     has_adopt = has_no_adopt = False
7060     for disk in self.op.disks:
7061       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7062       if "adopt" in disk:
7063         has_adopt = True
7064       else:
7065         has_no_adopt = True
7066     if has_adopt and has_no_adopt:
7067       raise errors.OpPrereqError("Either all disks are adopted or none is",
7068                                  errors.ECODE_INVAL)
7069     if has_adopt:
7070       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7071         raise errors.OpPrereqError("Disk adoption is not supported for the"
7072                                    " '%s' disk template" %
7073                                    self.op.disk_template,
7074                                    errors.ECODE_INVAL)
7075       if self.op.iallocator is not None:
7076         raise errors.OpPrereqError("Disk adoption not allowed with an"
7077                                    " iallocator script", errors.ECODE_INVAL)
7078       if self.op.mode == constants.INSTANCE_IMPORT:
7079         raise errors.OpPrereqError("Disk adoption not allowed for"
7080                                    " instance import", errors.ECODE_INVAL)
7081
7082     self.adopt_disks = has_adopt
7083
7084     # instance name verification
7085     if self.op.name_check:
7086       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7087       self.op.instance_name = self.hostname1.name
7088       # used in CheckPrereq for ip ping check
7089       self.check_ip = self.hostname1.ip
7090     else:
7091       self.check_ip = None
7092
7093     # file storage checks
7094     if (self.op.file_driver and
7095         not self.op.file_driver in constants.FILE_DRIVER):
7096       raise errors.OpPrereqError("Invalid file driver name '%s'" %
7097                                  self.op.file_driver, errors.ECODE_INVAL)
7098
7099     ### Node/iallocator related checks
7100     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7101
7102     if self.op.pnode is not None:
7103       if self.op.disk_template in constants.DTS_NET_MIRROR:
7104         if self.op.snode is None:
7105           raise errors.OpPrereqError("The networked disk templates need"
7106                                      " a mirror node", errors.ECODE_INVAL)
7107       elif self.op.snode:
7108         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7109                         " template")
7110         self.op.snode = None
7111
7112     self._cds = _GetClusterDomainSecret()
7113
7114     if self.op.mode == constants.INSTANCE_IMPORT:
7115       # On import force_variant must be True, because if we forced it at
7116       # initial install, our only chance when importing it back is that it
7117       # works again!
7118       self.op.force_variant = True
7119
7120       if self.op.no_install:
7121         self.LogInfo("No-installation mode has no effect during import")
7122
7123     elif self.op.mode == constants.INSTANCE_CREATE:
7124       if self.op.os_type is None:
7125         raise errors.OpPrereqError("No guest OS specified",
7126                                    errors.ECODE_INVAL)
7127       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7128         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7129                                    " installation" % self.op.os_type,
7130                                    errors.ECODE_STATE)
7131       if self.op.disk_template is None:
7132         raise errors.OpPrereqError("No disk template specified",
7133                                    errors.ECODE_INVAL)
7134
7135     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7136       # Check handshake to ensure both clusters have the same domain secret
7137       src_handshake = self.op.source_handshake
7138       if not src_handshake:
7139         raise errors.OpPrereqError("Missing source handshake",
7140                                    errors.ECODE_INVAL)
7141
7142       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7143                                                            src_handshake)
7144       if errmsg:
7145         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7146                                    errors.ECODE_INVAL)
7147
7148       # Load and check source CA
7149       self.source_x509_ca_pem = self.op.source_x509_ca
7150       if not self.source_x509_ca_pem:
7151         raise errors.OpPrereqError("Missing source X509 CA",
7152                                    errors.ECODE_INVAL)
7153
7154       try:
7155         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7156                                                     self._cds)
7157       except OpenSSL.crypto.Error, err:
7158         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7159                                    (err, ), errors.ECODE_INVAL)
7160
7161       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7162       if errcode is not None:
7163         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7164                                    errors.ECODE_INVAL)
7165
7166       self.source_x509_ca = cert
7167
7168       src_instance_name = self.op.source_instance_name
7169       if not src_instance_name:
7170         raise errors.OpPrereqError("Missing source instance name",
7171                                    errors.ECODE_INVAL)
7172
7173       self.source_instance_name = \
7174           netutils.GetHostname(name=src_instance_name).name
7175
7176     else:
7177       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7178                                  self.op.mode, errors.ECODE_INVAL)
7179
7180   def ExpandNames(self):
7181     """ExpandNames for CreateInstance.
7182
7183     Figure out the right locks for instance creation.
7184
7185     """
7186     self.needed_locks = {}
7187
7188     instance_name = self.op.instance_name
7189     # this is just a preventive check, but someone might still add this
7190     # instance in the meantime, and creation will fail at lock-add time
7191     if instance_name in self.cfg.GetInstanceList():
7192       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7193                                  instance_name, errors.ECODE_EXISTS)
7194
7195     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7196
7197     if self.op.iallocator:
7198       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7199     else:
7200       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7201       nodelist = [self.op.pnode]
7202       if self.op.snode is not None:
7203         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7204         nodelist.append(self.op.snode)
7205       self.needed_locks[locking.LEVEL_NODE] = nodelist
7206
7207     # in case of import lock the source node too
7208     if self.op.mode == constants.INSTANCE_IMPORT:
7209       src_node = self.op.src_node
7210       src_path = self.op.src_path
7211
7212       if src_path is None:
7213         self.op.src_path = src_path = self.op.instance_name
7214
7215       if src_node is None:
7216         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7217         self.op.src_node = None
7218         if os.path.isabs(src_path):
7219           raise errors.OpPrereqError("Importing an instance from an absolute"
7220                                      " path requires a source node option.",
7221                                      errors.ECODE_INVAL)
7222       else:
7223         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7224         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7225           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7226         if not os.path.isabs(src_path):
7227           self.op.src_path = src_path = \
7228             utils.PathJoin(constants.EXPORT_DIR, src_path)
7229
7230   def _RunAllocator(self):
7231     """Run the allocator based on input opcode.
7232
7233     """
7234     nics = [n.ToDict() for n in self.nics]
7235     ial = IAllocator(self.cfg, self.rpc,
7236                      mode=constants.IALLOCATOR_MODE_ALLOC,
7237                      name=self.op.instance_name,
7238                      disk_template=self.op.disk_template,
7239                      tags=[],
7240                      os=self.op.os_type,
7241                      vcpus=self.be_full[constants.BE_VCPUS],
7242                      mem_size=self.be_full[constants.BE_MEMORY],
7243                      disks=self.disks,
7244                      nics=nics,
7245                      hypervisor=self.op.hypervisor,
7246                      )
7247
7248     ial.Run(self.op.iallocator)
7249
7250     if not ial.success:
7251       raise errors.OpPrereqError("Can't compute nodes using"
7252                                  " iallocator '%s': %s" %
7253                                  (self.op.iallocator, ial.info),
7254                                  errors.ECODE_NORES)
7255     if len(ial.result) != ial.required_nodes:
7256       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7257                                  " of nodes (%s), required %s" %
7258                                  (self.op.iallocator, len(ial.result),
7259                                   ial.required_nodes), errors.ECODE_FAULT)
7260     self.op.pnode = ial.result[0]
7261     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7262                  self.op.instance_name, self.op.iallocator,
7263                  utils.CommaJoin(ial.result))
7264     if ial.required_nodes == 2:
7265       self.op.snode = ial.result[1]
7266
7267   def BuildHooksEnv(self):
7268     """Build hooks env.
7269
7270     This runs on master, primary and secondary nodes of the instance.
7271
7272     """
7273     env = {
7274       "ADD_MODE": self.op.mode,
7275       }
7276     if self.op.mode == constants.INSTANCE_IMPORT:
7277       env["SRC_NODE"] = self.op.src_node
7278       env["SRC_PATH"] = self.op.src_path
7279       env["SRC_IMAGES"] = self.src_images
7280
7281     env.update(_BuildInstanceHookEnv(
7282       name=self.op.instance_name,
7283       primary_node=self.op.pnode,
7284       secondary_nodes=self.secondaries,
7285       status=self.op.start,
7286       os_type=self.op.os_type,
7287       memory=self.be_full[constants.BE_MEMORY],
7288       vcpus=self.be_full[constants.BE_VCPUS],
7289       nics=_NICListToTuple(self, self.nics),
7290       disk_template=self.op.disk_template,
7291       disks=[(d["size"], d["mode"]) for d in self.disks],
7292       bep=self.be_full,
7293       hvp=self.hv_full,
7294       hypervisor_name=self.op.hypervisor,
7295     ))
7296
7297     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7298           self.secondaries)
7299     return env, nl, nl
7300
7301   def _ReadExportInfo(self):
7302     """Reads the export information from disk.
7303
7304     It will override the opcode source node and path with the actual
7305     information, if these two were not specified before.
7306
7307     @return: the export information
7308
7309     """
7310     assert self.op.mode == constants.INSTANCE_IMPORT
7311
7312     src_node = self.op.src_node
7313     src_path = self.op.src_path
7314
7315     if src_node is None:
7316       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7317       exp_list = self.rpc.call_export_list(locked_nodes)
7318       found = False
7319       for node in exp_list:
7320         if exp_list[node].fail_msg:
7321           continue
7322         if src_path in exp_list[node].payload:
7323           found = True
7324           self.op.src_node = src_node = node
7325           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7326                                                        src_path)
7327           break
7328       if not found:
7329         raise errors.OpPrereqError("No export found for relative path %s" %
7330                                     src_path, errors.ECODE_INVAL)
7331
7332     _CheckNodeOnline(self, src_node)
7333     result = self.rpc.call_export_info(src_node, src_path)
7334     result.Raise("No export or invalid export found in dir %s" % src_path)
7335
7336     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7337     if not export_info.has_section(constants.INISECT_EXP):
7338       raise errors.ProgrammerError("Corrupted export config",
7339                                    errors.ECODE_ENVIRON)
7340
7341     ei_version = export_info.get(constants.INISECT_EXP, "version")
7342     if (int(ei_version) != constants.EXPORT_VERSION):
7343       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7344                                  (ei_version, constants.EXPORT_VERSION),
7345                                  errors.ECODE_ENVIRON)
7346     return export_info
7347
7348   def _ReadExportParams(self, einfo):
7349     """Use export parameters as defaults.
7350
7351     In case the opcode doesn't specify (as in override) some instance
7352     parameters, then try to use them from the export information, if
7353     that declares them.
7354
7355     """
7356     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7357
7358     if self.op.disk_template is None:
7359       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7360         self.op.disk_template = einfo.get(constants.INISECT_INS,
7361                                           "disk_template")
7362       else:
7363         raise errors.OpPrereqError("No disk template specified and the export"
7364                                    " is missing the disk_template information",
7365                                    errors.ECODE_INVAL)
7366
7367     if not self.op.disks:
7368       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7369         disks = []
7370         # TODO: import the disk iv_name too
7371         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7372           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7373           disks.append({"size": disk_sz})
7374         self.op.disks = disks
7375       else:
7376         raise errors.OpPrereqError("No disk info specified and the export"
7377                                    " is missing the disk information",
7378                                    errors.ECODE_INVAL)
7379
7380     if (not self.op.nics and
7381         einfo.has_option(constants.INISECT_INS, "nic_count")):
7382       nics = []
7383       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7384         ndict = {}
7385         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7386           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7387           ndict[name] = v
7388         nics.append(ndict)
7389       self.op.nics = nics
7390
7391     if (self.op.hypervisor is None and
7392         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7393       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7394     if einfo.has_section(constants.INISECT_HYP):
7395       # use the export parameters but do not override the ones
7396       # specified by the user
7397       for name, value in einfo.items(constants.INISECT_HYP):
7398         if name not in self.op.hvparams:
7399           self.op.hvparams[name] = value
7400
7401     if einfo.has_section(constants.INISECT_BEP):
7402       # use the parameters, without overriding
7403       for name, value in einfo.items(constants.INISECT_BEP):
7404         if name not in self.op.beparams:
7405           self.op.beparams[name] = value
7406     else:
7407       # try to read the parameters old style, from the main section
7408       for name in constants.BES_PARAMETERS:
7409         if (name not in self.op.beparams and
7410             einfo.has_option(constants.INISECT_INS, name)):
7411           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7412
7413     if einfo.has_section(constants.INISECT_OSP):
7414       # use the parameters, without overriding
7415       for name, value in einfo.items(constants.INISECT_OSP):
7416         if name not in self.op.osparams:
7417           self.op.osparams[name] = value
7418
7419   def _RevertToDefaults(self, cluster):
7420     """Revert the instance parameters to the default values.
7421
7422     """
7423     # hvparams
7424     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7425     for name in self.op.hvparams.keys():
7426       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7427         del self.op.hvparams[name]
7428     # beparams
7429     be_defs = cluster.SimpleFillBE({})
7430     for name in self.op.beparams.keys():
7431       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7432         del self.op.beparams[name]
7433     # nic params
7434     nic_defs = cluster.SimpleFillNIC({})
7435     for nic in self.op.nics:
7436       for name in constants.NICS_PARAMETERS:
7437         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7438           del nic[name]
7439     # osparams
7440     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7441     for name in self.op.osparams.keys():
7442       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7443         del self.op.osparams[name]
7444
7445   def CheckPrereq(self):
7446     """Check prerequisites.
7447
7448     """
7449     if self.op.mode == constants.INSTANCE_IMPORT:
7450       export_info = self._ReadExportInfo()
7451       self._ReadExportParams(export_info)
7452
7453     if (not self.cfg.GetVGName() and
7454         self.op.disk_template not in constants.DTS_NOT_LVM):
7455       raise errors.OpPrereqError("Cluster does not support lvm-based"
7456                                  " instances", errors.ECODE_STATE)
7457
7458     if self.op.hypervisor is None:
7459       self.op.hypervisor = self.cfg.GetHypervisorType()
7460
7461     cluster = self.cfg.GetClusterInfo()
7462     enabled_hvs = cluster.enabled_hypervisors
7463     if self.op.hypervisor not in enabled_hvs:
7464       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7465                                  " cluster (%s)" % (self.op.hypervisor,
7466                                   ",".join(enabled_hvs)),
7467                                  errors.ECODE_STATE)
7468
7469     # check hypervisor parameter syntax (locally)
7470     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7471     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7472                                       self.op.hvparams)
7473     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7474     hv_type.CheckParameterSyntax(filled_hvp)
7475     self.hv_full = filled_hvp
7476     # check that we don't specify global parameters on an instance
7477     _CheckGlobalHvParams(self.op.hvparams)
7478
7479     # fill and remember the beparams dict
7480     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7481     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7482
7483     # build os parameters
7484     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7485
7486     # now that hvp/bep are in final format, let's reset to defaults,
7487     # if told to do so
7488     if self.op.identify_defaults:
7489       self._RevertToDefaults(cluster)
7490
7491     # NIC buildup
7492     self.nics = []
7493     for idx, nic in enumerate(self.op.nics):
7494       nic_mode_req = nic.get("mode", None)
7495       nic_mode = nic_mode_req
7496       if nic_mode is None:
7497         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7498
7499       # in routed mode, for the first nic, the default ip is 'auto'
7500       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7501         default_ip_mode = constants.VALUE_AUTO
7502       else:
7503         default_ip_mode = constants.VALUE_NONE
7504
7505       # ip validity checks
7506       ip = nic.get("ip", default_ip_mode)
7507       if ip is None or ip.lower() == constants.VALUE_NONE:
7508         nic_ip = None
7509       elif ip.lower() == constants.VALUE_AUTO:
7510         if not self.op.name_check:
7511           raise errors.OpPrereqError("IP address set to auto but name checks"
7512                                      " have been skipped",
7513                                      errors.ECODE_INVAL)
7514         nic_ip = self.hostname1.ip
7515       else:
7516         if not netutils.IPAddress.IsValid(ip):
7517           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7518                                      errors.ECODE_INVAL)
7519         nic_ip = ip
7520
7521       # TODO: check the ip address for uniqueness
7522       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7523         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7524                                    errors.ECODE_INVAL)
7525
7526       # MAC address verification
7527       mac = nic.get("mac", constants.VALUE_AUTO)
7528       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7529         mac = utils.NormalizeAndValidateMac(mac)
7530
7531         try:
7532           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7533         except errors.ReservationError:
7534           raise errors.OpPrereqError("MAC address %s already in use"
7535                                      " in cluster" % mac,
7536                                      errors.ECODE_NOTUNIQUE)
7537
7538       # bridge verification
7539       bridge = nic.get("bridge", None)
7540       link = nic.get("link", None)
7541       if bridge and link:
7542         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7543                                    " at the same time", errors.ECODE_INVAL)
7544       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7545         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7546                                    errors.ECODE_INVAL)
7547       elif bridge:
7548         link = bridge
7549
7550       nicparams = {}
7551       if nic_mode_req:
7552         nicparams[constants.NIC_MODE] = nic_mode_req
7553       if link:
7554         nicparams[constants.NIC_LINK] = link
7555
7556       check_params = cluster.SimpleFillNIC(nicparams)
7557       objects.NIC.CheckParameterSyntax(check_params)
7558       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7559
7560     # disk checks/pre-build
7561     self.disks = []
7562     for disk in self.op.disks:
7563       mode = disk.get("mode", constants.DISK_RDWR)
7564       if mode not in constants.DISK_ACCESS_SET:
7565         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7566                                    mode, errors.ECODE_INVAL)
7567       size = disk.get("size", None)
7568       if size is None:
7569         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7570       try:
7571         size = int(size)
7572       except (TypeError, ValueError):
7573         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7574                                    errors.ECODE_INVAL)
7575       data_vg = disk.get("vg", self.cfg.GetVGName())
7576       meta_vg = disk.get("metavg", data_vg)
7577       new_disk = {"size": size, "mode": mode, "vg": data_vg, "metavg": meta_vg}
7578       if "adopt" in disk:
7579         new_disk["adopt"] = disk["adopt"]
7580       self.disks.append(new_disk)
7581
7582     if self.op.mode == constants.INSTANCE_IMPORT:
7583
7584       # Check that the new instance doesn't have less disks than the export
7585       instance_disks = len(self.disks)
7586       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7587       if instance_disks < export_disks:
7588         raise errors.OpPrereqError("Not enough disks to import."
7589                                    " (instance: %d, export: %d)" %
7590                                    (instance_disks, export_disks),
7591                                    errors.ECODE_INVAL)
7592
7593       disk_images = []
7594       for idx in range(export_disks):
7595         option = 'disk%d_dump' % idx
7596         if export_info.has_option(constants.INISECT_INS, option):
7597           # FIXME: are the old os-es, disk sizes, etc. useful?
7598           export_name = export_info.get(constants.INISECT_INS, option)
7599           image = utils.PathJoin(self.op.src_path, export_name)
7600           disk_images.append(image)
7601         else:
7602           disk_images.append(False)
7603
7604       self.src_images = disk_images
7605
7606       old_name = export_info.get(constants.INISECT_INS, 'name')
7607       try:
7608         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7609       except (TypeError, ValueError), err:
7610         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7611                                    " an integer: %s" % str(err),
7612                                    errors.ECODE_STATE)
7613       if self.op.instance_name == old_name:
7614         for idx, nic in enumerate(self.nics):
7615           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7616             nic_mac_ini = 'nic%d_mac' % idx
7617             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7618
7619     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7620
7621     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7622     if self.op.ip_check:
7623       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7624         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7625                                    (self.check_ip, self.op.instance_name),
7626                                    errors.ECODE_NOTUNIQUE)
7627
7628     #### mac address generation
7629     # By generating here the mac address both the allocator and the hooks get
7630     # the real final mac address rather than the 'auto' or 'generate' value.
7631     # There is a race condition between the generation and the instance object
7632     # creation, which means that we know the mac is valid now, but we're not
7633     # sure it will be when we actually add the instance. If things go bad
7634     # adding the instance will abort because of a duplicate mac, and the
7635     # creation job will fail.
7636     for nic in self.nics:
7637       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7638         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7639
7640     #### allocator run
7641
7642     if self.op.iallocator is not None:
7643       self._RunAllocator()
7644
7645     #### node related checks
7646
7647     # check primary node
7648     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7649     assert self.pnode is not None, \
7650       "Cannot retrieve locked node %s" % self.op.pnode
7651     if pnode.offline:
7652       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7653                                  pnode.name, errors.ECODE_STATE)
7654     if pnode.drained:
7655       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7656                                  pnode.name, errors.ECODE_STATE)
7657     if not pnode.vm_capable:
7658       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7659                                  " '%s'" % pnode.name, errors.ECODE_STATE)
7660
7661     self.secondaries = []
7662
7663     # mirror node verification
7664     if self.op.disk_template in constants.DTS_NET_MIRROR:
7665       if self.op.snode == pnode.name:
7666         raise errors.OpPrereqError("The secondary node cannot be the"
7667                                    " primary node.", errors.ECODE_INVAL)
7668       _CheckNodeOnline(self, self.op.snode)
7669       _CheckNodeNotDrained(self, self.op.snode)
7670       _CheckNodeVmCapable(self, self.op.snode)
7671       self.secondaries.append(self.op.snode)
7672
7673     nodenames = [pnode.name] + self.secondaries
7674
7675     if not self.adopt_disks:
7676       # Check lv size requirements, if not adopting
7677       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7678       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7679
7680     else: # instead, we must check the adoption data
7681       all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7682       if len(all_lvs) != len(self.disks):
7683         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7684                                    errors.ECODE_INVAL)
7685       for lv_name in all_lvs:
7686         try:
7687           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7688           # to ReserveLV uses the same syntax
7689           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7690         except errors.ReservationError:
7691           raise errors.OpPrereqError("LV named %s used by another instance" %
7692                                      lv_name, errors.ECODE_NOTUNIQUE)
7693
7694       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7695       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7696
7697       node_lvs = self.rpc.call_lv_list([pnode.name],
7698                                        vg_names.payload.keys())[pnode.name]
7699       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7700       node_lvs = node_lvs.payload
7701
7702       delta = all_lvs.difference(node_lvs.keys())
7703       if delta:
7704         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7705                                    utils.CommaJoin(delta),
7706                                    errors.ECODE_INVAL)
7707       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7708       if online_lvs:
7709         raise errors.OpPrereqError("Online logical volumes found, cannot"
7710                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7711                                    errors.ECODE_STATE)
7712       # update the size of disk based on what is found
7713       for dsk in self.disks:
7714         dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7715
7716     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7717
7718     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7719     # check OS parameters (remotely)
7720     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7721
7722     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7723
7724     # memory check on primary node
7725     if self.op.start:
7726       _CheckNodeFreeMemory(self, self.pnode.name,
7727                            "creating instance %s" % self.op.instance_name,
7728                            self.be_full[constants.BE_MEMORY],
7729                            self.op.hypervisor)
7730
7731     self.dry_run_result = list(nodenames)
7732
7733   def Exec(self, feedback_fn):
7734     """Create and add the instance to the cluster.
7735
7736     """
7737     instance = self.op.instance_name
7738     pnode_name = self.pnode.name
7739
7740     ht_kind = self.op.hypervisor
7741     if ht_kind in constants.HTS_REQ_PORT:
7742       network_port = self.cfg.AllocatePort()
7743     else:
7744       network_port = None
7745
7746     if constants.ENABLE_FILE_STORAGE:
7747       # this is needed because os.path.join does not accept None arguments
7748       if self.op.file_storage_dir is None:
7749         string_file_storage_dir = ""
7750       else:
7751         string_file_storage_dir = self.op.file_storage_dir
7752
7753       # build the full file storage dir path
7754       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7755                                         string_file_storage_dir, instance)
7756     else:
7757       file_storage_dir = ""
7758
7759     disks = _GenerateDiskTemplate(self,
7760                                   self.op.disk_template,
7761                                   instance, pnode_name,
7762                                   self.secondaries,
7763                                   self.disks,
7764                                   file_storage_dir,
7765                                   self.op.file_driver,
7766                                   0,
7767                                   feedback_fn)
7768
7769     iobj = objects.Instance(name=instance, os=self.op.os_type,
7770                             primary_node=pnode_name,
7771                             nics=self.nics, disks=disks,
7772                             disk_template=self.op.disk_template,
7773                             admin_up=False,
7774                             network_port=network_port,
7775                             beparams=self.op.beparams,
7776                             hvparams=self.op.hvparams,
7777                             hypervisor=self.op.hypervisor,
7778                             osparams=self.op.osparams,
7779                             )
7780
7781     if self.adopt_disks:
7782       # rename LVs to the newly-generated names; we need to construct
7783       # 'fake' LV disks with the old data, plus the new unique_id
7784       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7785       rename_to = []
7786       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7787         rename_to.append(t_dsk.logical_id)
7788         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7789         self.cfg.SetDiskID(t_dsk, pnode_name)
7790       result = self.rpc.call_blockdev_rename(pnode_name,
7791                                              zip(tmp_disks, rename_to))
7792       result.Raise("Failed to rename adoped LVs")
7793     else:
7794       feedback_fn("* creating instance disks...")
7795       try:
7796         _CreateDisks(self, iobj)
7797       except errors.OpExecError:
7798         self.LogWarning("Device creation failed, reverting...")
7799         try:
7800           _RemoveDisks(self, iobj)
7801         finally:
7802           self.cfg.ReleaseDRBDMinors(instance)
7803           raise
7804
7805     feedback_fn("adding instance %s to cluster config" % instance)
7806
7807     self.cfg.AddInstance(iobj, self.proc.GetECId())
7808
7809     # Declare that we don't want to remove the instance lock anymore, as we've
7810     # added the instance to the config
7811     del self.remove_locks[locking.LEVEL_INSTANCE]
7812     # Unlock all the nodes
7813     if self.op.mode == constants.INSTANCE_IMPORT:
7814       nodes_keep = [self.op.src_node]
7815       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7816                        if node != self.op.src_node]
7817       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7818       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7819     else:
7820       self.context.glm.release(locking.LEVEL_NODE)
7821       del self.acquired_locks[locking.LEVEL_NODE]
7822
7823     disk_abort = False
7824     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
7825       feedback_fn("* wiping instance disks...")
7826       try:
7827         _WipeDisks(self, iobj)
7828       except errors.OpExecError, err:
7829         logging.exception("Wiping disks failed")
7830         self.LogWarning("Wiping instance disks failed (%s)", err)
7831         disk_abort = True
7832
7833     if disk_abort:
7834       # Something is already wrong with the disks, don't do anything else
7835       pass
7836     elif self.op.wait_for_sync:
7837       disk_abort = not _WaitForSync(self, iobj)
7838     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7839       # make sure the disks are not degraded (still sync-ing is ok)
7840       time.sleep(15)
7841       feedback_fn("* checking mirrors status")
7842       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7843     else:
7844       disk_abort = False
7845
7846     if disk_abort:
7847       _RemoveDisks(self, iobj)
7848       self.cfg.RemoveInstance(iobj.name)
7849       # Make sure the instance lock gets removed
7850       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7851       raise errors.OpExecError("There are some degraded disks for"
7852                                " this instance")
7853
7854     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7855       if self.op.mode == constants.INSTANCE_CREATE:
7856         if not self.op.no_install:
7857           feedback_fn("* running the instance OS create scripts...")
7858           # FIXME: pass debug option from opcode to backend
7859           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7860                                                  self.op.debug_level)
7861           result.Raise("Could not add os for instance %s"
7862                        " on node %s" % (instance, pnode_name))
7863
7864       elif self.op.mode == constants.INSTANCE_IMPORT:
7865         feedback_fn("* running the instance OS import scripts...")
7866
7867         transfers = []
7868
7869         for idx, image in enumerate(self.src_images):
7870           if not image:
7871             continue
7872
7873           # FIXME: pass debug option from opcode to backend
7874           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7875                                              constants.IEIO_FILE, (image, ),
7876                                              constants.IEIO_SCRIPT,
7877                                              (iobj.disks[idx], idx),
7878                                              None)
7879           transfers.append(dt)
7880
7881         import_result = \
7882           masterd.instance.TransferInstanceData(self, feedback_fn,
7883                                                 self.op.src_node, pnode_name,
7884                                                 self.pnode.secondary_ip,
7885                                                 iobj, transfers)
7886         if not compat.all(import_result):
7887           self.LogWarning("Some disks for instance %s on node %s were not"
7888                           " imported successfully" % (instance, pnode_name))
7889
7890       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7891         feedback_fn("* preparing remote import...")
7892         # The source cluster will stop the instance before attempting to make a
7893         # connection. In some cases stopping an instance can take a long time,
7894         # hence the shutdown timeout is added to the connection timeout.
7895         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7896                            self.op.source_shutdown_timeout)
7897         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7898
7899         assert iobj.primary_node == self.pnode.name
7900         disk_results = \
7901           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7902                                         self.source_x509_ca,
7903                                         self._cds, timeouts)
7904         if not compat.all(disk_results):
7905           # TODO: Should the instance still be started, even if some disks
7906           # failed to import (valid for local imports, too)?
7907           self.LogWarning("Some disks for instance %s on node %s were not"
7908                           " imported successfully" % (instance, pnode_name))
7909
7910         # Run rename script on newly imported instance
7911         assert iobj.name == instance
7912         feedback_fn("Running rename script for %s" % instance)
7913         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7914                                                    self.source_instance_name,
7915                                                    self.op.debug_level)
7916         if result.fail_msg:
7917           self.LogWarning("Failed to run rename script for %s on node"
7918                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7919
7920       else:
7921         # also checked in the prereq part
7922         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7923                                      % self.op.mode)
7924
7925     if self.op.start:
7926       iobj.admin_up = True
7927       self.cfg.Update(iobj, feedback_fn)
7928       logging.info("Starting instance %s on node %s", instance, pnode_name)
7929       feedback_fn("* starting instance...")
7930       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7931       result.Raise("Could not start instance")
7932
7933     return list(iobj.all_nodes)
7934
7935
7936 class LUInstanceConsole(NoHooksLU):
7937   """Connect to an instance's console.
7938
7939   This is somewhat special in that it returns the command line that
7940   you need to run on the master node in order to connect to the
7941   console.
7942
7943   """
7944   REQ_BGL = False
7945
7946   def ExpandNames(self):
7947     self._ExpandAndLockInstance()
7948
7949   def CheckPrereq(self):
7950     """Check prerequisites.
7951
7952     This checks that the instance is in the cluster.
7953
7954     """
7955     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7956     assert self.instance is not None, \
7957       "Cannot retrieve locked instance %s" % self.op.instance_name
7958     _CheckNodeOnline(self, self.instance.primary_node)
7959
7960   def Exec(self, feedback_fn):
7961     """Connect to the console of an instance
7962
7963     """
7964     instance = self.instance
7965     node = instance.primary_node
7966
7967     node_insts = self.rpc.call_instance_list([node],
7968                                              [instance.hypervisor])[node]
7969     node_insts.Raise("Can't get node information from %s" % node)
7970
7971     if instance.name not in node_insts.payload:
7972       if instance.admin_up:
7973         state = "ERROR_down"
7974       else:
7975         state = "ADMIN_down"
7976       raise errors.OpExecError("Instance %s is not running (state %s)" %
7977                                (instance.name, state))
7978
7979     logging.debug("Connecting to console of %s on %s", instance.name, node)
7980
7981     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
7982
7983
7984 def _GetInstanceConsole(cluster, instance):
7985   """Returns console information for an instance.
7986
7987   @type cluster: L{objects.Cluster}
7988   @type instance: L{objects.Instance}
7989   @rtype: dict
7990
7991   """
7992   hyper = hypervisor.GetHypervisor(instance.hypervisor)
7993   # beparams and hvparams are passed separately, to avoid editing the
7994   # instance and then saving the defaults in the instance itself.
7995   hvparams = cluster.FillHV(instance)
7996   beparams = cluster.FillBE(instance)
7997   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7998
7999   assert console.instance == instance.name
8000   assert console.Validate()
8001
8002   return console.ToDict()
8003
8004
8005 class LUInstanceReplaceDisks(LogicalUnit):
8006   """Replace the disks of an instance.
8007
8008   """
8009   HPATH = "mirrors-replace"
8010   HTYPE = constants.HTYPE_INSTANCE
8011   REQ_BGL = False
8012
8013   def CheckArguments(self):
8014     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8015                                   self.op.iallocator)
8016
8017   def ExpandNames(self):
8018     self._ExpandAndLockInstance()
8019
8020     if self.op.iallocator is not None:
8021       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8022
8023     elif self.op.remote_node is not None:
8024       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8025       self.op.remote_node = remote_node
8026
8027       # Warning: do not remove the locking of the new secondary here
8028       # unless DRBD8.AddChildren is changed to work in parallel;
8029       # currently it doesn't since parallel invocations of
8030       # FindUnusedMinor will conflict
8031       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8032       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8033
8034     else:
8035       self.needed_locks[locking.LEVEL_NODE] = []
8036       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8037
8038     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8039                                    self.op.iallocator, self.op.remote_node,
8040                                    self.op.disks, False, self.op.early_release)
8041
8042     self.tasklets = [self.replacer]
8043
8044   def DeclareLocks(self, level):
8045     # If we're not already locking all nodes in the set we have to declare the
8046     # instance's primary/secondary nodes.
8047     if (level == locking.LEVEL_NODE and
8048         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8049       self._LockInstancesNodes()
8050
8051   def BuildHooksEnv(self):
8052     """Build hooks env.
8053
8054     This runs on the master, the primary and all the secondaries.
8055
8056     """
8057     instance = self.replacer.instance
8058     env = {
8059       "MODE": self.op.mode,
8060       "NEW_SECONDARY": self.op.remote_node,
8061       "OLD_SECONDARY": instance.secondary_nodes[0],
8062       }
8063     env.update(_BuildInstanceHookEnvByObject(self, instance))
8064     nl = [
8065       self.cfg.GetMasterNode(),
8066       instance.primary_node,
8067       ]
8068     if self.op.remote_node is not None:
8069       nl.append(self.op.remote_node)
8070     return env, nl, nl
8071
8072
8073 class TLReplaceDisks(Tasklet):
8074   """Replaces disks for an instance.
8075
8076   Note: Locking is not within the scope of this class.
8077
8078   """
8079   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8080                disks, delay_iallocator, early_release):
8081     """Initializes this class.
8082
8083     """
8084     Tasklet.__init__(self, lu)
8085
8086     # Parameters
8087     self.instance_name = instance_name
8088     self.mode = mode
8089     self.iallocator_name = iallocator_name
8090     self.remote_node = remote_node
8091     self.disks = disks
8092     self.delay_iallocator = delay_iallocator
8093     self.early_release = early_release
8094
8095     # Runtime data
8096     self.instance = None
8097     self.new_node = None
8098     self.target_node = None
8099     self.other_node = None
8100     self.remote_node_info = None
8101     self.node_secondary_ip = None
8102
8103   @staticmethod
8104   def CheckArguments(mode, remote_node, iallocator):
8105     """Helper function for users of this class.
8106
8107     """
8108     # check for valid parameter combination
8109     if mode == constants.REPLACE_DISK_CHG:
8110       if remote_node is None and iallocator is None:
8111         raise errors.OpPrereqError("When changing the secondary either an"
8112                                    " iallocator script must be used or the"
8113                                    " new node given", errors.ECODE_INVAL)
8114
8115       if remote_node is not None and iallocator is not None:
8116         raise errors.OpPrereqError("Give either the iallocator or the new"
8117                                    " secondary, not both", errors.ECODE_INVAL)
8118
8119     elif remote_node is not None or iallocator is not None:
8120       # Not replacing the secondary
8121       raise errors.OpPrereqError("The iallocator and new node options can"
8122                                  " only be used when changing the"
8123                                  " secondary node", errors.ECODE_INVAL)
8124
8125   @staticmethod
8126   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8127     """Compute a new secondary node using an IAllocator.
8128
8129     """
8130     ial = IAllocator(lu.cfg, lu.rpc,
8131                      mode=constants.IALLOCATOR_MODE_RELOC,
8132                      name=instance_name,
8133                      relocate_from=relocate_from)
8134
8135     ial.Run(iallocator_name)
8136
8137     if not ial.success:
8138       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8139                                  " %s" % (iallocator_name, ial.info),
8140                                  errors.ECODE_NORES)
8141
8142     if len(ial.result) != ial.required_nodes:
8143       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8144                                  " of nodes (%s), required %s" %
8145                                  (iallocator_name,
8146                                   len(ial.result), ial.required_nodes),
8147                                  errors.ECODE_FAULT)
8148
8149     remote_node_name = ial.result[0]
8150
8151     lu.LogInfo("Selected new secondary for instance '%s': %s",
8152                instance_name, remote_node_name)
8153
8154     return remote_node_name
8155
8156   def _FindFaultyDisks(self, node_name):
8157     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8158                                     node_name, True)
8159
8160   def CheckPrereq(self):
8161     """Check prerequisites.
8162
8163     This checks that the instance is in the cluster.
8164
8165     """
8166     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8167     assert instance is not None, \
8168       "Cannot retrieve locked instance %s" % self.instance_name
8169
8170     if instance.disk_template != constants.DT_DRBD8:
8171       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8172                                  " instances", errors.ECODE_INVAL)
8173
8174     if len(instance.secondary_nodes) != 1:
8175       raise errors.OpPrereqError("The instance has a strange layout,"
8176                                  " expected one secondary but found %d" %
8177                                  len(instance.secondary_nodes),
8178                                  errors.ECODE_FAULT)
8179
8180     if not self.delay_iallocator:
8181       self._CheckPrereq2()
8182
8183   def _CheckPrereq2(self):
8184     """Check prerequisites, second part.
8185
8186     This function should always be part of CheckPrereq. It was separated and is
8187     now called from Exec because during node evacuation iallocator was only
8188     called with an unmodified cluster model, not taking planned changes into
8189     account.
8190
8191     """
8192     instance = self.instance
8193     secondary_node = instance.secondary_nodes[0]
8194
8195     if self.iallocator_name is None:
8196       remote_node = self.remote_node
8197     else:
8198       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8199                                        instance.name, instance.secondary_nodes)
8200
8201     if remote_node is not None:
8202       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8203       assert self.remote_node_info is not None, \
8204         "Cannot retrieve locked node %s" % remote_node
8205     else:
8206       self.remote_node_info = None
8207
8208     if remote_node == self.instance.primary_node:
8209       raise errors.OpPrereqError("The specified node is the primary node of"
8210                                  " the instance.", errors.ECODE_INVAL)
8211
8212     if remote_node == secondary_node:
8213       raise errors.OpPrereqError("The specified node is already the"
8214                                  " secondary node of the instance.",
8215                                  errors.ECODE_INVAL)
8216
8217     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8218                                     constants.REPLACE_DISK_CHG):
8219       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8220                                  errors.ECODE_INVAL)
8221
8222     if self.mode == constants.REPLACE_DISK_AUTO:
8223       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8224       faulty_secondary = self._FindFaultyDisks(secondary_node)
8225
8226       if faulty_primary and faulty_secondary:
8227         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8228                                    " one node and can not be repaired"
8229                                    " automatically" % self.instance_name,
8230                                    errors.ECODE_STATE)
8231
8232       if faulty_primary:
8233         self.disks = faulty_primary
8234         self.target_node = instance.primary_node
8235         self.other_node = secondary_node
8236         check_nodes = [self.target_node, self.other_node]
8237       elif faulty_secondary:
8238         self.disks = faulty_secondary
8239         self.target_node = secondary_node
8240         self.other_node = instance.primary_node
8241         check_nodes = [self.target_node, self.other_node]
8242       else:
8243         self.disks = []
8244         check_nodes = []
8245
8246     else:
8247       # Non-automatic modes
8248       if self.mode == constants.REPLACE_DISK_PRI:
8249         self.target_node = instance.primary_node
8250         self.other_node = secondary_node
8251         check_nodes = [self.target_node, self.other_node]
8252
8253       elif self.mode == constants.REPLACE_DISK_SEC:
8254         self.target_node = secondary_node
8255         self.other_node = instance.primary_node
8256         check_nodes = [self.target_node, self.other_node]
8257
8258       elif self.mode == constants.REPLACE_DISK_CHG:
8259         self.new_node = remote_node
8260         self.other_node = instance.primary_node
8261         self.target_node = secondary_node
8262         check_nodes = [self.new_node, self.other_node]
8263
8264         _CheckNodeNotDrained(self.lu, remote_node)
8265         _CheckNodeVmCapable(self.lu, remote_node)
8266
8267         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8268         assert old_node_info is not None
8269         if old_node_info.offline and not self.early_release:
8270           # doesn't make sense to delay the release
8271           self.early_release = True
8272           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8273                           " early-release mode", secondary_node)
8274
8275       else:
8276         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8277                                      self.mode)
8278
8279       # If not specified all disks should be replaced
8280       if not self.disks:
8281         self.disks = range(len(self.instance.disks))
8282
8283     for node in check_nodes:
8284       _CheckNodeOnline(self.lu, node)
8285
8286     touched_nodes = frozenset([self.new_node, self.other_node,
8287                                self.target_node])
8288
8289     if self.lu.needed_locks[locking.LEVEL_NODE] == locking.ALL_SET:
8290       # Release unneeded node locks
8291       for name in self.lu.acquired_locks[locking.LEVEL_NODE]:
8292         if name not in touched_nodes:
8293           self._ReleaseNodeLock(name)
8294
8295     # Check whether disks are valid
8296     for disk_idx in self.disks:
8297       instance.FindDisk(disk_idx)
8298
8299     # Get secondary node IP addresses
8300     self.node_secondary_ip = \
8301       dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
8302            for node_name in touched_nodes
8303            if node_name is not None)
8304
8305   def Exec(self, feedback_fn):
8306     """Execute disk replacement.
8307
8308     This dispatches the disk replacement to the appropriate handler.
8309
8310     """
8311     if self.delay_iallocator:
8312       self._CheckPrereq2()
8313
8314     if (self.lu.needed_locks[locking.LEVEL_NODE] == locking.ALL_SET and
8315         __debug__):
8316       # Verify owned locks before starting operation
8317       owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE)
8318       assert set(owned_locks) == set(self.node_secondary_ip), \
8319           "Not owning the correct locks: %s" % (owned_locks, )
8320
8321     if not self.disks:
8322       feedback_fn("No disks need replacement")
8323       return
8324
8325     feedback_fn("Replacing disk(s) %s for %s" %
8326                 (utils.CommaJoin(self.disks), self.instance.name))
8327
8328     activate_disks = (not self.instance.admin_up)
8329
8330     # Activate the instance disks if we're replacing them on a down instance
8331     if activate_disks:
8332       _StartInstanceDisks(self.lu, self.instance, True)
8333
8334     try:
8335       # Should we replace the secondary node?
8336       if self.new_node is not None:
8337         fn = self._ExecDrbd8Secondary
8338       else:
8339         fn = self._ExecDrbd8DiskOnly
8340
8341       result = fn(feedback_fn)
8342     finally:
8343       # Deactivate the instance disks if we're replacing them on a
8344       # down instance
8345       if activate_disks:
8346         _SafeShutdownInstanceDisks(self.lu, self.instance)
8347
8348     if __debug__:
8349       # Verify owned locks
8350       owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE)
8351       assert ((self.early_release and not owned_locks) or
8352               (not self.early_release and
8353                set(owned_locks) == set(self.node_secondary_ip))), \
8354         ("Not owning the correct locks, early_release=%s, owned=%r" %
8355          (self.early_release, owned_locks))
8356
8357     return result
8358
8359   def _CheckVolumeGroup(self, nodes):
8360     self.lu.LogInfo("Checking volume groups")
8361
8362     vgname = self.cfg.GetVGName()
8363
8364     # Make sure volume group exists on all involved nodes
8365     results = self.rpc.call_vg_list(nodes)
8366     if not results:
8367       raise errors.OpExecError("Can't list volume groups on the nodes")
8368
8369     for node in nodes:
8370       res = results[node]
8371       res.Raise("Error checking node %s" % node)
8372       if vgname not in res.payload:
8373         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8374                                  (vgname, node))
8375
8376   def _CheckDisksExistence(self, nodes):
8377     # Check disk existence
8378     for idx, dev in enumerate(self.instance.disks):
8379       if idx not in self.disks:
8380         continue
8381
8382       for node in nodes:
8383         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8384         self.cfg.SetDiskID(dev, node)
8385
8386         result = self.rpc.call_blockdev_find(node, dev)
8387
8388         msg = result.fail_msg
8389         if msg or not result.payload:
8390           if not msg:
8391             msg = "disk not found"
8392           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8393                                    (idx, node, msg))
8394
8395   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8396     for idx, dev in enumerate(self.instance.disks):
8397       if idx not in self.disks:
8398         continue
8399
8400       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8401                       (idx, node_name))
8402
8403       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8404                                    ldisk=ldisk):
8405         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8406                                  " replace disks for instance %s" %
8407                                  (node_name, self.instance.name))
8408
8409   def _CreateNewStorage(self, node_name):
8410     iv_names = {}
8411
8412     for idx, dev in enumerate(self.instance.disks):
8413       if idx not in self.disks:
8414         continue
8415
8416       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8417
8418       self.cfg.SetDiskID(dev, node_name)
8419
8420       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8421       names = _GenerateUniqueNames(self.lu, lv_names)
8422
8423       vg_data = dev.children[0].logical_id[0]
8424       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8425                              logical_id=(vg_data, names[0]))
8426       vg_meta = dev.children[1].logical_id[0]
8427       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8428                              logical_id=(vg_meta, names[1]))
8429
8430       new_lvs = [lv_data, lv_meta]
8431       old_lvs = dev.children
8432       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8433
8434       # we pass force_create=True to force the LVM creation
8435       for new_lv in new_lvs:
8436         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8437                         _GetInstanceInfoText(self.instance), False)
8438
8439     return iv_names
8440
8441   def _CheckDevices(self, node_name, iv_names):
8442     for name, (dev, _, _) in iv_names.iteritems():
8443       self.cfg.SetDiskID(dev, node_name)
8444
8445       result = self.rpc.call_blockdev_find(node_name, dev)
8446
8447       msg = result.fail_msg
8448       if msg or not result.payload:
8449         if not msg:
8450           msg = "disk not found"
8451         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8452                                  (name, msg))
8453
8454       if result.payload.is_degraded:
8455         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8456
8457   def _RemoveOldStorage(self, node_name, iv_names):
8458     for name, (_, old_lvs, _) in iv_names.iteritems():
8459       self.lu.LogInfo("Remove logical volumes for %s" % name)
8460
8461       for lv in old_lvs:
8462         self.cfg.SetDiskID(lv, node_name)
8463
8464         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8465         if msg:
8466           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8467                              hint="remove unused LVs manually")
8468
8469   def _ReleaseNodeLock(self, node_name):
8470     """Releases the lock for a given node."""
8471     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8472
8473   def _ExecDrbd8DiskOnly(self, feedback_fn):
8474     """Replace a disk on the primary or secondary for DRBD 8.
8475
8476     The algorithm for replace is quite complicated:
8477
8478       1. for each disk to be replaced:
8479
8480         1. create new LVs on the target node with unique names
8481         1. detach old LVs from the drbd device
8482         1. rename old LVs to name_replaced.<time_t>
8483         1. rename new LVs to old LVs
8484         1. attach the new LVs (with the old names now) to the drbd device
8485
8486       1. wait for sync across all devices
8487
8488       1. for each modified disk:
8489
8490         1. remove old LVs (which have the name name_replaces.<time_t>)
8491
8492     Failures are not very well handled.
8493
8494     """
8495     steps_total = 6
8496
8497     # Step: check device activation
8498     self.lu.LogStep(1, steps_total, "Check device existence")
8499     self._CheckDisksExistence([self.other_node, self.target_node])
8500     self._CheckVolumeGroup([self.target_node, self.other_node])
8501
8502     # Step: check other node consistency
8503     self.lu.LogStep(2, steps_total, "Check peer consistency")
8504     self._CheckDisksConsistency(self.other_node,
8505                                 self.other_node == self.instance.primary_node,
8506                                 False)
8507
8508     # Step: create new storage
8509     self.lu.LogStep(3, steps_total, "Allocate new storage")
8510     iv_names = self._CreateNewStorage(self.target_node)
8511
8512     # Step: for each lv, detach+rename*2+attach
8513     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8514     for dev, old_lvs, new_lvs in iv_names.itervalues():
8515       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8516
8517       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8518                                                      old_lvs)
8519       result.Raise("Can't detach drbd from local storage on node"
8520                    " %s for device %s" % (self.target_node, dev.iv_name))
8521       #dev.children = []
8522       #cfg.Update(instance)
8523
8524       # ok, we created the new LVs, so now we know we have the needed
8525       # storage; as such, we proceed on the target node to rename
8526       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8527       # using the assumption that logical_id == physical_id (which in
8528       # turn is the unique_id on that node)
8529
8530       # FIXME(iustin): use a better name for the replaced LVs
8531       temp_suffix = int(time.time())
8532       ren_fn = lambda d, suff: (d.physical_id[0],
8533                                 d.physical_id[1] + "_replaced-%s" % suff)
8534
8535       # Build the rename list based on what LVs exist on the node
8536       rename_old_to_new = []
8537       for to_ren in old_lvs:
8538         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8539         if not result.fail_msg and result.payload:
8540           # device exists
8541           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8542
8543       self.lu.LogInfo("Renaming the old LVs on the target node")
8544       result = self.rpc.call_blockdev_rename(self.target_node,
8545                                              rename_old_to_new)
8546       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8547
8548       # Now we rename the new LVs to the old LVs
8549       self.lu.LogInfo("Renaming the new LVs on the target node")
8550       rename_new_to_old = [(new, old.physical_id)
8551                            for old, new in zip(old_lvs, new_lvs)]
8552       result = self.rpc.call_blockdev_rename(self.target_node,
8553                                              rename_new_to_old)
8554       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8555
8556       for old, new in zip(old_lvs, new_lvs):
8557         new.logical_id = old.logical_id
8558         self.cfg.SetDiskID(new, self.target_node)
8559
8560       for disk in old_lvs:
8561         disk.logical_id = ren_fn(disk, temp_suffix)
8562         self.cfg.SetDiskID(disk, self.target_node)
8563
8564       # Now that the new lvs have the old name, we can add them to the device
8565       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8566       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8567                                                   new_lvs)
8568       msg = result.fail_msg
8569       if msg:
8570         for new_lv in new_lvs:
8571           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8572                                                new_lv).fail_msg
8573           if msg2:
8574             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8575                                hint=("cleanup manually the unused logical"
8576                                      "volumes"))
8577         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8578
8579       dev.children = new_lvs
8580
8581       self.cfg.Update(self.instance, feedback_fn)
8582
8583     cstep = 5
8584     if self.early_release:
8585       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8586       cstep += 1
8587       self._RemoveOldStorage(self.target_node, iv_names)
8588       # WARNING: we release both node locks here, do not do other RPCs
8589       # than WaitForSync to the primary node
8590       self._ReleaseNodeLock([self.target_node, self.other_node])
8591
8592     # Wait for sync
8593     # This can fail as the old devices are degraded and _WaitForSync
8594     # does a combined result over all disks, so we don't check its return value
8595     self.lu.LogStep(cstep, steps_total, "Sync devices")
8596     cstep += 1
8597     _WaitForSync(self.lu, self.instance)
8598
8599     # Check all devices manually
8600     self._CheckDevices(self.instance.primary_node, iv_names)
8601
8602     # Step: remove old storage
8603     if not self.early_release:
8604       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8605       cstep += 1
8606       self._RemoveOldStorage(self.target_node, iv_names)
8607
8608   def _ExecDrbd8Secondary(self, feedback_fn):
8609     """Replace the secondary node for DRBD 8.
8610
8611     The algorithm for replace is quite complicated:
8612       - for all disks of the instance:
8613         - create new LVs on the new node with same names
8614         - shutdown the drbd device on the old secondary
8615         - disconnect the drbd network on the primary
8616         - create the drbd device on the new secondary
8617         - network attach the drbd on the primary, using an artifice:
8618           the drbd code for Attach() will connect to the network if it
8619           finds a device which is connected to the good local disks but
8620           not network enabled
8621       - wait for sync across all devices
8622       - remove all disks from the old secondary
8623
8624     Failures are not very well handled.
8625
8626     """
8627     steps_total = 6
8628
8629     # Step: check device activation
8630     self.lu.LogStep(1, steps_total, "Check device existence")
8631     self._CheckDisksExistence([self.instance.primary_node])
8632     self._CheckVolumeGroup([self.instance.primary_node])
8633
8634     # Step: check other node consistency
8635     self.lu.LogStep(2, steps_total, "Check peer consistency")
8636     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8637
8638     # Step: create new storage
8639     self.lu.LogStep(3, steps_total, "Allocate new storage")
8640     for idx, dev in enumerate(self.instance.disks):
8641       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8642                       (self.new_node, idx))
8643       # we pass force_create=True to force LVM creation
8644       for new_lv in dev.children:
8645         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8646                         _GetInstanceInfoText(self.instance), False)
8647
8648     # Step 4: dbrd minors and drbd setups changes
8649     # after this, we must manually remove the drbd minors on both the
8650     # error and the success paths
8651     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8652     minors = self.cfg.AllocateDRBDMinor([self.new_node
8653                                          for dev in self.instance.disks],
8654                                         self.instance.name)
8655     logging.debug("Allocated minors %r", minors)
8656
8657     iv_names = {}
8658     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8659       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8660                       (self.new_node, idx))
8661       # create new devices on new_node; note that we create two IDs:
8662       # one without port, so the drbd will be activated without
8663       # networking information on the new node at this stage, and one
8664       # with network, for the latter activation in step 4
8665       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8666       if self.instance.primary_node == o_node1:
8667         p_minor = o_minor1
8668       else:
8669         assert self.instance.primary_node == o_node2, "Three-node instance?"
8670         p_minor = o_minor2
8671
8672       new_alone_id = (self.instance.primary_node, self.new_node, None,
8673                       p_minor, new_minor, o_secret)
8674       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8675                     p_minor, new_minor, o_secret)
8676
8677       iv_names[idx] = (dev, dev.children, new_net_id)
8678       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8679                     new_net_id)
8680       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8681                               logical_id=new_alone_id,
8682                               children=dev.children,
8683                               size=dev.size)
8684       try:
8685         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8686                               _GetInstanceInfoText(self.instance), False)
8687       except errors.GenericError:
8688         self.cfg.ReleaseDRBDMinors(self.instance.name)
8689         raise
8690
8691     # We have new devices, shutdown the drbd on the old secondary
8692     for idx, dev in enumerate(self.instance.disks):
8693       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8694       self.cfg.SetDiskID(dev, self.target_node)
8695       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8696       if msg:
8697         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8698                            "node: %s" % (idx, msg),
8699                            hint=("Please cleanup this device manually as"
8700                                  " soon as possible"))
8701
8702     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8703     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8704                                                self.node_secondary_ip,
8705                                                self.instance.disks)\
8706                                               [self.instance.primary_node]
8707
8708     msg = result.fail_msg
8709     if msg:
8710       # detaches didn't succeed (unlikely)
8711       self.cfg.ReleaseDRBDMinors(self.instance.name)
8712       raise errors.OpExecError("Can't detach the disks from the network on"
8713                                " old node: %s" % (msg,))
8714
8715     # if we managed to detach at least one, we update all the disks of
8716     # the instance to point to the new secondary
8717     self.lu.LogInfo("Updating instance configuration")
8718     for dev, _, new_logical_id in iv_names.itervalues():
8719       dev.logical_id = new_logical_id
8720       self.cfg.SetDiskID(dev, self.instance.primary_node)
8721
8722     self.cfg.Update(self.instance, feedback_fn)
8723
8724     # and now perform the drbd attach
8725     self.lu.LogInfo("Attaching primary drbds to new secondary"
8726                     " (standalone => connected)")
8727     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8728                                             self.new_node],
8729                                            self.node_secondary_ip,
8730                                            self.instance.disks,
8731                                            self.instance.name,
8732                                            False)
8733     for to_node, to_result in result.items():
8734       msg = to_result.fail_msg
8735       if msg:
8736         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8737                            to_node, msg,
8738                            hint=("please do a gnt-instance info to see the"
8739                                  " status of disks"))
8740     cstep = 5
8741     if self.early_release:
8742       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8743       cstep += 1
8744       self._RemoveOldStorage(self.target_node, iv_names)
8745       # WARNING: we release all node locks here, do not do other RPCs
8746       # than WaitForSync to the primary node
8747       self._ReleaseNodeLock([self.instance.primary_node,
8748                              self.target_node,
8749                              self.new_node])
8750
8751     # Wait for sync
8752     # This can fail as the old devices are degraded and _WaitForSync
8753     # does a combined result over all disks, so we don't check its return value
8754     self.lu.LogStep(cstep, steps_total, "Sync devices")
8755     cstep += 1
8756     _WaitForSync(self.lu, self.instance)
8757
8758     # Check all devices manually
8759     self._CheckDevices(self.instance.primary_node, iv_names)
8760
8761     # Step: remove old storage
8762     if not self.early_release:
8763       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8764       self._RemoveOldStorage(self.target_node, iv_names)
8765
8766
8767 class LURepairNodeStorage(NoHooksLU):
8768   """Repairs the volume group on a node.
8769
8770   """
8771   REQ_BGL = False
8772
8773   def CheckArguments(self):
8774     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8775
8776     storage_type = self.op.storage_type
8777
8778     if (constants.SO_FIX_CONSISTENCY not in
8779         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8780       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8781                                  " repaired" % storage_type,
8782                                  errors.ECODE_INVAL)
8783
8784   def ExpandNames(self):
8785     self.needed_locks = {
8786       locking.LEVEL_NODE: [self.op.node_name],
8787       }
8788
8789   def _CheckFaultyDisks(self, instance, node_name):
8790     """Ensure faulty disks abort the opcode or at least warn."""
8791     try:
8792       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8793                                   node_name, True):
8794         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8795                                    " node '%s'" % (instance.name, node_name),
8796                                    errors.ECODE_STATE)
8797     except errors.OpPrereqError, err:
8798       if self.op.ignore_consistency:
8799         self.proc.LogWarning(str(err.args[0]))
8800       else:
8801         raise
8802
8803   def CheckPrereq(self):
8804     """Check prerequisites.
8805
8806     """
8807     # Check whether any instance on this node has faulty disks
8808     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8809       if not inst.admin_up:
8810         continue
8811       check_nodes = set(inst.all_nodes)
8812       check_nodes.discard(self.op.node_name)
8813       for inst_node_name in check_nodes:
8814         self._CheckFaultyDisks(inst, inst_node_name)
8815
8816   def Exec(self, feedback_fn):
8817     feedback_fn("Repairing storage unit '%s' on %s ..." %
8818                 (self.op.name, self.op.node_name))
8819
8820     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8821     result = self.rpc.call_storage_execute(self.op.node_name,
8822                                            self.op.storage_type, st_args,
8823                                            self.op.name,
8824                                            constants.SO_FIX_CONSISTENCY)
8825     result.Raise("Failed to repair storage unit '%s' on %s" %
8826                  (self.op.name, self.op.node_name))
8827
8828
8829 class LUNodeEvacStrategy(NoHooksLU):
8830   """Computes the node evacuation strategy.
8831
8832   """
8833   REQ_BGL = False
8834
8835   def CheckArguments(self):
8836     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8837
8838   def ExpandNames(self):
8839     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8840     self.needed_locks = locks = {}
8841     if self.op.remote_node is None:
8842       locks[locking.LEVEL_NODE] = locking.ALL_SET
8843     else:
8844       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8845       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8846
8847   def Exec(self, feedback_fn):
8848     instances = []
8849     for node in self.op.nodes:
8850       instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8851     if not instances:
8852       return []
8853
8854     if self.op.remote_node is not None:
8855       result = []
8856       for i in instances:
8857         if i.primary_node == self.op.remote_node:
8858           raise errors.OpPrereqError("Node %s is the primary node of"
8859                                      " instance %s, cannot use it as"
8860                                      " secondary" %
8861                                      (self.op.remote_node, i.name),
8862                                      errors.ECODE_INVAL)
8863         result.append([i.name, self.op.remote_node])
8864     else:
8865       ial = IAllocator(self.cfg, self.rpc,
8866                        mode=constants.IALLOCATOR_MODE_MEVAC,
8867                        evac_nodes=self.op.nodes)
8868       ial.Run(self.op.iallocator, validate=True)
8869       if not ial.success:
8870         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8871                                  errors.ECODE_NORES)
8872       result = ial.result
8873     return result
8874
8875
8876 class LUInstanceGrowDisk(LogicalUnit):
8877   """Grow a disk of an instance.
8878
8879   """
8880   HPATH = "disk-grow"
8881   HTYPE = constants.HTYPE_INSTANCE
8882   REQ_BGL = False
8883
8884   def ExpandNames(self):
8885     self._ExpandAndLockInstance()
8886     self.needed_locks[locking.LEVEL_NODE] = []
8887     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8888
8889   def DeclareLocks(self, level):
8890     if level == locking.LEVEL_NODE:
8891       self._LockInstancesNodes()
8892
8893   def BuildHooksEnv(self):
8894     """Build hooks env.
8895
8896     This runs on the master, the primary and all the secondaries.
8897
8898     """
8899     env = {
8900       "DISK": self.op.disk,
8901       "AMOUNT": self.op.amount,
8902       }
8903     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8904     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8905     return env, nl, nl
8906
8907   def CheckPrereq(self):
8908     """Check prerequisites.
8909
8910     This checks that the instance is in the cluster.
8911
8912     """
8913     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8914     assert instance is not None, \
8915       "Cannot retrieve locked instance %s" % self.op.instance_name
8916     nodenames = list(instance.all_nodes)
8917     for node in nodenames:
8918       _CheckNodeOnline(self, node)
8919
8920     self.instance = instance
8921
8922     if instance.disk_template not in constants.DTS_GROWABLE:
8923       raise errors.OpPrereqError("Instance's disk layout does not support"
8924                                  " growing.", errors.ECODE_INVAL)
8925
8926     self.disk = instance.FindDisk(self.op.disk)
8927
8928     if instance.disk_template != constants.DT_FILE:
8929       # TODO: check the free disk space for file, when that feature
8930       # will be supported
8931       _CheckNodesFreeDiskPerVG(self, nodenames,
8932                                self.disk.ComputeGrowth(self.op.amount))
8933
8934   def Exec(self, feedback_fn):
8935     """Execute disk grow.
8936
8937     """
8938     instance = self.instance
8939     disk = self.disk
8940
8941     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8942     if not disks_ok:
8943       raise errors.OpExecError("Cannot activate block device to grow")
8944
8945     for node in instance.all_nodes:
8946       self.cfg.SetDiskID(disk, node)
8947       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8948       result.Raise("Grow request failed to node %s" % node)
8949
8950       # TODO: Rewrite code to work properly
8951       # DRBD goes into sync mode for a short amount of time after executing the
8952       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8953       # calling "resize" in sync mode fails. Sleeping for a short amount of
8954       # time is a work-around.
8955       time.sleep(5)
8956
8957     disk.RecordGrow(self.op.amount)
8958     self.cfg.Update(instance, feedback_fn)
8959     if self.op.wait_for_sync:
8960       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8961       if disk_abort:
8962         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8963                              " status.\nPlease check the instance.")
8964       if not instance.admin_up:
8965         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8966     elif not instance.admin_up:
8967       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8968                            " not supposed to be running because no wait for"
8969                            " sync mode was requested.")
8970
8971
8972 class LUInstanceQueryData(NoHooksLU):
8973   """Query runtime instance data.
8974
8975   """
8976   REQ_BGL = False
8977
8978   def ExpandNames(self):
8979     self.needed_locks = {}
8980
8981     # Use locking if requested or when non-static information is wanted
8982     if not (self.op.static or self.op.use_locking):
8983       self.LogWarning("Non-static data requested, locks need to be acquired")
8984       self.op.use_locking = True
8985
8986     if self.op.instances or not self.op.use_locking:
8987       # Expand instance names right here
8988       self.wanted_names = _GetWantedInstances(self, self.op.instances)
8989     else:
8990       # Will use acquired locks
8991       self.wanted_names = None
8992
8993     if self.op.use_locking:
8994       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8995
8996       if self.wanted_names is None:
8997         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8998       else:
8999         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9000
9001       self.needed_locks[locking.LEVEL_NODE] = []
9002       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9003       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9004
9005   def DeclareLocks(self, level):
9006     if self.op.use_locking and level == locking.LEVEL_NODE:
9007       self._LockInstancesNodes()
9008
9009   def CheckPrereq(self):
9010     """Check prerequisites.
9011
9012     This only checks the optional instance list against the existing names.
9013
9014     """
9015     if self.wanted_names is None:
9016       assert self.op.use_locking, "Locking was not used"
9017       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9018
9019     self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9020                              for name in self.wanted_names]
9021
9022   def _ComputeBlockdevStatus(self, node, instance_name, dev):
9023     """Returns the status of a block device
9024
9025     """
9026     if self.op.static or not node:
9027       return None
9028
9029     self.cfg.SetDiskID(dev, node)
9030
9031     result = self.rpc.call_blockdev_find(node, dev)
9032     if result.offline:
9033       return None
9034
9035     result.Raise("Can't compute disk status for %s" % instance_name)
9036
9037     status = result.payload
9038     if status is None:
9039       return None
9040
9041     return (status.dev_path, status.major, status.minor,
9042             status.sync_percent, status.estimated_time,
9043             status.is_degraded, status.ldisk_status)
9044
9045   def _ComputeDiskStatus(self, instance, snode, dev):
9046     """Compute block device status.
9047
9048     """
9049     if dev.dev_type in constants.LDS_DRBD:
9050       # we change the snode then (otherwise we use the one passed in)
9051       if dev.logical_id[0] == instance.primary_node:
9052         snode = dev.logical_id[1]
9053       else:
9054         snode = dev.logical_id[0]
9055
9056     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9057                                               instance.name, dev)
9058     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9059
9060     if dev.children:
9061       dev_children = [self._ComputeDiskStatus(instance, snode, child)
9062                       for child in dev.children]
9063     else:
9064       dev_children = []
9065
9066     return {
9067       "iv_name": dev.iv_name,
9068       "dev_type": dev.dev_type,
9069       "logical_id": dev.logical_id,
9070       "physical_id": dev.physical_id,
9071       "pstatus": dev_pstatus,
9072       "sstatus": dev_sstatus,
9073       "children": dev_children,
9074       "mode": dev.mode,
9075       "size": dev.size,
9076       }
9077
9078   def Exec(self, feedback_fn):
9079     """Gather and return data"""
9080     result = {}
9081
9082     cluster = self.cfg.GetClusterInfo()
9083
9084     for instance in self.wanted_instances:
9085       if not self.op.static:
9086         remote_info = self.rpc.call_instance_info(instance.primary_node,
9087                                                   instance.name,
9088                                                   instance.hypervisor)
9089         remote_info.Raise("Error checking node %s" % instance.primary_node)
9090         remote_info = remote_info.payload
9091         if remote_info and "state" in remote_info:
9092           remote_state = "up"
9093         else:
9094           remote_state = "down"
9095       else:
9096         remote_state = None
9097       if instance.admin_up:
9098         config_state = "up"
9099       else:
9100         config_state = "down"
9101
9102       disks = [self._ComputeDiskStatus(instance, None, device)
9103                for device in instance.disks]
9104
9105       result[instance.name] = {
9106         "name": instance.name,
9107         "config_state": config_state,
9108         "run_state": remote_state,
9109         "pnode": instance.primary_node,
9110         "snodes": instance.secondary_nodes,
9111         "os": instance.os,
9112         # this happens to be the same format used for hooks
9113         "nics": _NICListToTuple(self, instance.nics),
9114         "disk_template": instance.disk_template,
9115         "disks": disks,
9116         "hypervisor": instance.hypervisor,
9117         "network_port": instance.network_port,
9118         "hv_instance": instance.hvparams,
9119         "hv_actual": cluster.FillHV(instance, skip_globals=True),
9120         "be_instance": instance.beparams,
9121         "be_actual": cluster.FillBE(instance),
9122         "os_instance": instance.osparams,
9123         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9124         "serial_no": instance.serial_no,
9125         "mtime": instance.mtime,
9126         "ctime": instance.ctime,
9127         "uuid": instance.uuid,
9128         }
9129
9130     return result
9131
9132
9133 class LUInstanceSetParams(LogicalUnit):
9134   """Modifies an instances's parameters.
9135
9136   """
9137   HPATH = "instance-modify"
9138   HTYPE = constants.HTYPE_INSTANCE
9139   REQ_BGL = False
9140
9141   def CheckArguments(self):
9142     if not (self.op.nics or self.op.disks or self.op.disk_template or
9143             self.op.hvparams or self.op.beparams or self.op.os_name):
9144       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9145
9146     if self.op.hvparams:
9147       _CheckGlobalHvParams(self.op.hvparams)
9148
9149     # Disk validation
9150     disk_addremove = 0
9151     for disk_op, disk_dict in self.op.disks:
9152       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9153       if disk_op == constants.DDM_REMOVE:
9154         disk_addremove += 1
9155         continue
9156       elif disk_op == constants.DDM_ADD:
9157         disk_addremove += 1
9158       else:
9159         if not isinstance(disk_op, int):
9160           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9161         if not isinstance(disk_dict, dict):
9162           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9163           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9164
9165       if disk_op == constants.DDM_ADD:
9166         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9167         if mode not in constants.DISK_ACCESS_SET:
9168           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9169                                      errors.ECODE_INVAL)
9170         size = disk_dict.get('size', None)
9171         if size is None:
9172           raise errors.OpPrereqError("Required disk parameter size missing",
9173                                      errors.ECODE_INVAL)
9174         try:
9175           size = int(size)
9176         except (TypeError, ValueError), err:
9177           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9178                                      str(err), errors.ECODE_INVAL)
9179         disk_dict['size'] = size
9180       else:
9181         # modification of disk
9182         if 'size' in disk_dict:
9183           raise errors.OpPrereqError("Disk size change not possible, use"
9184                                      " grow-disk", errors.ECODE_INVAL)
9185
9186     if disk_addremove > 1:
9187       raise errors.OpPrereqError("Only one disk add or remove operation"
9188                                  " supported at a time", errors.ECODE_INVAL)
9189
9190     if self.op.disks and self.op.disk_template is not None:
9191       raise errors.OpPrereqError("Disk template conversion and other disk"
9192                                  " changes not supported at the same time",
9193                                  errors.ECODE_INVAL)
9194
9195     if (self.op.disk_template and
9196         self.op.disk_template in constants.DTS_NET_MIRROR and
9197         self.op.remote_node is None):
9198       raise errors.OpPrereqError("Changing the disk template to a mirrored"
9199                                  " one requires specifying a secondary node",
9200                                  errors.ECODE_INVAL)
9201
9202     # NIC validation
9203     nic_addremove = 0
9204     for nic_op, nic_dict in self.op.nics:
9205       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9206       if nic_op == constants.DDM_REMOVE:
9207         nic_addremove += 1
9208         continue
9209       elif nic_op == constants.DDM_ADD:
9210         nic_addremove += 1
9211       else:
9212         if not isinstance(nic_op, int):
9213           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9214         if not isinstance(nic_dict, dict):
9215           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9216           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9217
9218       # nic_dict should be a dict
9219       nic_ip = nic_dict.get('ip', None)
9220       if nic_ip is not None:
9221         if nic_ip.lower() == constants.VALUE_NONE:
9222           nic_dict['ip'] = None
9223         else:
9224           if not netutils.IPAddress.IsValid(nic_ip):
9225             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9226                                        errors.ECODE_INVAL)
9227
9228       nic_bridge = nic_dict.get('bridge', None)
9229       nic_link = nic_dict.get('link', None)
9230       if nic_bridge and nic_link:
9231         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9232                                    " at the same time", errors.ECODE_INVAL)
9233       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9234         nic_dict['bridge'] = None
9235       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9236         nic_dict['link'] = None
9237
9238       if nic_op == constants.DDM_ADD:
9239         nic_mac = nic_dict.get('mac', None)
9240         if nic_mac is None:
9241           nic_dict['mac'] = constants.VALUE_AUTO
9242
9243       if 'mac' in nic_dict:
9244         nic_mac = nic_dict['mac']
9245         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9246           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9247
9248         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9249           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9250                                      " modifying an existing nic",
9251                                      errors.ECODE_INVAL)
9252
9253     if nic_addremove > 1:
9254       raise errors.OpPrereqError("Only one NIC add or remove operation"
9255                                  " supported at a time", errors.ECODE_INVAL)
9256
9257   def ExpandNames(self):
9258     self._ExpandAndLockInstance()
9259     self.needed_locks[locking.LEVEL_NODE] = []
9260     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9261
9262   def DeclareLocks(self, level):
9263     if level == locking.LEVEL_NODE:
9264       self._LockInstancesNodes()
9265       if self.op.disk_template and self.op.remote_node:
9266         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9267         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9268
9269   def BuildHooksEnv(self):
9270     """Build hooks env.
9271
9272     This runs on the master, primary and secondaries.
9273
9274     """
9275     args = dict()
9276     if constants.BE_MEMORY in self.be_new:
9277       args['memory'] = self.be_new[constants.BE_MEMORY]
9278     if constants.BE_VCPUS in self.be_new:
9279       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9280     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9281     # information at all.
9282     if self.op.nics:
9283       args['nics'] = []
9284       nic_override = dict(self.op.nics)
9285       for idx, nic in enumerate(self.instance.nics):
9286         if idx in nic_override:
9287           this_nic_override = nic_override[idx]
9288         else:
9289           this_nic_override = {}
9290         if 'ip' in this_nic_override:
9291           ip = this_nic_override['ip']
9292         else:
9293           ip = nic.ip
9294         if 'mac' in this_nic_override:
9295           mac = this_nic_override['mac']
9296         else:
9297           mac = nic.mac
9298         if idx in self.nic_pnew:
9299           nicparams = self.nic_pnew[idx]
9300         else:
9301           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9302         mode = nicparams[constants.NIC_MODE]
9303         link = nicparams[constants.NIC_LINK]
9304         args['nics'].append((ip, mac, mode, link))
9305       if constants.DDM_ADD in nic_override:
9306         ip = nic_override[constants.DDM_ADD].get('ip', None)
9307         mac = nic_override[constants.DDM_ADD]['mac']
9308         nicparams = self.nic_pnew[constants.DDM_ADD]
9309         mode = nicparams[constants.NIC_MODE]
9310         link = nicparams[constants.NIC_LINK]
9311         args['nics'].append((ip, mac, mode, link))
9312       elif constants.DDM_REMOVE in nic_override:
9313         del args['nics'][-1]
9314
9315     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9316     if self.op.disk_template:
9317       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9318     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9319     return env, nl, nl
9320
9321   def CheckPrereq(self):
9322     """Check prerequisites.
9323
9324     This only checks the instance list against the existing names.
9325
9326     """
9327     # checking the new params on the primary/secondary nodes
9328
9329     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9330     cluster = self.cluster = self.cfg.GetClusterInfo()
9331     assert self.instance is not None, \
9332       "Cannot retrieve locked instance %s" % self.op.instance_name
9333     pnode = instance.primary_node
9334     nodelist = list(instance.all_nodes)
9335
9336     # OS change
9337     if self.op.os_name and not self.op.force:
9338       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9339                       self.op.force_variant)
9340       instance_os = self.op.os_name
9341     else:
9342       instance_os = instance.os
9343
9344     if self.op.disk_template:
9345       if instance.disk_template == self.op.disk_template:
9346         raise errors.OpPrereqError("Instance already has disk template %s" %
9347                                    instance.disk_template, errors.ECODE_INVAL)
9348
9349       if (instance.disk_template,
9350           self.op.disk_template) not in self._DISK_CONVERSIONS:
9351         raise errors.OpPrereqError("Unsupported disk template conversion from"
9352                                    " %s to %s" % (instance.disk_template,
9353                                                   self.op.disk_template),
9354                                    errors.ECODE_INVAL)
9355       _CheckInstanceDown(self, instance, "cannot change disk template")
9356       if self.op.disk_template in constants.DTS_NET_MIRROR:
9357         if self.op.remote_node == pnode:
9358           raise errors.OpPrereqError("Given new secondary node %s is the same"
9359                                      " as the primary node of the instance" %
9360                                      self.op.remote_node, errors.ECODE_STATE)
9361         _CheckNodeOnline(self, self.op.remote_node)
9362         _CheckNodeNotDrained(self, self.op.remote_node)
9363         # FIXME: here we assume that the old instance type is DT_PLAIN
9364         assert instance.disk_template == constants.DT_PLAIN
9365         disks = [{"size": d.size, "vg": d.logical_id[0]}
9366                  for d in instance.disks]
9367         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9368         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9369
9370     # hvparams processing
9371     if self.op.hvparams:
9372       hv_type = instance.hypervisor
9373       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9374       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9375       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9376
9377       # local check
9378       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9379       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9380       self.hv_new = hv_new # the new actual values
9381       self.hv_inst = i_hvdict # the new dict (without defaults)
9382     else:
9383       self.hv_new = self.hv_inst = {}
9384
9385     # beparams processing
9386     if self.op.beparams:
9387       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9388                                    use_none=True)
9389       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9390       be_new = cluster.SimpleFillBE(i_bedict)
9391       self.be_new = be_new # the new actual values
9392       self.be_inst = i_bedict # the new dict (without defaults)
9393     else:
9394       self.be_new = self.be_inst = {}
9395     be_old = cluster.FillBE(instance)
9396
9397     # osparams processing
9398     if self.op.osparams:
9399       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9400       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9401       self.os_inst = i_osdict # the new dict (without defaults)
9402     else:
9403       self.os_inst = {}
9404
9405     self.warn = []
9406
9407     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
9408         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
9409       mem_check_list = [pnode]
9410       if be_new[constants.BE_AUTO_BALANCE]:
9411         # either we changed auto_balance to yes or it was from before
9412         mem_check_list.extend(instance.secondary_nodes)
9413       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9414                                                   instance.hypervisor)
9415       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9416                                          instance.hypervisor)
9417       pninfo = nodeinfo[pnode]
9418       msg = pninfo.fail_msg
9419       if msg:
9420         # Assume the primary node is unreachable and go ahead
9421         self.warn.append("Can't get info from primary node %s: %s" %
9422                          (pnode,  msg))
9423       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9424         self.warn.append("Node data from primary node %s doesn't contain"
9425                          " free memory information" % pnode)
9426       elif instance_info.fail_msg:
9427         self.warn.append("Can't get instance runtime information: %s" %
9428                         instance_info.fail_msg)
9429       else:
9430         if instance_info.payload:
9431           current_mem = int(instance_info.payload['memory'])
9432         else:
9433           # Assume instance not running
9434           # (there is a slight race condition here, but it's not very probable,
9435           # and we have no other way to check)
9436           current_mem = 0
9437         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9438                     pninfo.payload['memory_free'])
9439         if miss_mem > 0:
9440           raise errors.OpPrereqError("This change will prevent the instance"
9441                                      " from starting, due to %d MB of memory"
9442                                      " missing on its primary node" % miss_mem,
9443                                      errors.ECODE_NORES)
9444
9445       if be_new[constants.BE_AUTO_BALANCE]:
9446         for node, nres in nodeinfo.items():
9447           if node not in instance.secondary_nodes:
9448             continue
9449           nres.Raise("Can't get info from secondary node %s" % node,
9450                      prereq=True, ecode=errors.ECODE_STATE)
9451           if not isinstance(nres.payload.get('memory_free', None), int):
9452             raise errors.OpPrereqError("Secondary node %s didn't return free"
9453                                        " memory information" % node,
9454                                        errors.ECODE_STATE)
9455           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9456             raise errors.OpPrereqError("This change will prevent the instance"
9457                                        " from failover to its secondary node"
9458                                        " %s, due to not enough memory" % node,
9459                                        errors.ECODE_STATE)
9460
9461     # NIC processing
9462     self.nic_pnew = {}
9463     self.nic_pinst = {}
9464     for nic_op, nic_dict in self.op.nics:
9465       if nic_op == constants.DDM_REMOVE:
9466         if not instance.nics:
9467           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9468                                      errors.ECODE_INVAL)
9469         continue
9470       if nic_op != constants.DDM_ADD:
9471         # an existing nic
9472         if not instance.nics:
9473           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9474                                      " no NICs" % nic_op,
9475                                      errors.ECODE_INVAL)
9476         if nic_op < 0 or nic_op >= len(instance.nics):
9477           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9478                                      " are 0 to %d" %
9479                                      (nic_op, len(instance.nics) - 1),
9480                                      errors.ECODE_INVAL)
9481         old_nic_params = instance.nics[nic_op].nicparams
9482         old_nic_ip = instance.nics[nic_op].ip
9483       else:
9484         old_nic_params = {}
9485         old_nic_ip = None
9486
9487       update_params_dict = dict([(key, nic_dict[key])
9488                                  for key in constants.NICS_PARAMETERS
9489                                  if key in nic_dict])
9490
9491       if 'bridge' in nic_dict:
9492         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9493
9494       new_nic_params = _GetUpdatedParams(old_nic_params,
9495                                          update_params_dict)
9496       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9497       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9498       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9499       self.nic_pinst[nic_op] = new_nic_params
9500       self.nic_pnew[nic_op] = new_filled_nic_params
9501       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9502
9503       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9504         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9505         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9506         if msg:
9507           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9508           if self.op.force:
9509             self.warn.append(msg)
9510           else:
9511             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9512       if new_nic_mode == constants.NIC_MODE_ROUTED:
9513         if 'ip' in nic_dict:
9514           nic_ip = nic_dict['ip']
9515         else:
9516           nic_ip = old_nic_ip
9517         if nic_ip is None:
9518           raise errors.OpPrereqError('Cannot set the nic ip to None'
9519                                      ' on a routed nic', errors.ECODE_INVAL)
9520       if 'mac' in nic_dict:
9521         nic_mac = nic_dict['mac']
9522         if nic_mac is None:
9523           raise errors.OpPrereqError('Cannot set the nic mac to None',
9524                                      errors.ECODE_INVAL)
9525         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9526           # otherwise generate the mac
9527           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9528         else:
9529           # or validate/reserve the current one
9530           try:
9531             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9532           except errors.ReservationError:
9533             raise errors.OpPrereqError("MAC address %s already in use"
9534                                        " in cluster" % nic_mac,
9535                                        errors.ECODE_NOTUNIQUE)
9536
9537     # DISK processing
9538     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9539       raise errors.OpPrereqError("Disk operations not supported for"
9540                                  " diskless instances",
9541                                  errors.ECODE_INVAL)
9542     for disk_op, _ in self.op.disks:
9543       if disk_op == constants.DDM_REMOVE:
9544         if len(instance.disks) == 1:
9545           raise errors.OpPrereqError("Cannot remove the last disk of"
9546                                      " an instance", errors.ECODE_INVAL)
9547         _CheckInstanceDown(self, instance, "cannot remove disks")
9548
9549       if (disk_op == constants.DDM_ADD and
9550           len(instance.disks) >= constants.MAX_DISKS):
9551         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9552                                    " add more" % constants.MAX_DISKS,
9553                                    errors.ECODE_STATE)
9554       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9555         # an existing disk
9556         if disk_op < 0 or disk_op >= len(instance.disks):
9557           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9558                                      " are 0 to %d" %
9559                                      (disk_op, len(instance.disks)),
9560                                      errors.ECODE_INVAL)
9561
9562     return
9563
9564   def _ConvertPlainToDrbd(self, feedback_fn):
9565     """Converts an instance from plain to drbd.
9566
9567     """
9568     feedback_fn("Converting template to drbd")
9569     instance = self.instance
9570     pnode = instance.primary_node
9571     snode = self.op.remote_node
9572
9573     # create a fake disk info for _GenerateDiskTemplate
9574     disk_info = [{"size": d.size, "mode": d.mode,
9575                   "vg": d.logical_id[0]} for d in instance.disks]
9576     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9577                                       instance.name, pnode, [snode],
9578                                       disk_info, None, None, 0, feedback_fn)
9579     info = _GetInstanceInfoText(instance)
9580     feedback_fn("Creating aditional volumes...")
9581     # first, create the missing data and meta devices
9582     for disk in new_disks:
9583       # unfortunately this is... not too nice
9584       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9585                             info, True)
9586       for child in disk.children:
9587         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9588     # at this stage, all new LVs have been created, we can rename the
9589     # old ones
9590     feedback_fn("Renaming original volumes...")
9591     rename_list = [(o, n.children[0].logical_id)
9592                    for (o, n) in zip(instance.disks, new_disks)]
9593     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9594     result.Raise("Failed to rename original LVs")
9595
9596     feedback_fn("Initializing DRBD devices...")
9597     # all child devices are in place, we can now create the DRBD devices
9598     for disk in new_disks:
9599       for node in [pnode, snode]:
9600         f_create = node == pnode
9601         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9602
9603     # at this point, the instance has been modified
9604     instance.disk_template = constants.DT_DRBD8
9605     instance.disks = new_disks
9606     self.cfg.Update(instance, feedback_fn)
9607
9608     # disks are created, waiting for sync
9609     disk_abort = not _WaitForSync(self, instance,
9610                                   oneshot=not self.op.wait_for_sync)
9611     if disk_abort:
9612       raise errors.OpExecError("There are some degraded disks for"
9613                                " this instance, please cleanup manually")
9614
9615   def _ConvertDrbdToPlain(self, feedback_fn):
9616     """Converts an instance from drbd to plain.
9617
9618     """
9619     instance = self.instance
9620     assert len(instance.secondary_nodes) == 1
9621     pnode = instance.primary_node
9622     snode = instance.secondary_nodes[0]
9623     feedback_fn("Converting template to plain")
9624
9625     old_disks = instance.disks
9626     new_disks = [d.children[0] for d in old_disks]
9627
9628     # copy over size and mode
9629     for parent, child in zip(old_disks, new_disks):
9630       child.size = parent.size
9631       child.mode = parent.mode
9632
9633     # update instance structure
9634     instance.disks = new_disks
9635     instance.disk_template = constants.DT_PLAIN
9636     self.cfg.Update(instance, feedback_fn)
9637
9638     feedback_fn("Removing volumes on the secondary node...")
9639     for disk in old_disks:
9640       self.cfg.SetDiskID(disk, snode)
9641       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9642       if msg:
9643         self.LogWarning("Could not remove block device %s on node %s,"
9644                         " continuing anyway: %s", disk.iv_name, snode, msg)
9645
9646     feedback_fn("Removing unneeded volumes on the primary node...")
9647     for idx, disk in enumerate(old_disks):
9648       meta = disk.children[1]
9649       self.cfg.SetDiskID(meta, pnode)
9650       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9651       if msg:
9652         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9653                         " continuing anyway: %s", idx, pnode, msg)
9654
9655   def Exec(self, feedback_fn):
9656     """Modifies an instance.
9657
9658     All parameters take effect only at the next restart of the instance.
9659
9660     """
9661     # Process here the warnings from CheckPrereq, as we don't have a
9662     # feedback_fn there.
9663     for warn in self.warn:
9664       feedback_fn("WARNING: %s" % warn)
9665
9666     result = []
9667     instance = self.instance
9668     # disk changes
9669     for disk_op, disk_dict in self.op.disks:
9670       if disk_op == constants.DDM_REMOVE:
9671         # remove the last disk
9672         device = instance.disks.pop()
9673         device_idx = len(instance.disks)
9674         for node, disk in device.ComputeNodeTree(instance.primary_node):
9675           self.cfg.SetDiskID(disk, node)
9676           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9677           if msg:
9678             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9679                             " continuing anyway", device_idx, node, msg)
9680         result.append(("disk/%d" % device_idx, "remove"))
9681       elif disk_op == constants.DDM_ADD:
9682         # add a new disk
9683         if instance.disk_template == constants.DT_FILE:
9684           file_driver, file_path = instance.disks[0].logical_id
9685           file_path = os.path.dirname(file_path)
9686         else:
9687           file_driver = file_path = None
9688         disk_idx_base = len(instance.disks)
9689         new_disk = _GenerateDiskTemplate(self,
9690                                          instance.disk_template,
9691                                          instance.name, instance.primary_node,
9692                                          instance.secondary_nodes,
9693                                          [disk_dict],
9694                                          file_path,
9695                                          file_driver,
9696                                          disk_idx_base, feedback_fn)[0]
9697         instance.disks.append(new_disk)
9698         info = _GetInstanceInfoText(instance)
9699
9700         logging.info("Creating volume %s for instance %s",
9701                      new_disk.iv_name, instance.name)
9702         # Note: this needs to be kept in sync with _CreateDisks
9703         #HARDCODE
9704         for node in instance.all_nodes:
9705           f_create = node == instance.primary_node
9706           try:
9707             _CreateBlockDev(self, node, instance, new_disk,
9708                             f_create, info, f_create)
9709           except errors.OpExecError, err:
9710             self.LogWarning("Failed to create volume %s (%s) on"
9711                             " node %s: %s",
9712                             new_disk.iv_name, new_disk, node, err)
9713         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9714                        (new_disk.size, new_disk.mode)))
9715       else:
9716         # change a given disk
9717         instance.disks[disk_op].mode = disk_dict['mode']
9718         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9719
9720     if self.op.disk_template:
9721       r_shut = _ShutdownInstanceDisks(self, instance)
9722       if not r_shut:
9723         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9724                                  " proceed with disk template conversion")
9725       mode = (instance.disk_template, self.op.disk_template)
9726       try:
9727         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9728       except:
9729         self.cfg.ReleaseDRBDMinors(instance.name)
9730         raise
9731       result.append(("disk_template", self.op.disk_template))
9732
9733     # NIC changes
9734     for nic_op, nic_dict in self.op.nics:
9735       if nic_op == constants.DDM_REMOVE:
9736         # remove the last nic
9737         del instance.nics[-1]
9738         result.append(("nic.%d" % len(instance.nics), "remove"))
9739       elif nic_op == constants.DDM_ADD:
9740         # mac and bridge should be set, by now
9741         mac = nic_dict['mac']
9742         ip = nic_dict.get('ip', None)
9743         nicparams = self.nic_pinst[constants.DDM_ADD]
9744         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9745         instance.nics.append(new_nic)
9746         result.append(("nic.%d" % (len(instance.nics) - 1),
9747                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9748                        (new_nic.mac, new_nic.ip,
9749                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9750                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9751                        )))
9752       else:
9753         for key in 'mac', 'ip':
9754           if key in nic_dict:
9755             setattr(instance.nics[nic_op], key, nic_dict[key])
9756         if nic_op in self.nic_pinst:
9757           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9758         for key, val in nic_dict.iteritems():
9759           result.append(("nic.%s/%d" % (key, nic_op), val))
9760
9761     # hvparams changes
9762     if self.op.hvparams:
9763       instance.hvparams = self.hv_inst
9764       for key, val in self.op.hvparams.iteritems():
9765         result.append(("hv/%s" % key, val))
9766
9767     # beparams changes
9768     if self.op.beparams:
9769       instance.beparams = self.be_inst
9770       for key, val in self.op.beparams.iteritems():
9771         result.append(("be/%s" % key, val))
9772
9773     # OS change
9774     if self.op.os_name:
9775       instance.os = self.op.os_name
9776
9777     # osparams changes
9778     if self.op.osparams:
9779       instance.osparams = self.os_inst
9780       for key, val in self.op.osparams.iteritems():
9781         result.append(("os/%s" % key, val))
9782
9783     self.cfg.Update(instance, feedback_fn)
9784
9785     return result
9786
9787   _DISK_CONVERSIONS = {
9788     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9789     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9790     }
9791
9792
9793 class LUBackupQuery(NoHooksLU):
9794   """Query the exports list
9795
9796   """
9797   REQ_BGL = False
9798
9799   def ExpandNames(self):
9800     self.needed_locks = {}
9801     self.share_locks[locking.LEVEL_NODE] = 1
9802     if not self.op.nodes:
9803       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9804     else:
9805       self.needed_locks[locking.LEVEL_NODE] = \
9806         _GetWantedNodes(self, self.op.nodes)
9807
9808   def Exec(self, feedback_fn):
9809     """Compute the list of all the exported system images.
9810
9811     @rtype: dict
9812     @return: a dictionary with the structure node->(export-list)
9813         where export-list is a list of the instances exported on
9814         that node.
9815
9816     """
9817     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9818     rpcresult = self.rpc.call_export_list(self.nodes)
9819     result = {}
9820     for node in rpcresult:
9821       if rpcresult[node].fail_msg:
9822         result[node] = False
9823       else:
9824         result[node] = rpcresult[node].payload
9825
9826     return result
9827
9828
9829 class LUBackupPrepare(NoHooksLU):
9830   """Prepares an instance for an export and returns useful information.
9831
9832   """
9833   REQ_BGL = False
9834
9835   def ExpandNames(self):
9836     self._ExpandAndLockInstance()
9837
9838   def CheckPrereq(self):
9839     """Check prerequisites.
9840
9841     """
9842     instance_name = self.op.instance_name
9843
9844     self.instance = self.cfg.GetInstanceInfo(instance_name)
9845     assert self.instance is not None, \
9846           "Cannot retrieve locked instance %s" % self.op.instance_name
9847     _CheckNodeOnline(self, self.instance.primary_node)
9848
9849     self._cds = _GetClusterDomainSecret()
9850
9851   def Exec(self, feedback_fn):
9852     """Prepares an instance for an export.
9853
9854     """
9855     instance = self.instance
9856
9857     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9858       salt = utils.GenerateSecret(8)
9859
9860       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9861       result = self.rpc.call_x509_cert_create(instance.primary_node,
9862                                               constants.RIE_CERT_VALIDITY)
9863       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9864
9865       (name, cert_pem) = result.payload
9866
9867       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9868                                              cert_pem)
9869
9870       return {
9871         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9872         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9873                           salt),
9874         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9875         }
9876
9877     return None
9878
9879
9880 class LUBackupExport(LogicalUnit):
9881   """Export an instance to an image in the cluster.
9882
9883   """
9884   HPATH = "instance-export"
9885   HTYPE = constants.HTYPE_INSTANCE
9886   REQ_BGL = False
9887
9888   def CheckArguments(self):
9889     """Check the arguments.
9890
9891     """
9892     self.x509_key_name = self.op.x509_key_name
9893     self.dest_x509_ca_pem = self.op.destination_x509_ca
9894
9895     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9896       if not self.x509_key_name:
9897         raise errors.OpPrereqError("Missing X509 key name for encryption",
9898                                    errors.ECODE_INVAL)
9899
9900       if not self.dest_x509_ca_pem:
9901         raise errors.OpPrereqError("Missing destination X509 CA",
9902                                    errors.ECODE_INVAL)
9903
9904   def ExpandNames(self):
9905     self._ExpandAndLockInstance()
9906
9907     # Lock all nodes for local exports
9908     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9909       # FIXME: lock only instance primary and destination node
9910       #
9911       # Sad but true, for now we have do lock all nodes, as we don't know where
9912       # the previous export might be, and in this LU we search for it and
9913       # remove it from its current node. In the future we could fix this by:
9914       #  - making a tasklet to search (share-lock all), then create the
9915       #    new one, then one to remove, after
9916       #  - removing the removal operation altogether
9917       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9918
9919   def DeclareLocks(self, level):
9920     """Last minute lock declaration."""
9921     # All nodes are locked anyway, so nothing to do here.
9922
9923   def BuildHooksEnv(self):
9924     """Build hooks env.
9925
9926     This will run on the master, primary node and target node.
9927
9928     """
9929     env = {
9930       "EXPORT_MODE": self.op.mode,
9931       "EXPORT_NODE": self.op.target_node,
9932       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9933       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9934       # TODO: Generic function for boolean env variables
9935       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9936       }
9937
9938     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9939
9940     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9941
9942     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9943       nl.append(self.op.target_node)
9944
9945     return env, nl, nl
9946
9947   def CheckPrereq(self):
9948     """Check prerequisites.
9949
9950     This checks that the instance and node names are valid.
9951
9952     """
9953     instance_name = self.op.instance_name
9954
9955     self.instance = self.cfg.GetInstanceInfo(instance_name)
9956     assert self.instance is not None, \
9957           "Cannot retrieve locked instance %s" % self.op.instance_name
9958     _CheckNodeOnline(self, self.instance.primary_node)
9959
9960     if (self.op.remove_instance and self.instance.admin_up and
9961         not self.op.shutdown):
9962       raise errors.OpPrereqError("Can not remove instance without shutting it"
9963                                  " down before")
9964
9965     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9966       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9967       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9968       assert self.dst_node is not None
9969
9970       _CheckNodeOnline(self, self.dst_node.name)
9971       _CheckNodeNotDrained(self, self.dst_node.name)
9972
9973       self._cds = None
9974       self.dest_disk_info = None
9975       self.dest_x509_ca = None
9976
9977     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9978       self.dst_node = None
9979
9980       if len(self.op.target_node) != len(self.instance.disks):
9981         raise errors.OpPrereqError(("Received destination information for %s"
9982                                     " disks, but instance %s has %s disks") %
9983                                    (len(self.op.target_node), instance_name,
9984                                     len(self.instance.disks)),
9985                                    errors.ECODE_INVAL)
9986
9987       cds = _GetClusterDomainSecret()
9988
9989       # Check X509 key name
9990       try:
9991         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9992       except (TypeError, ValueError), err:
9993         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9994
9995       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9996         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9997                                    errors.ECODE_INVAL)
9998
9999       # Load and verify CA
10000       try:
10001         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10002       except OpenSSL.crypto.Error, err:
10003         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10004                                    (err, ), errors.ECODE_INVAL)
10005
10006       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10007       if errcode is not None:
10008         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10009                                    (msg, ), errors.ECODE_INVAL)
10010
10011       self.dest_x509_ca = cert
10012
10013       # Verify target information
10014       disk_info = []
10015       for idx, disk_data in enumerate(self.op.target_node):
10016         try:
10017           (host, port, magic) = \
10018             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10019         except errors.GenericError, err:
10020           raise errors.OpPrereqError("Target info for disk %s: %s" %
10021                                      (idx, err), errors.ECODE_INVAL)
10022
10023         disk_info.append((host, port, magic))
10024
10025       assert len(disk_info) == len(self.op.target_node)
10026       self.dest_disk_info = disk_info
10027
10028     else:
10029       raise errors.ProgrammerError("Unhandled export mode %r" %
10030                                    self.op.mode)
10031
10032     # instance disk type verification
10033     # TODO: Implement export support for file-based disks
10034     for disk in self.instance.disks:
10035       if disk.dev_type == constants.LD_FILE:
10036         raise errors.OpPrereqError("Export not supported for instances with"
10037                                    " file-based disks", errors.ECODE_INVAL)
10038
10039   def _CleanupExports(self, feedback_fn):
10040     """Removes exports of current instance from all other nodes.
10041
10042     If an instance in a cluster with nodes A..D was exported to node C, its
10043     exports will be removed from the nodes A, B and D.
10044
10045     """
10046     assert self.op.mode != constants.EXPORT_MODE_REMOTE
10047
10048     nodelist = self.cfg.GetNodeList()
10049     nodelist.remove(self.dst_node.name)
10050
10051     # on one-node clusters nodelist will be empty after the removal
10052     # if we proceed the backup would be removed because OpBackupQuery
10053     # substitutes an empty list with the full cluster node list.
10054     iname = self.instance.name
10055     if nodelist:
10056       feedback_fn("Removing old exports for instance %s" % iname)
10057       exportlist = self.rpc.call_export_list(nodelist)
10058       for node in exportlist:
10059         if exportlist[node].fail_msg:
10060           continue
10061         if iname in exportlist[node].payload:
10062           msg = self.rpc.call_export_remove(node, iname).fail_msg
10063           if msg:
10064             self.LogWarning("Could not remove older export for instance %s"
10065                             " on node %s: %s", iname, node, msg)
10066
10067   def Exec(self, feedback_fn):
10068     """Export an instance to an image in the cluster.
10069
10070     """
10071     assert self.op.mode in constants.EXPORT_MODES
10072
10073     instance = self.instance
10074     src_node = instance.primary_node
10075
10076     if self.op.shutdown:
10077       # shutdown the instance, but not the disks
10078       feedback_fn("Shutting down instance %s" % instance.name)
10079       result = self.rpc.call_instance_shutdown(src_node, instance,
10080                                                self.op.shutdown_timeout)
10081       # TODO: Maybe ignore failures if ignore_remove_failures is set
10082       result.Raise("Could not shutdown instance %s on"
10083                    " node %s" % (instance.name, src_node))
10084
10085     # set the disks ID correctly since call_instance_start needs the
10086     # correct drbd minor to create the symlinks
10087     for disk in instance.disks:
10088       self.cfg.SetDiskID(disk, src_node)
10089
10090     activate_disks = (not instance.admin_up)
10091
10092     if activate_disks:
10093       # Activate the instance disks if we'exporting a stopped instance
10094       feedback_fn("Activating disks for %s" % instance.name)
10095       _StartInstanceDisks(self, instance, None)
10096
10097     try:
10098       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10099                                                      instance)
10100
10101       helper.CreateSnapshots()
10102       try:
10103         if (self.op.shutdown and instance.admin_up and
10104             not self.op.remove_instance):
10105           assert not activate_disks
10106           feedback_fn("Starting instance %s" % instance.name)
10107           result = self.rpc.call_instance_start(src_node, instance, None, None)
10108           msg = result.fail_msg
10109           if msg:
10110             feedback_fn("Failed to start instance: %s" % msg)
10111             _ShutdownInstanceDisks(self, instance)
10112             raise errors.OpExecError("Could not start instance: %s" % msg)
10113
10114         if self.op.mode == constants.EXPORT_MODE_LOCAL:
10115           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10116         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10117           connect_timeout = constants.RIE_CONNECT_TIMEOUT
10118           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10119
10120           (key_name, _, _) = self.x509_key_name
10121
10122           dest_ca_pem = \
10123             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10124                                             self.dest_x509_ca)
10125
10126           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10127                                                      key_name, dest_ca_pem,
10128                                                      timeouts)
10129       finally:
10130         helper.Cleanup()
10131
10132       # Check for backwards compatibility
10133       assert len(dresults) == len(instance.disks)
10134       assert compat.all(isinstance(i, bool) for i in dresults), \
10135              "Not all results are boolean: %r" % dresults
10136
10137     finally:
10138       if activate_disks:
10139         feedback_fn("Deactivating disks for %s" % instance.name)
10140         _ShutdownInstanceDisks(self, instance)
10141
10142     if not (compat.all(dresults) and fin_resu):
10143       failures = []
10144       if not fin_resu:
10145         failures.append("export finalization")
10146       if not compat.all(dresults):
10147         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10148                                if not dsk)
10149         failures.append("disk export: disk(s) %s" % fdsk)
10150
10151       raise errors.OpExecError("Export failed, errors in %s" %
10152                                utils.CommaJoin(failures))
10153
10154     # At this point, the export was successful, we can cleanup/finish
10155
10156     # Remove instance if requested
10157     if self.op.remove_instance:
10158       feedback_fn("Removing instance %s" % instance.name)
10159       _RemoveInstance(self, feedback_fn, instance,
10160                       self.op.ignore_remove_failures)
10161
10162     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10163       self._CleanupExports(feedback_fn)
10164
10165     return fin_resu, dresults
10166
10167
10168 class LUBackupRemove(NoHooksLU):
10169   """Remove exports related to the named instance.
10170
10171   """
10172   REQ_BGL = False
10173
10174   def ExpandNames(self):
10175     self.needed_locks = {}
10176     # We need all nodes to be locked in order for RemoveExport to work, but we
10177     # don't need to lock the instance itself, as nothing will happen to it (and
10178     # we can remove exports also for a removed instance)
10179     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10180
10181   def Exec(self, feedback_fn):
10182     """Remove any export.
10183
10184     """
10185     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10186     # If the instance was not found we'll try with the name that was passed in.
10187     # This will only work if it was an FQDN, though.
10188     fqdn_warn = False
10189     if not instance_name:
10190       fqdn_warn = True
10191       instance_name = self.op.instance_name
10192
10193     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10194     exportlist = self.rpc.call_export_list(locked_nodes)
10195     found = False
10196     for node in exportlist:
10197       msg = exportlist[node].fail_msg
10198       if msg:
10199         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10200         continue
10201       if instance_name in exportlist[node].payload:
10202         found = True
10203         result = self.rpc.call_export_remove(node, instance_name)
10204         msg = result.fail_msg
10205         if msg:
10206           logging.error("Could not remove export for instance %s"
10207                         " on node %s: %s", instance_name, node, msg)
10208
10209     if fqdn_warn and not found:
10210       feedback_fn("Export not found. If trying to remove an export belonging"
10211                   " to a deleted instance please use its Fully Qualified"
10212                   " Domain Name.")
10213
10214
10215 class LUGroupAdd(LogicalUnit):
10216   """Logical unit for creating node groups.
10217
10218   """
10219   HPATH = "group-add"
10220   HTYPE = constants.HTYPE_GROUP
10221   REQ_BGL = False
10222
10223   def ExpandNames(self):
10224     # We need the new group's UUID here so that we can create and acquire the
10225     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10226     # that it should not check whether the UUID exists in the configuration.
10227     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10228     self.needed_locks = {}
10229     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10230
10231   def CheckPrereq(self):
10232     """Check prerequisites.
10233
10234     This checks that the given group name is not an existing node group
10235     already.
10236
10237     """
10238     try:
10239       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10240     except errors.OpPrereqError:
10241       pass
10242     else:
10243       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10244                                  " node group (UUID: %s)" %
10245                                  (self.op.group_name, existing_uuid),
10246                                  errors.ECODE_EXISTS)
10247
10248     if self.op.ndparams:
10249       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10250
10251   def BuildHooksEnv(self):
10252     """Build hooks env.
10253
10254     """
10255     env = {
10256       "GROUP_NAME": self.op.group_name,
10257       }
10258     mn = self.cfg.GetMasterNode()
10259     return env, [mn], [mn]
10260
10261   def Exec(self, feedback_fn):
10262     """Add the node group to the cluster.
10263
10264     """
10265     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10266                                   uuid=self.group_uuid,
10267                                   alloc_policy=self.op.alloc_policy,
10268                                   ndparams=self.op.ndparams)
10269
10270     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10271     del self.remove_locks[locking.LEVEL_NODEGROUP]
10272
10273
10274 class LUGroupAssignNodes(NoHooksLU):
10275   """Logical unit for assigning nodes to groups.
10276
10277   """
10278   REQ_BGL = False
10279
10280   def ExpandNames(self):
10281     # These raise errors.OpPrereqError on their own:
10282     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10283     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10284
10285     # We want to lock all the affected nodes and groups. We have readily
10286     # available the list of nodes, and the *destination* group. To gather the
10287     # list of "source" groups, we need to fetch node information later on.
10288     self.needed_locks = {
10289       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
10290       locking.LEVEL_NODE: self.op.nodes,
10291       }
10292
10293   def DeclareLocks(self, level):
10294     if level == locking.LEVEL_NODEGROUP:
10295       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
10296
10297       # Try to get all affected nodes' groups without having the group or node
10298       # lock yet. Needs verification later in the code flow.
10299       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
10300
10301       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
10302
10303   def CheckPrereq(self):
10304     """Check prerequisites.
10305
10306     """
10307     assert self.needed_locks[locking.LEVEL_NODEGROUP]
10308     assert (frozenset(self.acquired_locks[locking.LEVEL_NODE]) ==
10309             frozenset(self.op.nodes))
10310
10311     expected_locks = (set([self.group_uuid]) |
10312                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
10313     actual_locks = self.acquired_locks[locking.LEVEL_NODEGROUP]
10314     if actual_locks != expected_locks:
10315       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
10316                                " current groups are '%s', used to be '%s'" %
10317                                (utils.CommaJoin(expected_locks),
10318                                 utils.CommaJoin(actual_locks)))
10319
10320     self.node_data = self.cfg.GetAllNodesInfo()
10321     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10322     instance_data = self.cfg.GetAllInstancesInfo()
10323
10324     if self.group is None:
10325       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10326                                (self.op.group_name, self.group_uuid))
10327
10328     (new_splits, previous_splits) = \
10329       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10330                                              for node in self.op.nodes],
10331                                             self.node_data, instance_data)
10332
10333     if new_splits:
10334       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10335
10336       if not self.op.force:
10337         raise errors.OpExecError("The following instances get split by this"
10338                                  " change and --force was not given: %s" %
10339                                  fmt_new_splits)
10340       else:
10341         self.LogWarning("This operation will split the following instances: %s",
10342                         fmt_new_splits)
10343
10344         if previous_splits:
10345           self.LogWarning("In addition, these already-split instances continue"
10346                           " to be split across groups: %s",
10347                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10348
10349   def Exec(self, feedback_fn):
10350     """Assign nodes to a new group.
10351
10352     """
10353     for node in self.op.nodes:
10354       self.node_data[node].group = self.group_uuid
10355
10356     # FIXME: Depends on side-effects of modifying the result of
10357     # C{cfg.GetAllNodesInfo}
10358
10359     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10360
10361   @staticmethod
10362   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10363     """Check for split instances after a node assignment.
10364
10365     This method considers a series of node assignments as an atomic operation,
10366     and returns information about split instances after applying the set of
10367     changes.
10368
10369     In particular, it returns information about newly split instances, and
10370     instances that were already split, and remain so after the change.
10371
10372     Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10373     considered.
10374
10375     @type changes: list of (node_name, new_group_uuid) pairs.
10376     @param changes: list of node assignments to consider.
10377     @param node_data: a dict with data for all nodes
10378     @param instance_data: a dict with all instances to consider
10379     @rtype: a two-tuple
10380     @return: a list of instances that were previously okay and result split as a
10381       consequence of this change, and a list of instances that were previously
10382       split and this change does not fix.
10383
10384     """
10385     changed_nodes = dict((node, group) for node, group in changes
10386                          if node_data[node].group != group)
10387
10388     all_split_instances = set()
10389     previously_split_instances = set()
10390
10391     def InstanceNodes(instance):
10392       return [instance.primary_node] + list(instance.secondary_nodes)
10393
10394     for inst in instance_data.values():
10395       if inst.disk_template not in constants.DTS_NET_MIRROR:
10396         continue
10397
10398       instance_nodes = InstanceNodes(inst)
10399
10400       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10401         previously_split_instances.add(inst.name)
10402
10403       if len(set(changed_nodes.get(node, node_data[node].group)
10404                  for node in instance_nodes)) > 1:
10405         all_split_instances.add(inst.name)
10406
10407     return (list(all_split_instances - previously_split_instances),
10408             list(previously_split_instances & all_split_instances))
10409
10410
10411 class _GroupQuery(_QueryBase):
10412
10413   FIELDS = query.GROUP_FIELDS
10414
10415   def ExpandNames(self, lu):
10416     lu.needed_locks = {}
10417
10418     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10419     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10420
10421     if not self.names:
10422       self.wanted = [name_to_uuid[name]
10423                      for name in utils.NiceSort(name_to_uuid.keys())]
10424     else:
10425       # Accept names to be either names or UUIDs.
10426       missing = []
10427       self.wanted = []
10428       all_uuid = frozenset(self._all_groups.keys())
10429
10430       for name in self.names:
10431         if name in all_uuid:
10432           self.wanted.append(name)
10433         elif name in name_to_uuid:
10434           self.wanted.append(name_to_uuid[name])
10435         else:
10436           missing.append(name)
10437
10438       if missing:
10439         raise errors.OpPrereqError("Some groups do not exist: %s" %
10440                                    utils.CommaJoin(missing),
10441                                    errors.ECODE_NOENT)
10442
10443   def DeclareLocks(self, lu, level):
10444     pass
10445
10446   def _GetQueryData(self, lu):
10447     """Computes the list of node groups and their attributes.
10448
10449     """
10450     do_nodes = query.GQ_NODE in self.requested_data
10451     do_instances = query.GQ_INST in self.requested_data
10452
10453     group_to_nodes = None
10454     group_to_instances = None
10455
10456     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10457     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10458     # latter GetAllInstancesInfo() is not enough, for we have to go through
10459     # instance->node. Hence, we will need to process nodes even if we only need
10460     # instance information.
10461     if do_nodes or do_instances:
10462       all_nodes = lu.cfg.GetAllNodesInfo()
10463       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10464       node_to_group = {}
10465
10466       for node in all_nodes.values():
10467         if node.group in group_to_nodes:
10468           group_to_nodes[node.group].append(node.name)
10469           node_to_group[node.name] = node.group
10470
10471       if do_instances:
10472         all_instances = lu.cfg.GetAllInstancesInfo()
10473         group_to_instances = dict((uuid, []) for uuid in self.wanted)
10474
10475         for instance in all_instances.values():
10476           node = instance.primary_node
10477           if node in node_to_group:
10478             group_to_instances[node_to_group[node]].append(instance.name)
10479
10480         if not do_nodes:
10481           # Do not pass on node information if it was not requested.
10482           group_to_nodes = None
10483
10484     return query.GroupQueryData([self._all_groups[uuid]
10485                                  for uuid in self.wanted],
10486                                 group_to_nodes, group_to_instances)
10487
10488
10489 class LUGroupQuery(NoHooksLU):
10490   """Logical unit for querying node groups.
10491
10492   """
10493   REQ_BGL = False
10494
10495   def CheckArguments(self):
10496     self.gq = _GroupQuery(self.op.names, self.op.output_fields, False)
10497
10498   def ExpandNames(self):
10499     self.gq.ExpandNames(self)
10500
10501   def Exec(self, feedback_fn):
10502     return self.gq.OldStyleQuery(self)
10503
10504
10505 class LUGroupSetParams(LogicalUnit):
10506   """Modifies the parameters of a node group.
10507
10508   """
10509   HPATH = "group-modify"
10510   HTYPE = constants.HTYPE_GROUP
10511   REQ_BGL = False
10512
10513   def CheckArguments(self):
10514     all_changes = [
10515       self.op.ndparams,
10516       self.op.alloc_policy,
10517       ]
10518
10519     if all_changes.count(None) == len(all_changes):
10520       raise errors.OpPrereqError("Please pass at least one modification",
10521                                  errors.ECODE_INVAL)
10522
10523   def ExpandNames(self):
10524     # This raises errors.OpPrereqError on its own:
10525     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10526
10527     self.needed_locks = {
10528       locking.LEVEL_NODEGROUP: [self.group_uuid],
10529       }
10530
10531   def CheckPrereq(self):
10532     """Check prerequisites.
10533
10534     """
10535     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10536
10537     if self.group is None:
10538       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10539                                (self.op.group_name, self.group_uuid))
10540
10541     if self.op.ndparams:
10542       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10543       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10544       self.new_ndparams = new_ndparams
10545
10546   def BuildHooksEnv(self):
10547     """Build hooks env.
10548
10549     """
10550     env = {
10551       "GROUP_NAME": self.op.group_name,
10552       "NEW_ALLOC_POLICY": self.op.alloc_policy,
10553       }
10554     mn = self.cfg.GetMasterNode()
10555     return env, [mn], [mn]
10556
10557   def Exec(self, feedback_fn):
10558     """Modifies the node group.
10559
10560     """
10561     result = []
10562
10563     if self.op.ndparams:
10564       self.group.ndparams = self.new_ndparams
10565       result.append(("ndparams", str(self.group.ndparams)))
10566
10567     if self.op.alloc_policy:
10568       self.group.alloc_policy = self.op.alloc_policy
10569
10570     self.cfg.Update(self.group, feedback_fn)
10571     return result
10572
10573
10574
10575 class LUGroupRemove(LogicalUnit):
10576   HPATH = "group-remove"
10577   HTYPE = constants.HTYPE_GROUP
10578   REQ_BGL = False
10579
10580   def ExpandNames(self):
10581     # This will raises errors.OpPrereqError on its own:
10582     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10583     self.needed_locks = {
10584       locking.LEVEL_NODEGROUP: [self.group_uuid],
10585       }
10586
10587   def CheckPrereq(self):
10588     """Check prerequisites.
10589
10590     This checks that the given group name exists as a node group, that is
10591     empty (i.e., contains no nodes), and that is not the last group of the
10592     cluster.
10593
10594     """
10595     # Verify that the group is empty.
10596     group_nodes = [node.name
10597                    for node in self.cfg.GetAllNodesInfo().values()
10598                    if node.group == self.group_uuid]
10599
10600     if group_nodes:
10601       raise errors.OpPrereqError("Group '%s' not empty, has the following"
10602                                  " nodes: %s" %
10603                                  (self.op.group_name,
10604                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
10605                                  errors.ECODE_STATE)
10606
10607     # Verify the cluster would not be left group-less.
10608     if len(self.cfg.GetNodeGroupList()) == 1:
10609       raise errors.OpPrereqError("Group '%s' is the only group,"
10610                                  " cannot be removed" %
10611                                  self.op.group_name,
10612                                  errors.ECODE_STATE)
10613
10614   def BuildHooksEnv(self):
10615     """Build hooks env.
10616
10617     """
10618     env = {
10619       "GROUP_NAME": self.op.group_name,
10620       }
10621     mn = self.cfg.GetMasterNode()
10622     return env, [mn], [mn]
10623
10624   def Exec(self, feedback_fn):
10625     """Remove the node group.
10626
10627     """
10628     try:
10629       self.cfg.RemoveNodeGroup(self.group_uuid)
10630     except errors.ConfigurationError:
10631       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10632                                (self.op.group_name, self.group_uuid))
10633
10634     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10635
10636
10637 class LUGroupRename(LogicalUnit):
10638   HPATH = "group-rename"
10639   HTYPE = constants.HTYPE_GROUP
10640   REQ_BGL = False
10641
10642   def ExpandNames(self):
10643     # This raises errors.OpPrereqError on its own:
10644     self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10645
10646     self.needed_locks = {
10647       locking.LEVEL_NODEGROUP: [self.group_uuid],
10648       }
10649
10650   def CheckPrereq(self):
10651     """Check prerequisites.
10652
10653     This checks that the given old_name exists as a node group, and that
10654     new_name doesn't.
10655
10656     """
10657     try:
10658       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10659     except errors.OpPrereqError:
10660       pass
10661     else:
10662       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10663                                  " node group (UUID: %s)" %
10664                                  (self.op.new_name, new_name_uuid),
10665                                  errors.ECODE_EXISTS)
10666
10667   def BuildHooksEnv(self):
10668     """Build hooks env.
10669
10670     """
10671     env = {
10672       "OLD_NAME": self.op.old_name,
10673       "NEW_NAME": self.op.new_name,
10674       }
10675
10676     mn = self.cfg.GetMasterNode()
10677     all_nodes = self.cfg.GetAllNodesInfo()
10678     run_nodes = [mn]
10679     all_nodes.pop(mn, None)
10680
10681     for node in all_nodes.values():
10682       if node.group == self.group_uuid:
10683         run_nodes.append(node.name)
10684
10685     return env, run_nodes, run_nodes
10686
10687   def Exec(self, feedback_fn):
10688     """Rename the node group.
10689
10690     """
10691     group = self.cfg.GetNodeGroup(self.group_uuid)
10692
10693     if group is None:
10694       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10695                                (self.op.old_name, self.group_uuid))
10696
10697     group.name = self.op.new_name
10698     self.cfg.Update(group, feedback_fn)
10699
10700     return self.op.new_name
10701
10702
10703 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10704   """Generic tags LU.
10705
10706   This is an abstract class which is the parent of all the other tags LUs.
10707
10708   """
10709
10710   def ExpandNames(self):
10711     self.needed_locks = {}
10712     if self.op.kind == constants.TAG_NODE:
10713       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10714       self.needed_locks[locking.LEVEL_NODE] = self.op.name
10715     elif self.op.kind == constants.TAG_INSTANCE:
10716       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10717       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10718
10719     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10720     # not possible to acquire the BGL based on opcode parameters)
10721
10722   def CheckPrereq(self):
10723     """Check prerequisites.
10724
10725     """
10726     if self.op.kind == constants.TAG_CLUSTER:
10727       self.target = self.cfg.GetClusterInfo()
10728     elif self.op.kind == constants.TAG_NODE:
10729       self.target = self.cfg.GetNodeInfo(self.op.name)
10730     elif self.op.kind == constants.TAG_INSTANCE:
10731       self.target = self.cfg.GetInstanceInfo(self.op.name)
10732     else:
10733       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10734                                  str(self.op.kind), errors.ECODE_INVAL)
10735
10736
10737 class LUTagsGet(TagsLU):
10738   """Returns the tags of a given object.
10739
10740   """
10741   REQ_BGL = False
10742
10743   def ExpandNames(self):
10744     TagsLU.ExpandNames(self)
10745
10746     # Share locks as this is only a read operation
10747     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10748
10749   def Exec(self, feedback_fn):
10750     """Returns the tag list.
10751
10752     """
10753     return list(self.target.GetTags())
10754
10755
10756 class LUTagsSearch(NoHooksLU):
10757   """Searches the tags for a given pattern.
10758
10759   """
10760   REQ_BGL = False
10761
10762   def ExpandNames(self):
10763     self.needed_locks = {}
10764
10765   def CheckPrereq(self):
10766     """Check prerequisites.
10767
10768     This checks the pattern passed for validity by compiling it.
10769
10770     """
10771     try:
10772       self.re = re.compile(self.op.pattern)
10773     except re.error, err:
10774       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10775                                  (self.op.pattern, err), errors.ECODE_INVAL)
10776
10777   def Exec(self, feedback_fn):
10778     """Returns the tag list.
10779
10780     """
10781     cfg = self.cfg
10782     tgts = [("/cluster", cfg.GetClusterInfo())]
10783     ilist = cfg.GetAllInstancesInfo().values()
10784     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10785     nlist = cfg.GetAllNodesInfo().values()
10786     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10787     results = []
10788     for path, target in tgts:
10789       for tag in target.GetTags():
10790         if self.re.search(tag):
10791           results.append((path, tag))
10792     return results
10793
10794
10795 class LUTagsSet(TagsLU):
10796   """Sets a tag on a given object.
10797
10798   """
10799   REQ_BGL = False
10800
10801   def CheckPrereq(self):
10802     """Check prerequisites.
10803
10804     This checks the type and length of the tag name and value.
10805
10806     """
10807     TagsLU.CheckPrereq(self)
10808     for tag in self.op.tags:
10809       objects.TaggableObject.ValidateTag(tag)
10810
10811   def Exec(self, feedback_fn):
10812     """Sets the tag.
10813
10814     """
10815     try:
10816       for tag in self.op.tags:
10817         self.target.AddTag(tag)
10818     except errors.TagError, err:
10819       raise errors.OpExecError("Error while setting tag: %s" % str(err))
10820     self.cfg.Update(self.target, feedback_fn)
10821
10822
10823 class LUTagsDel(TagsLU):
10824   """Delete a list of tags from a given object.
10825
10826   """
10827   REQ_BGL = False
10828
10829   def CheckPrereq(self):
10830     """Check prerequisites.
10831
10832     This checks that we have the given tag.
10833
10834     """
10835     TagsLU.CheckPrereq(self)
10836     for tag in self.op.tags:
10837       objects.TaggableObject.ValidateTag(tag)
10838     del_tags = frozenset(self.op.tags)
10839     cur_tags = self.target.GetTags()
10840
10841     diff_tags = del_tags - cur_tags
10842     if diff_tags:
10843       diff_names = ("'%s'" % i for i in sorted(diff_tags))
10844       raise errors.OpPrereqError("Tag(s) %s not found" %
10845                                  (utils.CommaJoin(diff_names), ),
10846                                  errors.ECODE_NOENT)
10847
10848   def Exec(self, feedback_fn):
10849     """Remove the tag from the object.
10850
10851     """
10852     for tag in self.op.tags:
10853       self.target.RemoveTag(tag)
10854     self.cfg.Update(self.target, feedback_fn)
10855
10856
10857 class LUTestDelay(NoHooksLU):
10858   """Sleep for a specified amount of time.
10859
10860   This LU sleeps on the master and/or nodes for a specified amount of
10861   time.
10862
10863   """
10864   REQ_BGL = False
10865
10866   def ExpandNames(self):
10867     """Expand names and set required locks.
10868
10869     This expands the node list, if any.
10870
10871     """
10872     self.needed_locks = {}
10873     if self.op.on_nodes:
10874       # _GetWantedNodes can be used here, but is not always appropriate to use
10875       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10876       # more information.
10877       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10878       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10879
10880   def _TestDelay(self):
10881     """Do the actual sleep.
10882
10883     """
10884     if self.op.on_master:
10885       if not utils.TestDelay(self.op.duration):
10886         raise errors.OpExecError("Error during master delay test")
10887     if self.op.on_nodes:
10888       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10889       for node, node_result in result.items():
10890         node_result.Raise("Failure during rpc call to node %s" % node)
10891
10892   def Exec(self, feedback_fn):
10893     """Execute the test delay opcode, with the wanted repetitions.
10894
10895     """
10896     if self.op.repeat == 0:
10897       self._TestDelay()
10898     else:
10899       top_value = self.op.repeat - 1
10900       for i in range(self.op.repeat):
10901         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10902         self._TestDelay()
10903
10904
10905 class LUTestJqueue(NoHooksLU):
10906   """Utility LU to test some aspects of the job queue.
10907
10908   """
10909   REQ_BGL = False
10910
10911   # Must be lower than default timeout for WaitForJobChange to see whether it
10912   # notices changed jobs
10913   _CLIENT_CONNECT_TIMEOUT = 20.0
10914   _CLIENT_CONFIRM_TIMEOUT = 60.0
10915
10916   @classmethod
10917   def _NotifyUsingSocket(cls, cb, errcls):
10918     """Opens a Unix socket and waits for another program to connect.
10919
10920     @type cb: callable
10921     @param cb: Callback to send socket name to client
10922     @type errcls: class
10923     @param errcls: Exception class to use for errors
10924
10925     """
10926     # Using a temporary directory as there's no easy way to create temporary
10927     # sockets without writing a custom loop around tempfile.mktemp and
10928     # socket.bind
10929     tmpdir = tempfile.mkdtemp()
10930     try:
10931       tmpsock = utils.PathJoin(tmpdir, "sock")
10932
10933       logging.debug("Creating temporary socket at %s", tmpsock)
10934       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10935       try:
10936         sock.bind(tmpsock)
10937         sock.listen(1)
10938
10939         # Send details to client
10940         cb(tmpsock)
10941
10942         # Wait for client to connect before continuing
10943         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10944         try:
10945           (conn, _) = sock.accept()
10946         except socket.error, err:
10947           raise errcls("Client didn't connect in time (%s)" % err)
10948       finally:
10949         sock.close()
10950     finally:
10951       # Remove as soon as client is connected
10952       shutil.rmtree(tmpdir)
10953
10954     # Wait for client to close
10955     try:
10956       try:
10957         # pylint: disable-msg=E1101
10958         # Instance of '_socketobject' has no ... member
10959         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10960         conn.recv(1)
10961       except socket.error, err:
10962         raise errcls("Client failed to confirm notification (%s)" % err)
10963     finally:
10964       conn.close()
10965
10966   def _SendNotification(self, test, arg, sockname):
10967     """Sends a notification to the client.
10968
10969     @type test: string
10970     @param test: Test name
10971     @param arg: Test argument (depends on test)
10972     @type sockname: string
10973     @param sockname: Socket path
10974
10975     """
10976     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10977
10978   def _Notify(self, prereq, test, arg):
10979     """Notifies the client of a test.
10980
10981     @type prereq: bool
10982     @param prereq: Whether this is a prereq-phase test
10983     @type test: string
10984     @param test: Test name
10985     @param arg: Test argument (depends on test)
10986
10987     """
10988     if prereq:
10989       errcls = errors.OpPrereqError
10990     else:
10991       errcls = errors.OpExecError
10992
10993     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10994                                                   test, arg),
10995                                    errcls)
10996
10997   def CheckArguments(self):
10998     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10999     self.expandnames_calls = 0
11000
11001   def ExpandNames(self):
11002     checkargs_calls = getattr(self, "checkargs_calls", 0)
11003     if checkargs_calls < 1:
11004       raise errors.ProgrammerError("CheckArguments was not called")
11005
11006     self.expandnames_calls += 1
11007
11008     if self.op.notify_waitlock:
11009       self._Notify(True, constants.JQT_EXPANDNAMES, None)
11010
11011     self.LogInfo("Expanding names")
11012
11013     # Get lock on master node (just to get a lock, not for a particular reason)
11014     self.needed_locks = {
11015       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11016       }
11017
11018   def Exec(self, feedback_fn):
11019     if self.expandnames_calls < 1:
11020       raise errors.ProgrammerError("ExpandNames was not called")
11021
11022     if self.op.notify_exec:
11023       self._Notify(False, constants.JQT_EXEC, None)
11024
11025     self.LogInfo("Executing")
11026
11027     if self.op.log_messages:
11028       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11029       for idx, msg in enumerate(self.op.log_messages):
11030         self.LogInfo("Sending log message %s", idx + 1)
11031         feedback_fn(constants.JQT_MSGPREFIX + msg)
11032         # Report how many test messages have been sent
11033         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11034
11035     if self.op.fail:
11036       raise errors.OpExecError("Opcode failure was requested")
11037
11038     return True
11039
11040
11041 class IAllocator(object):
11042   """IAllocator framework.
11043
11044   An IAllocator instance has three sets of attributes:
11045     - cfg that is needed to query the cluster
11046     - input data (all members of the _KEYS class attribute are required)
11047     - four buffer attributes (in|out_data|text), that represent the
11048       input (to the external script) in text and data structure format,
11049       and the output from it, again in two formats
11050     - the result variables from the script (success, info, nodes) for
11051       easy usage
11052
11053   """
11054   # pylint: disable-msg=R0902
11055   # lots of instance attributes
11056   _ALLO_KEYS = [
11057     "name", "mem_size", "disks", "disk_template",
11058     "os", "tags", "nics", "vcpus", "hypervisor",
11059     ]
11060   _RELO_KEYS = [
11061     "name", "relocate_from",
11062     ]
11063   _EVAC_KEYS = [
11064     "evac_nodes",
11065     ]
11066
11067   def __init__(self, cfg, rpc, mode, **kwargs):
11068     self.cfg = cfg
11069     self.rpc = rpc
11070     # init buffer variables
11071     self.in_text = self.out_text = self.in_data = self.out_data = None
11072     # init all input fields so that pylint is happy
11073     self.mode = mode
11074     self.mem_size = self.disks = self.disk_template = None
11075     self.os = self.tags = self.nics = self.vcpus = None
11076     self.hypervisor = None
11077     self.relocate_from = None
11078     self.name = None
11079     self.evac_nodes = None
11080     # computed fields
11081     self.required_nodes = None
11082     # init result fields
11083     self.success = self.info = self.result = None
11084     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11085       keyset = self._ALLO_KEYS
11086       fn = self._AddNewInstance
11087     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11088       keyset = self._RELO_KEYS
11089       fn = self._AddRelocateInstance
11090     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11091       keyset = self._EVAC_KEYS
11092       fn = self._AddEvacuateNodes
11093     else:
11094       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11095                                    " IAllocator" % self.mode)
11096     for key in kwargs:
11097       if key not in keyset:
11098         raise errors.ProgrammerError("Invalid input parameter '%s' to"
11099                                      " IAllocator" % key)
11100       setattr(self, key, kwargs[key])
11101
11102     for key in keyset:
11103       if key not in kwargs:
11104         raise errors.ProgrammerError("Missing input parameter '%s' to"
11105                                      " IAllocator" % key)
11106     self._BuildInputData(fn)
11107
11108   def _ComputeClusterData(self):
11109     """Compute the generic allocator input data.
11110
11111     This is the data that is independent of the actual operation.
11112
11113     """
11114     cfg = self.cfg
11115     cluster_info = cfg.GetClusterInfo()
11116     # cluster data
11117     data = {
11118       "version": constants.IALLOCATOR_VERSION,
11119       "cluster_name": cfg.GetClusterName(),
11120       "cluster_tags": list(cluster_info.GetTags()),
11121       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11122       # we don't have job IDs
11123       }
11124     ninfo = cfg.GetAllNodesInfo()
11125     iinfo = cfg.GetAllInstancesInfo().values()
11126     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11127
11128     # node data
11129     node_list = [n.name for n in ninfo.values() if n.vm_capable]
11130
11131     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11132       hypervisor_name = self.hypervisor
11133     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11134       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11135     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11136       hypervisor_name = cluster_info.enabled_hypervisors[0]
11137
11138     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11139                                         hypervisor_name)
11140     node_iinfo = \
11141       self.rpc.call_all_instances_info(node_list,
11142                                        cluster_info.enabled_hypervisors)
11143
11144     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11145
11146     config_ndata = self._ComputeBasicNodeData(ninfo)
11147     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11148                                                  i_list, config_ndata)
11149     assert len(data["nodes"]) == len(ninfo), \
11150         "Incomplete node data computed"
11151
11152     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11153
11154     self.in_data = data
11155
11156   @staticmethod
11157   def _ComputeNodeGroupData(cfg):
11158     """Compute node groups data.
11159
11160     """
11161     ng = {}
11162     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11163       ng[guuid] = {
11164         "name": gdata.name,
11165         "alloc_policy": gdata.alloc_policy,
11166         }
11167     return ng
11168
11169   @staticmethod
11170   def _ComputeBasicNodeData(node_cfg):
11171     """Compute global node data.
11172
11173     @rtype: dict
11174     @returns: a dict of name: (node dict, node config)
11175
11176     """
11177     node_results = {}
11178     for ninfo in node_cfg.values():
11179       # fill in static (config-based) values
11180       pnr = {
11181         "tags": list(ninfo.GetTags()),
11182         "primary_ip": ninfo.primary_ip,
11183         "secondary_ip": ninfo.secondary_ip,
11184         "offline": ninfo.offline,
11185         "drained": ninfo.drained,
11186         "master_candidate": ninfo.master_candidate,
11187         "group": ninfo.group,
11188         "master_capable": ninfo.master_capable,
11189         "vm_capable": ninfo.vm_capable,
11190         }
11191
11192       node_results[ninfo.name] = pnr
11193
11194     return node_results
11195
11196   @staticmethod
11197   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11198                               node_results):
11199     """Compute global node data.
11200
11201     @param node_results: the basic node structures as filled from the config
11202
11203     """
11204     # make a copy of the current dict
11205     node_results = dict(node_results)
11206     for nname, nresult in node_data.items():
11207       assert nname in node_results, "Missing basic data for node %s" % nname
11208       ninfo = node_cfg[nname]
11209
11210       if not (ninfo.offline or ninfo.drained):
11211         nresult.Raise("Can't get data for node %s" % nname)
11212         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11213                                 nname)
11214         remote_info = nresult.payload
11215
11216         for attr in ['memory_total', 'memory_free', 'memory_dom0',
11217                      'vg_size', 'vg_free', 'cpu_total']:
11218           if attr not in remote_info:
11219             raise errors.OpExecError("Node '%s' didn't return attribute"
11220                                      " '%s'" % (nname, attr))
11221           if not isinstance(remote_info[attr], int):
11222             raise errors.OpExecError("Node '%s' returned invalid value"
11223                                      " for '%s': %s" %
11224                                      (nname, attr, remote_info[attr]))
11225         # compute memory used by primary instances
11226         i_p_mem = i_p_up_mem = 0
11227         for iinfo, beinfo in i_list:
11228           if iinfo.primary_node == nname:
11229             i_p_mem += beinfo[constants.BE_MEMORY]
11230             if iinfo.name not in node_iinfo[nname].payload:
11231               i_used_mem = 0
11232             else:
11233               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11234             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11235             remote_info['memory_free'] -= max(0, i_mem_diff)
11236
11237             if iinfo.admin_up:
11238               i_p_up_mem += beinfo[constants.BE_MEMORY]
11239
11240         # compute memory used by instances
11241         pnr_dyn = {
11242           "total_memory": remote_info['memory_total'],
11243           "reserved_memory": remote_info['memory_dom0'],
11244           "free_memory": remote_info['memory_free'],
11245           "total_disk": remote_info['vg_size'],
11246           "free_disk": remote_info['vg_free'],
11247           "total_cpus": remote_info['cpu_total'],
11248           "i_pri_memory": i_p_mem,
11249           "i_pri_up_memory": i_p_up_mem,
11250           }
11251         pnr_dyn.update(node_results[nname])
11252         node_results[nname] = pnr_dyn
11253
11254     return node_results
11255
11256   @staticmethod
11257   def _ComputeInstanceData(cluster_info, i_list):
11258     """Compute global instance data.
11259
11260     """
11261     instance_data = {}
11262     for iinfo, beinfo in i_list:
11263       nic_data = []
11264       for nic in iinfo.nics:
11265         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11266         nic_dict = {"mac": nic.mac,
11267                     "ip": nic.ip,
11268                     "mode": filled_params[constants.NIC_MODE],
11269                     "link": filled_params[constants.NIC_LINK],
11270                    }
11271         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11272           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11273         nic_data.append(nic_dict)
11274       pir = {
11275         "tags": list(iinfo.GetTags()),
11276         "admin_up": iinfo.admin_up,
11277         "vcpus": beinfo[constants.BE_VCPUS],
11278         "memory": beinfo[constants.BE_MEMORY],
11279         "os": iinfo.os,
11280         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11281         "nics": nic_data,
11282         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11283         "disk_template": iinfo.disk_template,
11284         "hypervisor": iinfo.hypervisor,
11285         }
11286       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11287                                                  pir["disks"])
11288       instance_data[iinfo.name] = pir
11289
11290     return instance_data
11291
11292   def _AddNewInstance(self):
11293     """Add new instance data to allocator structure.
11294
11295     This in combination with _AllocatorGetClusterData will create the
11296     correct structure needed as input for the allocator.
11297
11298     The checks for the completeness of the opcode must have already been
11299     done.
11300
11301     """
11302     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11303
11304     if self.disk_template in constants.DTS_NET_MIRROR:
11305       self.required_nodes = 2
11306     else:
11307       self.required_nodes = 1
11308     request = {
11309       "name": self.name,
11310       "disk_template": self.disk_template,
11311       "tags": self.tags,
11312       "os": self.os,
11313       "vcpus": self.vcpus,
11314       "memory": self.mem_size,
11315       "disks": self.disks,
11316       "disk_space_total": disk_space,
11317       "nics": self.nics,
11318       "required_nodes": self.required_nodes,
11319       }
11320     return request
11321
11322   def _AddRelocateInstance(self):
11323     """Add relocate instance data to allocator structure.
11324
11325     This in combination with _IAllocatorGetClusterData will create the
11326     correct structure needed as input for the allocator.
11327
11328     The checks for the completeness of the opcode must have already been
11329     done.
11330
11331     """
11332     instance = self.cfg.GetInstanceInfo(self.name)
11333     if instance is None:
11334       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11335                                    " IAllocator" % self.name)
11336
11337     if instance.disk_template not in constants.DTS_NET_MIRROR:
11338       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11339                                  errors.ECODE_INVAL)
11340
11341     if len(instance.secondary_nodes) != 1:
11342       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11343                                  errors.ECODE_STATE)
11344
11345     self.required_nodes = 1
11346     disk_sizes = [{'size': disk.size} for disk in instance.disks]
11347     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11348
11349     request = {
11350       "name": self.name,
11351       "disk_space_total": disk_space,
11352       "required_nodes": self.required_nodes,
11353       "relocate_from": self.relocate_from,
11354       }
11355     return request
11356
11357   def _AddEvacuateNodes(self):
11358     """Add evacuate nodes data to allocator structure.
11359
11360     """
11361     request = {
11362       "evac_nodes": self.evac_nodes
11363       }
11364     return request
11365
11366   def _BuildInputData(self, fn):
11367     """Build input data structures.
11368
11369     """
11370     self._ComputeClusterData()
11371
11372     request = fn()
11373     request["type"] = self.mode
11374     self.in_data["request"] = request
11375
11376     self.in_text = serializer.Dump(self.in_data)
11377
11378   def Run(self, name, validate=True, call_fn=None):
11379     """Run an instance allocator and return the results.
11380
11381     """
11382     if call_fn is None:
11383       call_fn = self.rpc.call_iallocator_runner
11384
11385     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11386     result.Raise("Failure while running the iallocator script")
11387
11388     self.out_text = result.payload
11389     if validate:
11390       self._ValidateResult()
11391
11392   def _ValidateResult(self):
11393     """Process the allocator results.
11394
11395     This will process and if successful save the result in
11396     self.out_data and the other parameters.
11397
11398     """
11399     try:
11400       rdict = serializer.Load(self.out_text)
11401     except Exception, err:
11402       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11403
11404     if not isinstance(rdict, dict):
11405       raise errors.OpExecError("Can't parse iallocator results: not a dict")
11406
11407     # TODO: remove backwards compatiblity in later versions
11408     if "nodes" in rdict and "result" not in rdict:
11409       rdict["result"] = rdict["nodes"]
11410       del rdict["nodes"]
11411
11412     for key in "success", "info", "result":
11413       if key not in rdict:
11414         raise errors.OpExecError("Can't parse iallocator results:"
11415                                  " missing key '%s'" % key)
11416       setattr(self, key, rdict[key])
11417
11418     if not isinstance(rdict["result"], list):
11419       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11420                                " is not a list")
11421     self.out_data = rdict
11422
11423
11424 class LUTestAllocator(NoHooksLU):
11425   """Run allocator tests.
11426
11427   This LU runs the allocator tests
11428
11429   """
11430   def CheckPrereq(self):
11431     """Check prerequisites.
11432
11433     This checks the opcode parameters depending on the director and mode test.
11434
11435     """
11436     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11437       for attr in ["mem_size", "disks", "disk_template",
11438                    "os", "tags", "nics", "vcpus"]:
11439         if not hasattr(self.op, attr):
11440           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11441                                      attr, errors.ECODE_INVAL)
11442       iname = self.cfg.ExpandInstanceName(self.op.name)
11443       if iname is not None:
11444         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11445                                    iname, errors.ECODE_EXISTS)
11446       if not isinstance(self.op.nics, list):
11447         raise errors.OpPrereqError("Invalid parameter 'nics'",
11448                                    errors.ECODE_INVAL)
11449       if not isinstance(self.op.disks, list):
11450         raise errors.OpPrereqError("Invalid parameter 'disks'",
11451                                    errors.ECODE_INVAL)
11452       for row in self.op.disks:
11453         if (not isinstance(row, dict) or
11454             "size" not in row or
11455             not isinstance(row["size"], int) or
11456             "mode" not in row or
11457             row["mode"] not in ['r', 'w']):
11458           raise errors.OpPrereqError("Invalid contents of the 'disks'"
11459                                      " parameter", errors.ECODE_INVAL)
11460       if self.op.hypervisor is None:
11461         self.op.hypervisor = self.cfg.GetHypervisorType()
11462     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11463       fname = _ExpandInstanceName(self.cfg, self.op.name)
11464       self.op.name = fname
11465       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11466     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11467       if not hasattr(self.op, "evac_nodes"):
11468         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11469                                    " opcode input", errors.ECODE_INVAL)
11470     else:
11471       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11472                                  self.op.mode, errors.ECODE_INVAL)
11473
11474     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11475       if self.op.allocator is None:
11476         raise errors.OpPrereqError("Missing allocator name",
11477                                    errors.ECODE_INVAL)
11478     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11479       raise errors.OpPrereqError("Wrong allocator test '%s'" %
11480                                  self.op.direction, errors.ECODE_INVAL)
11481
11482   def Exec(self, feedback_fn):
11483     """Run the allocator test.
11484
11485     """
11486     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11487       ial = IAllocator(self.cfg, self.rpc,
11488                        mode=self.op.mode,
11489                        name=self.op.name,
11490                        mem_size=self.op.mem_size,
11491                        disks=self.op.disks,
11492                        disk_template=self.op.disk_template,
11493                        os=self.op.os,
11494                        tags=self.op.tags,
11495                        nics=self.op.nics,
11496                        vcpus=self.op.vcpus,
11497                        hypervisor=self.op.hypervisor,
11498                        )
11499     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11500       ial = IAllocator(self.cfg, self.rpc,
11501                        mode=self.op.mode,
11502                        name=self.op.name,
11503                        relocate_from=list(self.relocate_from),
11504                        )
11505     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11506       ial = IAllocator(self.cfg, self.rpc,
11507                        mode=self.op.mode,
11508                        evac_nodes=self.op.evac_nodes)
11509     else:
11510       raise errors.ProgrammerError("Uncatched mode %s in"
11511                                    " LUTestAllocator.Exec", self.op.mode)
11512
11513     if self.op.direction == constants.IALLOCATOR_DIR_IN:
11514       result = ial.in_text
11515     else:
11516       ial.Run(self.op.allocator, validate=False)
11517       result = ial.out_text
11518     return result
11519
11520
11521 #: Query type implementations
11522 _QUERY_IMPL = {
11523   constants.QR_INSTANCE: _InstanceQuery,
11524   constants.QR_NODE: _NodeQuery,
11525   constants.QR_GROUP: _GroupQuery,
11526   }
11527
11528
11529 def _GetQueryImplementation(name):
11530   """Returns the implemtnation for a query type.
11531
11532   @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11533
11534   """
11535   try:
11536     return _QUERY_IMPL[name]
11537   except KeyError:
11538     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11539                                errors.ECODE_INVAL)