code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 # End types
  78 class LogicalUnit(object):
  79   """Logical Unit base class.
  80
  81   Subclasses must follow these rules:
  82     - implement ExpandNames
  83     - implement CheckPrereq (except when tasklets are used)
  84     - implement Exec (except when tasklets are used)
  85     - implement BuildHooksEnv
  86     - redefine HPATH and HTYPE
  87     - optionally redefine their run requirements:
  88         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  89
  90   Note that all commands require root permissions.
  91
  92   @ivar dry_run_result: the value (if any) that will be returned to the caller
  93       in dry-run mode (signalled by opcode dry_run parameter)
  94
  95   """
  96   HPATH = None
  97   HTYPE = None
  98   REQ_BGL = True
  99
 100   def __init__(self, processor, op, context, rpc):
 101     """Constructor for LogicalUnit.
 102
 103     This needs to be overridden in derived classes in order to check op
 104     validity.
 105
 106     """
 107     self.proc = processor
 108     self.op = op
 109     self.cfg = context.cfg
 110     self.context = context
 111     self.rpc = rpc
 112     # Dicts used to declare locking needs to mcpu
 113     self.needed_locks = None
 114     self.acquired_locks = {}
 115     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 116     self.add_locks = {}
 117     self.remove_locks = {}
 118     # Used to force good behavior when calling helper functions
 119     self.recalculate_locks = {}
 120     self.__ssh = None
 121     # logging
 122     self.Log = processor.Log # pylint: disable-msg=C0103
 123     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 124     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 125     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 126     # support for dry-run
 127     self.dry_run_result = None
 128     # support for generic debug attribute
 129     if (not hasattr(self.op, "debug_level") or
 130         not isinstance(self.op.debug_level, int)):
 131       self.op.debug_level = 0
 132
 133     # Tasklets
 134     self.tasklets = None
 135
 136     # Validate opcode parameters and set defaults
 137     self.op.Validate(True)
 138
 139     self.CheckArguments()
 140
 141   def __GetSSH(self):
 142     """Returns the SshRunner object
 143
 144     """
 145     if not self.__ssh:
 146       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 147     return self.__ssh
 148
 149   ssh = property(fget=__GetSSH)
 150
 151   def CheckArguments(self):
 152     """Check syntactic validity for the opcode arguments.
 153
 154     This method is for doing a simple syntactic check and ensure
 155     validity of opcode parameters, without any cluster-related
 156     checks. While the same can be accomplished in ExpandNames and/or
 157     CheckPrereq, doing these separate is better because:
 158
 159       - ExpandNames is left as as purely a lock-related function
 160       - CheckPrereq is run after we have acquired locks (and possible
 161         waited for them)
 162
 163     The function is allowed to change the self.op attribute so that
 164     later methods can no longer worry about missing parameters.
 165
 166     """
 167     pass
 168
 169   def ExpandNames(self):
 170     """Expand names for this LU.
 171
 172     This method is called before starting to execute the opcode, and it should
 173     update all the parameters of the opcode to their canonical form (e.g. a
 174     short node name must be fully expanded after this method has successfully
 175     completed). This way locking, hooks, logging, etc. can work correctly.
 176
 177     LUs which implement this method must also populate the self.needed_locks
 178     member, as a dict with lock levels as keys, and a list of needed lock names
 179     as values. Rules:
 180
 181       - use an empty dict if you don't need any lock
 182       - if you don't need any lock at a particular level omit that level
 183       - don't put anything for the BGL level
 184       - if you want all locks at a level use locking.ALL_SET as a value
 185
 186     If you need to share locks (rather than acquire them exclusively) at one
 187     level you can modify self.share_locks, setting a true value (usually 1) for
 188     that level. By default locks are not shared.
 189
 190     This function can also define a list of tasklets, which then will be
 191     executed in order instead of the usual LU-level CheckPrereq and Exec
 192     functions, if those are not defined by the LU.
 193
 194     Examples::
 195
 196       # Acquire all nodes and one instance
 197       self.needed_locks = {
 198         locking.LEVEL_NODE: locking.ALL_SET,
 199         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 200       }
 201       # Acquire just two nodes
 202       self.needed_locks = {
 203         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 204       }
 205       # Acquire no locks
 206       self.needed_locks = {} # No, you can't leave it to the default value None
 207
 208     """
 209     # The implementation of this method is mandatory only if the new LU is
 210     # concurrent, so that old LUs don't need to be changed all at the same
 211     # time.
 212     if self.REQ_BGL:
 213       self.needed_locks = {} # Exclusive LUs don't need locks.
 214     else:
 215       raise NotImplementedError
 216
 217   def DeclareLocks(self, level):
 218     """Declare LU locking needs for a level
 219
 220     While most LUs can just declare their locking needs at ExpandNames time,
 221     sometimes there's the need to calculate some locks after having acquired
 222     the ones before. This function is called just before acquiring locks at a
 223     particular level, but after acquiring the ones at lower levels, and permits
 224     such calculations. It can be used to modify self.needed_locks, and by
 225     default it does nothing.
 226
 227     This function is only called if you have something already set in
 228     self.needed_locks for the level.
 229
 230     @param level: Locking level which is going to be locked
 231     @type level: member of ganeti.locking.LEVELS
 232
 233     """
 234
 235   def CheckPrereq(self):
 236     """Check prerequisites for this LU.
 237
 238     This method should check that the prerequisites for the execution
 239     of this LU are fulfilled. It can do internode communication, but
 240     it should be idempotent - no cluster or system changes are
 241     allowed.
 242
 243     The method should raise errors.OpPrereqError in case something is
 244     not fulfilled. Its return value is ignored.
 245
 246     This method should also update all the parameters of the opcode to
 247     their canonical form if it hasn't been done by ExpandNames before.
 248
 249     """
 250     if self.tasklets is not None:
 251       for (idx, tl) in enumerate(self.tasklets):
 252         logging.debug("Checking prerequisites for tasklet %s/%s",
 253                       idx + 1, len(self.tasklets))
 254         tl.CheckPrereq()
 255     else:
 256       pass
 257
 258   def Exec(self, feedback_fn):
 259     """Execute the LU.
 260
 261     This method should implement the actual work. It should raise
 262     errors.OpExecError for failures that are somewhat dealt with in
 263     code, or expected.
 264
 265     """
 266     if self.tasklets is not None:
 267       for (idx, tl) in enumerate(self.tasklets):
 268         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 269         tl.Exec(feedback_fn)
 270     else:
 271       raise NotImplementedError
 272
 273   def BuildHooksEnv(self):
 274     """Build hooks environment for this LU.
 275
 276     This method should return a three-node tuple consisting of: a dict
 277     containing the environment that will be used for running the
 278     specific hook for this LU, a list of node names on which the hook
 279     should run before the execution, and a list of node names on which
 280     the hook should run after the execution.
 281
 282     The keys of the dict must not have 'GANETI_' prefixed as this will
 283     be handled in the hooks runner. Also note additional keys will be
 284     added by the hooks runner. If the LU doesn't define any
 285     environment, an empty dict (and not None) should be returned.
 286
 287     No nodes should be returned as an empty list (and not None).
 288
 289     Note that if the HPATH for a LU class is None, this function will
 290     not be called.
 291
 292     """
 293     raise NotImplementedError
 294
 295   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 296     """Notify the LU about the results of its hooks.
 297
 298     This method is called every time a hooks phase is executed, and notifies
 299     the Logical Unit about the hooks' result. The LU can then use it to alter
 300     its result based on the hooks.  By default the method does nothing and the
 301     previous result is passed back unchanged but any LU can define it if it
 302     wants to use the local cluster hook-scripts somehow.
 303
 304     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 305         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 306     @param hook_results: the results of the multi-node hooks rpc call
 307     @param feedback_fn: function used send feedback back to the caller
 308     @param lu_result: the previous Exec result this LU had, or None
 309         in the PRE phase
 310     @return: the new Exec result, based on the previous result
 311         and hook results
 312
 313     """
 314     # API must be kept, thus we ignore the unused argument and could
 315     # be a function warnings
 316     # pylint: disable-msg=W0613,R0201
 317     return lu_result
 318
 319   def _ExpandAndLockInstance(self):
 320     """Helper function to expand and lock an instance.
 321
 322     Many LUs that work on an instance take its name in self.op.instance_name
 323     and need to expand it and then declare the expanded name for locking. This
 324     function does it, and then updates self.op.instance_name to the expanded
 325     name. It also initializes needed_locks as a dict, if this hasn't been done
 326     before.
 327
 328     """
 329     if self.needed_locks is None:
 330       self.needed_locks = {}
 331     else:
 332       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 333         "_ExpandAndLockInstance called with instance-level locks set"
 334     self.op.instance_name = _ExpandInstanceName(self.cfg,
 335                                                 self.op.instance_name)
 336     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 337
 338   def _LockInstancesNodes(self, primary_only=False):
 339     """Helper function to declare instances' nodes for locking.
 340
 341     This function should be called after locking one or more instances to lock
 342     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 343     with all primary or secondary nodes for instances already locked and
 344     present in self.needed_locks[locking.LEVEL_INSTANCE].
 345
 346     It should be called from DeclareLocks, and for safety only works if
 347     self.recalculate_locks[locking.LEVEL_NODE] is set.
 348
 349     In the future it may grow parameters to just lock some instance's nodes, or
 350     to just lock primaries or secondary nodes, if needed.
 351
 352     If should be called in DeclareLocks in a way similar to::
 353
 354       if level == locking.LEVEL_NODE:
 355         self._LockInstancesNodes()
 356
 357     @type primary_only: boolean
 358     @param primary_only: only lock primary nodes of locked instances
 359
 360     """
 361     assert locking.LEVEL_NODE in self.recalculate_locks, \
 362       "_LockInstancesNodes helper function called with no nodes to recalculate"
 363
 364     # TODO: check if we're really been called with the instance locks held
 365
 366     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 367     # future we might want to have different behaviors depending on the value
 368     # of self.recalculate_locks[locking.LEVEL_NODE]
 369     wanted_nodes = []
 370     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 371       instance = self.context.cfg.GetInstanceInfo(instance_name)
 372       wanted_nodes.append(instance.primary_node)
 373       if not primary_only:
 374         wanted_nodes.extend(instance.secondary_nodes)
 375
 376     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 377       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 378     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 379       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 380
 381     del self.recalculate_locks[locking.LEVEL_NODE]
 382
 383
 384 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 385   """Simple LU which runs no hooks.
 386
 387   This LU is intended as a parent for other LogicalUnits which will
 388   run no hooks, in order to reduce duplicate code.
 389
 390   """
 391   HPATH = None
 392   HTYPE = None
 393
 394   def BuildHooksEnv(self):
 395     """Empty BuildHooksEnv for NoHooksLu.
 396
 397     This just raises an error.
 398
 399     """
 400     assert False, "BuildHooksEnv called for NoHooksLUs"
 401
 402
 403 class Tasklet:
 404   """Tasklet base class.
 405
 406   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 407   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 408   tasklets know nothing about locks.
 409
 410   Subclasses must follow these rules:
 411     - Implement CheckPrereq
 412     - Implement Exec
 413
 414   """
 415   def __init__(self, lu):
 416     self.lu = lu
 417
 418     # Shortcuts
 419     self.cfg = lu.cfg
 420     self.rpc = lu.rpc
 421
 422   def CheckPrereq(self):
 423     """Check prerequisites for this tasklets.
 424
 425     This method should check whether the prerequisites for the execution of
 426     this tasklet are fulfilled. It can do internode communication, but it
 427     should be idempotent - no cluster or system changes are allowed.
 428
 429     The method should raise errors.OpPrereqError in case something is not
 430     fulfilled. Its return value is ignored.
 431
 432     This method should also update all parameters to their canonical form if it
 433     hasn't been done before.
 434
 435     """
 436     pass
 437
 438   def Exec(self, feedback_fn):
 439     """Execute the tasklet.
 440
 441     This method should implement the actual work. It should raise
 442     errors.OpExecError for failures that are somewhat dealt with in code, or
 443     expected.
 444
 445     """
 446     raise NotImplementedError
 447
 448
 449 class _QueryBase:
 450   """Base for query utility classes.
 451
 452   """
 453   #: Attribute holding field definitions
 454   FIELDS = None
 455
 456   def __init__(self, filter_, fields, use_locking):
 457     """Initializes this class.
 458
 459     """
 460     self.use_locking = use_locking
 461
 462     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 463                              namefield="name")
 464     self.requested_data = self.query.RequestedData()
 465     self.names = self.query.RequestedNames()
 466
 467     self.do_locking = None
 468     self.wanted = None
 469
 470   def _GetNames(self, lu, all_names, lock_level):
 471     """Helper function to determine names asked for in the query.
 472
 473     """
 474     if self.do_locking:
 475       names = lu.acquired_locks[lock_level]
 476     else:
 477       names = all_names
 478
 479     if self.wanted == locking.ALL_SET:
 480       assert not self.names
 481       # caller didn't specify names, so ordering is not important
 482       return utils.NiceSort(names)
 483
 484     # caller specified names and we must keep the same order
 485     assert self.names
 486     assert not self.do_locking or lu.acquired_locks[lock_level]
 487
 488     missing = set(self.wanted).difference(names)
 489     if missing:
 490       raise errors.OpExecError("Some items were removed before retrieving"
 491                                " their data: %s" % missing)
 492
 493     # Return expanded names
 494     return self.wanted
 495
 496   @classmethod
 497   def FieldsQuery(cls, fields):
 498     """Returns list of available fields.
 499
 500     @return: List of L{objects.QueryFieldDefinition}
 501
 502     """
 503     return query.QueryFields(cls.FIELDS, fields)
 504
 505   def ExpandNames(self, lu):
 506     """Expand names for this query.
 507
 508     See L{LogicalUnit.ExpandNames}.
 509
 510     """
 511     raise NotImplementedError()
 512
 513   def DeclareLocks(self, lu, level):
 514     """Declare locks for this query.
 515
 516     See L{LogicalUnit.DeclareLocks}.
 517
 518     """
 519     raise NotImplementedError()
 520
 521   def _GetQueryData(self, lu):
 522     """Collects all data for this query.
 523
 524     @return: Query data object
 525
 526     """
 527     raise NotImplementedError()
 528
 529   def NewStyleQuery(self, lu):
 530     """Collect data and execute query.
 531
 532     """
 533     return query.GetQueryResponse(self.query, self._GetQueryData(lu))
 534
 535   def OldStyleQuery(self, lu):
 536     """Collect data and execute query.
 537
 538     """
 539     return self.query.OldStyleQuery(self._GetQueryData(lu))
 540
 541
 542 def _GetWantedNodes(lu, nodes):
 543   """Returns list of checked and expanded node names.
 544
 545   @type lu: L{LogicalUnit}
 546   @param lu: the logical unit on whose behalf we execute
 547   @type nodes: list
 548   @param nodes: list of node names or None for all nodes
 549   @rtype: list
 550   @return: the list of nodes, sorted
 551   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 552
 553   """
 554   if nodes:
 555     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 556
 557   return utils.NiceSort(lu.cfg.GetNodeList())
 558
 559
 560 def _GetWantedInstances(lu, instances):
 561   """Returns list of checked and expanded instance names.
 562
 563   @type lu: L{LogicalUnit}
 564   @param lu: the logical unit on whose behalf we execute
 565   @type instances: list
 566   @param instances: list of instance names or None for all instances
 567   @rtype: list
 568   @return: the list of instances, sorted
 569   @raise errors.OpPrereqError: if the instances parameter is wrong type
 570   @raise errors.OpPrereqError: if any of the passed instances is not found
 571
 572   """
 573   if instances:
 574     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 575   else:
 576     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 577   return wanted
 578
 579
 580 def _GetUpdatedParams(old_params, update_dict,
 581                       use_default=True, use_none=False):
 582   """Return the new version of a parameter dictionary.
 583
 584   @type old_params: dict
 585   @param old_params: old parameters
 586   @type update_dict: dict
 587   @param update_dict: dict containing new parameter values, or
 588       constants.VALUE_DEFAULT to reset the parameter to its default
 589       value
 590   @param use_default: boolean
 591   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 592       values as 'to be deleted' values
 593   @param use_none: boolean
 594   @type use_none: whether to recognise C{None} values as 'to be
 595       deleted' values
 596   @rtype: dict
 597   @return: the new parameter dictionary
 598
 599   """
 600   params_copy = copy.deepcopy(old_params)
 601   for key, val in update_dict.iteritems():
 602     if ((use_default and val == constants.VALUE_DEFAULT) or
 603         (use_none and val is None)):
 604       try:
 605         del params_copy[key]
 606       except KeyError:
 607         pass
 608     else:
 609       params_copy[key] = val
 610   return params_copy
 611
 612
 613 def _CheckOutputFields(static, dynamic, selected):
 614   """Checks whether all selected fields are valid.
 615
 616   @type static: L{utils.FieldSet}
 617   @param static: static fields set
 618   @type dynamic: L{utils.FieldSet}
 619   @param dynamic: dynamic fields set
 620
 621   """
 622   f = utils.FieldSet()
 623   f.Extend(static)
 624   f.Extend(dynamic)
 625
 626   delta = f.NonMatching(selected)
 627   if delta:
 628     raise errors.OpPrereqError("Unknown output fields selected: %s"
 629                                % ",".join(delta), errors.ECODE_INVAL)
 630
 631
 632 def _CheckGlobalHvParams(params):
 633   """Validates that given hypervisor params are not global ones.
 634
 635   This will ensure that instances don't get customised versions of
 636   global params.
 637
 638   """
 639   used_globals = constants.HVC_GLOBALS.intersection(params)
 640   if used_globals:
 641     msg = ("The following hypervisor parameters are global and cannot"
 642            " be customized at instance level, please modify them at"
 643            " cluster level: %s" % utils.CommaJoin(used_globals))
 644     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 645
 646
 647 def _CheckNodeOnline(lu, node, msg=None):
 648   """Ensure that a given node is online.
 649
 650   @param lu: the LU on behalf of which we make the check
 651   @param node: the node to check
 652   @param msg: if passed, should be a message to replace the default one
 653   @raise errors.OpPrereqError: if the node is offline
 654
 655   """
 656   if msg is None:
 657     msg = "Can't use offline node"
 658   if lu.cfg.GetNodeInfo(node).offline:
 659     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 660
 661
 662 def _CheckNodeNotDrained(lu, node):
 663   """Ensure that a given node is not drained.
 664
 665   @param lu: the LU on behalf of which we make the check
 666   @param node: the node to check
 667   @raise errors.OpPrereqError: if the node is drained
 668
 669   """
 670   if lu.cfg.GetNodeInfo(node).drained:
 671     raise errors.OpPrereqError("Can't use drained node %s" % node,
 672                                errors.ECODE_STATE)
 673
 674
 675 def _CheckNodeVmCapable(lu, node):
 676   """Ensure that a given node is vm capable.
 677
 678   @param lu: the LU on behalf of which we make the check
 679   @param node: the node to check
 680   @raise errors.OpPrereqError: if the node is not vm capable
 681
 682   """
 683   if not lu.cfg.GetNodeInfo(node).vm_capable:
 684     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 685                                errors.ECODE_STATE)
 686
 687
 688 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 689   """Ensure that a node supports a given OS.
 690
 691   @param lu: the LU on behalf of which we make the check
 692   @param node: the node to check
 693   @param os_name: the OS to query about
 694   @param force_variant: whether to ignore variant errors
 695   @raise errors.OpPrereqError: if the node is not supporting the OS
 696
 697   """
 698   result = lu.rpc.call_os_get(node, os_name)
 699   result.Raise("OS '%s' not in supported OS list for node %s" %
 700                (os_name, node),
 701                prereq=True, ecode=errors.ECODE_INVAL)
 702   if not force_variant:
 703     _CheckOSVariant(result.payload, os_name)
 704
 705
 706 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 707   """Ensure that a node has the given secondary ip.
 708
 709   @type lu: L{LogicalUnit}
 710   @param lu: the LU on behalf of which we make the check
 711   @type node: string
 712   @param node: the node to check
 713   @type secondary_ip: string
 714   @param secondary_ip: the ip to check
 715   @type prereq: boolean
 716   @param prereq: whether to throw a prerequisite or an execute error
 717   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 718   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 719
 720   """
 721   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 722   result.Raise("Failure checking secondary ip on node %s" % node,
 723                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 724   if not result.payload:
 725     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 726            " please fix and re-run this command" % secondary_ip)
 727     if prereq:
 728       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 729     else:
 730       raise errors.OpExecError(msg)
 731
 732
 733 def _GetClusterDomainSecret():
 734   """Reads the cluster domain secret.
 735
 736   """
 737   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 738                                strict=True)
 739
 740
 741 def _CheckInstanceDown(lu, instance, reason):
 742   """Ensure that an instance is not running."""
 743   if instance.admin_up:
 744     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 745                                (instance.name, reason), errors.ECODE_STATE)
 746
 747   pnode = instance.primary_node
 748   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 749   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 750               prereq=True, ecode=errors.ECODE_ENVIRON)
 751
 752   if instance.name in ins_l.payload:
 753     raise errors.OpPrereqError("Instance %s is running, %s" %
 754                                (instance.name, reason), errors.ECODE_STATE)
 755
 756
 757 def _ExpandItemName(fn, name, kind):
 758   """Expand an item name.
 759
 760   @param fn: the function to use for expansion
 761   @param name: requested item name
 762   @param kind: text description ('Node' or 'Instance')
 763   @return: the resolved (full) name
 764   @raise errors.OpPrereqError: if the item is not found
 765
 766   """
 767   full_name = fn(name)
 768   if full_name is None:
 769     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 770                                errors.ECODE_NOENT)
 771   return full_name
 772
 773
 774 def _ExpandNodeName(cfg, name):
 775   """Wrapper over L{_ExpandItemName} for nodes."""
 776   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 777
 778
 779 def _ExpandInstanceName(cfg, name):
 780   """Wrapper over L{_ExpandItemName} for instance."""
 781   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 782
 783
 784 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 785                           memory, vcpus, nics, disk_template, disks,
 786                           bep, hvp, hypervisor_name):
 787   """Builds instance related env variables for hooks
 788
 789   This builds the hook environment from individual variables.
 790
 791   @type name: string
 792   @param name: the name of the instance
 793   @type primary_node: string
 794   @param primary_node: the name of the instance's primary node
 795   @type secondary_nodes: list
 796   @param secondary_nodes: list of secondary nodes as strings
 797   @type os_type: string
 798   @param os_type: the name of the instance's OS
 799   @type status: boolean
 800   @param status: the should_run status of the instance
 801   @type memory: string
 802   @param memory: the memory size of the instance
 803   @type vcpus: string
 804   @param vcpus: the count of VCPUs the instance has
 805   @type nics: list
 806   @param nics: list of tuples (ip, mac, mode, link) representing
 807       the NICs the instance has
 808   @type disk_template: string
 809   @param disk_template: the disk template of the instance
 810   @type disks: list
 811   @param disks: the list of (size, mode) pairs
 812   @type bep: dict
 813   @param bep: the backend parameters for the instance
 814   @type hvp: dict
 815   @param hvp: the hypervisor parameters for the instance
 816   @type hypervisor_name: string
 817   @param hypervisor_name: the hypervisor for the instance
 818   @rtype: dict
 819   @return: the hook environment for this instance
 820
 821   """
 822   if status:
 823     str_status = "up"
 824   else:
 825     str_status = "down"
 826   env = {
 827     "OP_TARGET": name,
 828     "INSTANCE_NAME": name,
 829     "INSTANCE_PRIMARY": primary_node,
 830     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 831     "INSTANCE_OS_TYPE": os_type,
 832     "INSTANCE_STATUS": str_status,
 833     "INSTANCE_MEMORY": memory,
 834     "INSTANCE_VCPUS": vcpus,
 835     "INSTANCE_DISK_TEMPLATE": disk_template,
 836     "INSTANCE_HYPERVISOR": hypervisor_name,
 837   }
 838
 839   if nics:
 840     nic_count = len(nics)
 841     for idx, (ip, mac, mode, link) in enumerate(nics):
 842       if ip is None:
 843         ip = ""
 844       env["INSTANCE_NIC%d_IP" % idx] = ip
 845       env["INSTANCE_NIC%d_MAC" % idx] = mac
 846       env["INSTANCE_NIC%d_MODE" % idx] = mode
 847       env["INSTANCE_NIC%d_LINK" % idx] = link
 848       if mode == constants.NIC_MODE_BRIDGED:
 849         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 850   else:
 851     nic_count = 0
 852
 853   env["INSTANCE_NIC_COUNT"] = nic_count
 854
 855   if disks:
 856     disk_count = len(disks)
 857     for idx, (size, mode) in enumerate(disks):
 858       env["INSTANCE_DISK%d_SIZE" % idx] = size
 859       env["INSTANCE_DISK%d_MODE" % idx] = mode
 860   else:
 861     disk_count = 0
 862
 863   env["INSTANCE_DISK_COUNT"] = disk_count
 864
 865   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 866     for key, value in source.items():
 867       env["INSTANCE_%s_%s" % (kind, key)] = value
 868
 869   return env
 870
 871
 872 def _NICListToTuple(lu, nics):
 873   """Build a list of nic information tuples.
 874
 875   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 876   value in LUInstanceQueryData.
 877
 878   @type lu:  L{LogicalUnit}
 879   @param lu: the logical unit on whose behalf we execute
 880   @type nics: list of L{objects.NIC}
 881   @param nics: list of nics to convert to hooks tuples
 882
 883   """
 884   hooks_nics = []
 885   cluster = lu.cfg.GetClusterInfo()
 886   for nic in nics:
 887     ip = nic.ip
 888     mac = nic.mac
 889     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 890     mode = filled_params[constants.NIC_MODE]
 891     link = filled_params[constants.NIC_LINK]
 892     hooks_nics.append((ip, mac, mode, link))
 893   return hooks_nics
 894
 895
 896 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 897   """Builds instance related env variables for hooks from an object.
 898
 899   @type lu: L{LogicalUnit}
 900   @param lu: the logical unit on whose behalf we execute
 901   @type instance: L{objects.Instance}
 902   @param instance: the instance for which we should build the
 903       environment
 904   @type override: dict
 905   @param override: dictionary with key/values that will override
 906       our values
 907   @rtype: dict
 908   @return: the hook environment dictionary
 909
 910   """
 911   cluster = lu.cfg.GetClusterInfo()
 912   bep = cluster.FillBE(instance)
 913   hvp = cluster.FillHV(instance)
 914   args = {
 915     'name': instance.name,
 916     'primary_node': instance.primary_node,
 917     'secondary_nodes': instance.secondary_nodes,
 918     'os_type': instance.os,
 919     'status': instance.admin_up,
 920     'memory': bep[constants.BE_MEMORY],
 921     'vcpus': bep[constants.BE_VCPUS],
 922     'nics': _NICListToTuple(lu, instance.nics),
 923     'disk_template': instance.disk_template,
 924     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 925     'bep': bep,
 926     'hvp': hvp,
 927     'hypervisor_name': instance.hypervisor,
 928   }
 929   if override:
 930     args.update(override)
 931   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 932
 933
 934 def _AdjustCandidatePool(lu, exceptions):
 935   """Adjust the candidate pool after node operations.
 936
 937   """
 938   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 939   if mod_list:
 940     lu.LogInfo("Promoted nodes to master candidate role: %s",
 941                utils.CommaJoin(node.name for node in mod_list))
 942     for name in mod_list:
 943       lu.context.ReaddNode(name)
 944   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 945   if mc_now > mc_max:
 946     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 947                (mc_now, mc_max))
 948
 949
 950 def _DecideSelfPromotion(lu, exceptions=None):
 951   """Decide whether I should promote myself as a master candidate.
 952
 953   """
 954   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 955   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 956   # the new node will increase mc_max with one, so:
 957   mc_should = min(mc_should + 1, cp_size)
 958   return mc_now < mc_should
 959
 960
 961 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 962   """Check that the brigdes needed by a list of nics exist.
 963
 964   """
 965   cluster = lu.cfg.GetClusterInfo()
 966   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 967   brlist = [params[constants.NIC_LINK] for params in paramslist
 968             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 969   if brlist:
 970     result = lu.rpc.call_bridges_exist(target_node, brlist)
 971     result.Raise("Error checking bridges on destination node '%s'" %
 972                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 973
 974
 975 def _CheckInstanceBridgesExist(lu, instance, node=None):
 976   """Check that the brigdes needed by an instance exist.
 977
 978   """
 979   if node is None:
 980     node = instance.primary_node
 981   _CheckNicsBridgesExist(lu, instance.nics, node)
 982
 983
 984 def _CheckOSVariant(os_obj, name):
 985   """Check whether an OS name conforms to the os variants specification.
 986
 987   @type os_obj: L{objects.OS}
 988   @param os_obj: OS object to check
 989   @type name: string
 990   @param name: OS name passed by the user, to check for validity
 991
 992   """
 993   if not os_obj.supported_variants:
 994     return
 995   variant = objects.OS.GetVariant(name)
 996   if not variant:
 997     raise errors.OpPrereqError("OS name must include a variant",
 998                                errors.ECODE_INVAL)
 999
1000   if variant not in os_obj.supported_variants:
1001     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1002
1003
1004 def _GetNodeInstancesInner(cfg, fn):
1005   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1006
1007
1008 def _GetNodeInstances(cfg, node_name):
1009   """Returns a list of all primary and secondary instances on a node.
1010
1011   """
1012
1013   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1014
1015
1016 def _GetNodePrimaryInstances(cfg, node_name):
1017   """Returns primary instances on a node.
1018
1019   """
1020   return _GetNodeInstancesInner(cfg,
1021                                 lambda inst: node_name == inst.primary_node)
1022
1023
1024 def _GetNodeSecondaryInstances(cfg, node_name):
1025   """Returns secondary instances on a node.
1026
1027   """
1028   return _GetNodeInstancesInner(cfg,
1029                                 lambda inst: node_name in inst.secondary_nodes)
1030
1031
1032 def _GetStorageTypeArgs(cfg, storage_type):
1033   """Returns the arguments for a storage type.
1034
1035   """
1036   # Special case for file storage
1037   if storage_type == constants.ST_FILE:
1038     # storage.FileStorage wants a list of storage directories
1039     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1040
1041   return []
1042
1043
1044 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1045   faulty = []
1046
1047   for dev in instance.disks:
1048     cfg.SetDiskID(dev, node_name)
1049
1050   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1051   result.Raise("Failed to get disk status from node %s" % node_name,
1052                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1053
1054   for idx, bdev_status in enumerate(result.payload):
1055     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1056       faulty.append(idx)
1057
1058   return faulty
1059
1060
1061 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1062   """Check the sanity of iallocator and node arguments and use the
1063   cluster-wide iallocator if appropriate.
1064
1065   Check that at most one of (iallocator, node) is specified. If none is
1066   specified, then the LU's opcode's iallocator slot is filled with the
1067   cluster-wide default iallocator.
1068
1069   @type iallocator_slot: string
1070   @param iallocator_slot: the name of the opcode iallocator slot
1071   @type node_slot: string
1072   @param node_slot: the name of the opcode target node slot
1073
1074   """
1075   node = getattr(lu.op, node_slot, None)
1076   iallocator = getattr(lu.op, iallocator_slot, None)
1077
1078   if node is not None and iallocator is not None:
1079     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1080                                errors.ECODE_INVAL)
1081   elif node is None and iallocator is None:
1082     default_iallocator = lu.cfg.GetDefaultIAllocator()
1083     if default_iallocator:
1084       setattr(lu.op, iallocator_slot, default_iallocator)
1085     else:
1086       raise errors.OpPrereqError("No iallocator or node given and no"
1087                                  " cluster-wide default iallocator found."
1088                                  " Please specify either an iallocator or a"
1089                                  " node, or set a cluster-wide default"
1090                                  " iallocator.")
1091
1092
1093 class LUClusterPostInit(LogicalUnit):
1094   """Logical unit for running hooks after cluster initialization.
1095
1096   """
1097   HPATH = "cluster-init"
1098   HTYPE = constants.HTYPE_CLUSTER
1099
1100   def BuildHooksEnv(self):
1101     """Build hooks env.
1102
1103     """
1104     env = {"OP_TARGET": self.cfg.GetClusterName()}
1105     mn = self.cfg.GetMasterNode()
1106     return env, [], [mn]
1107
1108   def Exec(self, feedback_fn):
1109     """Nothing to do.
1110
1111     """
1112     return True
1113
1114
1115 class LUClusterDestroy(LogicalUnit):
1116   """Logical unit for destroying the cluster.
1117
1118   """
1119   HPATH = "cluster-destroy"
1120   HTYPE = constants.HTYPE_CLUSTER
1121
1122   def BuildHooksEnv(self):
1123     """Build hooks env.
1124
1125     """
1126     env = {"OP_TARGET": self.cfg.GetClusterName()}
1127     return env, [], []
1128
1129   def CheckPrereq(self):
1130     """Check prerequisites.
1131
1132     This checks whether the cluster is empty.
1133
1134     Any errors are signaled by raising errors.OpPrereqError.
1135
1136     """
1137     master = self.cfg.GetMasterNode()
1138
1139     nodelist = self.cfg.GetNodeList()
1140     if len(nodelist) != 1 or nodelist[0] != master:
1141       raise errors.OpPrereqError("There are still %d node(s) in"
1142                                  " this cluster." % (len(nodelist) - 1),
1143                                  errors.ECODE_INVAL)
1144     instancelist = self.cfg.GetInstanceList()
1145     if instancelist:
1146       raise errors.OpPrereqError("There are still %d instance(s) in"
1147                                  " this cluster." % len(instancelist),
1148                                  errors.ECODE_INVAL)
1149
1150   def Exec(self, feedback_fn):
1151     """Destroys the cluster.
1152
1153     """
1154     master = self.cfg.GetMasterNode()
1155
1156     # Run post hooks on master node before it's removed
1157     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1158     try:
1159       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1160     except:
1161       # pylint: disable-msg=W0702
1162       self.LogWarning("Errors occurred running hooks on %s" % master)
1163
1164     result = self.rpc.call_node_stop_master(master, False)
1165     result.Raise("Could not disable the master role")
1166
1167     return master
1168
1169
1170 def _VerifyCertificate(filename):
1171   """Verifies a certificate for LUClusterVerify.
1172
1173   @type filename: string
1174   @param filename: Path to PEM file
1175
1176   """
1177   try:
1178     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1179                                            utils.ReadFile(filename))
1180   except Exception, err: # pylint: disable-msg=W0703
1181     return (LUClusterVerify.ETYPE_ERROR,
1182             "Failed to load X509 certificate %s: %s" % (filename, err))
1183
1184   (errcode, msg) = \
1185     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1186                                 constants.SSL_CERT_EXPIRATION_ERROR)
1187
1188   if msg:
1189     fnamemsg = "While verifying %s: %s" % (filename, msg)
1190   else:
1191     fnamemsg = None
1192
1193   if errcode is None:
1194     return (None, fnamemsg)
1195   elif errcode == utils.CERT_WARNING:
1196     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1197   elif errcode == utils.CERT_ERROR:
1198     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1199
1200   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1201
1202
1203 class LUClusterVerify(LogicalUnit):
1204   """Verifies the cluster status.
1205
1206   """
1207   HPATH = "cluster-verify"
1208   HTYPE = constants.HTYPE_CLUSTER
1209   REQ_BGL = False
1210
1211   TCLUSTER = "cluster"
1212   TNODE = "node"
1213   TINSTANCE = "instance"
1214
1215   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1216   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1217   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1218   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1219   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1220   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1221   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1222   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1223   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1224   ENODEDRBD = (TNODE, "ENODEDRBD")
1225   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1226   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1227   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1228   ENODEHV = (TNODE, "ENODEHV")
1229   ENODELVM = (TNODE, "ENODELVM")
1230   ENODEN1 = (TNODE, "ENODEN1")
1231   ENODENET = (TNODE, "ENODENET")
1232   ENODEOS = (TNODE, "ENODEOS")
1233   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1234   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1235   ENODERPC = (TNODE, "ENODERPC")
1236   ENODESSH = (TNODE, "ENODESSH")
1237   ENODEVERSION = (TNODE, "ENODEVERSION")
1238   ENODESETUP = (TNODE, "ENODESETUP")
1239   ENODETIME = (TNODE, "ENODETIME")
1240   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1241
1242   ETYPE_FIELD = "code"
1243   ETYPE_ERROR = "ERROR"
1244   ETYPE_WARNING = "WARNING"
1245
1246   _HOOKS_INDENT_RE = re.compile("^", re.M)
1247
1248   class NodeImage(object):
1249     """A class representing the logical and physical status of a node.
1250
1251     @type name: string
1252     @ivar name: the node name to which this object refers
1253     @ivar volumes: a structure as returned from
1254         L{ganeti.backend.GetVolumeList} (runtime)
1255     @ivar instances: a list of running instances (runtime)
1256     @ivar pinst: list of configured primary instances (config)
1257     @ivar sinst: list of configured secondary instances (config)
1258     @ivar sbp: dictionary of {primary-node: list of instances} for all
1259         instances for which this node is secondary (config)
1260     @ivar mfree: free memory, as reported by hypervisor (runtime)
1261     @ivar dfree: free disk, as reported by the node (runtime)
1262     @ivar offline: the offline status (config)
1263     @type rpc_fail: boolean
1264     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1265         not whether the individual keys were correct) (runtime)
1266     @type lvm_fail: boolean
1267     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1268     @type hyp_fail: boolean
1269     @ivar hyp_fail: whether the RPC call didn't return the instance list
1270     @type ghost: boolean
1271     @ivar ghost: whether this is a known node or not (config)
1272     @type os_fail: boolean
1273     @ivar os_fail: whether the RPC call didn't return valid OS data
1274     @type oslist: list
1275     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1276     @type vm_capable: boolean
1277     @ivar vm_capable: whether the node can host instances
1278
1279     """
1280     def __init__(self, offline=False, name=None, vm_capable=True):
1281       self.name = name
1282       self.volumes = {}
1283       self.instances = []
1284       self.pinst = []
1285       self.sinst = []
1286       self.sbp = {}
1287       self.mfree = 0
1288       self.dfree = 0
1289       self.offline = offline
1290       self.vm_capable = vm_capable
1291       self.rpc_fail = False
1292       self.lvm_fail = False
1293       self.hyp_fail = False
1294       self.ghost = False
1295       self.os_fail = False
1296       self.oslist = {}
1297
1298   def ExpandNames(self):
1299     self.needed_locks = {
1300       locking.LEVEL_NODE: locking.ALL_SET,
1301       locking.LEVEL_INSTANCE: locking.ALL_SET,
1302     }
1303     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1304
1305   def _Error(self, ecode, item, msg, *args, **kwargs):
1306     """Format an error message.
1307
1308     Based on the opcode's error_codes parameter, either format a
1309     parseable error code, or a simpler error string.
1310
1311     This must be called only from Exec and functions called from Exec.
1312
1313     """
1314     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1315     itype, etxt = ecode
1316     # first complete the msg
1317     if args:
1318       msg = msg % args
1319     # then format the whole message
1320     if self.op.error_codes:
1321       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1322     else:
1323       if item:
1324         item = " " + item
1325       else:
1326         item = ""
1327       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1328     # and finally report it via the feedback_fn
1329     self._feedback_fn("  - %s" % msg)
1330
1331   def _ErrorIf(self, cond, *args, **kwargs):
1332     """Log an error message if the passed condition is True.
1333
1334     """
1335     cond = bool(cond) or self.op.debug_simulate_errors
1336     if cond:
1337       self._Error(*args, **kwargs)
1338     # do not mark the operation as failed for WARN cases only
1339     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1340       self.bad = self.bad or cond
1341
1342   def _VerifyNode(self, ninfo, nresult):
1343     """Perform some basic validation on data returned from a node.
1344
1345       - check the result data structure is well formed and has all the
1346         mandatory fields
1347       - check ganeti version
1348
1349     @type ninfo: L{objects.Node}
1350     @param ninfo: the node to check
1351     @param nresult: the results from the node
1352     @rtype: boolean
1353     @return: whether overall this call was successful (and we can expect
1354          reasonable values in the respose)
1355
1356     """
1357     node = ninfo.name
1358     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1359
1360     # main result, nresult should be a non-empty dict
1361     test = not nresult or not isinstance(nresult, dict)
1362     _ErrorIf(test, self.ENODERPC, node,
1363                   "unable to verify node: no data returned")
1364     if test:
1365       return False
1366
1367     # compares ganeti version
1368     local_version = constants.PROTOCOL_VERSION
1369     remote_version = nresult.get("version", None)
1370     test = not (remote_version and
1371                 isinstance(remote_version, (list, tuple)) and
1372                 len(remote_version) == 2)
1373     _ErrorIf(test, self.ENODERPC, node,
1374              "connection to node returned invalid data")
1375     if test:
1376       return False
1377
1378     test = local_version != remote_version[0]
1379     _ErrorIf(test, self.ENODEVERSION, node,
1380              "incompatible protocol versions: master %s,"
1381              " node %s", local_version, remote_version[0])
1382     if test:
1383       return False
1384
1385     # node seems compatible, we can actually try to look into its results
1386
1387     # full package version
1388     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1389                   self.ENODEVERSION, node,
1390                   "software version mismatch: master %s, node %s",
1391                   constants.RELEASE_VERSION, remote_version[1],
1392                   code=self.ETYPE_WARNING)
1393
1394     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1395     if ninfo.vm_capable and isinstance(hyp_result, dict):
1396       for hv_name, hv_result in hyp_result.iteritems():
1397         test = hv_result is not None
1398         _ErrorIf(test, self.ENODEHV, node,
1399                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1400
1401     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1402     if ninfo.vm_capable and isinstance(hvp_result, list):
1403       for item, hv_name, hv_result in hvp_result:
1404         _ErrorIf(True, self.ENODEHV, node,
1405                  "hypervisor %s parameter verify failure (source %s): %s",
1406                  hv_name, item, hv_result)
1407
1408     test = nresult.get(constants.NV_NODESETUP,
1409                            ["Missing NODESETUP results"])
1410     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1411              "; ".join(test))
1412
1413     return True
1414
1415   def _VerifyNodeTime(self, ninfo, nresult,
1416                       nvinfo_starttime, nvinfo_endtime):
1417     """Check the node time.
1418
1419     @type ninfo: L{objects.Node}
1420     @param ninfo: the node to check
1421     @param nresult: the remote results for the node
1422     @param nvinfo_starttime: the start time of the RPC call
1423     @param nvinfo_endtime: the end time of the RPC call
1424
1425     """
1426     node = ninfo.name
1427     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1428
1429     ntime = nresult.get(constants.NV_TIME, None)
1430     try:
1431       ntime_merged = utils.MergeTime(ntime)
1432     except (ValueError, TypeError):
1433       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1434       return
1435
1436     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1437       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1438     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1439       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1440     else:
1441       ntime_diff = None
1442
1443     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1444              "Node time diverges by at least %s from master node time",
1445              ntime_diff)
1446
1447   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1448     """Check the node time.
1449
1450     @type ninfo: L{objects.Node}
1451     @param ninfo: the node to check
1452     @param nresult: the remote results for the node
1453     @param vg_name: the configured VG name
1454
1455     """
1456     if vg_name is None:
1457       return
1458
1459     node = ninfo.name
1460     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1461
1462     # checks vg existence and size > 20G
1463     vglist = nresult.get(constants.NV_VGLIST, None)
1464     test = not vglist
1465     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1466     if not test:
1467       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1468                                             constants.MIN_VG_SIZE)
1469       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1470
1471     # check pv names
1472     pvlist = nresult.get(constants.NV_PVLIST, None)
1473     test = pvlist is None
1474     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1475     if not test:
1476       # check that ':' is not present in PV names, since it's a
1477       # special character for lvcreate (denotes the range of PEs to
1478       # use on the PV)
1479       for _, pvname, owner_vg in pvlist:
1480         test = ":" in pvname
1481         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1482                  " '%s' of VG '%s'", pvname, owner_vg)
1483
1484   def _VerifyNodeNetwork(self, ninfo, nresult):
1485     """Check the node time.
1486
1487     @type ninfo: L{objects.Node}
1488     @param ninfo: the node to check
1489     @param nresult: the remote results for the node
1490
1491     """
1492     node = ninfo.name
1493     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1494
1495     test = constants.NV_NODELIST not in nresult
1496     _ErrorIf(test, self.ENODESSH, node,
1497              "node hasn't returned node ssh connectivity data")
1498     if not test:
1499       if nresult[constants.NV_NODELIST]:
1500         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1501           _ErrorIf(True, self.ENODESSH, node,
1502                    "ssh communication with node '%s': %s", a_node, a_msg)
1503
1504     test = constants.NV_NODENETTEST not in nresult
1505     _ErrorIf(test, self.ENODENET, node,
1506              "node hasn't returned node tcp connectivity data")
1507     if not test:
1508       if nresult[constants.NV_NODENETTEST]:
1509         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1510         for anode in nlist:
1511           _ErrorIf(True, self.ENODENET, node,
1512                    "tcp communication with node '%s': %s",
1513                    anode, nresult[constants.NV_NODENETTEST][anode])
1514
1515     test = constants.NV_MASTERIP not in nresult
1516     _ErrorIf(test, self.ENODENET, node,
1517              "node hasn't returned node master IP reachability data")
1518     if not test:
1519       if not nresult[constants.NV_MASTERIP]:
1520         if node == self.master_node:
1521           msg = "the master node cannot reach the master IP (not configured?)"
1522         else:
1523           msg = "cannot reach the master IP"
1524         _ErrorIf(True, self.ENODENET, node, msg)
1525
1526   def _VerifyInstance(self, instance, instanceconfig, node_image,
1527                       diskstatus):
1528     """Verify an instance.
1529
1530     This function checks to see if the required block devices are
1531     available on the instance's node.
1532
1533     """
1534     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1535     node_current = instanceconfig.primary_node
1536
1537     node_vol_should = {}
1538     instanceconfig.MapLVsByNode(node_vol_should)
1539
1540     for node in node_vol_should:
1541       n_img = node_image[node]
1542       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1543         # ignore missing volumes on offline or broken nodes
1544         continue
1545       for volume in node_vol_should[node]:
1546         test = volume not in n_img.volumes
1547         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1548                  "volume %s missing on node %s", volume, node)
1549
1550     if instanceconfig.admin_up:
1551       pri_img = node_image[node_current]
1552       test = instance not in pri_img.instances and not pri_img.offline
1553       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1554                "instance not running on its primary node %s",
1555                node_current)
1556
1557     for node, n_img in node_image.items():
1558       if node != node_current:
1559         test = instance in n_img.instances
1560         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1561                  "instance should not run on node %s", node)
1562
1563     diskdata = [(nname, success, status, idx)
1564                 for (nname, disks) in diskstatus.items()
1565                 for idx, (success, status) in enumerate(disks)]
1566
1567     for nname, success, bdev_status, idx in diskdata:
1568       # the 'ghost node' construction in Exec() ensures that we have a
1569       # node here
1570       snode = node_image[nname]
1571       bad_snode = snode.ghost or snode.offline
1572       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1573                self.EINSTANCEFAULTYDISK, instance,
1574                "couldn't retrieve status for disk/%s on %s: %s",
1575                idx, nname, bdev_status)
1576       _ErrorIf((instanceconfig.admin_up and success and
1577                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1578                self.EINSTANCEFAULTYDISK, instance,
1579                "disk/%s on %s is faulty", idx, nname)
1580
1581   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1582     """Verify if there are any unknown volumes in the cluster.
1583
1584     The .os, .swap and backup volumes are ignored. All other volumes are
1585     reported as unknown.
1586
1587     @type reserved: L{ganeti.utils.FieldSet}
1588     @param reserved: a FieldSet of reserved volume names
1589
1590     """
1591     for node, n_img in node_image.items():
1592       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1593         # skip non-healthy nodes
1594         continue
1595       for volume in n_img.volumes:
1596         test = ((node not in node_vol_should or
1597                 volume not in node_vol_should[node]) and
1598                 not reserved.Matches(volume))
1599         self._ErrorIf(test, self.ENODEORPHANLV, node,
1600                       "volume %s is unknown", volume)
1601
1602   def _VerifyOrphanInstances(self, instancelist, node_image):
1603     """Verify the list of running instances.
1604
1605     This checks what instances are running but unknown to the cluster.
1606
1607     """
1608     for node, n_img in node_image.items():
1609       for o_inst in n_img.instances:
1610         test = o_inst not in instancelist
1611         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1612                       "instance %s on node %s should not exist", o_inst, node)
1613
1614   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1615     """Verify N+1 Memory Resilience.
1616
1617     Check that if one single node dies we can still start all the
1618     instances it was primary for.
1619
1620     """
1621     cluster_info = self.cfg.GetClusterInfo()
1622     for node, n_img in node_image.items():
1623       # This code checks that every node which is now listed as
1624       # secondary has enough memory to host all instances it is
1625       # supposed to should a single other node in the cluster fail.
1626       # FIXME: not ready for failover to an arbitrary node
1627       # FIXME: does not support file-backed instances
1628       # WARNING: we currently take into account down instances as well
1629       # as up ones, considering that even if they're down someone
1630       # might want to start them even in the event of a node failure.
1631       if n_img.offline:
1632         # we're skipping offline nodes from the N+1 warning, since
1633         # most likely we don't have good memory infromation from them;
1634         # we already list instances living on such nodes, and that's
1635         # enough warning
1636         continue
1637       for prinode, instances in n_img.sbp.items():
1638         needed_mem = 0
1639         for instance in instances:
1640           bep = cluster_info.FillBE(instance_cfg[instance])
1641           if bep[constants.BE_AUTO_BALANCE]:
1642             needed_mem += bep[constants.BE_MEMORY]
1643         test = n_img.mfree < needed_mem
1644         self._ErrorIf(test, self.ENODEN1, node,
1645                       "not enough memory to accomodate instance failovers"
1646                       " should node %s fail", prinode)
1647
1648   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1649                        master_files):
1650     """Verifies and computes the node required file checksums.
1651
1652     @type ninfo: L{objects.Node}
1653     @param ninfo: the node to check
1654     @param nresult: the remote results for the node
1655     @param file_list: required list of files
1656     @param local_cksum: dictionary of local files and their checksums
1657     @param master_files: list of files that only masters should have
1658
1659     """
1660     node = ninfo.name
1661     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1662
1663     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1664     test = not isinstance(remote_cksum, dict)
1665     _ErrorIf(test, self.ENODEFILECHECK, node,
1666              "node hasn't returned file checksum data")
1667     if test:
1668       return
1669
1670     for file_name in file_list:
1671       node_is_mc = ninfo.master_candidate
1672       must_have = (file_name not in master_files) or node_is_mc
1673       # missing
1674       test1 = file_name not in remote_cksum
1675       # invalid checksum
1676       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1677       # existing and good
1678       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1679       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1680                "file '%s' missing", file_name)
1681       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1682                "file '%s' has wrong checksum", file_name)
1683       # not candidate and this is not a must-have file
1684       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1685                "file '%s' should not exist on non master"
1686                " candidates (and the file is outdated)", file_name)
1687       # all good, except non-master/non-must have combination
1688       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1689                "file '%s' should not exist"
1690                " on non master candidates", file_name)
1691
1692   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1693                       drbd_map):
1694     """Verifies and the node DRBD status.
1695
1696     @type ninfo: L{objects.Node}
1697     @param ninfo: the node to check
1698     @param nresult: the remote results for the node
1699     @param instanceinfo: the dict of instances
1700     @param drbd_helper: the configured DRBD usermode helper
1701     @param drbd_map: the DRBD map as returned by
1702         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1703
1704     """
1705     node = ninfo.name
1706     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1707
1708     if drbd_helper:
1709       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1710       test = (helper_result == None)
1711       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1712                "no drbd usermode helper returned")
1713       if helper_result:
1714         status, payload = helper_result
1715         test = not status
1716         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1717                  "drbd usermode helper check unsuccessful: %s", payload)
1718         test = status and (payload != drbd_helper)
1719         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1720                  "wrong drbd usermode helper: %s", payload)
1721
1722     # compute the DRBD minors
1723     node_drbd = {}
1724     for minor, instance in drbd_map[node].items():
1725       test = instance not in instanceinfo
1726       _ErrorIf(test, self.ECLUSTERCFG, None,
1727                "ghost instance '%s' in temporary DRBD map", instance)
1728         # ghost instance should not be running, but otherwise we
1729         # don't give double warnings (both ghost instance and
1730         # unallocated minor in use)
1731       if test:
1732         node_drbd[minor] = (instance, False)
1733       else:
1734         instance = instanceinfo[instance]
1735         node_drbd[minor] = (instance.name, instance.admin_up)
1736
1737     # and now check them
1738     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1739     test = not isinstance(used_minors, (tuple, list))
1740     _ErrorIf(test, self.ENODEDRBD, node,
1741              "cannot parse drbd status file: %s", str(used_minors))
1742     if test:
1743       # we cannot check drbd status
1744       return
1745
1746     for minor, (iname, must_exist) in node_drbd.items():
1747       test = minor not in used_minors and must_exist
1748       _ErrorIf(test, self.ENODEDRBD, node,
1749                "drbd minor %d of instance %s is not active", minor, iname)
1750     for minor in used_minors:
1751       test = minor not in node_drbd
1752       _ErrorIf(test, self.ENODEDRBD, node,
1753                "unallocated drbd minor %d is in use", minor)
1754
1755   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1756     """Builds the node OS structures.
1757
1758     @type ninfo: L{objects.Node}
1759     @param ninfo: the node to check
1760     @param nresult: the remote results for the node
1761     @param nimg: the node image object
1762
1763     """
1764     node = ninfo.name
1765     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1766
1767     remote_os = nresult.get(constants.NV_OSLIST, None)
1768     test = (not isinstance(remote_os, list) or
1769             not compat.all(isinstance(v, list) and len(v) == 7
1770                            for v in remote_os))
1771
1772     _ErrorIf(test, self.ENODEOS, node,
1773              "node hasn't returned valid OS data")
1774
1775     nimg.os_fail = test
1776
1777     if test:
1778       return
1779
1780     os_dict = {}
1781
1782     for (name, os_path, status, diagnose,
1783          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1784
1785       if name not in os_dict:
1786         os_dict[name] = []
1787
1788       # parameters is a list of lists instead of list of tuples due to
1789       # JSON lacking a real tuple type, fix it:
1790       parameters = [tuple(v) for v in parameters]
1791       os_dict[name].append((os_path, status, diagnose,
1792                             set(variants), set(parameters), set(api_ver)))
1793
1794     nimg.oslist = os_dict
1795
1796   def _VerifyNodeOS(self, ninfo, nimg, base):
1797     """Verifies the node OS list.
1798
1799     @type ninfo: L{objects.Node}
1800     @param ninfo: the node to check
1801     @param nimg: the node image object
1802     @param base: the 'template' node we match against (e.g. from the master)
1803
1804     """
1805     node = ninfo.name
1806     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1807
1808     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1809
1810     for os_name, os_data in nimg.oslist.items():
1811       assert os_data, "Empty OS status for OS %s?!" % os_name
1812       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1813       _ErrorIf(not f_status, self.ENODEOS, node,
1814                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1815       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1816                "OS '%s' has multiple entries (first one shadows the rest): %s",
1817                os_name, utils.CommaJoin([v[0] for v in os_data]))
1818       # this will catched in backend too
1819       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1820                and not f_var, self.ENODEOS, node,
1821                "OS %s with API at least %d does not declare any variant",
1822                os_name, constants.OS_API_V15)
1823       # comparisons with the 'base' image
1824       test = os_name not in base.oslist
1825       _ErrorIf(test, self.ENODEOS, node,
1826                "Extra OS %s not present on reference node (%s)",
1827                os_name, base.name)
1828       if test:
1829         continue
1830       assert base.oslist[os_name], "Base node has empty OS status?"
1831       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1832       if not b_status:
1833         # base OS is invalid, skipping
1834         continue
1835       for kind, a, b in [("API version", f_api, b_api),
1836                          ("variants list", f_var, b_var),
1837                          ("parameters", f_param, b_param)]:
1838         _ErrorIf(a != b, self.ENODEOS, node,
1839                  "OS %s %s differs from reference node %s: %s vs. %s",
1840                  kind, os_name, base.name,
1841                  utils.CommaJoin(a), utils.CommaJoin(b))
1842
1843     # check any missing OSes
1844     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1845     _ErrorIf(missing, self.ENODEOS, node,
1846              "OSes present on reference node %s but missing on this node: %s",
1847              base.name, utils.CommaJoin(missing))
1848
1849   def _VerifyOob(self, ninfo, nresult):
1850     """Verifies out of band functionality of a node.
1851
1852     @type ninfo: L{objects.Node}
1853     @param ninfo: the node to check
1854     @param nresult: the remote results for the node
1855
1856     """
1857     node = ninfo.name
1858     # We just have to verify the paths on master and/or master candidates
1859     # as the oob helper is invoked on the master
1860     if ((ninfo.master_candidate or ninfo.master_capable) and
1861         constants.NV_OOB_PATHS in nresult):
1862       for path_result in nresult[constants.NV_OOB_PATHS]:
1863         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1864
1865   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1866     """Verifies and updates the node volume data.
1867
1868     This function will update a L{NodeImage}'s internal structures
1869     with data from the remote call.
1870
1871     @type ninfo: L{objects.Node}
1872     @param ninfo: the node to check
1873     @param nresult: the remote results for the node
1874     @param nimg: the node image object
1875     @param vg_name: the configured VG name
1876
1877     """
1878     node = ninfo.name
1879     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1880
1881     nimg.lvm_fail = True
1882     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1883     if vg_name is None:
1884       pass
1885     elif isinstance(lvdata, basestring):
1886       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1887                utils.SafeEncode(lvdata))
1888     elif not isinstance(lvdata, dict):
1889       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1890     else:
1891       nimg.volumes = lvdata
1892       nimg.lvm_fail = False
1893
1894   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1895     """Verifies and updates the node instance list.
1896
1897     If the listing was successful, then updates this node's instance
1898     list. Otherwise, it marks the RPC call as failed for the instance
1899     list key.
1900
1901     @type ninfo: L{objects.Node}
1902     @param ninfo: the node to check
1903     @param nresult: the remote results for the node
1904     @param nimg: the node image object
1905
1906     """
1907     idata = nresult.get(constants.NV_INSTANCELIST, None)
1908     test = not isinstance(idata, list)
1909     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1910                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1911     if test:
1912       nimg.hyp_fail = True
1913     else:
1914       nimg.instances = idata
1915
1916   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1917     """Verifies and computes a node information map
1918
1919     @type ninfo: L{objects.Node}
1920     @param ninfo: the node to check
1921     @param nresult: the remote results for the node
1922     @param nimg: the node image object
1923     @param vg_name: the configured VG name
1924
1925     """
1926     node = ninfo.name
1927     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1928
1929     # try to read free memory (from the hypervisor)
1930     hv_info = nresult.get(constants.NV_HVINFO, None)
1931     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1932     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1933     if not test:
1934       try:
1935         nimg.mfree = int(hv_info["memory_free"])
1936       except (ValueError, TypeError):
1937         _ErrorIf(True, self.ENODERPC, node,
1938                  "node returned invalid nodeinfo, check hypervisor")
1939
1940     # FIXME: devise a free space model for file based instances as well
1941     if vg_name is not None:
1942       test = (constants.NV_VGLIST not in nresult or
1943               vg_name not in nresult[constants.NV_VGLIST])
1944       _ErrorIf(test, self.ENODELVM, node,
1945                "node didn't return data for the volume group '%s'"
1946                " - it is either missing or broken", vg_name)
1947       if not test:
1948         try:
1949           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1950         except (ValueError, TypeError):
1951           _ErrorIf(True, self.ENODERPC, node,
1952                    "node returned invalid LVM info, check LVM status")
1953
1954   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1955     """Gets per-disk status information for all instances.
1956
1957     @type nodelist: list of strings
1958     @param nodelist: Node names
1959     @type node_image: dict of (name, L{objects.Node})
1960     @param node_image: Node objects
1961     @type instanceinfo: dict of (name, L{objects.Instance})
1962     @param instanceinfo: Instance objects
1963     @rtype: {instance: {node: [(succes, payload)]}}
1964     @return: a dictionary of per-instance dictionaries with nodes as
1965         keys and disk information as values; the disk information is a
1966         list of tuples (success, payload)
1967
1968     """
1969     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1970
1971     node_disks = {}
1972     node_disks_devonly = {}
1973     diskless_instances = set()
1974     diskless = constants.DT_DISKLESS
1975
1976     for nname in nodelist:
1977       node_instances = list(itertools.chain(node_image[nname].pinst,
1978                                             node_image[nname].sinst))
1979       diskless_instances.update(inst for inst in node_instances
1980                                 if instanceinfo[inst].disk_template == diskless)
1981       disks = [(inst, disk)
1982                for inst in node_instances
1983                for disk in instanceinfo[inst].disks]
1984
1985       if not disks:
1986         # No need to collect data
1987         continue
1988
1989       node_disks[nname] = disks
1990
1991       # Creating copies as SetDiskID below will modify the objects and that can
1992       # lead to incorrect data returned from nodes
1993       devonly = [dev.Copy() for (_, dev) in disks]
1994
1995       for dev in devonly:
1996         self.cfg.SetDiskID(dev, nname)
1997
1998       node_disks_devonly[nname] = devonly
1999
2000     assert len(node_disks) == len(node_disks_devonly)
2001
2002     # Collect data from all nodes with disks
2003     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2004                                                           node_disks_devonly)
2005
2006     assert len(result) == len(node_disks)
2007
2008     instdisk = {}
2009
2010     for (nname, nres) in result.items():
2011       disks = node_disks[nname]
2012
2013       if nres.offline:
2014         # No data from this node
2015         data = len(disks) * [(False, "node offline")]
2016       else:
2017         msg = nres.fail_msg
2018         _ErrorIf(msg, self.ENODERPC, nname,
2019                  "while getting disk information: %s", msg)
2020         if msg:
2021           # No data from this node
2022           data = len(disks) * [(False, msg)]
2023         else:
2024           data = []
2025           for idx, i in enumerate(nres.payload):
2026             if isinstance(i, (tuple, list)) and len(i) == 2:
2027               data.append(i)
2028             else:
2029               logging.warning("Invalid result from node %s, entry %d: %s",
2030                               nname, idx, i)
2031               data.append((False, "Invalid result from the remote node"))
2032
2033       for ((inst, _), status) in zip(disks, data):
2034         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2035
2036     # Add empty entries for diskless instances.
2037     for inst in diskless_instances:
2038       assert inst not in instdisk
2039       instdisk[inst] = {}
2040
2041     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2042                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2043                       compat.all(isinstance(s, (tuple, list)) and
2044                                  len(s) == 2 for s in statuses)
2045                       for inst, nnames in instdisk.items()
2046                       for nname, statuses in nnames.items())
2047     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2048
2049     return instdisk
2050
2051   def _VerifyHVP(self, hvp_data):
2052     """Verifies locally the syntax of the hypervisor parameters.
2053
2054     """
2055     for item, hv_name, hv_params in hvp_data:
2056       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2057              (item, hv_name))
2058       try:
2059         hv_class = hypervisor.GetHypervisor(hv_name)
2060         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2061         hv_class.CheckParameterSyntax(hv_params)
2062       except errors.GenericError, err:
2063         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2064
2065
2066   def BuildHooksEnv(self):
2067     """Build hooks env.
2068
2069     Cluster-Verify hooks just ran in the post phase and their failure makes
2070     the output be logged in the verify output and the verification to fail.
2071
2072     """
2073     all_nodes = self.cfg.GetNodeList()
2074     env = {
2075       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2076       }
2077     for node in self.cfg.GetAllNodesInfo().values():
2078       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2079
2080     return env, [], all_nodes
2081
2082   def Exec(self, feedback_fn):
2083     """Verify integrity of cluster, performing various test on nodes.
2084
2085     """
2086     # This method has too many local variables. pylint: disable-msg=R0914
2087     self.bad = False
2088     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2089     verbose = self.op.verbose
2090     self._feedback_fn = feedback_fn
2091     feedback_fn("* Verifying global settings")
2092     for msg in self.cfg.VerifyConfig():
2093       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2094
2095     # Check the cluster certificates
2096     for cert_filename in constants.ALL_CERT_FILES:
2097       (errcode, msg) = _VerifyCertificate(cert_filename)
2098       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2099
2100     vg_name = self.cfg.GetVGName()
2101     drbd_helper = self.cfg.GetDRBDHelper()
2102     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2103     cluster = self.cfg.GetClusterInfo()
2104     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2105     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2106     nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2107     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2108     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2109                         for iname in instancelist)
2110     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2111     i_non_redundant = [] # Non redundant instances
2112     i_non_a_balanced = [] # Non auto-balanced instances
2113     n_offline = 0 # Count of offline nodes
2114     n_drained = 0 # Count of nodes being drained
2115     node_vol_should = {}
2116
2117     # FIXME: verify OS list
2118     # do local checksums
2119     master_files = [constants.CLUSTER_CONF_FILE]
2120     master_node = self.master_node = self.cfg.GetMasterNode()
2121     master_ip = self.cfg.GetMasterIP()
2122
2123     file_names = ssconf.SimpleStore().GetFileList()
2124     file_names.extend(constants.ALL_CERT_FILES)
2125     file_names.extend(master_files)
2126     if cluster.modify_etc_hosts:
2127       file_names.append(constants.ETC_HOSTS)
2128
2129     local_checksums = utils.FingerprintFiles(file_names)
2130
2131     # Compute the set of hypervisor parameters
2132     hvp_data = []
2133     for hv_name in hypervisors:
2134       hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2135     for os_name, os_hvp in cluster.os_hvp.items():
2136       for hv_name, hv_params in os_hvp.items():
2137         if not hv_params:
2138           continue
2139         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2140         hvp_data.append(("os %s" % os_name, hv_name, full_params))
2141     # TODO: collapse identical parameter values in a single one
2142     for instance in instanceinfo.values():
2143       if not instance.hvparams:
2144         continue
2145       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2146                        cluster.FillHV(instance)))
2147     # and verify them locally
2148     self._VerifyHVP(hvp_data)
2149
2150     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2151     node_verify_param = {
2152       constants.NV_FILELIST: file_names,
2153       constants.NV_NODELIST: [node.name for node in nodeinfo
2154                               if not node.offline],
2155       constants.NV_HYPERVISOR: hypervisors,
2156       constants.NV_HVPARAMS: hvp_data,
2157       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2158                                   node.secondary_ip) for node in nodeinfo
2159                                  if not node.offline],
2160       constants.NV_INSTANCELIST: hypervisors,
2161       constants.NV_VERSION: None,
2162       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2163       constants.NV_NODESETUP: None,
2164       constants.NV_TIME: None,
2165       constants.NV_MASTERIP: (master_node, master_ip),
2166       constants.NV_OSLIST: None,
2167       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2168       }
2169
2170     if vg_name is not None:
2171       node_verify_param[constants.NV_VGLIST] = None
2172       node_verify_param[constants.NV_LVLIST] = vg_name
2173       node_verify_param[constants.NV_PVLIST] = [vg_name]
2174       node_verify_param[constants.NV_DRBDLIST] = None
2175
2176     if drbd_helper:
2177       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2178
2179     # Build our expected cluster state
2180     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2181                                                  name=node.name,
2182                                                  vm_capable=node.vm_capable))
2183                       for node in nodeinfo)
2184
2185     # Gather OOB paths
2186     oob_paths = []
2187     for node in nodeinfo:
2188       path = _SupportsOob(self.cfg, node)
2189       if path and path not in oob_paths:
2190         oob_paths.append(path)
2191
2192     if oob_paths:
2193       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2194
2195     for instance in instancelist:
2196       inst_config = instanceinfo[instance]
2197
2198       for nname in inst_config.all_nodes:
2199         if nname not in node_image:
2200           # ghost node
2201           gnode = self.NodeImage(name=nname)
2202           gnode.ghost = True
2203           node_image[nname] = gnode
2204
2205       inst_config.MapLVsByNode(node_vol_should)
2206
2207       pnode = inst_config.primary_node
2208       node_image[pnode].pinst.append(instance)
2209
2210       for snode in inst_config.secondary_nodes:
2211         nimg = node_image[snode]
2212         nimg.sinst.append(instance)
2213         if pnode not in nimg.sbp:
2214           nimg.sbp[pnode] = []
2215         nimg.sbp[pnode].append(instance)
2216
2217     # At this point, we have the in-memory data structures complete,
2218     # except for the runtime information, which we'll gather next
2219
2220     # Due to the way our RPC system works, exact response times cannot be
2221     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2222     # time before and after executing the request, we can at least have a time
2223     # window.
2224     nvinfo_starttime = time.time()
2225     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2226                                            self.cfg.GetClusterName())
2227     nvinfo_endtime = time.time()
2228
2229     all_drbd_map = self.cfg.ComputeDRBDMap()
2230
2231     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2232     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2233
2234     feedback_fn("* Verifying node status")
2235
2236     refos_img = None
2237
2238     for node_i in nodeinfo:
2239       node = node_i.name
2240       nimg = node_image[node]
2241
2242       if node_i.offline:
2243         if verbose:
2244           feedback_fn("* Skipping offline node %s" % (node,))
2245         n_offline += 1
2246         continue
2247
2248       if node == master_node:
2249         ntype = "master"
2250       elif node_i.master_candidate:
2251         ntype = "master candidate"
2252       elif node_i.drained:
2253         ntype = "drained"
2254         n_drained += 1
2255       else:
2256         ntype = "regular"
2257       if verbose:
2258         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2259
2260       msg = all_nvinfo[node].fail_msg
2261       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2262       if msg:
2263         nimg.rpc_fail = True
2264         continue
2265
2266       nresult = all_nvinfo[node].payload
2267
2268       nimg.call_ok = self._VerifyNode(node_i, nresult)
2269       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2270       self._VerifyNodeNetwork(node_i, nresult)
2271       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2272                             master_files)
2273
2274       self._VerifyOob(node_i, nresult)
2275
2276       if nimg.vm_capable:
2277         self._VerifyNodeLVM(node_i, nresult, vg_name)
2278         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2279                              all_drbd_map)
2280
2281         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2282         self._UpdateNodeInstances(node_i, nresult, nimg)
2283         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2284         self._UpdateNodeOS(node_i, nresult, nimg)
2285         if not nimg.os_fail:
2286           if refos_img is None:
2287             refos_img = nimg
2288           self._VerifyNodeOS(node_i, nimg, refos_img)
2289
2290     feedback_fn("* Verifying instance status")
2291     for instance in instancelist:
2292       if verbose:
2293         feedback_fn("* Verifying instance %s" % instance)
2294       inst_config = instanceinfo[instance]
2295       self._VerifyInstance(instance, inst_config, node_image,
2296                            instdisk[instance])
2297       inst_nodes_offline = []
2298
2299       pnode = inst_config.primary_node
2300       pnode_img = node_image[pnode]
2301       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2302                self.ENODERPC, pnode, "instance %s, connection to"
2303                " primary node failed", instance)
2304
2305       _ErrorIf(pnode_img.offline, self.EINSTANCEBADNODE, instance,
2306                "instance lives on offline node %s", inst_config.primary_node)
2307
2308       # If the instance is non-redundant we cannot survive losing its primary
2309       # node, so we are not N+1 compliant. On the other hand we have no disk
2310       # templates with more than one secondary so that situation is not well
2311       # supported either.
2312       # FIXME: does not support file-backed instances
2313       if not inst_config.secondary_nodes:
2314         i_non_redundant.append(instance)
2315
2316       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2317                instance, "instance has multiple secondary nodes: %s",
2318                utils.CommaJoin(inst_config.secondary_nodes),
2319                code=self.ETYPE_WARNING)
2320
2321       if inst_config.disk_template in constants.DTS_NET_MIRROR:
2322         pnode = inst_config.primary_node
2323         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2324         instance_groups = {}
2325
2326         for node in instance_nodes:
2327           instance_groups.setdefault(nodeinfo_byname[node].group,
2328                                      []).append(node)
2329
2330         pretty_list = [
2331           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2332           # Sort so that we always list the primary node first.
2333           for group, nodes in sorted(instance_groups.items(),
2334                                      key=lambda (_, nodes): pnode in nodes,
2335                                      reverse=True)]
2336
2337         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2338                       instance, "instance has primary and secondary nodes in"
2339                       " different groups: %s", utils.CommaJoin(pretty_list),
2340                       code=self.ETYPE_WARNING)
2341
2342       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2343         i_non_a_balanced.append(instance)
2344
2345       for snode in inst_config.secondary_nodes:
2346         s_img = node_image[snode]
2347         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2348                  "instance %s, connection to secondary node failed", instance)
2349
2350         if s_img.offline:
2351           inst_nodes_offline.append(snode)
2352
2353       # warn that the instance lives on offline nodes
2354       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2355                "instance has offline secondary node(s) %s",
2356                utils.CommaJoin(inst_nodes_offline))
2357       # ... or ghost/non-vm_capable nodes
2358       for node in inst_config.all_nodes:
2359         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2360                  "instance lives on ghost node %s", node)
2361         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2362                  instance, "instance lives on non-vm_capable node %s", node)
2363
2364     feedback_fn("* Verifying orphan volumes")
2365     reserved = utils.FieldSet(*cluster.reserved_lvs)
2366     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2367
2368     feedback_fn("* Verifying orphan instances")
2369     self._VerifyOrphanInstances(instancelist, node_image)
2370
2371     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2372       feedback_fn("* Verifying N+1 Memory redundancy")
2373       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2374
2375     feedback_fn("* Other Notes")
2376     if i_non_redundant:
2377       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2378                   % len(i_non_redundant))
2379
2380     if i_non_a_balanced:
2381       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2382                   % len(i_non_a_balanced))
2383
2384     if n_offline:
2385       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2386
2387     if n_drained:
2388       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2389
2390     return not self.bad
2391
2392   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2393     """Analyze the post-hooks' result
2394
2395     This method analyses the hook result, handles it, and sends some
2396     nicely-formatted feedback back to the user.
2397
2398     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2399         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2400     @param hooks_results: the results of the multi-node hooks rpc call
2401     @param feedback_fn: function used send feedback back to the caller
2402     @param lu_result: previous Exec result
2403     @return: the new Exec result, based on the previous result
2404         and hook results
2405
2406     """
2407     # We only really run POST phase hooks, and are only interested in
2408     # their results
2409     if phase == constants.HOOKS_PHASE_POST:
2410       # Used to change hooks' output to proper indentation
2411       feedback_fn("* Hooks Results")
2412       assert hooks_results, "invalid result from hooks"
2413
2414       for node_name in hooks_results:
2415         res = hooks_results[node_name]
2416         msg = res.fail_msg
2417         test = msg and not res.offline
2418         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2419                       "Communication failure in hooks execution: %s", msg)
2420         if res.offline or msg:
2421           # No need to investigate payload if node is offline or gave an error.
2422           # override manually lu_result here as _ErrorIf only
2423           # overrides self.bad
2424           lu_result = 1
2425           continue
2426         for script, hkr, output in res.payload:
2427           test = hkr == constants.HKR_FAIL
2428           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2429                         "Script %s failed, output:", script)
2430           if test:
2431             output = self._HOOKS_INDENT_RE.sub('      ', output)
2432             feedback_fn("%s" % output)
2433             lu_result = 0
2434
2435       return lu_result
2436
2437
2438 class LUClusterVerifyDisks(NoHooksLU):
2439   """Verifies the cluster disks status.
2440
2441   """
2442   REQ_BGL = False
2443
2444   def ExpandNames(self):
2445     self.needed_locks = {
2446       locking.LEVEL_NODE: locking.ALL_SET,
2447       locking.LEVEL_INSTANCE: locking.ALL_SET,
2448     }
2449     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2450
2451   def Exec(self, feedback_fn):
2452     """Verify integrity of cluster disks.
2453
2454     @rtype: tuple of three items
2455     @return: a tuple of (dict of node-to-node_error, list of instances
2456         which need activate-disks, dict of instance: (node, volume) for
2457         missing volumes
2458
2459     """
2460     result = res_nodes, res_instances, res_missing = {}, [], {}
2461
2462     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2463     instances = self.cfg.GetAllInstancesInfo().values()
2464
2465     nv_dict = {}
2466     for inst in instances:
2467       inst_lvs = {}
2468       if not inst.admin_up:
2469         continue
2470       inst.MapLVsByNode(inst_lvs)
2471       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2472       for node, vol_list in inst_lvs.iteritems():
2473         for vol in vol_list:
2474           nv_dict[(node, vol)] = inst
2475
2476     if not nv_dict:
2477       return result
2478
2479     node_lvs = self.rpc.call_lv_list(nodes, [])
2480     for node, node_res in node_lvs.items():
2481       if node_res.offline:
2482         continue
2483       msg = node_res.fail_msg
2484       if msg:
2485         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2486         res_nodes[node] = msg
2487         continue
2488
2489       lvs = node_res.payload
2490       for lv_name, (_, _, lv_online) in lvs.items():
2491         inst = nv_dict.pop((node, lv_name), None)
2492         if (not lv_online and inst is not None
2493             and inst.name not in res_instances):
2494           res_instances.append(inst.name)
2495
2496     # any leftover items in nv_dict are missing LVs, let's arrange the
2497     # data better
2498     for key, inst in nv_dict.iteritems():
2499       if inst.name not in res_missing:
2500         res_missing[inst.name] = []
2501       res_missing[inst.name].append(key)
2502
2503     return result
2504
2505
2506 class LUClusterRepairDiskSizes(NoHooksLU):
2507   """Verifies the cluster disks sizes.
2508
2509   """
2510   REQ_BGL = False
2511
2512   def ExpandNames(self):
2513     if self.op.instances:
2514       self.wanted_names = []
2515       for name in self.op.instances:
2516         full_name = _ExpandInstanceName(self.cfg, name)
2517         self.wanted_names.append(full_name)
2518       self.needed_locks = {
2519         locking.LEVEL_NODE: [],
2520         locking.LEVEL_INSTANCE: self.wanted_names,
2521         }
2522       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2523     else:
2524       self.wanted_names = None
2525       self.needed_locks = {
2526         locking.LEVEL_NODE: locking.ALL_SET,
2527         locking.LEVEL_INSTANCE: locking.ALL_SET,
2528         }
2529     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2530
2531   def DeclareLocks(self, level):
2532     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2533       self._LockInstancesNodes(primary_only=True)
2534
2535   def CheckPrereq(self):
2536     """Check prerequisites.
2537
2538     This only checks the optional instance list against the existing names.
2539
2540     """
2541     if self.wanted_names is None:
2542       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2543
2544     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2545                              in self.wanted_names]
2546
2547   def _EnsureChildSizes(self, disk):
2548     """Ensure children of the disk have the needed disk size.
2549
2550     This is valid mainly for DRBD8 and fixes an issue where the
2551     children have smaller disk size.
2552
2553     @param disk: an L{ganeti.objects.Disk} object
2554
2555     """
2556     if disk.dev_type == constants.LD_DRBD8:
2557       assert disk.children, "Empty children for DRBD8?"
2558       fchild = disk.children[0]
2559       mismatch = fchild.size < disk.size
2560       if mismatch:
2561         self.LogInfo("Child disk has size %d, parent %d, fixing",
2562                      fchild.size, disk.size)
2563         fchild.size = disk.size
2564
2565       # and we recurse on this child only, not on the metadev
2566       return self._EnsureChildSizes(fchild) or mismatch
2567     else:
2568       return False
2569
2570   def Exec(self, feedback_fn):
2571     """Verify the size of cluster disks.
2572
2573     """
2574     # TODO: check child disks too
2575     # TODO: check differences in size between primary/secondary nodes
2576     per_node_disks = {}
2577     for instance in self.wanted_instances:
2578       pnode = instance.primary_node
2579       if pnode not in per_node_disks:
2580         per_node_disks[pnode] = []
2581       for idx, disk in enumerate(instance.disks):
2582         per_node_disks[pnode].append((instance, idx, disk))
2583
2584     changed = []
2585     for node, dskl in per_node_disks.items():
2586       newl = [v[2].Copy() for v in dskl]
2587       for dsk in newl:
2588         self.cfg.SetDiskID(dsk, node)
2589       result = self.rpc.call_blockdev_getsize(node, newl)
2590       if result.fail_msg:
2591         self.LogWarning("Failure in blockdev_getsize call to node"
2592                         " %s, ignoring", node)
2593         continue
2594       if len(result.payload) != len(dskl):
2595         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2596                         " result.payload=%s", node, len(dskl), result.payload)
2597         self.LogWarning("Invalid result from node %s, ignoring node results",
2598                         node)
2599         continue
2600       for ((instance, idx, disk), size) in zip(dskl, result.payload):
2601         if size is None:
2602           self.LogWarning("Disk %d of instance %s did not return size"
2603                           " information, ignoring", idx, instance.name)
2604           continue
2605         if not isinstance(size, (int, long)):
2606           self.LogWarning("Disk %d of instance %s did not return valid"
2607                           " size information, ignoring", idx, instance.name)
2608           continue
2609         size = size >> 20
2610         if size != disk.size:
2611           self.LogInfo("Disk %d of instance %s has mismatched size,"
2612                        " correcting: recorded %d, actual %d", idx,
2613                        instance.name, disk.size, size)
2614           disk.size = size
2615           self.cfg.Update(instance, feedback_fn)
2616           changed.append((instance.name, idx, size))
2617         if self._EnsureChildSizes(disk):
2618           self.cfg.Update(instance, feedback_fn)
2619           changed.append((instance.name, idx, disk.size))
2620     return changed
2621
2622
2623 class LUClusterRename(LogicalUnit):
2624   """Rename the cluster.
2625
2626   """
2627   HPATH = "cluster-rename"
2628   HTYPE = constants.HTYPE_CLUSTER
2629
2630   def BuildHooksEnv(self):
2631     """Build hooks env.
2632
2633     """
2634     env = {
2635       "OP_TARGET": self.cfg.GetClusterName(),
2636       "NEW_NAME": self.op.name,
2637       }
2638     mn = self.cfg.GetMasterNode()
2639     all_nodes = self.cfg.GetNodeList()
2640     return env, [mn], all_nodes
2641
2642   def CheckPrereq(self):
2643     """Verify that the passed name is a valid one.
2644
2645     """
2646     hostname = netutils.GetHostname(name=self.op.name,
2647                                     family=self.cfg.GetPrimaryIPFamily())
2648
2649     new_name = hostname.name
2650     self.ip = new_ip = hostname.ip
2651     old_name = self.cfg.GetClusterName()
2652     old_ip = self.cfg.GetMasterIP()
2653     if new_name == old_name and new_ip == old_ip:
2654       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2655                                  " cluster has changed",
2656                                  errors.ECODE_INVAL)
2657     if new_ip != old_ip:
2658       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2659         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2660                                    " reachable on the network" %
2661                                    new_ip, errors.ECODE_NOTUNIQUE)
2662
2663     self.op.name = new_name
2664
2665   def Exec(self, feedback_fn):
2666     """Rename the cluster.
2667
2668     """
2669     clustername = self.op.name
2670     ip = self.ip
2671
2672     # shutdown the master IP
2673     master = self.cfg.GetMasterNode()
2674     result = self.rpc.call_node_stop_master(master, False)
2675     result.Raise("Could not disable the master role")
2676
2677     try:
2678       cluster = self.cfg.GetClusterInfo()
2679       cluster.cluster_name = clustername
2680       cluster.master_ip = ip
2681       self.cfg.Update(cluster, feedback_fn)
2682
2683       # update the known hosts file
2684       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2685       node_list = self.cfg.GetOnlineNodeList()
2686       try:
2687         node_list.remove(master)
2688       except ValueError:
2689         pass
2690       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2691     finally:
2692       result = self.rpc.call_node_start_master(master, False, False)
2693       msg = result.fail_msg
2694       if msg:
2695         self.LogWarning("Could not re-enable the master role on"
2696                         " the master, please restart manually: %s", msg)
2697
2698     return clustername
2699
2700
2701 class LUClusterSetParams(LogicalUnit):
2702   """Change the parameters of the cluster.
2703
2704   """
2705   HPATH = "cluster-modify"
2706   HTYPE = constants.HTYPE_CLUSTER
2707   REQ_BGL = False
2708
2709   def CheckArguments(self):
2710     """Check parameters
2711
2712     """
2713     if self.op.uid_pool:
2714       uidpool.CheckUidPool(self.op.uid_pool)
2715
2716     if self.op.add_uids:
2717       uidpool.CheckUidPool(self.op.add_uids)
2718
2719     if self.op.remove_uids:
2720       uidpool.CheckUidPool(self.op.remove_uids)
2721
2722   def ExpandNames(self):
2723     # FIXME: in the future maybe other cluster params won't require checking on
2724     # all nodes to be modified.
2725     self.needed_locks = {
2726       locking.LEVEL_NODE: locking.ALL_SET,
2727     }
2728     self.share_locks[locking.LEVEL_NODE] = 1
2729
2730   def BuildHooksEnv(self):
2731     """Build hooks env.
2732
2733     """
2734     env = {
2735       "OP_TARGET": self.cfg.GetClusterName(),
2736       "NEW_VG_NAME": self.op.vg_name,
2737       }
2738     mn = self.cfg.GetMasterNode()
2739     return env, [mn], [mn]
2740
2741   def CheckPrereq(self):
2742     """Check prerequisites.
2743
2744     This checks whether the given params don't conflict and
2745     if the given volume group is valid.
2746
2747     """
2748     if self.op.vg_name is not None and not self.op.vg_name:
2749       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2750         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2751                                    " instances exist", errors.ECODE_INVAL)
2752
2753     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2754       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2755         raise errors.OpPrereqError("Cannot disable drbd helper while"
2756                                    " drbd-based instances exist",
2757                                    errors.ECODE_INVAL)
2758
2759     node_list = self.acquired_locks[locking.LEVEL_NODE]
2760
2761     # if vg_name not None, checks given volume group on all nodes
2762     if self.op.vg_name:
2763       vglist = self.rpc.call_vg_list(node_list)
2764       for node in node_list:
2765         msg = vglist[node].fail_msg
2766         if msg:
2767           # ignoring down node
2768           self.LogWarning("Error while gathering data on node %s"
2769                           " (ignoring node): %s", node, msg)
2770           continue
2771         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2772                                               self.op.vg_name,
2773                                               constants.MIN_VG_SIZE)
2774         if vgstatus:
2775           raise errors.OpPrereqError("Error on node '%s': %s" %
2776                                      (node, vgstatus), errors.ECODE_ENVIRON)
2777
2778     if self.op.drbd_helper:
2779       # checks given drbd helper on all nodes
2780       helpers = self.rpc.call_drbd_helper(node_list)
2781       for node in node_list:
2782         ninfo = self.cfg.GetNodeInfo(node)
2783         if ninfo.offline:
2784           self.LogInfo("Not checking drbd helper on offline node %s", node)
2785           continue
2786         msg = helpers[node].fail_msg
2787         if msg:
2788           raise errors.OpPrereqError("Error checking drbd helper on node"
2789                                      " '%s': %s" % (node, msg),
2790                                      errors.ECODE_ENVIRON)
2791         node_helper = helpers[node].payload
2792         if node_helper != self.op.drbd_helper:
2793           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2794                                      (node, node_helper), errors.ECODE_ENVIRON)
2795
2796     self.cluster = cluster = self.cfg.GetClusterInfo()
2797     # validate params changes
2798     if self.op.beparams:
2799       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2800       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2801
2802     if self.op.ndparams:
2803       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2804       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2805
2806     if self.op.nicparams:
2807       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2808       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2809       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2810       nic_errors = []
2811
2812       # check all instances for consistency
2813       for instance in self.cfg.GetAllInstancesInfo().values():
2814         for nic_idx, nic in enumerate(instance.nics):
2815           params_copy = copy.deepcopy(nic.nicparams)
2816           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2817
2818           # check parameter syntax
2819           try:
2820             objects.NIC.CheckParameterSyntax(params_filled)
2821           except errors.ConfigurationError, err:
2822             nic_errors.append("Instance %s, nic/%d: %s" %
2823                               (instance.name, nic_idx, err))
2824
2825           # if we're moving instances to routed, check that they have an ip
2826           target_mode = params_filled[constants.NIC_MODE]
2827           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2828             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2829                               (instance.name, nic_idx))
2830       if nic_errors:
2831         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2832                                    "\n".join(nic_errors))
2833
2834     # hypervisor list/parameters
2835     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2836     if self.op.hvparams:
2837       for hv_name, hv_dict in self.op.hvparams.items():
2838         if hv_name not in self.new_hvparams:
2839           self.new_hvparams[hv_name] = hv_dict
2840         else:
2841           self.new_hvparams[hv_name].update(hv_dict)
2842
2843     # os hypervisor parameters
2844     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2845     if self.op.os_hvp:
2846       for os_name, hvs in self.op.os_hvp.items():
2847         if os_name not in self.new_os_hvp:
2848           self.new_os_hvp[os_name] = hvs
2849         else:
2850           for hv_name, hv_dict in hvs.items():
2851             if hv_name not in self.new_os_hvp[os_name]:
2852               self.new_os_hvp[os_name][hv_name] = hv_dict
2853             else:
2854               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2855
2856     # os parameters
2857     self.new_osp = objects.FillDict(cluster.osparams, {})
2858     if self.op.osparams:
2859       for os_name, osp in self.op.osparams.items():
2860         if os_name not in self.new_osp:
2861           self.new_osp[os_name] = {}
2862
2863         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2864                                                   use_none=True)
2865
2866         if not self.new_osp[os_name]:
2867           # we removed all parameters
2868           del self.new_osp[os_name]
2869         else:
2870           # check the parameter validity (remote check)
2871           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2872                          os_name, self.new_osp[os_name])
2873
2874     # changes to the hypervisor list
2875     if self.op.enabled_hypervisors is not None:
2876       self.hv_list = self.op.enabled_hypervisors
2877       for hv in self.hv_list:
2878         # if the hypervisor doesn't already exist in the cluster
2879         # hvparams, we initialize it to empty, and then (in both
2880         # cases) we make sure to fill the defaults, as we might not
2881         # have a complete defaults list if the hypervisor wasn't
2882         # enabled before
2883         if hv not in new_hvp:
2884           new_hvp[hv] = {}
2885         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2886         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2887     else:
2888       self.hv_list = cluster.enabled_hypervisors
2889
2890     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2891       # either the enabled list has changed, or the parameters have, validate
2892       for hv_name, hv_params in self.new_hvparams.items():
2893         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2894             (self.op.enabled_hypervisors and
2895              hv_name in self.op.enabled_hypervisors)):
2896           # either this is a new hypervisor, or its parameters have changed
2897           hv_class = hypervisor.GetHypervisor(hv_name)
2898           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2899           hv_class.CheckParameterSyntax(hv_params)
2900           _CheckHVParams(self, node_list, hv_name, hv_params)
2901
2902     if self.op.os_hvp:
2903       # no need to check any newly-enabled hypervisors, since the
2904       # defaults have already been checked in the above code-block
2905       for os_name, os_hvp in self.new_os_hvp.items():
2906         for hv_name, hv_params in os_hvp.items():
2907           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2908           # we need to fill in the new os_hvp on top of the actual hv_p
2909           cluster_defaults = self.new_hvparams.get(hv_name, {})
2910           new_osp = objects.FillDict(cluster_defaults, hv_params)
2911           hv_class = hypervisor.GetHypervisor(hv_name)
2912           hv_class.CheckParameterSyntax(new_osp)
2913           _CheckHVParams(self, node_list, hv_name, new_osp)
2914
2915     if self.op.default_iallocator:
2916       alloc_script = utils.FindFile(self.op.default_iallocator,
2917                                     constants.IALLOCATOR_SEARCH_PATH,
2918                                     os.path.isfile)
2919       if alloc_script is None:
2920         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2921                                    " specified" % self.op.default_iallocator,
2922                                    errors.ECODE_INVAL)
2923
2924   def Exec(self, feedback_fn):
2925     """Change the parameters of the cluster.
2926
2927     """
2928     if self.op.vg_name is not None:
2929       new_volume = self.op.vg_name
2930       if not new_volume:
2931         new_volume = None
2932       if new_volume != self.cfg.GetVGName():
2933         self.cfg.SetVGName(new_volume)
2934       else:
2935         feedback_fn("Cluster LVM configuration already in desired"
2936                     " state, not changing")
2937     if self.op.drbd_helper is not None:
2938       new_helper = self.op.drbd_helper
2939       if not new_helper:
2940         new_helper = None
2941       if new_helper != self.cfg.GetDRBDHelper():
2942         self.cfg.SetDRBDHelper(new_helper)
2943       else:
2944         feedback_fn("Cluster DRBD helper already in desired state,"
2945                     " not changing")
2946     if self.op.hvparams:
2947       self.cluster.hvparams = self.new_hvparams
2948     if self.op.os_hvp:
2949       self.cluster.os_hvp = self.new_os_hvp
2950     if self.op.enabled_hypervisors is not None:
2951       self.cluster.hvparams = self.new_hvparams
2952       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2953     if self.op.beparams:
2954       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2955     if self.op.nicparams:
2956       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2957     if self.op.osparams:
2958       self.cluster.osparams = self.new_osp
2959     if self.op.ndparams:
2960       self.cluster.ndparams = self.new_ndparams
2961
2962     if self.op.candidate_pool_size is not None:
2963       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2964       # we need to update the pool size here, otherwise the save will fail
2965       _AdjustCandidatePool(self, [])
2966
2967     if self.op.maintain_node_health is not None:
2968       self.cluster.maintain_node_health = self.op.maintain_node_health
2969
2970     if self.op.prealloc_wipe_disks is not None:
2971       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2972
2973     if self.op.add_uids is not None:
2974       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2975
2976     if self.op.remove_uids is not None:
2977       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2978
2979     if self.op.uid_pool is not None:
2980       self.cluster.uid_pool = self.op.uid_pool
2981
2982     if self.op.default_iallocator is not None:
2983       self.cluster.default_iallocator = self.op.default_iallocator
2984
2985     if self.op.reserved_lvs is not None:
2986       self.cluster.reserved_lvs = self.op.reserved_lvs
2987
2988     def helper_os(aname, mods, desc):
2989       desc += " OS list"
2990       lst = getattr(self.cluster, aname)
2991       for key, val in mods:
2992         if key == constants.DDM_ADD:
2993           if val in lst:
2994             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2995           else:
2996             lst.append(val)
2997         elif key == constants.DDM_REMOVE:
2998           if val in lst:
2999             lst.remove(val)
3000           else:
3001             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3002         else:
3003           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3004
3005     if self.op.hidden_os:
3006       helper_os("hidden_os", self.op.hidden_os, "hidden")
3007
3008     if self.op.blacklisted_os:
3009       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3010
3011     if self.op.master_netdev:
3012       master = self.cfg.GetMasterNode()
3013       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3014                   self.cluster.master_netdev)
3015       result = self.rpc.call_node_stop_master(master, False)
3016       result.Raise("Could not disable the master ip")
3017       feedback_fn("Changing master_netdev from %s to %s" %
3018                   (self.cluster.master_netdev, self.op.master_netdev))
3019       self.cluster.master_netdev = self.op.master_netdev
3020
3021     self.cfg.Update(self.cluster, feedback_fn)
3022
3023     if self.op.master_netdev:
3024       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3025                   self.op.master_netdev)
3026       result = self.rpc.call_node_start_master(master, False, False)
3027       if result.fail_msg:
3028         self.LogWarning("Could not re-enable the master ip on"
3029                         " the master, please restart manually: %s",
3030                         result.fail_msg)
3031
3032
3033 def _UploadHelper(lu, nodes, fname):
3034   """Helper for uploading a file and showing warnings.
3035
3036   """
3037   if os.path.exists(fname):
3038     result = lu.rpc.call_upload_file(nodes, fname)
3039     for to_node, to_result in result.items():
3040       msg = to_result.fail_msg
3041       if msg:
3042         msg = ("Copy of file %s to node %s failed: %s" %
3043                (fname, to_node, msg))
3044         lu.proc.LogWarning(msg)
3045
3046
3047 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3048   """Distribute additional files which are part of the cluster configuration.
3049
3050   ConfigWriter takes care of distributing the config and ssconf files, but
3051   there are more files which should be distributed to all nodes. This function
3052   makes sure those are copied.
3053
3054   @param lu: calling logical unit
3055   @param additional_nodes: list of nodes not in the config to distribute to
3056   @type additional_vm: boolean
3057   @param additional_vm: whether the additional nodes are vm-capable or not
3058
3059   """
3060   # 1. Gather target nodes
3061   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3062   dist_nodes = lu.cfg.GetOnlineNodeList()
3063   nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3064   vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3065   if additional_nodes is not None:
3066     dist_nodes.extend(additional_nodes)
3067     if additional_vm:
3068       vm_nodes.extend(additional_nodes)
3069   if myself.name in dist_nodes:
3070     dist_nodes.remove(myself.name)
3071   if myself.name in vm_nodes:
3072     vm_nodes.remove(myself.name)
3073
3074   # 2. Gather files to distribute
3075   dist_files = set([constants.ETC_HOSTS,
3076                     constants.SSH_KNOWN_HOSTS_FILE,
3077                     constants.RAPI_CERT_FILE,
3078                     constants.RAPI_USERS_FILE,
3079                     constants.CONFD_HMAC_KEY,
3080                     constants.CLUSTER_DOMAIN_SECRET_FILE,
3081                    ])
3082
3083   vm_files = set()
3084   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3085   for hv_name in enabled_hypervisors:
3086     hv_class = hypervisor.GetHypervisor(hv_name)
3087     vm_files.update(hv_class.GetAncillaryFiles())
3088
3089   # 3. Perform the files upload
3090   for fname in dist_files:
3091     _UploadHelper(lu, dist_nodes, fname)
3092   for fname in vm_files:
3093     _UploadHelper(lu, vm_nodes, fname)
3094
3095
3096 class LUClusterRedistConf(NoHooksLU):
3097   """Force the redistribution of cluster configuration.
3098
3099   This is a very simple LU.
3100
3101   """
3102   REQ_BGL = False
3103
3104   def ExpandNames(self):
3105     self.needed_locks = {
3106       locking.LEVEL_NODE: locking.ALL_SET,
3107     }
3108     self.share_locks[locking.LEVEL_NODE] = 1
3109
3110   def Exec(self, feedback_fn):
3111     """Redistribute the configuration.
3112
3113     """
3114     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3115     _RedistributeAncillaryFiles(self)
3116
3117
3118 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3119   """Sleep and poll for an instance's disk to sync.
3120
3121   """
3122   if not instance.disks or disks is not None and not disks:
3123     return True
3124
3125   disks = _ExpandCheckDisks(instance, disks)
3126
3127   if not oneshot:
3128     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3129
3130   node = instance.primary_node
3131
3132   for dev in disks:
3133     lu.cfg.SetDiskID(dev, node)
3134
3135   # TODO: Convert to utils.Retry
3136
3137   retries = 0
3138   degr_retries = 10 # in seconds, as we sleep 1 second each time
3139   while True:
3140     max_time = 0
3141     done = True
3142     cumul_degraded = False
3143     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3144     msg = rstats.fail_msg
3145     if msg:
3146       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3147       retries += 1
3148       if retries >= 10:
3149         raise errors.RemoteError("Can't contact node %s for mirror data,"
3150                                  " aborting." % node)
3151       time.sleep(6)
3152       continue
3153     rstats = rstats.payload
3154     retries = 0
3155     for i, mstat in enumerate(rstats):
3156       if mstat is None:
3157         lu.LogWarning("Can't compute data for node %s/%s",
3158                            node, disks[i].iv_name)
3159         continue
3160
3161       cumul_degraded = (cumul_degraded or
3162                         (mstat.is_degraded and mstat.sync_percent is None))
3163       if mstat.sync_percent is not None:
3164         done = False
3165         if mstat.estimated_time is not None:
3166           rem_time = ("%s remaining (estimated)" %
3167                       utils.FormatSeconds(mstat.estimated_time))
3168           max_time = mstat.estimated_time
3169         else:
3170           rem_time = "no time estimate"
3171         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3172                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3173
3174     # if we're done but degraded, let's do a few small retries, to
3175     # make sure we see a stable and not transient situation; therefore
3176     # we force restart of the loop
3177     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3178       logging.info("Degraded disks found, %d retries left", degr_retries)
3179       degr_retries -= 1
3180       time.sleep(1)
3181       continue
3182
3183     if done or oneshot:
3184       break
3185
3186     time.sleep(min(60, max_time))
3187
3188   if done:
3189     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3190   return not cumul_degraded
3191
3192
3193 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3194   """Check that mirrors are not degraded.
3195
3196   The ldisk parameter, if True, will change the test from the
3197   is_degraded attribute (which represents overall non-ok status for
3198   the device(s)) to the ldisk (representing the local storage status).
3199
3200   """
3201   lu.cfg.SetDiskID(dev, node)
3202
3203   result = True
3204
3205   if on_primary or dev.AssembleOnSecondary():
3206     rstats = lu.rpc.call_blockdev_find(node, dev)
3207     msg = rstats.fail_msg
3208     if msg:
3209       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3210       result = False
3211     elif not rstats.payload:
3212       lu.LogWarning("Can't find disk on node %s", node)
3213       result = False
3214     else:
3215       if ldisk:
3216         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3217       else:
3218         result = result and not rstats.payload.is_degraded
3219
3220   if dev.children:
3221     for child in dev.children:
3222       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3223
3224   return result
3225
3226
3227 class LUOobCommand(NoHooksLU):
3228   """Logical unit for OOB handling.
3229
3230   """
3231   REG_BGL = False
3232   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3233
3234   def CheckPrereq(self):
3235     """Check prerequisites.
3236
3237     This checks:
3238      - the node exists in the configuration
3239      - OOB is supported
3240
3241     Any errors are signaled by raising errors.OpPrereqError.
3242
3243     """
3244     self.nodes = []
3245     self.master_node = self.cfg.GetMasterNode()
3246
3247     if self.op.node_names:
3248       if self.op.command in self._SKIP_MASTER:
3249         if self.master_node in self.op.node_names:
3250           master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3251           master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3252
3253           if master_oob_handler:
3254             additional_text = ("Run '%s %s %s' if you want to operate on the"
3255                                " master regardless") % (master_oob_handler,
3256                                                         self.op.command,
3257                                                         self.master_node)
3258           else:
3259             additional_text = "The master node does not support out-of-band"
3260
3261           raise errors.OpPrereqError(("Operating on the master node %s is not"
3262                                       " allowed for %s\n%s") %
3263                                      (self.master_node, self.op.command,
3264                                       additional_text), errors.ECODE_INVAL)
3265     else:
3266       self.op.node_names = self.cfg.GetNodeList()
3267       if self.op.command in self._SKIP_MASTER:
3268         self.op.node_names.remove(self.master_node)
3269
3270     if self.op.command in self._SKIP_MASTER:
3271       assert self.master_node not in self.op.node_names
3272
3273     for node_name in self.op.node_names:
3274       node = self.cfg.GetNodeInfo(node_name)
3275
3276       if node is None:
3277         raise errors.OpPrereqError("Node %s not found" % node_name,
3278                                    errors.ECODE_NOENT)
3279       else:
3280         self.nodes.append(node)
3281
3282       if (not self.op.ignore_status and
3283           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3284         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3285                                     " not marked offline") % node_name,
3286                                    errors.ECODE_STATE)
3287
3288   def ExpandNames(self):
3289     """Gather locks we need.
3290
3291     """
3292     if self.op.node_names:
3293       self.op.node_names = [_ExpandNodeName(self.cfg, name)
3294                             for name in self.op.node_names]
3295       lock_names = self.op.node_names
3296     else:
3297       lock_names = locking.ALL_SET
3298
3299     self.needed_locks = {
3300       locking.LEVEL_NODE: lock_names,
3301       }
3302
3303   def Exec(self, feedback_fn):
3304     """Execute OOB and return result if we expect any.
3305
3306     """
3307     master_node = self.master_node
3308     ret = []
3309
3310     for node in self.nodes:
3311       node_entry = [(constants.RS_NORMAL, node.name)]
3312       ret.append(node_entry)
3313
3314       oob_program = _SupportsOob(self.cfg, node)
3315
3316       if not oob_program:
3317         node_entry.append((constants.RS_UNAVAIL, None))
3318         continue
3319
3320       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3321                    self.op.command, oob_program, node.name)
3322       result = self.rpc.call_run_oob(master_node, oob_program,
3323                                      self.op.command, node.name,
3324                                      self.op.timeout)
3325
3326       if result.fail_msg:
3327         self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3328                         node.name, result.fail_msg)
3329         node_entry.append((constants.RS_NODATA, None))
3330       else:
3331         try:
3332           self._CheckPayload(result)
3333         except errors.OpExecError, err:
3334           self.LogWarning("The payload returned by '%s' is not valid: %s",
3335                           node.name, err)
3336           node_entry.append((constants.RS_NODATA, None))
3337         else:
3338           if self.op.command == constants.OOB_HEALTH:
3339             # For health we should log important events
3340             for item, status in result.payload:
3341               if status in [constants.OOB_STATUS_WARNING,
3342                             constants.OOB_STATUS_CRITICAL]:
3343                 self.LogWarning("On node '%s' item '%s' has status '%s'",
3344                                 node.name, item, status)
3345
3346           if self.op.command == constants.OOB_POWER_ON:
3347             node.powered = True
3348           elif self.op.command == constants.OOB_POWER_OFF:
3349             node.powered = False
3350           elif self.op.command == constants.OOB_POWER_STATUS:
3351             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3352             if powered != node.powered:
3353               logging.warning(("Recorded power state (%s) of node '%s' does not"
3354                                " match actual power state (%s)"), node.powered,
3355                               node.name, powered)
3356
3357           # For configuration changing commands we should update the node
3358           if self.op.command in (constants.OOB_POWER_ON,
3359                                  constants.OOB_POWER_OFF):
3360             self.cfg.Update(node, feedback_fn)
3361
3362           node_entry.append((constants.RS_NORMAL, result.payload))
3363
3364     return ret
3365
3366   def _CheckPayload(self, result):
3367     """Checks if the payload is valid.
3368
3369     @param result: RPC result
3370     @raises errors.OpExecError: If payload is not valid
3371
3372     """
3373     errs = []
3374     if self.op.command == constants.OOB_HEALTH:
3375       if not isinstance(result.payload, list):
3376         errs.append("command 'health' is expected to return a list but got %s" %
3377                     type(result.payload))
3378       else:
3379         for item, status in result.payload:
3380           if status not in constants.OOB_STATUSES:
3381             errs.append("health item '%s' has invalid status '%s'" %
3382                         (item, status))
3383
3384     if self.op.command == constants.OOB_POWER_STATUS:
3385       if not isinstance(result.payload, dict):
3386         errs.append("power-status is expected to return a dict but got %s" %
3387                     type(result.payload))
3388
3389     if self.op.command in [
3390         constants.OOB_POWER_ON,
3391         constants.OOB_POWER_OFF,
3392         constants.OOB_POWER_CYCLE,
3393         ]:
3394       if result.payload is not None:
3395         errs.append("%s is expected to not return payload but got '%s'" %
3396                     (self.op.command, result.payload))
3397
3398     if errs:
3399       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3400                                utils.CommaJoin(errs))
3401
3402
3403
3404 class LUOsDiagnose(NoHooksLU):
3405   """Logical unit for OS diagnose/query.
3406
3407   """
3408   REQ_BGL = False
3409   _HID = "hidden"
3410   _BLK = "blacklisted"
3411   _VLD = "valid"
3412   _FIELDS_STATIC = utils.FieldSet()
3413   _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3414                                    "parameters", "api_versions", _HID, _BLK)
3415
3416   def CheckArguments(self):
3417     if self.op.names:
3418       raise errors.OpPrereqError("Selective OS query not supported",
3419                                  errors.ECODE_INVAL)
3420
3421     _CheckOutputFields(static=self._FIELDS_STATIC,
3422                        dynamic=self._FIELDS_DYNAMIC,
3423                        selected=self.op.output_fields)
3424
3425   def ExpandNames(self):
3426     # Lock all nodes, in shared mode
3427     # Temporary removal of locks, should be reverted later
3428     # TODO: reintroduce locks when they are lighter-weight
3429     self.needed_locks = {}
3430     #self.share_locks[locking.LEVEL_NODE] = 1
3431     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3432
3433   @staticmethod
3434   def _DiagnoseByOS(rlist):
3435     """Remaps a per-node return list into an a per-os per-node dictionary
3436
3437     @param rlist: a map with node names as keys and OS objects as values
3438
3439     @rtype: dict
3440     @return: a dictionary with osnames as keys and as value another
3441         map, with nodes as keys and tuples of (path, status, diagnose,
3442         variants, parameters, api_versions) as values, eg::
3443
3444           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3445                                      (/srv/..., False, "invalid api")],
3446                            "node2": [(/srv/..., True, "", [], [])]}
3447           }
3448
3449     """
3450     all_os = {}
3451     # we build here the list of nodes that didn't fail the RPC (at RPC
3452     # level), so that nodes with a non-responding node daemon don't
3453     # make all OSes invalid
3454     good_nodes = [node_name for node_name in rlist
3455                   if not rlist[node_name].fail_msg]
3456     for node_name, nr in rlist.items():
3457       if nr.fail_msg or not nr.payload:
3458         continue
3459       for (name, path, status, diagnose, variants,
3460            params, api_versions) in nr.payload:
3461         if name not in all_os:
3462           # build a list of nodes for this os containing empty lists
3463           # for each node in node_list
3464           all_os[name] = {}
3465           for nname in good_nodes:
3466             all_os[name][nname] = []
3467         # convert params from [name, help] to (name, help)
3468         params = [tuple(v) for v in params]
3469         all_os[name][node_name].append((path, status, diagnose,
3470                                         variants, params, api_versions))
3471     return all_os
3472
3473   def Exec(self, feedback_fn):
3474     """Compute the list of OSes.
3475
3476     """
3477     valid_nodes = [node.name
3478                    for node in self.cfg.GetAllNodesInfo().values()
3479                    if not node.offline and node.vm_capable]
3480     node_data = self.rpc.call_os_diagnose(valid_nodes)
3481     pol = self._DiagnoseByOS(node_data)
3482     output = []
3483     cluster = self.cfg.GetClusterInfo()
3484
3485     for os_name in utils.NiceSort(pol.keys()):
3486       os_data = pol[os_name]
3487       row = []
3488       valid = True
3489       (variants, params, api_versions) = null_state = (set(), set(), set())
3490       for idx, osl in enumerate(os_data.values()):
3491         valid = bool(valid and osl and osl[0][1])
3492         if not valid:
3493           (variants, params, api_versions) = null_state
3494           break
3495         node_variants, node_params, node_api = osl[0][3:6]
3496         if idx == 0: # first entry
3497           variants = set(node_variants)
3498           params = set(node_params)
3499           api_versions = set(node_api)
3500         else: # keep consistency
3501           variants.intersection_update(node_variants)
3502           params.intersection_update(node_params)
3503           api_versions.intersection_update(node_api)
3504
3505       is_hid = os_name in cluster.hidden_os
3506       is_blk = os_name in cluster.blacklisted_os
3507       if ((self._HID not in self.op.output_fields and is_hid) or
3508           (self._BLK not in self.op.output_fields and is_blk) or
3509           (self._VLD not in self.op.output_fields and not valid)):
3510         continue
3511
3512       for field in self.op.output_fields:
3513         if field == "name":
3514           val = os_name
3515         elif field == self._VLD:
3516           val = valid
3517         elif field == "node_status":
3518           # this is just a copy of the dict
3519           val = {}
3520           for node_name, nos_list in os_data.items():
3521             val[node_name] = nos_list
3522         elif field == "variants":
3523           val = utils.NiceSort(list(variants))
3524         elif field == "parameters":
3525           val = list(params)
3526         elif field == "api_versions":
3527           val = list(api_versions)
3528         elif field == self._HID:
3529           val = is_hid
3530         elif field == self._BLK:
3531           val = is_blk
3532         else:
3533           raise errors.ParameterError(field)
3534         row.append(val)
3535       output.append(row)
3536
3537     return output
3538
3539
3540 class LUNodeRemove(LogicalUnit):
3541   """Logical unit for removing a node.
3542
3543   """
3544   HPATH = "node-remove"
3545   HTYPE = constants.HTYPE_NODE
3546
3547   def BuildHooksEnv(self):
3548     """Build hooks env.
3549
3550     This doesn't run on the target node in the pre phase as a failed
3551     node would then be impossible to remove.
3552
3553     """
3554     env = {
3555       "OP_TARGET": self.op.node_name,
3556       "NODE_NAME": self.op.node_name,
3557       }
3558     all_nodes = self.cfg.GetNodeList()
3559     try:
3560       all_nodes.remove(self.op.node_name)
3561     except ValueError:
3562       logging.warning("Node %s which is about to be removed not found"
3563                       " in the all nodes list", self.op.node_name)
3564     return env, all_nodes, all_nodes
3565
3566   def CheckPrereq(self):
3567     """Check prerequisites.
3568
3569     This checks:
3570      - the node exists in the configuration
3571      - it does not have primary or secondary instances
3572      - it's not the master
3573
3574     Any errors are signaled by raising errors.OpPrereqError.
3575
3576     """
3577     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3578     node = self.cfg.GetNodeInfo(self.op.node_name)
3579     assert node is not None
3580
3581     instance_list = self.cfg.GetInstanceList()
3582
3583     masternode = self.cfg.GetMasterNode()
3584     if node.name == masternode:
3585       raise errors.OpPrereqError("Node is the master node,"
3586                                  " you need to failover first.",
3587                                  errors.ECODE_INVAL)
3588
3589     for instance_name in instance_list:
3590       instance = self.cfg.GetInstanceInfo(instance_name)
3591       if node.name in instance.all_nodes:
3592         raise errors.OpPrereqError("Instance %s is still running on the node,"
3593                                    " please remove first." % instance_name,
3594                                    errors.ECODE_INVAL)
3595     self.op.node_name = node.name
3596     self.node = node
3597
3598   def Exec(self, feedback_fn):
3599     """Removes the node from the cluster.
3600
3601     """
3602     node = self.node
3603     logging.info("Stopping the node daemon and removing configs from node %s",
3604                  node.name)
3605
3606     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3607
3608     # Promote nodes to master candidate as needed
3609     _AdjustCandidatePool(self, exceptions=[node.name])
3610     self.context.RemoveNode(node.name)
3611
3612     # Run post hooks on the node before it's removed
3613     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3614     try:
3615       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3616     except:
3617       # pylint: disable-msg=W0702
3618       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3619
3620     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3621     msg = result.fail_msg
3622     if msg:
3623       self.LogWarning("Errors encountered on the remote node while leaving"
3624                       " the cluster: %s", msg)
3625
3626     # Remove node from our /etc/hosts
3627     if self.cfg.GetClusterInfo().modify_etc_hosts:
3628       master_node = self.cfg.GetMasterNode()
3629       result = self.rpc.call_etc_hosts_modify(master_node,
3630                                               constants.ETC_HOSTS_REMOVE,
3631                                               node.name, None)
3632       result.Raise("Can't update hosts file with new host data")
3633       _RedistributeAncillaryFiles(self)
3634
3635
3636 class _NodeQuery(_QueryBase):
3637   FIELDS = query.NODE_FIELDS
3638
3639   def ExpandNames(self, lu):
3640     lu.needed_locks = {}
3641     lu.share_locks[locking.LEVEL_NODE] = 1
3642
3643     if self.names:
3644       self.wanted = _GetWantedNodes(lu, self.names)
3645     else:
3646       self.wanted = locking.ALL_SET
3647
3648     self.do_locking = (self.use_locking and
3649                        query.NQ_LIVE in self.requested_data)
3650
3651     if self.do_locking:
3652       # if we don't request only static fields, we need to lock the nodes
3653       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3654
3655   def DeclareLocks(self, lu, level):
3656     pass
3657
3658   def _GetQueryData(self, lu):
3659     """Computes the list of nodes and their attributes.
3660
3661     """
3662     all_info = lu.cfg.GetAllNodesInfo()
3663
3664     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3665
3666     # Gather data as requested
3667     if query.NQ_LIVE in self.requested_data:
3668       # filter out non-vm_capable nodes
3669       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3670
3671       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3672                                         lu.cfg.GetHypervisorType())
3673       live_data = dict((name, nresult.payload)
3674                        for (name, nresult) in node_data.items()
3675                        if not nresult.fail_msg and nresult.payload)
3676     else:
3677       live_data = None
3678
3679     if query.NQ_INST in self.requested_data:
3680       node_to_primary = dict([(name, set()) for name in nodenames])
3681       node_to_secondary = dict([(name, set()) for name in nodenames])
3682
3683       inst_data = lu.cfg.GetAllInstancesInfo()
3684
3685       for inst in inst_data.values():
3686         if inst.primary_node in node_to_primary:
3687           node_to_primary[inst.primary_node].add(inst.name)
3688         for secnode in inst.secondary_nodes:
3689           if secnode in node_to_secondary:
3690             node_to_secondary[secnode].add(inst.name)
3691     else:
3692       node_to_primary = None
3693       node_to_secondary = None
3694
3695     if query.NQ_OOB in self.requested_data:
3696       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3697                          for name, node in all_info.iteritems())
3698     else:
3699       oob_support = None
3700
3701     if query.NQ_GROUP in self.requested_data:
3702       groups = lu.cfg.GetAllNodeGroupsInfo()
3703     else:
3704       groups = {}
3705
3706     return query.NodeQueryData([all_info[name] for name in nodenames],
3707                                live_data, lu.cfg.GetMasterNode(),
3708                                node_to_primary, node_to_secondary, groups,
3709                                oob_support, lu.cfg.GetClusterInfo())
3710
3711
3712 class LUNodeQuery(NoHooksLU):
3713   """Logical unit for querying nodes.
3714
3715   """
3716   # pylint: disable-msg=W0142
3717   REQ_BGL = False
3718
3719   def CheckArguments(self):
3720     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3721                          self.op.output_fields, self.op.use_locking)
3722
3723   def ExpandNames(self):
3724     self.nq.ExpandNames(self)
3725
3726   def Exec(self, feedback_fn):
3727     return self.nq.OldStyleQuery(self)
3728
3729
3730 class LUNodeQueryvols(NoHooksLU):
3731   """Logical unit for getting volumes on node(s).
3732
3733   """
3734   REQ_BGL = False
3735   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3736   _FIELDS_STATIC = utils.FieldSet("node")
3737
3738   def CheckArguments(self):
3739     _CheckOutputFields(static=self._FIELDS_STATIC,
3740                        dynamic=self._FIELDS_DYNAMIC,
3741                        selected=self.op.output_fields)
3742
3743   def ExpandNames(self):
3744     self.needed_locks = {}
3745     self.share_locks[locking.LEVEL_NODE] = 1
3746     if not self.op.nodes:
3747       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3748     else:
3749       self.needed_locks[locking.LEVEL_NODE] = \
3750         _GetWantedNodes(self, self.op.nodes)
3751
3752   def Exec(self, feedback_fn):
3753     """Computes the list of nodes and their attributes.
3754
3755     """
3756     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3757     volumes = self.rpc.call_node_volumes(nodenames)
3758
3759     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3760              in self.cfg.GetInstanceList()]
3761
3762     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3763
3764     output = []
3765     for node in nodenames:
3766       nresult = volumes[node]
3767       if nresult.offline:
3768         continue
3769       msg = nresult.fail_msg
3770       if msg:
3771         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3772         continue
3773
3774       node_vols = nresult.payload[:]
3775       node_vols.sort(key=lambda vol: vol['dev'])
3776
3777       for vol in node_vols:
3778         node_output = []
3779         for field in self.op.output_fields:
3780           if field == "node":
3781             val = node
3782           elif field == "phys":
3783             val = vol['dev']
3784           elif field == "vg":
3785             val = vol['vg']
3786           elif field == "name":
3787             val = vol['name']
3788           elif field == "size":
3789             val = int(float(vol['size']))
3790           elif field == "instance":
3791             for inst in ilist:
3792               if node not in lv_by_node[inst]:
3793                 continue
3794               if vol['name'] in lv_by_node[inst][node]:
3795                 val = inst.name
3796                 break
3797             else:
3798               val = '-'
3799           else:
3800             raise errors.ParameterError(field)
3801           node_output.append(str(val))
3802
3803         output.append(node_output)
3804
3805     return output
3806
3807
3808 class LUNodeQueryStorage(NoHooksLU):
3809   """Logical unit for getting information on storage units on node(s).
3810
3811   """
3812   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3813   REQ_BGL = False
3814
3815   def CheckArguments(self):
3816     _CheckOutputFields(static=self._FIELDS_STATIC,
3817                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3818                        selected=self.op.output_fields)
3819
3820   def ExpandNames(self):
3821     self.needed_locks = {}
3822     self.share_locks[locking.LEVEL_NODE] = 1
3823
3824     if self.op.nodes:
3825       self.needed_locks[locking.LEVEL_NODE] = \
3826         _GetWantedNodes(self, self.op.nodes)
3827     else:
3828       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3829
3830   def Exec(self, feedback_fn):
3831     """Computes the list of nodes and their attributes.
3832
3833     """
3834     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3835
3836     # Always get name to sort by
3837     if constants.SF_NAME in self.op.output_fields:
3838       fields = self.op.output_fields[:]
3839     else:
3840       fields = [constants.SF_NAME] + self.op.output_fields
3841
3842     # Never ask for node or type as it's only known to the LU
3843     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3844       while extra in fields:
3845         fields.remove(extra)
3846
3847     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3848     name_idx = field_idx[constants.SF_NAME]
3849
3850     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3851     data = self.rpc.call_storage_list(self.nodes,
3852                                       self.op.storage_type, st_args,
3853                                       self.op.name, fields)
3854
3855     result = []
3856
3857     for node in utils.NiceSort(self.nodes):
3858       nresult = data[node]
3859       if nresult.offline:
3860         continue
3861
3862       msg = nresult.fail_msg
3863       if msg:
3864         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3865         continue
3866
3867       rows = dict([(row[name_idx], row) for row in nresult.payload])
3868
3869       for name in utils.NiceSort(rows.keys()):
3870         row = rows[name]
3871
3872         out = []
3873
3874         for field in self.op.output_fields:
3875           if field == constants.SF_NODE:
3876             val = node
3877           elif field == constants.SF_TYPE:
3878             val = self.op.storage_type
3879           elif field in field_idx:
3880             val = row[field_idx[field]]
3881           else:
3882             raise errors.ParameterError(field)
3883
3884           out.append(val)
3885
3886         result.append(out)
3887
3888     return result
3889
3890
3891 class _InstanceQuery(_QueryBase):
3892   FIELDS = query.INSTANCE_FIELDS
3893
3894   def ExpandNames(self, lu):
3895     lu.needed_locks = {}
3896     lu.share_locks[locking.LEVEL_INSTANCE] = 1
3897     lu.share_locks[locking.LEVEL_NODE] = 1
3898
3899     if self.names:
3900       self.wanted = _GetWantedInstances(lu, self.names)
3901     else:
3902       self.wanted = locking.ALL_SET
3903
3904     self.do_locking = (self.use_locking and
3905                        query.IQ_LIVE in self.requested_data)
3906     if self.do_locking:
3907       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3908       lu.needed_locks[locking.LEVEL_NODE] = []
3909       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3910
3911   def DeclareLocks(self, lu, level):
3912     if level == locking.LEVEL_NODE and self.do_locking:
3913       lu._LockInstancesNodes() # pylint: disable-msg=W0212
3914
3915   def _GetQueryData(self, lu):
3916     """Computes the list of instances and their attributes.
3917
3918     """
3919     cluster = lu.cfg.GetClusterInfo()
3920     all_info = lu.cfg.GetAllInstancesInfo()
3921
3922     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3923
3924     instance_list = [all_info[name] for name in instance_names]
3925     nodes = frozenset(itertools.chain(*(inst.all_nodes
3926                                         for inst in instance_list)))
3927     hv_list = list(set([inst.hypervisor for inst in instance_list]))
3928     bad_nodes = []
3929     offline_nodes = []
3930     wrongnode_inst = set()
3931
3932     # Gather data as requested
3933     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
3934       live_data = {}
3935       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3936       for name in nodes:
3937         result = node_data[name]
3938         if result.offline:
3939           # offline nodes will be in both lists
3940           assert result.fail_msg
3941           offline_nodes.append(name)
3942         if result.fail_msg:
3943           bad_nodes.append(name)
3944         elif result.payload:
3945           for inst in result.payload:
3946             if all_info[inst].primary_node == name:
3947               live_data.update(result.payload)
3948             else:
3949               wrongnode_inst.add(inst)
3950         # else no instance is alive
3951     else:
3952       live_data = {}
3953
3954     if query.IQ_DISKUSAGE in self.requested_data:
3955       disk_usage = dict((inst.name,
3956                          _ComputeDiskSize(inst.disk_template,
3957                                           [{"size": disk.size}
3958                                            for disk in inst.disks]))
3959                         for inst in instance_list)
3960     else:
3961       disk_usage = None
3962
3963     if query.IQ_CONSOLE in self.requested_data:
3964       consinfo = {}
3965       for inst in instance_list:
3966         if inst.name in live_data:
3967           # Instance is running
3968           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
3969         else:
3970           consinfo[inst.name] = None
3971       assert set(consinfo.keys()) == set(instance_names)
3972     else:
3973       consinfo = None
3974
3975     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3976                                    disk_usage, offline_nodes, bad_nodes,
3977                                    live_data, wrongnode_inst, consinfo)
3978
3979
3980 class LUQuery(NoHooksLU):
3981   """Query for resources/items of a certain kind.
3982
3983   """
3984   # pylint: disable-msg=W0142
3985   REQ_BGL = False
3986
3987   def CheckArguments(self):
3988     qcls = _GetQueryImplementation(self.op.what)
3989
3990     self.impl = qcls(self.op.filter, self.op.fields, False)
3991
3992   def ExpandNames(self):
3993     self.impl.ExpandNames(self)
3994
3995   def DeclareLocks(self, level):
3996     self.impl.DeclareLocks(self, level)
3997
3998   def Exec(self, feedback_fn):
3999     return self.impl.NewStyleQuery(self)
4000
4001
4002 class LUQueryFields(NoHooksLU):
4003   """Query for resources/items of a certain kind.
4004
4005   """
4006   # pylint: disable-msg=W0142
4007   REQ_BGL = False
4008
4009   def CheckArguments(self):
4010     self.qcls = _GetQueryImplementation(self.op.what)
4011
4012   def ExpandNames(self):
4013     self.needed_locks = {}
4014
4015   def Exec(self, feedback_fn):
4016     return self.qcls.FieldsQuery(self.op.fields)
4017
4018
4019 class LUNodeModifyStorage(NoHooksLU):
4020   """Logical unit for modifying a storage volume on a node.
4021
4022   """
4023   REQ_BGL = False
4024
4025   def CheckArguments(self):
4026     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4027
4028     storage_type = self.op.storage_type
4029
4030     try:
4031       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4032     except KeyError:
4033       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4034                                  " modified" % storage_type,
4035                                  errors.ECODE_INVAL)
4036
4037     diff = set(self.op.changes.keys()) - modifiable
4038     if diff:
4039       raise errors.OpPrereqError("The following fields can not be modified for"
4040                                  " storage units of type '%s': %r" %
4041                                  (storage_type, list(diff)),
4042                                  errors.ECODE_INVAL)
4043
4044   def ExpandNames(self):
4045     self.needed_locks = {
4046       locking.LEVEL_NODE: self.op.node_name,
4047       }
4048
4049   def Exec(self, feedback_fn):
4050     """Computes the list of nodes and their attributes.
4051
4052     """
4053     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4054     result = self.rpc.call_storage_modify(self.op.node_name,
4055                                           self.op.storage_type, st_args,
4056                                           self.op.name, self.op.changes)
4057     result.Raise("Failed to modify storage unit '%s' on %s" %
4058                  (self.op.name, self.op.node_name))
4059
4060
4061 class LUNodeAdd(LogicalUnit):
4062   """Logical unit for adding node to the cluster.
4063
4064   """
4065   HPATH = "node-add"
4066   HTYPE = constants.HTYPE_NODE
4067   _NFLAGS = ["master_capable", "vm_capable"]
4068
4069   def CheckArguments(self):
4070     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4071     # validate/normalize the node name
4072     self.hostname = netutils.GetHostname(name=self.op.node_name,
4073                                          family=self.primary_ip_family)
4074     self.op.node_name = self.hostname.name
4075     if self.op.readd and self.op.group:
4076       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4077                                  " being readded", errors.ECODE_INVAL)
4078
4079   def BuildHooksEnv(self):
4080     """Build hooks env.
4081
4082     This will run on all nodes before, and on all nodes + the new node after.
4083
4084     """
4085     env = {
4086       "OP_TARGET": self.op.node_name,
4087       "NODE_NAME": self.op.node_name,
4088       "NODE_PIP": self.op.primary_ip,
4089       "NODE_SIP": self.op.secondary_ip,
4090       "MASTER_CAPABLE": str(self.op.master_capable),
4091       "VM_CAPABLE": str(self.op.vm_capable),
4092       }
4093     nodes_0 = self.cfg.GetNodeList()
4094     nodes_1 = nodes_0 + [self.op.node_name, ]
4095     return env, nodes_0, nodes_1
4096
4097   def CheckPrereq(self):
4098     """Check prerequisites.
4099
4100     This checks:
4101      - the new node is not already in the config
4102      - it is resolvable
4103      - its parameters (single/dual homed) matches the cluster
4104
4105     Any errors are signaled by raising errors.OpPrereqError.
4106
4107     """
4108     cfg = self.cfg
4109     hostname = self.hostname
4110     node = hostname.name
4111     primary_ip = self.op.primary_ip = hostname.ip
4112     if self.op.secondary_ip is None:
4113       if self.primary_ip_family == netutils.IP6Address.family:
4114         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4115                                    " IPv4 address must be given as secondary",
4116                                    errors.ECODE_INVAL)
4117       self.op.secondary_ip = primary_ip
4118
4119     secondary_ip = self.op.secondary_ip
4120     if not netutils.IP4Address.IsValid(secondary_ip):
4121       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4122                                  " address" % secondary_ip, errors.ECODE_INVAL)
4123
4124     node_list = cfg.GetNodeList()
4125     if not self.op.readd and node in node_list:
4126       raise errors.OpPrereqError("Node %s is already in the configuration" %
4127                                  node, errors.ECODE_EXISTS)
4128     elif self.op.readd and node not in node_list:
4129       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4130                                  errors.ECODE_NOENT)
4131
4132     self.changed_primary_ip = False
4133
4134     for existing_node_name in node_list:
4135       existing_node = cfg.GetNodeInfo(existing_node_name)
4136
4137       if self.op.readd and node == existing_node_name:
4138         if existing_node.secondary_ip != secondary_ip:
4139           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4140                                      " address configuration as before",
4141                                      errors.ECODE_INVAL)
4142         if existing_node.primary_ip != primary_ip:
4143           self.changed_primary_ip = True
4144
4145         continue
4146
4147       if (existing_node.primary_ip == primary_ip or
4148           existing_node.secondary_ip == primary_ip or
4149           existing_node.primary_ip == secondary_ip or
4150           existing_node.secondary_ip == secondary_ip):
4151         raise errors.OpPrereqError("New node ip address(es) conflict with"
4152                                    " existing node %s" % existing_node.name,
4153                                    errors.ECODE_NOTUNIQUE)
4154
4155     # After this 'if' block, None is no longer a valid value for the
4156     # _capable op attributes
4157     if self.op.readd:
4158       old_node = self.cfg.GetNodeInfo(node)
4159       assert old_node is not None, "Can't retrieve locked node %s" % node
4160       for attr in self._NFLAGS:
4161         if getattr(self.op, attr) is None:
4162           setattr(self.op, attr, getattr(old_node, attr))
4163     else:
4164       for attr in self._NFLAGS:
4165         if getattr(self.op, attr) is None:
4166           setattr(self.op, attr, True)
4167
4168     if self.op.readd and not self.op.vm_capable:
4169       pri, sec = cfg.GetNodeInstances(node)
4170       if pri or sec:
4171         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4172                                    " flag set to false, but it already holds"
4173                                    " instances" % node,
4174                                    errors.ECODE_STATE)
4175
4176     # check that the type of the node (single versus dual homed) is the
4177     # same as for the master
4178     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4179     master_singlehomed = myself.secondary_ip == myself.primary_ip
4180     newbie_singlehomed = secondary_ip == primary_ip
4181     if master_singlehomed != newbie_singlehomed:
4182       if master_singlehomed:
4183         raise errors.OpPrereqError("The master has no secondary ip but the"
4184                                    " new node has one",
4185                                    errors.ECODE_INVAL)
4186       else:
4187         raise errors.OpPrereqError("The master has a secondary ip but the"
4188                                    " new node doesn't have one",
4189                                    errors.ECODE_INVAL)
4190
4191     # checks reachability
4192     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4193       raise errors.OpPrereqError("Node not reachable by ping",
4194                                  errors.ECODE_ENVIRON)
4195
4196     if not newbie_singlehomed:
4197       # check reachability from my secondary ip to newbie's secondary ip
4198       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4199                            source=myself.secondary_ip):
4200         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4201                                    " based ping to node daemon port",
4202                                    errors.ECODE_ENVIRON)
4203
4204     if self.op.readd:
4205       exceptions = [node]
4206     else:
4207       exceptions = []
4208
4209     if self.op.master_capable:
4210       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4211     else:
4212       self.master_candidate = False
4213
4214     if self.op.readd:
4215       self.new_node = old_node
4216     else:
4217       node_group = cfg.LookupNodeGroup(self.op.group)
4218       self.new_node = objects.Node(name=node,
4219                                    primary_ip=primary_ip,
4220                                    secondary_ip=secondary_ip,
4221                                    master_candidate=self.master_candidate,
4222                                    offline=False, drained=False,
4223                                    group=node_group)
4224
4225     if self.op.ndparams:
4226       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4227
4228   def Exec(self, feedback_fn):
4229     """Adds the new node to the cluster.
4230
4231     """
4232     new_node = self.new_node
4233     node = new_node.name
4234
4235     # We adding a new node so we assume it's powered
4236     new_node.powered = True
4237
4238     # for re-adds, reset the offline/drained/master-candidate flags;
4239     # we need to reset here, otherwise offline would prevent RPC calls
4240     # later in the procedure; this also means that if the re-add
4241     # fails, we are left with a non-offlined, broken node
4242     if self.op.readd:
4243       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4244       self.LogInfo("Readding a node, the offline/drained flags were reset")
4245       # if we demote the node, we do cleanup later in the procedure
4246       new_node.master_candidate = self.master_candidate
4247       if self.changed_primary_ip:
4248         new_node.primary_ip = self.op.primary_ip
4249
4250     # copy the master/vm_capable flags
4251     for attr in self._NFLAGS:
4252       setattr(new_node, attr, getattr(self.op, attr))
4253
4254     # notify the user about any possible mc promotion
4255     if new_node.master_candidate:
4256       self.LogInfo("Node will be a master candidate")
4257
4258     if self.op.ndparams:
4259       new_node.ndparams = self.op.ndparams
4260     else:
4261       new_node.ndparams = {}
4262
4263     # check connectivity
4264     result = self.rpc.call_version([node])[node]
4265     result.Raise("Can't get version information from node %s" % node)
4266     if constants.PROTOCOL_VERSION == result.payload:
4267       logging.info("Communication to node %s fine, sw version %s match",
4268                    node, result.payload)
4269     else:
4270       raise errors.OpExecError("Version mismatch master version %s,"
4271                                " node version %s" %
4272                                (constants.PROTOCOL_VERSION, result.payload))
4273
4274     # Add node to our /etc/hosts, and add key to known_hosts
4275     if self.cfg.GetClusterInfo().modify_etc_hosts:
4276       master_node = self.cfg.GetMasterNode()
4277       result = self.rpc.call_etc_hosts_modify(master_node,
4278                                               constants.ETC_HOSTS_ADD,
4279                                               self.hostname.name,
4280                                               self.hostname.ip)
4281       result.Raise("Can't update hosts file with new host data")
4282
4283     if new_node.secondary_ip != new_node.primary_ip:
4284       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4285                                False)
4286
4287     node_verify_list = [self.cfg.GetMasterNode()]
4288     node_verify_param = {
4289       constants.NV_NODELIST: [node],
4290       # TODO: do a node-net-test as well?
4291     }
4292
4293     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4294                                        self.cfg.GetClusterName())
4295     for verifier in node_verify_list:
4296       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4297       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4298       if nl_payload:
4299         for failed in nl_payload:
4300           feedback_fn("ssh/hostname verification failed"
4301                       " (checking from %s): %s" %
4302                       (verifier, nl_payload[failed]))
4303         raise errors.OpExecError("ssh/hostname verification failed.")
4304
4305     if self.op.readd:
4306       _RedistributeAncillaryFiles(self)
4307       self.context.ReaddNode(new_node)
4308       # make sure we redistribute the config
4309       self.cfg.Update(new_node, feedback_fn)
4310       # and make sure the new node will not have old files around
4311       if not new_node.master_candidate:
4312         result = self.rpc.call_node_demote_from_mc(new_node.name)
4313         msg = result.fail_msg
4314         if msg:
4315           self.LogWarning("Node failed to demote itself from master"
4316                           " candidate status: %s" % msg)
4317     else:
4318       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4319                                   additional_vm=self.op.vm_capable)
4320       self.context.AddNode(new_node, self.proc.GetECId())
4321
4322
4323 class LUNodeSetParams(LogicalUnit):
4324   """Modifies the parameters of a node.
4325
4326   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4327       to the node role (as _ROLE_*)
4328   @cvar _R2F: a dictionary from node role to tuples of flags
4329   @cvar _FLAGS: a list of attribute names corresponding to the flags
4330
4331   """
4332   HPATH = "node-modify"
4333   HTYPE = constants.HTYPE_NODE
4334   REQ_BGL = False
4335   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4336   _F2R = {
4337     (True, False, False): _ROLE_CANDIDATE,
4338     (False, True, False): _ROLE_DRAINED,
4339     (False, False, True): _ROLE_OFFLINE,
4340     (False, False, False): _ROLE_REGULAR,
4341     }
4342   _R2F = dict((v, k) for k, v in _F2R.items())
4343   _FLAGS = ["master_candidate", "drained", "offline"]
4344
4345   def CheckArguments(self):
4346     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4347     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4348                 self.op.master_capable, self.op.vm_capable,
4349                 self.op.secondary_ip, self.op.ndparams]
4350     if all_mods.count(None) == len(all_mods):
4351       raise errors.OpPrereqError("Please pass at least one modification",
4352                                  errors.ECODE_INVAL)
4353     if all_mods.count(True) > 1:
4354       raise errors.OpPrereqError("Can't set the node into more than one"
4355                                  " state at the same time",
4356                                  errors.ECODE_INVAL)
4357
4358     # Boolean value that tells us whether we might be demoting from MC
4359     self.might_demote = (self.op.master_candidate == False or
4360                          self.op.offline == True or
4361                          self.op.drained == True or
4362                          self.op.master_capable == False)
4363
4364     if self.op.secondary_ip:
4365       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4366         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4367                                    " address" % self.op.secondary_ip,
4368                                    errors.ECODE_INVAL)
4369
4370     self.lock_all = self.op.auto_promote and self.might_demote
4371     self.lock_instances = self.op.secondary_ip is not None
4372
4373   def ExpandNames(self):
4374     if self.lock_all:
4375       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4376     else:
4377       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4378
4379     if self.lock_instances:
4380       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4381
4382   def DeclareLocks(self, level):
4383     # If we have locked all instances, before waiting to lock nodes, release
4384     # all the ones living on nodes unrelated to the current operation.
4385     if level == locking.LEVEL_NODE and self.lock_instances:
4386       instances_release = []
4387       instances_keep = []
4388       self.affected_instances = []
4389       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4390         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4391           instance = self.context.cfg.GetInstanceInfo(instance_name)
4392           i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4393           if i_mirrored and self.op.node_name in instance.all_nodes:
4394             instances_keep.append(instance_name)
4395             self.affected_instances.append(instance)
4396           else:
4397             instances_release.append(instance_name)
4398         if instances_release:
4399           self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4400           self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4401
4402   def BuildHooksEnv(self):
4403     """Build hooks env.
4404
4405     This runs on the master node.
4406
4407     """
4408     env = {
4409       "OP_TARGET": self.op.node_name,
4410       "MASTER_CANDIDATE": str(self.op.master_candidate),
4411       "OFFLINE": str(self.op.offline),
4412       "DRAINED": str(self.op.drained),
4413       "MASTER_CAPABLE": str(self.op.master_capable),
4414       "VM_CAPABLE": str(self.op.vm_capable),
4415       }
4416     nl = [self.cfg.GetMasterNode(),
4417           self.op.node_name]
4418     return env, nl, nl
4419
4420   def CheckPrereq(self):
4421     """Check prerequisites.
4422
4423     This only checks the instance list against the existing names.
4424
4425     """
4426     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4427
4428     if (self.op.master_candidate is not None or
4429         self.op.drained is not None or
4430         self.op.offline is not None):
4431       # we can't change the master's node flags
4432       if self.op.node_name == self.cfg.GetMasterNode():
4433         raise errors.OpPrereqError("The master role can be changed"
4434                                    " only via master-failover",
4435                                    errors.ECODE_INVAL)
4436
4437     if self.op.master_candidate and not node.master_capable:
4438       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4439                                  " it a master candidate" % node.name,
4440                                  errors.ECODE_STATE)
4441
4442     if self.op.vm_capable == False:
4443       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4444       if ipri or isec:
4445         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4446                                    " the vm_capable flag" % node.name,
4447                                    errors.ECODE_STATE)
4448
4449     if node.master_candidate and self.might_demote and not self.lock_all:
4450       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4451       # check if after removing the current node, we're missing master
4452       # candidates
4453       (mc_remaining, mc_should, _) = \
4454           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4455       if mc_remaining < mc_should:
4456         raise errors.OpPrereqError("Not enough master candidates, please"
4457                                    " pass auto promote option to allow"
4458                                    " promotion", errors.ECODE_STATE)
4459
4460     self.old_flags = old_flags = (node.master_candidate,
4461                                   node.drained, node.offline)
4462     assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4463     self.old_role = old_role = self._F2R[old_flags]
4464
4465     # Check for ineffective changes
4466     for attr in self._FLAGS:
4467       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4468         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4469         setattr(self.op, attr, None)
4470
4471     # Past this point, any flag change to False means a transition
4472     # away from the respective state, as only real changes are kept
4473
4474     # TODO: We might query the real power state if it supports OOB
4475     if _SupportsOob(self.cfg, node):
4476       if self.op.offline is False and not (node.powered or
4477                                            self.op.powered == True):
4478         raise errors.OpPrereqError(("Please power on node %s first before you"
4479                                     " can reset offline state") %
4480                                    self.op.node_name)
4481     elif self.op.powered is not None:
4482       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4483                                   " which does not support out-of-band"
4484                                   " handling") % self.op.node_name)
4485
4486     # If we're being deofflined/drained, we'll MC ourself if needed
4487     if (self.op.drained == False or self.op.offline == False or
4488         (self.op.master_capable and not node.master_capable)):
4489       if _DecideSelfPromotion(self):
4490         self.op.master_candidate = True
4491         self.LogInfo("Auto-promoting node to master candidate")
4492
4493     # If we're no longer master capable, we'll demote ourselves from MC
4494     if self.op.master_capable == False and node.master_candidate:
4495       self.LogInfo("Demoting from master candidate")
4496       self.op.master_candidate = False
4497
4498     # Compute new role
4499     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4500     if self.op.master_candidate:
4501       new_role = self._ROLE_CANDIDATE
4502     elif self.op.drained:
4503       new_role = self._ROLE_DRAINED
4504     elif self.op.offline:
4505       new_role = self._ROLE_OFFLINE
4506     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4507       # False is still in new flags, which means we're un-setting (the
4508       # only) True flag
4509       new_role = self._ROLE_REGULAR
4510     else: # no new flags, nothing, keep old role
4511       new_role = old_role
4512
4513     self.new_role = new_role
4514
4515     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4516       # Trying to transition out of offline status
4517       result = self.rpc.call_version([node.name])[node.name]
4518       if result.fail_msg:
4519         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4520                                    " to report its version: %s" %
4521                                    (node.name, result.fail_msg),
4522                                    errors.ECODE_STATE)
4523       else:
4524         self.LogWarning("Transitioning node from offline to online state"
4525                         " without using re-add. Please make sure the node"
4526                         " is healthy!")
4527
4528     if self.op.secondary_ip:
4529       # Ok even without locking, because this can't be changed by any LU
4530       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4531       master_singlehomed = master.secondary_ip == master.primary_ip
4532       if master_singlehomed and self.op.secondary_ip:
4533         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4534                                    " homed cluster", errors.ECODE_INVAL)
4535
4536       if node.offline:
4537         if self.affected_instances:
4538           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4539                                      " node has instances (%s) configured"
4540                                      " to use it" % self.affected_instances)
4541       else:
4542         # On online nodes, check that no instances are running, and that
4543         # the node has the new ip and we can reach it.
4544         for instance in self.affected_instances:
4545           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4546
4547         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4548         if master.name != node.name:
4549           # check reachability from master secondary ip to new secondary ip
4550           if not netutils.TcpPing(self.op.secondary_ip,
4551                                   constants.DEFAULT_NODED_PORT,
4552                                   source=master.secondary_ip):
4553             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4554                                        " based ping to node daemon port",
4555                                        errors.ECODE_ENVIRON)
4556
4557     if self.op.ndparams:
4558       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4559       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4560       self.new_ndparams = new_ndparams
4561
4562   def Exec(self, feedback_fn):
4563     """Modifies a node.
4564
4565     """
4566     node = self.node
4567     old_role = self.old_role
4568     new_role = self.new_role
4569
4570     result = []
4571
4572     if self.op.ndparams:
4573       node.ndparams = self.new_ndparams
4574
4575     if self.op.powered is not None:
4576       node.powered = self.op.powered
4577
4578     for attr in ["master_capable", "vm_capable"]:
4579       val = getattr(self.op, attr)
4580       if val is not None:
4581         setattr(node, attr, val)
4582         result.append((attr, str(val)))
4583
4584     if new_role != old_role:
4585       # Tell the node to demote itself, if no longer MC and not offline
4586       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4587         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4588         if msg:
4589           self.LogWarning("Node failed to demote itself: %s", msg)
4590
4591       new_flags = self._R2F[new_role]
4592       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4593         if of != nf:
4594           result.append((desc, str(nf)))
4595       (node.master_candidate, node.drained, node.offline) = new_flags
4596
4597       # we locked all nodes, we adjust the CP before updating this node
4598       if self.lock_all:
4599         _AdjustCandidatePool(self, [node.name])
4600
4601     if self.op.secondary_ip:
4602       node.secondary_ip = self.op.secondary_ip
4603       result.append(("secondary_ip", self.op.secondary_ip))
4604
4605     # this will trigger configuration file update, if needed
4606     self.cfg.Update(node, feedback_fn)
4607
4608     # this will trigger job queue propagation or cleanup if the mc
4609     # flag changed
4610     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4611       self.context.ReaddNode(node)
4612
4613     return result
4614
4615
4616 class LUNodePowercycle(NoHooksLU):
4617   """Powercycles a node.
4618
4619   """
4620   REQ_BGL = False
4621
4622   def CheckArguments(self):
4623     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4624     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4625       raise errors.OpPrereqError("The node is the master and the force"
4626                                  " parameter was not set",
4627                                  errors.ECODE_INVAL)
4628
4629   def ExpandNames(self):
4630     """Locking for PowercycleNode.
4631
4632     This is a last-resort option and shouldn't block on other
4633     jobs. Therefore, we grab no locks.
4634
4635     """
4636     self.needed_locks = {}
4637
4638   def Exec(self, feedback_fn):
4639     """Reboots a node.
4640
4641     """
4642     result = self.rpc.call_node_powercycle(self.op.node_name,
4643                                            self.cfg.GetHypervisorType())
4644     result.Raise("Failed to schedule the reboot")
4645     return result.payload
4646
4647
4648 class LUClusterQuery(NoHooksLU):
4649   """Query cluster configuration.
4650
4651   """
4652   REQ_BGL = False
4653
4654   def ExpandNames(self):
4655     self.needed_locks = {}
4656
4657   def Exec(self, feedback_fn):
4658     """Return cluster config.
4659
4660     """
4661     cluster = self.cfg.GetClusterInfo()
4662     os_hvp = {}
4663
4664     # Filter just for enabled hypervisors
4665     for os_name, hv_dict in cluster.os_hvp.items():
4666       os_hvp[os_name] = {}
4667       for hv_name, hv_params in hv_dict.items():
4668         if hv_name in cluster.enabled_hypervisors:
4669           os_hvp[os_name][hv_name] = hv_params
4670
4671     # Convert ip_family to ip_version
4672     primary_ip_version = constants.IP4_VERSION
4673     if cluster.primary_ip_family == netutils.IP6Address.family:
4674       primary_ip_version = constants.IP6_VERSION
4675
4676     result = {
4677       "software_version": constants.RELEASE_VERSION,
4678       "protocol_version": constants.PROTOCOL_VERSION,
4679       "config_version": constants.CONFIG_VERSION,
4680       "os_api_version": max(constants.OS_API_VERSIONS),
4681       "export_version": constants.EXPORT_VERSION,
4682       "architecture": (platform.architecture()[0], platform.machine()),
4683       "name": cluster.cluster_name,
4684       "master": cluster.master_node,
4685       "default_hypervisor": cluster.enabled_hypervisors[0],
4686       "enabled_hypervisors": cluster.enabled_hypervisors,
4687       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4688                         for hypervisor_name in cluster.enabled_hypervisors]),
4689       "os_hvp": os_hvp,
4690       "beparams": cluster.beparams,
4691       "osparams": cluster.osparams,
4692       "nicparams": cluster.nicparams,
4693       "ndparams": cluster.ndparams,
4694       "candidate_pool_size": cluster.candidate_pool_size,
4695       "master_netdev": cluster.master_netdev,
4696       "volume_group_name": cluster.volume_group_name,
4697       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4698       "file_storage_dir": cluster.file_storage_dir,
4699       "shared_file_storage_dir": cluster.shared_file_storage_dir,
4700       "maintain_node_health": cluster.maintain_node_health,
4701       "ctime": cluster.ctime,
4702       "mtime": cluster.mtime,
4703       "uuid": cluster.uuid,
4704       "tags": list(cluster.GetTags()),
4705       "uid_pool": cluster.uid_pool,
4706       "default_iallocator": cluster.default_iallocator,
4707       "reserved_lvs": cluster.reserved_lvs,
4708       "primary_ip_version": primary_ip_version,
4709       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4710       "hidden_os": cluster.hidden_os,
4711       "blacklisted_os": cluster.blacklisted_os,
4712       }
4713
4714     return result
4715
4716
4717 class LUClusterConfigQuery(NoHooksLU):
4718   """Return configuration values.
4719
4720   """
4721   REQ_BGL = False
4722   _FIELDS_DYNAMIC = utils.FieldSet()
4723   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4724                                   "watcher_pause", "volume_group_name")
4725
4726   def CheckArguments(self):
4727     _CheckOutputFields(static=self._FIELDS_STATIC,
4728                        dynamic=self._FIELDS_DYNAMIC,
4729                        selected=self.op.output_fields)
4730
4731   def ExpandNames(self):
4732     self.needed_locks = {}
4733
4734   def Exec(self, feedback_fn):
4735     """Dump a representation of the cluster config to the standard output.
4736
4737     """
4738     values = []
4739     for field in self.op.output_fields:
4740       if field == "cluster_name":
4741         entry = self.cfg.GetClusterName()
4742       elif field == "master_node":
4743         entry = self.cfg.GetMasterNode()
4744       elif field == "drain_flag":
4745         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4746       elif field == "watcher_pause":
4747         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4748       elif field == "volume_group_name":
4749         entry = self.cfg.GetVGName()
4750       else:
4751         raise errors.ParameterError(field)
4752       values.append(entry)
4753     return values
4754
4755
4756 class LUInstanceActivateDisks(NoHooksLU):
4757   """Bring up an instance's disks.
4758
4759   """
4760   REQ_BGL = False
4761
4762   def ExpandNames(self):
4763     self._ExpandAndLockInstance()
4764     self.needed_locks[locking.LEVEL_NODE] = []
4765     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4766
4767   def DeclareLocks(self, level):
4768     if level == locking.LEVEL_NODE:
4769       self._LockInstancesNodes()
4770
4771   def CheckPrereq(self):
4772     """Check prerequisites.
4773
4774     This checks that the instance is in the cluster.
4775
4776     """
4777     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4778     assert self.instance is not None, \
4779       "Cannot retrieve locked instance %s" % self.op.instance_name
4780     _CheckNodeOnline(self, self.instance.primary_node)
4781
4782   def Exec(self, feedback_fn):
4783     """Activate the disks.
4784
4785     """
4786     disks_ok, disks_info = \
4787               _AssembleInstanceDisks(self, self.instance,
4788                                      ignore_size=self.op.ignore_size)
4789     if not disks_ok:
4790       raise errors.OpExecError("Cannot activate block devices")
4791
4792     return disks_info
4793
4794
4795 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4796                            ignore_size=False):
4797   """Prepare the block devices for an instance.
4798
4799   This sets up the block devices on all nodes.
4800
4801   @type lu: L{LogicalUnit}
4802   @param lu: the logical unit on whose behalf we execute
4803   @type instance: L{objects.Instance}
4804   @param instance: the instance for whose disks we assemble
4805   @type disks: list of L{objects.Disk} or None
4806   @param disks: which disks to assemble (or all, if None)
4807   @type ignore_secondaries: boolean
4808   @param ignore_secondaries: if true, errors on secondary nodes
4809       won't result in an error return from the function
4810   @type ignore_size: boolean
4811   @param ignore_size: if true, the current known size of the disk
4812       will not be used during the disk activation, useful for cases
4813       when the size is wrong
4814   @return: False if the operation failed, otherwise a list of
4815       (host, instance_visible_name, node_visible_name)
4816       with the mapping from node devices to instance devices
4817
4818   """
4819   device_info = []
4820   disks_ok = True
4821   iname = instance.name
4822   disks = _ExpandCheckDisks(instance, disks)
4823
4824   # With the two passes mechanism we try to reduce the window of
4825   # opportunity for the race condition of switching DRBD to primary
4826   # before handshaking occured, but we do not eliminate it
4827
4828   # The proper fix would be to wait (with some limits) until the
4829   # connection has been made and drbd transitions from WFConnection
4830   # into any other network-connected state (Connected, SyncTarget,
4831   # SyncSource, etc.)
4832
4833   # 1st pass, assemble on all nodes in secondary mode
4834   for idx, inst_disk in enumerate(disks):
4835     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4836       if ignore_size:
4837         node_disk = node_disk.Copy()
4838         node_disk.UnsetSize()
4839       lu.cfg.SetDiskID(node_disk, node)
4840       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
4841       msg = result.fail_msg
4842       if msg:
4843         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4844                            " (is_primary=False, pass=1): %s",
4845                            inst_disk.iv_name, node, msg)
4846         if not ignore_secondaries:
4847           disks_ok = False
4848
4849   # FIXME: race condition on drbd migration to primary
4850
4851   # 2nd pass, do only the primary node
4852   for idx, inst_disk in enumerate(disks):
4853     dev_path = None
4854
4855     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4856       if node != instance.primary_node:
4857         continue
4858       if ignore_size:
4859         node_disk = node_disk.Copy()
4860         node_disk.UnsetSize()
4861       lu.cfg.SetDiskID(node_disk, node)
4862       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
4863       msg = result.fail_msg
4864       if msg:
4865         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4866                            " (is_primary=True, pass=2): %s",
4867                            inst_disk.iv_name, node, msg)
4868         disks_ok = False
4869       else:
4870         dev_path = result.payload
4871
4872     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4873
4874   # leave the disks configured for the primary node
4875   # this is a workaround that would be fixed better by
4876   # improving the logical/physical id handling
4877   for disk in disks:
4878     lu.cfg.SetDiskID(disk, instance.primary_node)
4879
4880   return disks_ok, device_info
4881
4882
4883 def _StartInstanceDisks(lu, instance, force):
4884   """Start the disks of an instance.
4885
4886   """
4887   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4888                                            ignore_secondaries=force)
4889   if not disks_ok:
4890     _ShutdownInstanceDisks(lu, instance)
4891     if force is not None and not force:
4892       lu.proc.LogWarning("", hint="If the message above refers to a"
4893                          " secondary node,"
4894                          " you can retry the operation using '--force'.")
4895     raise errors.OpExecError("Disk consistency error")
4896
4897
4898 class LUInstanceDeactivateDisks(NoHooksLU):
4899   """Shutdown an instance's disks.
4900
4901   """
4902   REQ_BGL = False
4903
4904   def ExpandNames(self):
4905     self._ExpandAndLockInstance()
4906     self.needed_locks[locking.LEVEL_NODE] = []
4907     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4908
4909   def DeclareLocks(self, level):
4910     if level == locking.LEVEL_NODE:
4911       self._LockInstancesNodes()
4912
4913   def CheckPrereq(self):
4914     """Check prerequisites.
4915
4916     This checks that the instance is in the cluster.
4917
4918     """
4919     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4920     assert self.instance is not None, \
4921       "Cannot retrieve locked instance %s" % self.op.instance_name
4922
4923   def Exec(self, feedback_fn):
4924     """Deactivate the disks
4925
4926     """
4927     instance = self.instance
4928     if self.op.force:
4929       _ShutdownInstanceDisks(self, instance)
4930     else:
4931       _SafeShutdownInstanceDisks(self, instance)
4932
4933
4934 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4935   """Shutdown block devices of an instance.
4936
4937   This function checks if an instance is running, before calling
4938   _ShutdownInstanceDisks.
4939
4940   """
4941   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4942   _ShutdownInstanceDisks(lu, instance, disks=disks)
4943
4944
4945 def _ExpandCheckDisks(instance, disks):
4946   """Return the instance disks selected by the disks list
4947
4948   @type disks: list of L{objects.Disk} or None
4949   @param disks: selected disks
4950   @rtype: list of L{objects.Disk}
4951   @return: selected instance disks to act on
4952
4953   """
4954   if disks is None:
4955     return instance.disks
4956   else:
4957     if not set(disks).issubset(instance.disks):
4958       raise errors.ProgrammerError("Can only act on disks belonging to the"
4959                                    " target instance")
4960     return disks
4961
4962
4963 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4964   """Shutdown block devices of an instance.
4965
4966   This does the shutdown on all nodes of the instance.
4967
4968   If the ignore_primary is false, errors on the primary node are
4969   ignored.
4970
4971   """
4972   all_result = True
4973   disks = _ExpandCheckDisks(instance, disks)
4974
4975   for disk in disks:
4976     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4977       lu.cfg.SetDiskID(top_disk, node)
4978       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4979       msg = result.fail_msg
4980       if msg:
4981         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4982                       disk.iv_name, node, msg)
4983         if ((node == instance.primary_node and not ignore_primary) or
4984             (node != instance.primary_node and not result.offline)):
4985           all_result = False
4986   return all_result
4987
4988
4989 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4990   """Checks if a node has enough free memory.
4991
4992   This function check if a given node has the needed amount of free
4993   memory. In case the node has less memory or we cannot get the
4994   information from the node, this function raise an OpPrereqError
4995   exception.
4996
4997   @type lu: C{LogicalUnit}
4998   @param lu: a logical unit from which we get configuration data
4999   @type node: C{str}
5000   @param node: the node to check
5001   @type reason: C{str}
5002   @param reason: string to use in the error message
5003   @type requested: C{int}
5004   @param requested: the amount of memory in MiB to check for
5005   @type hypervisor_name: C{str}
5006   @param hypervisor_name: the hypervisor to ask for memory stats
5007   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5008       we cannot check the node
5009
5010   """
5011   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5012   nodeinfo[node].Raise("Can't get data from node %s" % node,
5013                        prereq=True, ecode=errors.ECODE_ENVIRON)
5014   free_mem = nodeinfo[node].payload.get('memory_free', None)
5015   if not isinstance(free_mem, int):
5016     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5017                                " was '%s'" % (node, free_mem),
5018                                errors.ECODE_ENVIRON)
5019   if requested > free_mem:
5020     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5021                                " needed %s MiB, available %s MiB" %
5022                                (node, reason, requested, free_mem),
5023                                errors.ECODE_NORES)
5024
5025
5026 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5027   """Checks if nodes have enough free disk space in the all VGs.
5028
5029   This function check if all given nodes have the needed amount of
5030   free disk. In case any node has less disk or we cannot get the
5031   information from the node, this function raise an OpPrereqError
5032   exception.
5033
5034   @type lu: C{LogicalUnit}
5035   @param lu: a logical unit from which we get configuration data
5036   @type nodenames: C{list}
5037   @param nodenames: the list of node names to check
5038   @type req_sizes: C{dict}
5039   @param req_sizes: the hash of vg and corresponding amount of disk in
5040       MiB to check for
5041   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5042       or we cannot check the node
5043
5044   """
5045   for vg, req_size in req_sizes.items():
5046     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5047
5048
5049 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5050   """Checks if nodes have enough free disk space in the specified VG.
5051
5052   This function check if all given nodes have the needed amount of
5053   free disk. In case any node has less disk or we cannot get the
5054   information from the node, this function raise an OpPrereqError
5055   exception.
5056
5057   @type lu: C{LogicalUnit}
5058   @param lu: a logical unit from which we get configuration data
5059   @type nodenames: C{list}
5060   @param nodenames: the list of node names to check
5061   @type vg: C{str}
5062   @param vg: the volume group to check
5063   @type requested: C{int}
5064   @param requested: the amount of disk in MiB to check for
5065   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5066       or we cannot check the node
5067
5068   """
5069   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5070   for node in nodenames:
5071     info = nodeinfo[node]
5072     info.Raise("Cannot get current information from node %s" % node,
5073                prereq=True, ecode=errors.ECODE_ENVIRON)
5074     vg_free = info.payload.get("vg_free", None)
5075     if not isinstance(vg_free, int):
5076       raise errors.OpPrereqError("Can't compute free disk space on node"
5077                                  " %s for vg %s, result was '%s'" %
5078                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5079     if requested > vg_free:
5080       raise errors.OpPrereqError("Not enough disk space on target node %s"
5081                                  " vg %s: required %d MiB, available %d MiB" %
5082                                  (node, vg, requested, vg_free),
5083                                  errors.ECODE_NORES)
5084
5085
5086 class LUInstanceStartup(LogicalUnit):
5087   """Starts an instance.
5088
5089   """
5090   HPATH = "instance-start"
5091   HTYPE = constants.HTYPE_INSTANCE
5092   REQ_BGL = False
5093
5094   def CheckArguments(self):
5095     # extra beparams
5096     if self.op.beparams:
5097       # fill the beparams dict
5098       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5099
5100   def ExpandNames(self):
5101     self._ExpandAndLockInstance()
5102
5103   def BuildHooksEnv(self):
5104     """Build hooks env.
5105
5106     This runs on master, primary and secondary nodes of the instance.
5107
5108     """
5109     env = {
5110       "FORCE": self.op.force,
5111       }
5112     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5113     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5114     return env, nl, nl
5115
5116   def CheckPrereq(self):
5117     """Check prerequisites.
5118
5119     This checks that the instance is in the cluster.
5120
5121     """
5122     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5123     assert self.instance is not None, \
5124       "Cannot retrieve locked instance %s" % self.op.instance_name
5125
5126     # extra hvparams
5127     if self.op.hvparams:
5128       # check hypervisor parameter syntax (locally)
5129       cluster = self.cfg.GetClusterInfo()
5130       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5131       filled_hvp = cluster.FillHV(instance)
5132       filled_hvp.update(self.op.hvparams)
5133       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5134       hv_type.CheckParameterSyntax(filled_hvp)
5135       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5136
5137     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5138
5139     if self.primary_offline and self.op.ignore_offline_nodes:
5140       self.proc.LogWarning("Ignoring offline primary node")
5141
5142       if self.op.hvparams or self.op.beparams:
5143         self.proc.LogWarning("Overridden parameters are ignored")
5144     else:
5145       _CheckNodeOnline(self, instance.primary_node)
5146
5147       bep = self.cfg.GetClusterInfo().FillBE(instance)
5148
5149       # check bridges existence
5150       _CheckInstanceBridgesExist(self, instance)
5151
5152       remote_info = self.rpc.call_instance_info(instance.primary_node,
5153                                                 instance.name,
5154                                                 instance.hypervisor)
5155       remote_info.Raise("Error checking node %s" % instance.primary_node,
5156                         prereq=True, ecode=errors.ECODE_ENVIRON)
5157       if not remote_info.payload: # not running already
5158         _CheckNodeFreeMemory(self, instance.primary_node,
5159                              "starting instance %s" % instance.name,
5160                              bep[constants.BE_MEMORY], instance.hypervisor)
5161
5162   def Exec(self, feedback_fn):
5163     """Start the instance.
5164
5165     """
5166     instance = self.instance
5167     force = self.op.force
5168
5169     self.cfg.MarkInstanceUp(instance.name)
5170
5171     if self.primary_offline:
5172       assert self.op.ignore_offline_nodes
5173       self.proc.LogInfo("Primary node offline, marked instance as started")
5174     else:
5175       node_current = instance.primary_node
5176
5177       _StartInstanceDisks(self, instance, force)
5178
5179       result = self.rpc.call_instance_start(node_current, instance,
5180                                             self.op.hvparams, self.op.beparams)
5181       msg = result.fail_msg
5182       if msg:
5183         _ShutdownInstanceDisks(self, instance)
5184         raise errors.OpExecError("Could not start instance: %s" % msg)
5185
5186
5187 class LUInstanceReboot(LogicalUnit):
5188   """Reboot an instance.
5189
5190   """
5191   HPATH = "instance-reboot"
5192   HTYPE = constants.HTYPE_INSTANCE
5193   REQ_BGL = False
5194
5195   def ExpandNames(self):
5196     self._ExpandAndLockInstance()
5197
5198   def BuildHooksEnv(self):
5199     """Build hooks env.
5200
5201     This runs on master, primary and secondary nodes of the instance.
5202
5203     """
5204     env = {
5205       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5206       "REBOOT_TYPE": self.op.reboot_type,
5207       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5208       }
5209     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5210     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5211     return env, nl, nl
5212
5213   def CheckPrereq(self):
5214     """Check prerequisites.
5215
5216     This checks that the instance is in the cluster.
5217
5218     """
5219     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5220     assert self.instance is not None, \
5221       "Cannot retrieve locked instance %s" % self.op.instance_name
5222
5223     _CheckNodeOnline(self, instance.primary_node)
5224
5225     # check bridges existence
5226     _CheckInstanceBridgesExist(self, instance)
5227
5228   def Exec(self, feedback_fn):
5229     """Reboot the instance.
5230
5231     """
5232     instance = self.instance
5233     ignore_secondaries = self.op.ignore_secondaries
5234     reboot_type = self.op.reboot_type
5235
5236     remote_info = self.rpc.call_instance_info(instance.primary_node,
5237                                               instance.name,
5238                                               instance.hypervisor)
5239     remote_info.Raise("Error checking node %s" % instance.primary_node)
5240     instance_running = bool(remote_info.payload)
5241
5242     node_current = instance.primary_node
5243
5244     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5245                                             constants.INSTANCE_REBOOT_HARD]:
5246       for disk in instance.disks:
5247         self.cfg.SetDiskID(disk, node_current)
5248       result = self.rpc.call_instance_reboot(node_current, instance,
5249                                              reboot_type,
5250                                              self.op.shutdown_timeout)
5251       result.Raise("Could not reboot instance")
5252     else:
5253       if instance_running:
5254         result = self.rpc.call_instance_shutdown(node_current, instance,
5255                                                  self.op.shutdown_timeout)
5256         result.Raise("Could not shutdown instance for full reboot")
5257         _ShutdownInstanceDisks(self, instance)
5258       else:
5259         self.LogInfo("Instance %s was already stopped, starting now",
5260                      instance.name)
5261       _StartInstanceDisks(self, instance, ignore_secondaries)
5262       result = self.rpc.call_instance_start(node_current, instance, None, None)
5263       msg = result.fail_msg
5264       if msg:
5265         _ShutdownInstanceDisks(self, instance)
5266         raise errors.OpExecError("Could not start instance for"
5267                                  " full reboot: %s" % msg)
5268
5269     self.cfg.MarkInstanceUp(instance.name)
5270
5271
5272 class LUInstanceShutdown(LogicalUnit):
5273   """Shutdown an instance.
5274
5275   """
5276   HPATH = "instance-stop"
5277   HTYPE = constants.HTYPE_INSTANCE
5278   REQ_BGL = False
5279
5280   def ExpandNames(self):
5281     self._ExpandAndLockInstance()
5282
5283   def BuildHooksEnv(self):
5284     """Build hooks env.
5285
5286     This runs on master, primary and secondary nodes of the instance.
5287
5288     """
5289     env = _BuildInstanceHookEnvByObject(self, self.instance)
5290     env["TIMEOUT"] = self.op.timeout
5291     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5292     return env, nl, nl
5293
5294   def CheckPrereq(self):
5295     """Check prerequisites.
5296
5297     This checks that the instance is in the cluster.
5298
5299     """
5300     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5301     assert self.instance is not None, \
5302       "Cannot retrieve locked instance %s" % self.op.instance_name
5303
5304     self.primary_offline = \
5305       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5306
5307     if self.primary_offline and self.op.ignore_offline_nodes:
5308       self.proc.LogWarning("Ignoring offline primary node")
5309     else:
5310       _CheckNodeOnline(self, self.instance.primary_node)
5311
5312   def Exec(self, feedback_fn):
5313     """Shutdown the instance.
5314
5315     """
5316     instance = self.instance
5317     node_current = instance.primary_node
5318     timeout = self.op.timeout
5319
5320     self.cfg.MarkInstanceDown(instance.name)
5321
5322     if self.primary_offline:
5323       assert self.op.ignore_offline_nodes
5324       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5325     else:
5326       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5327       msg = result.fail_msg
5328       if msg:
5329         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5330
5331       _ShutdownInstanceDisks(self, instance)
5332
5333
5334 class LUInstanceReinstall(LogicalUnit):
5335   """Reinstall an instance.
5336
5337   """
5338   HPATH = "instance-reinstall"
5339   HTYPE = constants.HTYPE_INSTANCE
5340   REQ_BGL = False
5341
5342   def ExpandNames(self):
5343     self._ExpandAndLockInstance()
5344
5345   def BuildHooksEnv(self):
5346     """Build hooks env.
5347
5348     This runs on master, primary and secondary nodes of the instance.
5349
5350     """
5351     env = _BuildInstanceHookEnvByObject(self, self.instance)
5352     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5353     return env, nl, nl
5354
5355   def CheckPrereq(self):
5356     """Check prerequisites.
5357
5358     This checks that the instance is in the cluster and is not running.
5359
5360     """
5361     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5362     assert instance is not None, \
5363       "Cannot retrieve locked instance %s" % self.op.instance_name
5364     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5365                      " offline, cannot reinstall")
5366     for node in instance.secondary_nodes:
5367       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5368                        " cannot reinstall")
5369
5370     if instance.disk_template == constants.DT_DISKLESS:
5371       raise errors.OpPrereqError("Instance '%s' has no disks" %
5372                                  self.op.instance_name,
5373                                  errors.ECODE_INVAL)
5374     _CheckInstanceDown(self, instance, "cannot reinstall")
5375
5376     if self.op.os_type is not None:
5377       # OS verification
5378       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5379       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5380       instance_os = self.op.os_type
5381     else:
5382       instance_os = instance.os
5383
5384     nodelist = list(instance.all_nodes)
5385
5386     if self.op.osparams:
5387       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5388       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5389       self.os_inst = i_osdict # the new dict (without defaults)
5390     else:
5391       self.os_inst = None
5392
5393     self.instance = instance
5394
5395   def Exec(self, feedback_fn):
5396     """Reinstall the instance.
5397
5398     """
5399     inst = self.instance
5400
5401     if self.op.os_type is not None:
5402       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5403       inst.os = self.op.os_type
5404       # Write to configuration
5405       self.cfg.Update(inst, feedback_fn)
5406
5407     _StartInstanceDisks(self, inst, None)
5408     try:
5409       feedback_fn("Running the instance OS create scripts...")
5410       # FIXME: pass debug option from opcode to backend
5411       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5412                                              self.op.debug_level,
5413                                              osparams=self.os_inst)
5414       result.Raise("Could not install OS for instance %s on node %s" %
5415                    (inst.name, inst.primary_node))
5416     finally:
5417       _ShutdownInstanceDisks(self, inst)
5418
5419
5420 class LUInstanceRecreateDisks(LogicalUnit):
5421   """Recreate an instance's missing disks.
5422
5423   """
5424   HPATH = "instance-recreate-disks"
5425   HTYPE = constants.HTYPE_INSTANCE
5426   REQ_BGL = False
5427
5428   def ExpandNames(self):
5429     self._ExpandAndLockInstance()
5430
5431   def BuildHooksEnv(self):
5432     """Build hooks env.
5433
5434     This runs on master, primary and secondary nodes of the instance.
5435
5436     """
5437     env = _BuildInstanceHookEnvByObject(self, self.instance)
5438     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5439     return env, nl, nl
5440
5441   def CheckPrereq(self):
5442     """Check prerequisites.
5443
5444     This checks that the instance is in the cluster and is not running.
5445
5446     """
5447     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5448     assert instance is not None, \
5449       "Cannot retrieve locked instance %s" % self.op.instance_name
5450     _CheckNodeOnline(self, instance.primary_node)
5451
5452     if instance.disk_template == constants.DT_DISKLESS:
5453       raise errors.OpPrereqError("Instance '%s' has no disks" %
5454                                  self.op.instance_name, errors.ECODE_INVAL)
5455     _CheckInstanceDown(self, instance, "cannot recreate disks")
5456
5457     if not self.op.disks:
5458       self.op.disks = range(len(instance.disks))
5459     else:
5460       for idx in self.op.disks:
5461         if idx >= len(instance.disks):
5462           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5463                                      errors.ECODE_INVAL)
5464
5465     self.instance = instance
5466
5467   def Exec(self, feedback_fn):
5468     """Recreate the disks.
5469
5470     """
5471     to_skip = []
5472     for idx, _ in enumerate(self.instance.disks):
5473       if idx not in self.op.disks: # disk idx has not been passed in
5474         to_skip.append(idx)
5475         continue
5476
5477     _CreateDisks(self, self.instance, to_skip=to_skip)
5478
5479
5480 class LUInstanceRename(LogicalUnit):
5481   """Rename an instance.
5482
5483   """
5484   HPATH = "instance-rename"
5485   HTYPE = constants.HTYPE_INSTANCE
5486
5487   def CheckArguments(self):
5488     """Check arguments.
5489
5490     """
5491     if self.op.ip_check and not self.op.name_check:
5492       # TODO: make the ip check more flexible and not depend on the name check
5493       raise errors.OpPrereqError("Cannot do ip check without a name check",
5494                                  errors.ECODE_INVAL)
5495
5496   def BuildHooksEnv(self):
5497     """Build hooks env.
5498
5499     This runs on master, primary and secondary nodes of the instance.
5500
5501     """
5502     env = _BuildInstanceHookEnvByObject(self, self.instance)
5503     env["INSTANCE_NEW_NAME"] = self.op.new_name
5504     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5505     return env, nl, nl
5506
5507   def CheckPrereq(self):
5508     """Check prerequisites.
5509
5510     This checks that the instance is in the cluster and is not running.
5511
5512     """
5513     self.op.instance_name = _ExpandInstanceName(self.cfg,
5514                                                 self.op.instance_name)
5515     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5516     assert instance is not None
5517     _CheckNodeOnline(self, instance.primary_node)
5518     _CheckInstanceDown(self, instance, "cannot rename")
5519     self.instance = instance
5520
5521     new_name = self.op.new_name
5522     if self.op.name_check:
5523       hostname = netutils.GetHostname(name=new_name)
5524       self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5525                    hostname.name)
5526       new_name = self.op.new_name = hostname.name
5527       if (self.op.ip_check and
5528           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5529         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5530                                    (hostname.ip, new_name),
5531                                    errors.ECODE_NOTUNIQUE)
5532
5533     instance_list = self.cfg.GetInstanceList()
5534     if new_name in instance_list and new_name != instance.name:
5535       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5536                                  new_name, errors.ECODE_EXISTS)
5537
5538   def Exec(self, feedback_fn):
5539     """Rename the instance.
5540
5541     """
5542     inst = self.instance
5543     old_name = inst.name
5544
5545     rename_file_storage = False
5546     if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5547         self.op.new_name != inst.name):
5548       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5549       rename_file_storage = True
5550
5551     self.cfg.RenameInstance(inst.name, self.op.new_name)
5552     # Change the instance lock. This is definitely safe while we hold the BGL
5553     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5554     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5555
5556     # re-read the instance from the configuration after rename
5557     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5558
5559     if rename_file_storage:
5560       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5561       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5562                                                      old_file_storage_dir,
5563                                                      new_file_storage_dir)
5564       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5565                    " (but the instance has been renamed in Ganeti)" %
5566                    (inst.primary_node, old_file_storage_dir,
5567                     new_file_storage_dir))
5568
5569     _StartInstanceDisks(self, inst, None)
5570     try:
5571       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5572                                                  old_name, self.op.debug_level)
5573       msg = result.fail_msg
5574       if msg:
5575         msg = ("Could not run OS rename script for instance %s on node %s"
5576                " (but the instance has been renamed in Ganeti): %s" %
5577                (inst.name, inst.primary_node, msg))
5578         self.proc.LogWarning(msg)
5579     finally:
5580       _ShutdownInstanceDisks(self, inst)
5581
5582     return inst.name
5583
5584
5585 class LUInstanceRemove(LogicalUnit):
5586   """Remove an instance.
5587
5588   """
5589   HPATH = "instance-remove"
5590   HTYPE = constants.HTYPE_INSTANCE
5591   REQ_BGL = False
5592
5593   def ExpandNames(self):
5594     self._ExpandAndLockInstance()
5595     self.needed_locks[locking.LEVEL_NODE] = []
5596     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5597
5598   def DeclareLocks(self, level):
5599     if level == locking.LEVEL_NODE:
5600       self._LockInstancesNodes()
5601
5602   def BuildHooksEnv(self):
5603     """Build hooks env.
5604
5605     This runs on master, primary and secondary nodes of the instance.
5606
5607     """
5608     env = _BuildInstanceHookEnvByObject(self, self.instance)
5609     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5610     nl = [self.cfg.GetMasterNode()]
5611     nl_post = list(self.instance.all_nodes) + nl
5612     return env, nl, nl_post
5613
5614   def CheckPrereq(self):
5615     """Check prerequisites.
5616
5617     This checks that the instance is in the cluster.
5618
5619     """
5620     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5621     assert self.instance is not None, \
5622       "Cannot retrieve locked instance %s" % self.op.instance_name
5623
5624   def Exec(self, feedback_fn):
5625     """Remove the instance.
5626
5627     """
5628     instance = self.instance
5629     logging.info("Shutting down instance %s on node %s",
5630                  instance.name, instance.primary_node)
5631
5632     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5633                                              self.op.shutdown_timeout)
5634     msg = result.fail_msg
5635     if msg:
5636       if self.op.ignore_failures:
5637         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5638       else:
5639         raise errors.OpExecError("Could not shutdown instance %s on"
5640                                  " node %s: %s" %
5641                                  (instance.name, instance.primary_node, msg))
5642
5643     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5644
5645
5646 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5647   """Utility function to remove an instance.
5648
5649   """
5650   logging.info("Removing block devices for instance %s", instance.name)
5651
5652   if not _RemoveDisks(lu, instance):
5653     if not ignore_failures:
5654       raise errors.OpExecError("Can't remove instance's disks")
5655     feedback_fn("Warning: can't remove instance's disks")
5656
5657   logging.info("Removing instance %s out of cluster config", instance.name)
5658
5659   lu.cfg.RemoveInstance(instance.name)
5660
5661   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5662     "Instance lock removal conflict"
5663
5664   # Remove lock for the instance
5665   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5666
5667
5668 class LUInstanceQuery(NoHooksLU):
5669   """Logical unit for querying instances.
5670
5671   """
5672   # pylint: disable-msg=W0142
5673   REQ_BGL = False
5674
5675   def CheckArguments(self):
5676     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5677                              self.op.output_fields, self.op.use_locking)
5678
5679   def ExpandNames(self):
5680     self.iq.ExpandNames(self)
5681
5682   def DeclareLocks(self, level):
5683     self.iq.DeclareLocks(self, level)
5684
5685   def Exec(self, feedback_fn):
5686     return self.iq.OldStyleQuery(self)
5687
5688
5689 class LUInstanceFailover(LogicalUnit):
5690   """Failover an instance.
5691
5692   """
5693   HPATH = "instance-failover"
5694   HTYPE = constants.HTYPE_INSTANCE
5695   REQ_BGL = False
5696
5697   def ExpandNames(self):
5698     self._ExpandAndLockInstance()
5699     self.needed_locks[locking.LEVEL_NODE] = []
5700     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5701
5702   def DeclareLocks(self, level):
5703     if level == locking.LEVEL_NODE:
5704       self._LockInstancesNodes()
5705
5706   def BuildHooksEnv(self):
5707     """Build hooks env.
5708
5709     This runs on master, primary and secondary nodes of the instance.
5710
5711     """
5712     instance = self.instance
5713     source_node = instance.primary_node
5714     target_node = instance.secondary_nodes[0]
5715     env = {
5716       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5717       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5718       "OLD_PRIMARY": source_node,
5719       "OLD_SECONDARY": target_node,
5720       "NEW_PRIMARY": target_node,
5721       "NEW_SECONDARY": source_node,
5722       }
5723     env.update(_BuildInstanceHookEnvByObject(self, instance))
5724     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5725     nl_post = list(nl)
5726     nl_post.append(source_node)
5727     return env, nl, nl_post
5728
5729   def CheckPrereq(self):
5730     """Check prerequisites.
5731
5732     This checks that the instance is in the cluster.
5733
5734     """
5735     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5736     assert self.instance is not None, \
5737       "Cannot retrieve locked instance %s" % self.op.instance_name
5738
5739     bep = self.cfg.GetClusterInfo().FillBE(instance)
5740     if instance.disk_template not in constants.DTS_NET_MIRROR:
5741       raise errors.OpPrereqError("Instance's disk layout is not"
5742                                  " network mirrored, cannot failover.",
5743                                  errors.ECODE_STATE)
5744
5745     secondary_nodes = instance.secondary_nodes
5746     if not secondary_nodes:
5747       raise errors.ProgrammerError("no secondary node but using "
5748                                    "a mirrored disk template")
5749
5750     target_node = secondary_nodes[0]
5751     _CheckNodeOnline(self, target_node)
5752     _CheckNodeNotDrained(self, target_node)
5753     if instance.admin_up:
5754       # check memory requirements on the secondary node
5755       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5756                            instance.name, bep[constants.BE_MEMORY],
5757                            instance.hypervisor)
5758     else:
5759       self.LogInfo("Not checking memory on the secondary node as"
5760                    " instance will not be started")
5761
5762     # check bridge existance
5763     _CheckInstanceBridgesExist(self, instance, node=target_node)
5764
5765   def Exec(self, feedback_fn):
5766     """Failover an instance.
5767
5768     The failover is done by shutting it down on its present node and
5769     starting it on the secondary.
5770
5771     """
5772     instance = self.instance
5773     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5774
5775     source_node = instance.primary_node
5776     target_node = instance.secondary_nodes[0]
5777
5778     if instance.admin_up:
5779       feedback_fn("* checking disk consistency between source and target")
5780       for dev in instance.disks:
5781         # for drbd, these are drbd over lvm
5782         if not _CheckDiskConsistency(self, dev, target_node, False):
5783           if not self.op.ignore_consistency:
5784             raise errors.OpExecError("Disk %s is degraded on target node,"
5785                                      " aborting failover." % dev.iv_name)
5786     else:
5787       feedback_fn("* not checking disk consistency as instance is not running")
5788
5789     feedback_fn("* shutting down instance on source node")
5790     logging.info("Shutting down instance %s on node %s",
5791                  instance.name, source_node)
5792
5793     result = self.rpc.call_instance_shutdown(source_node, instance,
5794                                              self.op.shutdown_timeout)
5795     msg = result.fail_msg
5796     if msg:
5797       if self.op.ignore_consistency or primary_node.offline:
5798         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5799                              " Proceeding anyway. Please make sure node"
5800                              " %s is down. Error details: %s",
5801                              instance.name, source_node, source_node, msg)
5802       else:
5803         raise errors.OpExecError("Could not shutdown instance %s on"
5804                                  " node %s: %s" %
5805                                  (instance.name, source_node, msg))
5806
5807     feedback_fn("* deactivating the instance's disks on source node")
5808     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5809       raise errors.OpExecError("Can't shut down the instance's disks.")
5810
5811     instance.primary_node = target_node
5812     # distribute new instance config to the other nodes
5813     self.cfg.Update(instance, feedback_fn)
5814
5815     # Only start the instance if it's marked as up
5816     if instance.admin_up:
5817       feedback_fn("* activating the instance's disks on target node")
5818       logging.info("Starting instance %s on node %s",
5819                    instance.name, target_node)
5820
5821       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5822                                            ignore_secondaries=True)
5823       if not disks_ok:
5824         _ShutdownInstanceDisks(self, instance)
5825         raise errors.OpExecError("Can't activate the instance's disks")
5826
5827       feedback_fn("* starting the instance on the target node")
5828       result = self.rpc.call_instance_start(target_node, instance, None, None)
5829       msg = result.fail_msg
5830       if msg:
5831         _ShutdownInstanceDisks(self, instance)
5832         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5833                                  (instance.name, target_node, msg))
5834
5835
5836 class LUInstanceMigrate(LogicalUnit):
5837   """Migrate an instance.
5838
5839   This is migration without shutting down, compared to the failover,
5840   which is done with shutdown.
5841
5842   """
5843   HPATH = "instance-migrate"
5844   HTYPE = constants.HTYPE_INSTANCE
5845   REQ_BGL = False
5846
5847   def ExpandNames(self):
5848     self._ExpandAndLockInstance()
5849
5850     self.needed_locks[locking.LEVEL_NODE] = []
5851     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5852
5853     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5854                                        self.op.cleanup)
5855     self.tasklets = [self._migrater]
5856
5857   def DeclareLocks(self, level):
5858     if level == locking.LEVEL_NODE:
5859       self._LockInstancesNodes()
5860
5861   def BuildHooksEnv(self):
5862     """Build hooks env.
5863
5864     This runs on master, primary and secondary nodes of the instance.
5865
5866     """
5867     instance = self._migrater.instance
5868     source_node = instance.primary_node
5869     target_node = instance.secondary_nodes[0]
5870     env = _BuildInstanceHookEnvByObject(self, instance)
5871     env["MIGRATE_LIVE"] = self._migrater.live
5872     env["MIGRATE_CLEANUP"] = self.op.cleanup
5873     env.update({
5874         "OLD_PRIMARY": source_node,
5875         "OLD_SECONDARY": target_node,
5876         "NEW_PRIMARY": target_node,
5877         "NEW_SECONDARY": source_node,
5878         })
5879     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5880     nl_post = list(nl)
5881     nl_post.append(source_node)
5882     return env, nl, nl_post
5883
5884
5885 class LUInstanceMove(LogicalUnit):
5886   """Move an instance by data-copying.
5887
5888   """
5889   HPATH = "instance-move"
5890   HTYPE = constants.HTYPE_INSTANCE
5891   REQ_BGL = False
5892
5893   def ExpandNames(self):
5894     self._ExpandAndLockInstance()
5895     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5896     self.op.target_node = target_node
5897     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5898     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5899
5900   def DeclareLocks(self, level):
5901     if level == locking.LEVEL_NODE:
5902       self._LockInstancesNodes(primary_only=True)
5903
5904   def BuildHooksEnv(self):
5905     """Build hooks env.
5906
5907     This runs on master, primary and secondary nodes of the instance.
5908
5909     """
5910     env = {
5911       "TARGET_NODE": self.op.target_node,
5912       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5913       }
5914     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5915     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5916                                        self.op.target_node]
5917     return env, nl, nl
5918
5919   def CheckPrereq(self):
5920     """Check prerequisites.
5921
5922     This checks that the instance is in the cluster.
5923
5924     """
5925     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5926     assert self.instance is not None, \
5927       "Cannot retrieve locked instance %s" % self.op.instance_name
5928
5929     node = self.cfg.GetNodeInfo(self.op.target_node)
5930     assert node is not None, \
5931       "Cannot retrieve locked node %s" % self.op.target_node
5932
5933     self.target_node = target_node = node.name
5934
5935     if target_node == instance.primary_node:
5936       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5937                                  (instance.name, target_node),
5938                                  errors.ECODE_STATE)
5939
5940     bep = self.cfg.GetClusterInfo().FillBE(instance)
5941
5942     for idx, dsk in enumerate(instance.disks):
5943       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5944         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5945                                    " cannot copy" % idx, errors.ECODE_STATE)
5946
5947     _CheckNodeOnline(self, target_node)
5948     _CheckNodeNotDrained(self, target_node)
5949     _CheckNodeVmCapable(self, target_node)
5950
5951     if instance.admin_up:
5952       # check memory requirements on the secondary node
5953       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5954                            instance.name, bep[constants.BE_MEMORY],
5955                            instance.hypervisor)
5956     else:
5957       self.LogInfo("Not checking memory on the secondary node as"
5958                    " instance will not be started")
5959
5960     # check bridge existance
5961     _CheckInstanceBridgesExist(self, instance, node=target_node)
5962
5963   def Exec(self, feedback_fn):
5964     """Move an instance.
5965
5966     The move is done by shutting it down on its present node, copying
5967     the data over (slow) and starting it on the new node.
5968
5969     """
5970     instance = self.instance
5971
5972     source_node = instance.primary_node
5973     target_node = self.target_node
5974
5975     self.LogInfo("Shutting down instance %s on source node %s",
5976                  instance.name, source_node)
5977
5978     result = self.rpc.call_instance_shutdown(source_node, instance,
5979                                              self.op.shutdown_timeout)
5980     msg = result.fail_msg
5981     if msg:
5982       if self.op.ignore_consistency:
5983         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5984                              " Proceeding anyway. Please make sure node"
5985                              " %s is down. Error details: %s",
5986                              instance.name, source_node, source_node, msg)
5987       else:
5988         raise errors.OpExecError("Could not shutdown instance %s on"
5989                                  " node %s: %s" %
5990                                  (instance.name, source_node, msg))
5991
5992     # create the target disks
5993     try:
5994       _CreateDisks(self, instance, target_node=target_node)
5995     except errors.OpExecError:
5996       self.LogWarning("Device creation failed, reverting...")
5997       try:
5998         _RemoveDisks(self, instance, target_node=target_node)
5999       finally:
6000         self.cfg.ReleaseDRBDMinors(instance.name)
6001         raise
6002
6003     cluster_name = self.cfg.GetClusterInfo().cluster_name
6004
6005     errs = []
6006     # activate, get path, copy the data over
6007     for idx, disk in enumerate(instance.disks):
6008       self.LogInfo("Copying data for disk %d", idx)
6009       result = self.rpc.call_blockdev_assemble(target_node, disk,
6010                                                instance.name, True, idx)
6011       if result.fail_msg:
6012         self.LogWarning("Can't assemble newly created disk %d: %s",
6013                         idx, result.fail_msg)
6014         errs.append(result.fail_msg)
6015         break
6016       dev_path = result.payload
6017       result = self.rpc.call_blockdev_export(source_node, disk,
6018                                              target_node, dev_path,
6019                                              cluster_name)
6020       if result.fail_msg:
6021         self.LogWarning("Can't copy data over for disk %d: %s",
6022                         idx, result.fail_msg)
6023         errs.append(result.fail_msg)
6024         break
6025
6026     if errs:
6027       self.LogWarning("Some disks failed to copy, aborting")
6028       try:
6029         _RemoveDisks(self, instance, target_node=target_node)
6030       finally:
6031         self.cfg.ReleaseDRBDMinors(instance.name)
6032         raise errors.OpExecError("Errors during disk copy: %s" %
6033                                  (",".join(errs),))
6034
6035     instance.primary_node = target_node
6036     self.cfg.Update(instance, feedback_fn)
6037
6038     self.LogInfo("Removing the disks on the original node")
6039     _RemoveDisks(self, instance, target_node=source_node)
6040
6041     # Only start the instance if it's marked as up
6042     if instance.admin_up:
6043       self.LogInfo("Starting instance %s on node %s",
6044                    instance.name, target_node)
6045
6046       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6047                                            ignore_secondaries=True)
6048       if not disks_ok:
6049         _ShutdownInstanceDisks(self, instance)
6050         raise errors.OpExecError("Can't activate the instance's disks")
6051
6052       result = self.rpc.call_instance_start(target_node, instance, None, None)
6053       msg = result.fail_msg
6054       if msg:
6055         _ShutdownInstanceDisks(self, instance)
6056         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6057                                  (instance.name, target_node, msg))
6058
6059
6060 class LUNodeMigrate(LogicalUnit):
6061   """Migrate all instances from a node.
6062
6063   """
6064   HPATH = "node-migrate"
6065   HTYPE = constants.HTYPE_NODE
6066   REQ_BGL = False
6067
6068   def ExpandNames(self):
6069     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6070
6071     self.needed_locks = {
6072       locking.LEVEL_NODE: [self.op.node_name],
6073       }
6074
6075     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6076
6077     # Create tasklets for migrating instances for all instances on this node
6078     names = []
6079     tasklets = []
6080
6081     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6082       logging.debug("Migrating instance %s", inst.name)
6083       names.append(inst.name)
6084
6085       tasklets.append(TLMigrateInstance(self, inst.name, False))
6086
6087     self.tasklets = tasklets
6088
6089     # Declare instance locks
6090     self.needed_locks[locking.LEVEL_INSTANCE] = names
6091
6092   def DeclareLocks(self, level):
6093     if level == locking.LEVEL_NODE:
6094       self._LockInstancesNodes()
6095
6096   def BuildHooksEnv(self):
6097     """Build hooks env.
6098
6099     This runs on the master, the primary and all the secondaries.
6100
6101     """
6102     env = {
6103       "NODE_NAME": self.op.node_name,
6104       }
6105
6106     nl = [self.cfg.GetMasterNode()]
6107
6108     return (env, nl, nl)
6109
6110
6111 class TLMigrateInstance(Tasklet):
6112   """Tasklet class for instance migration.
6113
6114   @type live: boolean
6115   @ivar live: whether the migration will be done live or non-live;
6116       this variable is initalized only after CheckPrereq has run
6117
6118   """
6119   def __init__(self, lu, instance_name, cleanup):
6120     """Initializes this class.
6121
6122     """
6123     Tasklet.__init__(self, lu)
6124
6125     # Parameters
6126     self.instance_name = instance_name
6127     self.cleanup = cleanup
6128     self.live = False # will be overridden later
6129
6130   def CheckPrereq(self):
6131     """Check prerequisites.
6132
6133     This checks that the instance is in the cluster.
6134
6135     """
6136     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6137     instance = self.cfg.GetInstanceInfo(instance_name)
6138     assert instance is not None
6139
6140     if instance.disk_template != constants.DT_DRBD8:
6141       raise errors.OpPrereqError("Instance's disk layout is not"
6142                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
6143
6144     secondary_nodes = instance.secondary_nodes
6145     if not secondary_nodes:
6146       raise errors.ConfigurationError("No secondary node but using"
6147                                       " drbd8 disk template")
6148
6149     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6150
6151     target_node = secondary_nodes[0]
6152     # check memory requirements on the secondary node
6153     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6154                          instance.name, i_be[constants.BE_MEMORY],
6155                          instance.hypervisor)
6156
6157     # check bridge existance
6158     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6159
6160     if not self.cleanup:
6161       _CheckNodeNotDrained(self.lu, target_node)
6162       result = self.rpc.call_instance_migratable(instance.primary_node,
6163                                                  instance)
6164       result.Raise("Can't migrate, please use failover",
6165                    prereq=True, ecode=errors.ECODE_STATE)
6166
6167     self.instance = instance
6168
6169     if self.lu.op.live is not None and self.lu.op.mode is not None:
6170       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6171                                  " parameters are accepted",
6172                                  errors.ECODE_INVAL)
6173     if self.lu.op.live is not None:
6174       if self.lu.op.live:
6175         self.lu.op.mode = constants.HT_MIGRATION_LIVE
6176       else:
6177         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6178       # reset the 'live' parameter to None so that repeated
6179       # invocations of CheckPrereq do not raise an exception
6180       self.lu.op.live = None
6181     elif self.lu.op.mode is None:
6182       # read the default value from the hypervisor
6183       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6184       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6185
6186     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6187
6188   def _WaitUntilSync(self):
6189     """Poll with custom rpc for disk sync.
6190
6191     This uses our own step-based rpc call.
6192
6193     """
6194     self.feedback_fn("* wait until resync is done")
6195     all_done = False
6196     while not all_done:
6197       all_done = True
6198       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6199                                             self.nodes_ip,
6200                                             self.instance.disks)
6201       min_percent = 100
6202       for node, nres in result.items():
6203         nres.Raise("Cannot resync disks on node %s" % node)
6204         node_done, node_percent = nres.payload
6205         all_done = all_done and node_done
6206         if node_percent is not None:
6207           min_percent = min(min_percent, node_percent)
6208       if not all_done:
6209         if min_percent < 100:
6210           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6211         time.sleep(2)
6212
6213   def _EnsureSecondary(self, node):
6214     """Demote a node to secondary.
6215
6216     """
6217     self.feedback_fn("* switching node %s to secondary mode" % node)
6218
6219     for dev in self.instance.disks:
6220       self.cfg.SetDiskID(dev, node)
6221
6222     result = self.rpc.call_blockdev_close(node, self.instance.name,
6223                                           self.instance.disks)
6224     result.Raise("Cannot change disk to secondary on node %s" % node)
6225
6226   def _GoStandalone(self):
6227     """Disconnect from the network.
6228
6229     """
6230     self.feedback_fn("* changing into standalone mode")
6231     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6232                                                self.instance.disks)
6233     for node, nres in result.items():
6234       nres.Raise("Cannot disconnect disks node %s" % node)
6235
6236   def _GoReconnect(self, multimaster):
6237     """Reconnect to the network.
6238
6239     """
6240     if multimaster:
6241       msg = "dual-master"
6242     else:
6243       msg = "single-master"
6244     self.feedback_fn("* changing disks into %s mode" % msg)
6245     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6246                                            self.instance.disks,
6247                                            self.instance.name, multimaster)
6248     for node, nres in result.items():
6249       nres.Raise("Cannot change disks config on node %s" % node)
6250
6251   def _ExecCleanup(self):
6252     """Try to cleanup after a failed migration.
6253
6254     The cleanup is done by:
6255       - check that the instance is running only on one node
6256         (and update the config if needed)
6257       - change disks on its secondary node to secondary
6258       - wait until disks are fully synchronized
6259       - disconnect from the network
6260       - change disks into single-master mode
6261       - wait again until disks are fully synchronized
6262
6263     """
6264     instance = self.instance
6265     target_node = self.target_node
6266     source_node = self.source_node
6267
6268     # check running on only one node
6269     self.feedback_fn("* checking where the instance actually runs"
6270                      " (if this hangs, the hypervisor might be in"
6271                      " a bad state)")
6272     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6273     for node, result in ins_l.items():
6274       result.Raise("Can't contact node %s" % node)
6275
6276     runningon_source = instance.name in ins_l[source_node].payload
6277     runningon_target = instance.name in ins_l[target_node].payload
6278
6279     if runningon_source and runningon_target:
6280       raise errors.OpExecError("Instance seems to be running on two nodes,"
6281                                " or the hypervisor is confused. You will have"
6282                                " to ensure manually that it runs only on one"
6283                                " and restart this operation.")
6284
6285     if not (runningon_source or runningon_target):
6286       raise errors.OpExecError("Instance does not seem to be running at all."
6287                                " In this case, it's safer to repair by"
6288                                " running 'gnt-instance stop' to ensure disk"
6289                                " shutdown, and then restarting it.")
6290
6291     if runningon_target:
6292       # the migration has actually succeeded, we need to update the config
6293       self.feedback_fn("* instance running on secondary node (%s),"
6294                        " updating config" % target_node)
6295       instance.primary_node = target_node
6296       self.cfg.Update(instance, self.feedback_fn)
6297       demoted_node = source_node
6298     else:
6299       self.feedback_fn("* instance confirmed to be running on its"
6300                        " primary node (%s)" % source_node)
6301       demoted_node = target_node
6302
6303     self._EnsureSecondary(demoted_node)
6304     try:
6305       self._WaitUntilSync()
6306     except errors.OpExecError:
6307       # we ignore here errors, since if the device is standalone, it
6308       # won't be able to sync
6309       pass
6310     self._GoStandalone()
6311     self._GoReconnect(False)
6312     self._WaitUntilSync()
6313
6314     self.feedback_fn("* done")
6315
6316   def _RevertDiskStatus(self):
6317     """Try to revert the disk status after a failed migration.
6318
6319     """
6320     target_node = self.target_node
6321     try:
6322       self._EnsureSecondary(target_node)
6323       self._GoStandalone()
6324       self._GoReconnect(False)
6325       self._WaitUntilSync()
6326     except errors.OpExecError, err:
6327       self.lu.LogWarning("Migration failed and I can't reconnect the"
6328                          " drives: error '%s'\n"
6329                          "Please look and recover the instance status" %
6330                          str(err))
6331
6332   def _AbortMigration(self):
6333     """Call the hypervisor code to abort a started migration.
6334
6335     """
6336     instance = self.instance
6337     target_node = self.target_node
6338     migration_info = self.migration_info
6339
6340     abort_result = self.rpc.call_finalize_migration(target_node,
6341                                                     instance,
6342                                                     migration_info,
6343                                                     False)
6344     abort_msg = abort_result.fail_msg
6345     if abort_msg:
6346       logging.error("Aborting migration failed on target node %s: %s",
6347                     target_node, abort_msg)
6348       # Don't raise an exception here, as we stil have to try to revert the
6349       # disk status, even if this step failed.
6350
6351   def _ExecMigration(self):
6352     """Migrate an instance.
6353
6354     The migrate is done by:
6355       - change the disks into dual-master mode
6356       - wait until disks are fully synchronized again
6357       - migrate the instance
6358       - change disks on the new secondary node (the old primary) to secondary
6359       - wait until disks are fully synchronized
6360       - change disks into single-master mode
6361
6362     """
6363     instance = self.instance
6364     target_node = self.target_node
6365     source_node = self.source_node
6366
6367     self.feedback_fn("* checking disk consistency between source and target")
6368     for dev in instance.disks:
6369       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6370         raise errors.OpExecError("Disk %s is degraded or not fully"
6371                                  " synchronized on target node,"
6372                                  " aborting migrate." % dev.iv_name)
6373
6374     # First get the migration information from the remote node
6375     result = self.rpc.call_migration_info(source_node, instance)
6376     msg = result.fail_msg
6377     if msg:
6378       log_err = ("Failed fetching source migration information from %s: %s" %
6379                  (source_node, msg))
6380       logging.error(log_err)
6381       raise errors.OpExecError(log_err)
6382
6383     self.migration_info = migration_info = result.payload
6384
6385     # Then switch the disks to master/master mode
6386     self._EnsureSecondary(target_node)
6387     self._GoStandalone()
6388     self._GoReconnect(True)
6389     self._WaitUntilSync()
6390
6391     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6392     result = self.rpc.call_accept_instance(target_node,
6393                                            instance,
6394                                            migration_info,
6395                                            self.nodes_ip[target_node])
6396
6397     msg = result.fail_msg
6398     if msg:
6399       logging.error("Instance pre-migration failed, trying to revert"
6400                     " disk status: %s", msg)
6401       self.feedback_fn("Pre-migration failed, aborting")
6402       self._AbortMigration()
6403       self._RevertDiskStatus()
6404       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6405                                (instance.name, msg))
6406
6407     self.feedback_fn("* migrating instance to %s" % target_node)
6408     time.sleep(10)
6409     result = self.rpc.call_instance_migrate(source_node, instance,
6410                                             self.nodes_ip[target_node],
6411                                             self.live)
6412     msg = result.fail_msg
6413     if msg:
6414       logging.error("Instance migration failed, trying to revert"
6415                     " disk status: %s", msg)
6416       self.feedback_fn("Migration failed, aborting")
6417       self._AbortMigration()
6418       self._RevertDiskStatus()
6419       raise errors.OpExecError("Could not migrate instance %s: %s" %
6420                                (instance.name, msg))
6421     time.sleep(10)
6422
6423     instance.primary_node = target_node
6424     # distribute new instance config to the other nodes
6425     self.cfg.Update(instance, self.feedback_fn)
6426
6427     result = self.rpc.call_finalize_migration(target_node,
6428                                               instance,
6429                                               migration_info,
6430                                               True)
6431     msg = result.fail_msg
6432     if msg:
6433       logging.error("Instance migration succeeded, but finalization failed:"
6434                     " %s", msg)
6435       raise errors.OpExecError("Could not finalize instance migration: %s" %
6436                                msg)
6437
6438     self._EnsureSecondary(source_node)
6439     self._WaitUntilSync()
6440     self._GoStandalone()
6441     self._GoReconnect(False)
6442     self._WaitUntilSync()
6443
6444     self.feedback_fn("* done")
6445
6446   def Exec(self, feedback_fn):
6447     """Perform the migration.
6448
6449     """
6450     feedback_fn("Migrating instance %s" % self.instance.name)
6451
6452     self.feedback_fn = feedback_fn
6453
6454     self.source_node = self.instance.primary_node
6455     self.target_node = self.instance.secondary_nodes[0]
6456     self.all_nodes = [self.source_node, self.target_node]
6457     self.nodes_ip = {
6458       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6459       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6460       }
6461
6462     if self.cleanup:
6463       return self._ExecCleanup()
6464     else:
6465       return self._ExecMigration()
6466
6467
6468 def _CreateBlockDev(lu, node, instance, device, force_create,
6469                     info, force_open):
6470   """Create a tree of block devices on a given node.
6471
6472   If this device type has to be created on secondaries, create it and
6473   all its children.
6474
6475   If not, just recurse to children keeping the same 'force' value.
6476
6477   @param lu: the lu on whose behalf we execute
6478   @param node: the node on which to create the device
6479   @type instance: L{objects.Instance}
6480   @param instance: the instance which owns the device
6481   @type device: L{objects.Disk}
6482   @param device: the device to create
6483   @type force_create: boolean
6484   @param force_create: whether to force creation of this device; this
6485       will be change to True whenever we find a device which has
6486       CreateOnSecondary() attribute
6487   @param info: the extra 'metadata' we should attach to the device
6488       (this will be represented as a LVM tag)
6489   @type force_open: boolean
6490   @param force_open: this parameter will be passes to the
6491       L{backend.BlockdevCreate} function where it specifies
6492       whether we run on primary or not, and it affects both
6493       the child assembly and the device own Open() execution
6494
6495   """
6496   if device.CreateOnSecondary():
6497     force_create = True
6498
6499   if device.children:
6500     for child in device.children:
6501       _CreateBlockDev(lu, node, instance, child, force_create,
6502                       info, force_open)
6503
6504   if not force_create:
6505     return
6506
6507   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6508
6509
6510 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6511   """Create a single block device on a given node.
6512
6513   This will not recurse over children of the device, so they must be
6514   created in advance.
6515
6516   @param lu: the lu on whose behalf we execute
6517   @param node: the node on which to create the device
6518   @type instance: L{objects.Instance}
6519   @param instance: the instance which owns the device
6520   @type device: L{objects.Disk}
6521   @param device: the device to create
6522   @param info: the extra 'metadata' we should attach to the device
6523       (this will be represented as a LVM tag)
6524   @type force_open: boolean
6525   @param force_open: this parameter will be passes to the
6526       L{backend.BlockdevCreate} function where it specifies
6527       whether we run on primary or not, and it affects both
6528       the child assembly and the device own Open() execution
6529
6530   """
6531   lu.cfg.SetDiskID(device, node)
6532   result = lu.rpc.call_blockdev_create(node, device, device.size,
6533                                        instance.name, force_open, info)
6534   result.Raise("Can't create block device %s on"
6535                " node %s for instance %s" % (device, node, instance.name))
6536   if device.physical_id is None:
6537     device.physical_id = result.payload
6538
6539
6540 def _GenerateUniqueNames(lu, exts):
6541   """Generate a suitable LV name.
6542
6543   This will generate a logical volume name for the given instance.
6544
6545   """
6546   results = []
6547   for val in exts:
6548     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6549     results.append("%s%s" % (new_id, val))
6550   return results
6551
6552
6553 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6554                          p_minor, s_minor):
6555   """Generate a drbd8 device complete with its children.
6556
6557   """
6558   port = lu.cfg.AllocatePort()
6559   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6560   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6561                           logical_id=(vgname, names[0]))
6562   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6563                           logical_id=(vgname, names[1]))
6564   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6565                           logical_id=(primary, secondary, port,
6566                                       p_minor, s_minor,
6567                                       shared_secret),
6568                           children=[dev_data, dev_meta],
6569                           iv_name=iv_name)
6570   return drbd_dev
6571
6572
6573 def _GenerateDiskTemplate(lu, template_name,
6574                           instance_name, primary_node,
6575                           secondary_nodes, disk_info,
6576                           file_storage_dir, file_driver,
6577                           base_index, feedback_fn):
6578   """Generate the entire disk layout for a given template type.
6579
6580   """
6581   #TODO: compute space requirements
6582
6583   vgname = lu.cfg.GetVGName()
6584   disk_count = len(disk_info)
6585   disks = []
6586   if template_name == constants.DT_DISKLESS:
6587     pass
6588   elif template_name == constants.DT_PLAIN:
6589     if len(secondary_nodes) != 0:
6590       raise errors.ProgrammerError("Wrong template configuration")
6591
6592     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6593                                       for i in range(disk_count)])
6594     for idx, disk in enumerate(disk_info):
6595       disk_index = idx + base_index
6596       vg = disk.get("vg", vgname)
6597       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6598       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6599                               logical_id=(vg, names[idx]),
6600                               iv_name="disk/%d" % disk_index,
6601                               mode=disk["mode"])
6602       disks.append(disk_dev)
6603   elif template_name == constants.DT_DRBD8:
6604     if len(secondary_nodes) != 1:
6605       raise errors.ProgrammerError("Wrong template configuration")
6606     remote_node = secondary_nodes[0]
6607     minors = lu.cfg.AllocateDRBDMinor(
6608       [primary_node, remote_node] * len(disk_info), instance_name)
6609
6610     names = []
6611     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6612                                                for i in range(disk_count)]):
6613       names.append(lv_prefix + "_data")
6614       names.append(lv_prefix + "_meta")
6615     for idx, disk in enumerate(disk_info):
6616       disk_index = idx + base_index
6617       vg = disk.get("vg", vgname)
6618       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6619                                       disk["size"], vg, names[idx*2:idx*2+2],
6620                                       "disk/%d" % disk_index,
6621                                       minors[idx*2], minors[idx*2+1])
6622       disk_dev.mode = disk["mode"]
6623       disks.append(disk_dev)
6624   elif template_name == constants.DT_FILE:
6625     if len(secondary_nodes) != 0:
6626       raise errors.ProgrammerError("Wrong template configuration")
6627
6628     opcodes.RequireFileStorage()
6629
6630     for idx, disk in enumerate(disk_info):
6631       disk_index = idx + base_index
6632       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6633                               iv_name="disk/%d" % disk_index,
6634                               logical_id=(file_driver,
6635                                           "%s/disk%d" % (file_storage_dir,
6636                                                          disk_index)),
6637                               mode=disk["mode"])
6638       disks.append(disk_dev)
6639   elif template_name == constants.DT_SHARED_FILE:
6640     if len(secondary_nodes) != 0:
6641       raise errors.ProgrammerError("Wrong template configuration")
6642
6643     opcodes.RequireSharedFileStorage()
6644
6645     for idx, disk in enumerate(disk_info):
6646       disk_index = idx + base_index
6647       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6648                               iv_name="disk/%d" % disk_index,
6649                               logical_id=(file_driver,
6650                                           "%s/disk%d" % (file_storage_dir,
6651                                                          disk_index)),
6652                               mode=disk["mode"])
6653       disks.append(disk_dev)
6654   else:
6655     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6656   return disks
6657
6658
6659 def _GetInstanceInfoText(instance):
6660   """Compute that text that should be added to the disk's metadata.
6661
6662   """
6663   return "originstname+%s" % instance.name
6664
6665
6666 def _CalcEta(time_taken, written, total_size):
6667   """Calculates the ETA based on size written and total size.
6668
6669   @param time_taken: The time taken so far
6670   @param written: amount written so far
6671   @param total_size: The total size of data to be written
6672   @return: The remaining time in seconds
6673
6674   """
6675   avg_time = time_taken / float(written)
6676   return (total_size - written) * avg_time
6677
6678
6679 def _WipeDisks(lu, instance):
6680   """Wipes instance disks.
6681
6682   @type lu: L{LogicalUnit}
6683   @param lu: the logical unit on whose behalf we execute
6684   @type instance: L{objects.Instance}
6685   @param instance: the instance whose disks we should create
6686   @return: the success of the wipe
6687
6688   """
6689   node = instance.primary_node
6690   logging.info("Pause sync of instance %s disks", instance.name)
6691   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6692
6693   for idx, success in enumerate(result.payload):
6694     if not success:
6695       logging.warn("pause-sync of instance %s for disks %d failed",
6696                    instance.name, idx)
6697
6698   try:
6699     for idx, device in enumerate(instance.disks):
6700       lu.LogInfo("* Wiping disk %d", idx)
6701       logging.info("Wiping disk %d for instance %s", idx, instance.name)
6702
6703       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6704       # MAX_WIPE_CHUNK at max
6705       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6706                             constants.MIN_WIPE_CHUNK_PERCENT)
6707
6708       offset = 0
6709       size = device.size
6710       last_output = 0
6711       start_time = time.time()
6712
6713       while offset < size:
6714         wipe_size = min(wipe_chunk_size, size - offset)
6715         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6716         result.Raise("Could not wipe disk %d at offset %d for size %d" %
6717                      (idx, offset, wipe_size))
6718         now = time.time()
6719         offset += wipe_size
6720         if now - last_output >= 60:
6721           eta = _CalcEta(now - start_time, offset, size)
6722           lu.LogInfo(" - done: %.1f%% ETA: %s" %
6723                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
6724           last_output = now
6725   finally:
6726     logging.info("Resume sync of instance %s disks", instance.name)
6727
6728     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6729
6730     for idx, success in enumerate(result.payload):
6731       if not success:
6732         lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6733                       " look at the status and troubleshoot the issue.", idx)
6734         logging.warn("resume-sync of instance %s for disks %d failed",
6735                      instance.name, idx)
6736
6737
6738 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6739   """Create all disks for an instance.
6740
6741   This abstracts away some work from AddInstance.
6742
6743   @type lu: L{LogicalUnit}
6744   @param lu: the logical unit on whose behalf we execute
6745   @type instance: L{objects.Instance}
6746   @param instance: the instance whose disks we should create
6747   @type to_skip: list
6748   @param to_skip: list of indices to skip
6749   @type target_node: string
6750   @param target_node: if passed, overrides the target node for creation
6751   @rtype: boolean
6752   @return: the success of the creation
6753
6754   """
6755   info = _GetInstanceInfoText(instance)
6756   if target_node is None:
6757     pnode = instance.primary_node
6758     all_nodes = instance.all_nodes
6759   else:
6760     pnode = target_node
6761     all_nodes = [pnode]
6762
6763   if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
6764     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6765     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6766
6767     result.Raise("Failed to create directory '%s' on"
6768                  " node %s" % (file_storage_dir, pnode))
6769
6770   # Note: this needs to be kept in sync with adding of disks in
6771   # LUInstanceSetParams
6772   for idx, device in enumerate(instance.disks):
6773     if to_skip and idx in to_skip:
6774       continue
6775     logging.info("Creating volume %s for instance %s",
6776                  device.iv_name, instance.name)
6777     #HARDCODE
6778     for node in all_nodes:
6779       f_create = node == pnode
6780       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6781
6782
6783 def _RemoveDisks(lu, instance, target_node=None):
6784   """Remove all disks for an instance.
6785
6786   This abstracts away some work from `AddInstance()` and
6787   `RemoveInstance()`. Note that in case some of the devices couldn't
6788   be removed, the removal will continue with the other ones (compare
6789   with `_CreateDisks()`).
6790
6791   @type lu: L{LogicalUnit}
6792   @param lu: the logical unit on whose behalf we execute
6793   @type instance: L{objects.Instance}
6794   @param instance: the instance whose disks we should remove
6795   @type target_node: string
6796   @param target_node: used to override the node on which to remove the disks
6797   @rtype: boolean
6798   @return: the success of the removal
6799
6800   """
6801   logging.info("Removing block devices for instance %s", instance.name)
6802
6803   all_result = True
6804   for device in instance.disks:
6805     if target_node:
6806       edata = [(target_node, device)]
6807     else:
6808       edata = device.ComputeNodeTree(instance.primary_node)
6809     for node, disk in edata:
6810       lu.cfg.SetDiskID(disk, node)
6811       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6812       if msg:
6813         lu.LogWarning("Could not remove block device %s on node %s,"
6814                       " continuing anyway: %s", device.iv_name, node, msg)
6815         all_result = False
6816
6817   if instance.disk_template == constants.DT_FILE:
6818     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6819     if target_node:
6820       tgt = target_node
6821     else:
6822       tgt = instance.primary_node
6823     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6824     if result.fail_msg:
6825       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6826                     file_storage_dir, instance.primary_node, result.fail_msg)
6827       all_result = False
6828
6829   return all_result
6830
6831
6832 def _ComputeDiskSizePerVG(disk_template, disks):
6833   """Compute disk size requirements in the volume group
6834
6835   """
6836   def _compute(disks, payload):
6837     """Universal algorithm
6838
6839     """
6840     vgs = {}
6841     for disk in disks:
6842       vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6843
6844     return vgs
6845
6846   # Required free disk space as a function of disk and swap space
6847   req_size_dict = {
6848     constants.DT_DISKLESS: {},
6849     constants.DT_PLAIN: _compute(disks, 0),
6850     # 128 MB are added for drbd metadata for each disk
6851     constants.DT_DRBD8: _compute(disks, 128),
6852     constants.DT_FILE: {},
6853     constants.DT_SHARED_FILE: {},
6854   }
6855
6856   if disk_template not in req_size_dict:
6857     raise errors.ProgrammerError("Disk template '%s' size requirement"
6858                                  " is unknown" %  disk_template)
6859
6860   return req_size_dict[disk_template]
6861
6862
6863 def _ComputeDiskSize(disk_template, disks):
6864   """Compute disk size requirements in the volume group
6865
6866   """
6867   # Required free disk space as a function of disk and swap space
6868   req_size_dict = {
6869     constants.DT_DISKLESS: None,
6870     constants.DT_PLAIN: sum(d["size"] for d in disks),
6871     # 128 MB are added for drbd metadata for each disk
6872     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6873     constants.DT_FILE: None,
6874     constants.DT_SHARED_FILE: 0,
6875   }
6876
6877   if disk_template not in req_size_dict:
6878     raise errors.ProgrammerError("Disk template '%s' size requirement"
6879                                  " is unknown" %  disk_template)
6880
6881   return req_size_dict[disk_template]
6882
6883
6884 def _FilterVmNodes(lu, nodenames):
6885   """Filters out non-vm_capable nodes from a list.
6886
6887   @type lu: L{LogicalUnit}
6888   @param lu: the logical unit for which we check
6889   @type nodenames: list
6890   @param nodenames: the list of nodes on which we should check
6891   @rtype: list
6892   @return: the list of vm-capable nodes
6893
6894   """
6895   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
6896   return [name for name in nodenames if name not in vm_nodes]
6897
6898
6899 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6900   """Hypervisor parameter validation.
6901
6902   This function abstract the hypervisor parameter validation to be
6903   used in both instance create and instance modify.
6904
6905   @type lu: L{LogicalUnit}
6906   @param lu: the logical unit for which we check
6907   @type nodenames: list
6908   @param nodenames: the list of nodes on which we should check
6909   @type hvname: string
6910   @param hvname: the name of the hypervisor we should use
6911   @type hvparams: dict
6912   @param hvparams: the parameters which we need to check
6913   @raise errors.OpPrereqError: if the parameters are not valid
6914
6915   """
6916   nodenames = _FilterVmNodes(lu, nodenames)
6917   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6918                                                   hvname,
6919                                                   hvparams)
6920   for node in nodenames:
6921     info = hvinfo[node]
6922     if info.offline:
6923       continue
6924     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6925
6926
6927 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6928   """OS parameters validation.
6929
6930   @type lu: L{LogicalUnit}
6931   @param lu: the logical unit for which we check
6932   @type required: boolean
6933   @param required: whether the validation should fail if the OS is not
6934       found
6935   @type nodenames: list
6936   @param nodenames: the list of nodes on which we should check
6937   @type osname: string
6938   @param osname: the name of the hypervisor we should use
6939   @type osparams: dict
6940   @param osparams: the parameters which we need to check
6941   @raise errors.OpPrereqError: if the parameters are not valid
6942
6943   """
6944   nodenames = _FilterVmNodes(lu, nodenames)
6945   result = lu.rpc.call_os_validate(required, nodenames, osname,
6946                                    [constants.OS_VALIDATE_PARAMETERS],
6947                                    osparams)
6948   for node, nres in result.items():
6949     # we don't check for offline cases since this should be run only
6950     # against the master node and/or an instance's nodes
6951     nres.Raise("OS Parameters validation failed on node %s" % node)
6952     if not nres.payload:
6953       lu.LogInfo("OS %s not found on node %s, validation skipped",
6954                  osname, node)
6955
6956
6957 class LUInstanceCreate(LogicalUnit):
6958   """Create an instance.
6959
6960   """
6961   HPATH = "instance-add"
6962   HTYPE = constants.HTYPE_INSTANCE
6963   REQ_BGL = False
6964
6965   def CheckArguments(self):
6966     """Check arguments.
6967
6968     """
6969     # do not require name_check to ease forward/backward compatibility
6970     # for tools
6971     if self.op.no_install and self.op.start:
6972       self.LogInfo("No-installation mode selected, disabling startup")
6973       self.op.start = False
6974     # validate/normalize the instance name
6975     self.op.instance_name = \
6976       netutils.Hostname.GetNormalizedName(self.op.instance_name)
6977
6978     if self.op.ip_check and not self.op.name_check:
6979       # TODO: make the ip check more flexible and not depend on the name check
6980       raise errors.OpPrereqError("Cannot do ip check without a name check",
6981                                  errors.ECODE_INVAL)
6982
6983     # check nics' parameter names
6984     for nic in self.op.nics:
6985       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6986
6987     # check disks. parameter names and consistent adopt/no-adopt strategy
6988     has_adopt = has_no_adopt = False
6989     for disk in self.op.disks:
6990       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6991       if "adopt" in disk:
6992         has_adopt = True
6993       else:
6994         has_no_adopt = True
6995     if has_adopt and has_no_adopt:
6996       raise errors.OpPrereqError("Either all disks are adopted or none is",
6997                                  errors.ECODE_INVAL)
6998     if has_adopt:
6999       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7000         raise errors.OpPrereqError("Disk adoption is not supported for the"
7001                                    " '%s' disk template" %
7002                                    self.op.disk_template,
7003                                    errors.ECODE_INVAL)
7004       if self.op.iallocator is not None:
7005         raise errors.OpPrereqError("Disk adoption not allowed with an"
7006                                    " iallocator script", errors.ECODE_INVAL)
7007       if self.op.mode == constants.INSTANCE_IMPORT:
7008         raise errors.OpPrereqError("Disk adoption not allowed for"
7009                                    " instance import", errors.ECODE_INVAL)
7010
7011     self.adopt_disks = has_adopt
7012
7013     # instance name verification
7014     if self.op.name_check:
7015       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7016       self.op.instance_name = self.hostname1.name
7017       # used in CheckPrereq for ip ping check
7018       self.check_ip = self.hostname1.ip
7019     else:
7020       self.check_ip = None
7021
7022     # file storage checks
7023     if (self.op.file_driver and
7024         not self.op.file_driver in constants.FILE_DRIVER):
7025       raise errors.OpPrereqError("Invalid file driver name '%s'" %
7026                                  self.op.file_driver, errors.ECODE_INVAL)
7027
7028     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7029       raise errors.OpPrereqError("File storage directory path not absolute",
7030                                  errors.ECODE_INVAL)
7031
7032     ### Node/iallocator related checks
7033     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7034
7035     if self.op.pnode is not None:
7036       if self.op.disk_template in constants.DTS_NET_MIRROR:
7037         if self.op.snode is None:
7038           raise errors.OpPrereqError("The networked disk templates need"
7039                                      " a mirror node", errors.ECODE_INVAL)
7040       elif self.op.snode:
7041         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7042                         " template")
7043         self.op.snode = None
7044
7045     self._cds = _GetClusterDomainSecret()
7046
7047     if self.op.mode == constants.INSTANCE_IMPORT:
7048       # On import force_variant must be True, because if we forced it at
7049       # initial install, our only chance when importing it back is that it
7050       # works again!
7051       self.op.force_variant = True
7052
7053       if self.op.no_install:
7054         self.LogInfo("No-installation mode has no effect during import")
7055
7056     elif self.op.mode == constants.INSTANCE_CREATE:
7057       if self.op.os_type is None:
7058         raise errors.OpPrereqError("No guest OS specified",
7059                                    errors.ECODE_INVAL)
7060       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7061         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7062                                    " installation" % self.op.os_type,
7063                                    errors.ECODE_STATE)
7064       if self.op.disk_template is None:
7065         raise errors.OpPrereqError("No disk template specified",
7066                                    errors.ECODE_INVAL)
7067
7068     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7069       # Check handshake to ensure both clusters have the same domain secret
7070       src_handshake = self.op.source_handshake
7071       if not src_handshake:
7072         raise errors.OpPrereqError("Missing source handshake",
7073                                    errors.ECODE_INVAL)
7074
7075       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7076                                                            src_handshake)
7077       if errmsg:
7078         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7079                                    errors.ECODE_INVAL)
7080
7081       # Load and check source CA
7082       self.source_x509_ca_pem = self.op.source_x509_ca
7083       if not self.source_x509_ca_pem:
7084         raise errors.OpPrereqError("Missing source X509 CA",
7085                                    errors.ECODE_INVAL)
7086
7087       try:
7088         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7089                                                     self._cds)
7090       except OpenSSL.crypto.Error, err:
7091         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7092                                    (err, ), errors.ECODE_INVAL)
7093
7094       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7095       if errcode is not None:
7096         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7097                                    errors.ECODE_INVAL)
7098
7099       self.source_x509_ca = cert
7100
7101       src_instance_name = self.op.source_instance_name
7102       if not src_instance_name:
7103         raise errors.OpPrereqError("Missing source instance name",
7104                                    errors.ECODE_INVAL)
7105
7106       self.source_instance_name = \
7107           netutils.GetHostname(name=src_instance_name).name
7108
7109     else:
7110       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7111                                  self.op.mode, errors.ECODE_INVAL)
7112
7113   def ExpandNames(self):
7114     """ExpandNames for CreateInstance.
7115
7116     Figure out the right locks for instance creation.
7117
7118     """
7119     self.needed_locks = {}
7120
7121     instance_name = self.op.instance_name
7122     # this is just a preventive check, but someone might still add this
7123     # instance in the meantime, and creation will fail at lock-add time
7124     if instance_name in self.cfg.GetInstanceList():
7125       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7126                                  instance_name, errors.ECODE_EXISTS)
7127
7128     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7129
7130     if self.op.iallocator:
7131       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7132     else:
7133       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7134       nodelist = [self.op.pnode]
7135       if self.op.snode is not None:
7136         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7137         nodelist.append(self.op.snode)
7138       self.needed_locks[locking.LEVEL_NODE] = nodelist
7139
7140     # in case of import lock the source node too
7141     if self.op.mode == constants.INSTANCE_IMPORT:
7142       src_node = self.op.src_node
7143       src_path = self.op.src_path
7144
7145       if src_path is None:
7146         self.op.src_path = src_path = self.op.instance_name
7147
7148       if src_node is None:
7149         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7150         self.op.src_node = None
7151         if os.path.isabs(src_path):
7152           raise errors.OpPrereqError("Importing an instance from an absolute"
7153                                      " path requires a source node option.",
7154                                      errors.ECODE_INVAL)
7155       else:
7156         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7157         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7158           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7159         if not os.path.isabs(src_path):
7160           self.op.src_path = src_path = \
7161             utils.PathJoin(constants.EXPORT_DIR, src_path)
7162
7163   def _RunAllocator(self):
7164     """Run the allocator based on input opcode.
7165
7166     """
7167     nics = [n.ToDict() for n in self.nics]
7168     ial = IAllocator(self.cfg, self.rpc,
7169                      mode=constants.IALLOCATOR_MODE_ALLOC,
7170                      name=self.op.instance_name,
7171                      disk_template=self.op.disk_template,
7172                      tags=[],
7173                      os=self.op.os_type,
7174                      vcpus=self.be_full[constants.BE_VCPUS],
7175                      mem_size=self.be_full[constants.BE_MEMORY],
7176                      disks=self.disks,
7177                      nics=nics,
7178                      hypervisor=self.op.hypervisor,
7179                      )
7180
7181     ial.Run(self.op.iallocator)
7182
7183     if not ial.success:
7184       raise errors.OpPrereqError("Can't compute nodes using"
7185                                  " iallocator '%s': %s" %
7186                                  (self.op.iallocator, ial.info),
7187                                  errors.ECODE_NORES)
7188     if len(ial.result) != ial.required_nodes:
7189       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7190                                  " of nodes (%s), required %s" %
7191                                  (self.op.iallocator, len(ial.result),
7192                                   ial.required_nodes), errors.ECODE_FAULT)
7193     self.op.pnode = ial.result[0]
7194     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7195                  self.op.instance_name, self.op.iallocator,
7196                  utils.CommaJoin(ial.result))
7197     if ial.required_nodes == 2:
7198       self.op.snode = ial.result[1]
7199
7200   def BuildHooksEnv(self):
7201     """Build hooks env.
7202
7203     This runs on master, primary and secondary nodes of the instance.
7204
7205     """
7206     env = {
7207       "ADD_MODE": self.op.mode,
7208       }
7209     if self.op.mode == constants.INSTANCE_IMPORT:
7210       env["SRC_NODE"] = self.op.src_node
7211       env["SRC_PATH"] = self.op.src_path
7212       env["SRC_IMAGES"] = self.src_images
7213
7214     env.update(_BuildInstanceHookEnv(
7215       name=self.op.instance_name,
7216       primary_node=self.op.pnode,
7217       secondary_nodes=self.secondaries,
7218       status=self.op.start,
7219       os_type=self.op.os_type,
7220       memory=self.be_full[constants.BE_MEMORY],
7221       vcpus=self.be_full[constants.BE_VCPUS],
7222       nics=_NICListToTuple(self, self.nics),
7223       disk_template=self.op.disk_template,
7224       disks=[(d["size"], d["mode"]) for d in self.disks],
7225       bep=self.be_full,
7226       hvp=self.hv_full,
7227       hypervisor_name=self.op.hypervisor,
7228     ))
7229
7230     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7231           self.secondaries)
7232     return env, nl, nl
7233
7234   def _ReadExportInfo(self):
7235     """Reads the export information from disk.
7236
7237     It will override the opcode source node and path with the actual
7238     information, if these two were not specified before.
7239
7240     @return: the export information
7241
7242     """
7243     assert self.op.mode == constants.INSTANCE_IMPORT
7244
7245     src_node = self.op.src_node
7246     src_path = self.op.src_path
7247
7248     if src_node is None:
7249       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7250       exp_list = self.rpc.call_export_list(locked_nodes)
7251       found = False
7252       for node in exp_list:
7253         if exp_list[node].fail_msg:
7254           continue
7255         if src_path in exp_list[node].payload:
7256           found = True
7257           self.op.src_node = src_node = node
7258           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7259                                                        src_path)
7260           break
7261       if not found:
7262         raise errors.OpPrereqError("No export found for relative path %s" %
7263                                     src_path, errors.ECODE_INVAL)
7264
7265     _CheckNodeOnline(self, src_node)
7266     result = self.rpc.call_export_info(src_node, src_path)
7267     result.Raise("No export or invalid export found in dir %s" % src_path)
7268
7269     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7270     if not export_info.has_section(constants.INISECT_EXP):
7271       raise errors.ProgrammerError("Corrupted export config",
7272                                    errors.ECODE_ENVIRON)
7273
7274     ei_version = export_info.get(constants.INISECT_EXP, "version")
7275     if (int(ei_version) != constants.EXPORT_VERSION):
7276       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7277                                  (ei_version, constants.EXPORT_VERSION),
7278                                  errors.ECODE_ENVIRON)
7279     return export_info
7280
7281   def _ReadExportParams(self, einfo):
7282     """Use export parameters as defaults.
7283
7284     In case the opcode doesn't specify (as in override) some instance
7285     parameters, then try to use them from the export information, if
7286     that declares them.
7287
7288     """
7289     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7290
7291     if self.op.disk_template is None:
7292       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7293         self.op.disk_template = einfo.get(constants.INISECT_INS,
7294                                           "disk_template")
7295       else:
7296         raise errors.OpPrereqError("No disk template specified and the export"
7297                                    " is missing the disk_template information",
7298                                    errors.ECODE_INVAL)
7299
7300     if not self.op.disks:
7301       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7302         disks = []
7303         # TODO: import the disk iv_name too
7304         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7305           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7306           disks.append({"size": disk_sz})
7307         self.op.disks = disks
7308       else:
7309         raise errors.OpPrereqError("No disk info specified and the export"
7310                                    " is missing the disk information",
7311                                    errors.ECODE_INVAL)
7312
7313     if (not self.op.nics and
7314         einfo.has_option(constants.INISECT_INS, "nic_count")):
7315       nics = []
7316       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7317         ndict = {}
7318         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7319           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7320           ndict[name] = v
7321         nics.append(ndict)
7322       self.op.nics = nics
7323
7324     if (self.op.hypervisor is None and
7325         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7326       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7327     if einfo.has_section(constants.INISECT_HYP):
7328       # use the export parameters but do not override the ones
7329       # specified by the user
7330       for name, value in einfo.items(constants.INISECT_HYP):
7331         if name not in self.op.hvparams:
7332           self.op.hvparams[name] = value
7333
7334     if einfo.has_section(constants.INISECT_BEP):
7335       # use the parameters, without overriding
7336       for name, value in einfo.items(constants.INISECT_BEP):
7337         if name not in self.op.beparams:
7338           self.op.beparams[name] = value
7339     else:
7340       # try to read the parameters old style, from the main section
7341       for name in constants.BES_PARAMETERS:
7342         if (name not in self.op.beparams and
7343             einfo.has_option(constants.INISECT_INS, name)):
7344           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7345
7346     if einfo.has_section(constants.INISECT_OSP):
7347       # use the parameters, without overriding
7348       for name, value in einfo.items(constants.INISECT_OSP):
7349         if name not in self.op.osparams:
7350           self.op.osparams[name] = value
7351
7352   def _RevertToDefaults(self, cluster):
7353     """Revert the instance parameters to the default values.
7354
7355     """
7356     # hvparams
7357     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7358     for name in self.op.hvparams.keys():
7359       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7360         del self.op.hvparams[name]
7361     # beparams
7362     be_defs = cluster.SimpleFillBE({})
7363     for name in self.op.beparams.keys():
7364       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7365         del self.op.beparams[name]
7366     # nic params
7367     nic_defs = cluster.SimpleFillNIC({})
7368     for nic in self.op.nics:
7369       for name in constants.NICS_PARAMETERS:
7370         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7371           del nic[name]
7372     # osparams
7373     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7374     for name in self.op.osparams.keys():
7375       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7376         del self.op.osparams[name]
7377
7378   def CheckPrereq(self):
7379     """Check prerequisites.
7380
7381     """
7382     if self.op.mode == constants.INSTANCE_IMPORT:
7383       export_info = self._ReadExportInfo()
7384       self._ReadExportParams(export_info)
7385
7386     if (not self.cfg.GetVGName() and
7387         self.op.disk_template not in constants.DTS_NOT_LVM):
7388       raise errors.OpPrereqError("Cluster does not support lvm-based"
7389                                  " instances", errors.ECODE_STATE)
7390
7391     if self.op.hypervisor is None:
7392       self.op.hypervisor = self.cfg.GetHypervisorType()
7393
7394     cluster = self.cfg.GetClusterInfo()
7395     enabled_hvs = cluster.enabled_hypervisors
7396     if self.op.hypervisor not in enabled_hvs:
7397       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7398                                  " cluster (%s)" % (self.op.hypervisor,
7399                                   ",".join(enabled_hvs)),
7400                                  errors.ECODE_STATE)
7401
7402     # check hypervisor parameter syntax (locally)
7403     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7404     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7405                                       self.op.hvparams)
7406     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7407     hv_type.CheckParameterSyntax(filled_hvp)
7408     self.hv_full = filled_hvp
7409     # check that we don't specify global parameters on an instance
7410     _CheckGlobalHvParams(self.op.hvparams)
7411
7412     # fill and remember the beparams dict
7413     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7414     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7415
7416     # build os parameters
7417     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7418
7419     # now that hvp/bep are in final format, let's reset to defaults,
7420     # if told to do so
7421     if self.op.identify_defaults:
7422       self._RevertToDefaults(cluster)
7423
7424     # NIC buildup
7425     self.nics = []
7426     for idx, nic in enumerate(self.op.nics):
7427       nic_mode_req = nic.get("mode", None)
7428       nic_mode = nic_mode_req
7429       if nic_mode is None:
7430         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7431
7432       # in routed mode, for the first nic, the default ip is 'auto'
7433       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7434         default_ip_mode = constants.VALUE_AUTO
7435       else:
7436         default_ip_mode = constants.VALUE_NONE
7437
7438       # ip validity checks
7439       ip = nic.get("ip", default_ip_mode)
7440       if ip is None or ip.lower() == constants.VALUE_NONE:
7441         nic_ip = None
7442       elif ip.lower() == constants.VALUE_AUTO:
7443         if not self.op.name_check:
7444           raise errors.OpPrereqError("IP address set to auto but name checks"
7445                                      " have been skipped",
7446                                      errors.ECODE_INVAL)
7447         nic_ip = self.hostname1.ip
7448       else:
7449         if not netutils.IPAddress.IsValid(ip):
7450           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7451                                      errors.ECODE_INVAL)
7452         nic_ip = ip
7453
7454       # TODO: check the ip address for uniqueness
7455       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7456         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7457                                    errors.ECODE_INVAL)
7458
7459       # MAC address verification
7460       mac = nic.get("mac", constants.VALUE_AUTO)
7461       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7462         mac = utils.NormalizeAndValidateMac(mac)
7463
7464         try:
7465           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7466         except errors.ReservationError:
7467           raise errors.OpPrereqError("MAC address %s already in use"
7468                                      " in cluster" % mac,
7469                                      errors.ECODE_NOTUNIQUE)
7470
7471       # bridge verification
7472       bridge = nic.get("bridge", None)
7473       link = nic.get("link", None)
7474       if bridge and link:
7475         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7476                                    " at the same time", errors.ECODE_INVAL)
7477       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7478         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7479                                    errors.ECODE_INVAL)
7480       elif bridge:
7481         link = bridge
7482
7483       nicparams = {}
7484       if nic_mode_req:
7485         nicparams[constants.NIC_MODE] = nic_mode_req
7486       if link:
7487         nicparams[constants.NIC_LINK] = link
7488
7489       check_params = cluster.SimpleFillNIC(nicparams)
7490       objects.NIC.CheckParameterSyntax(check_params)
7491       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7492
7493     # disk checks/pre-build
7494     self.disks = []
7495     for disk in self.op.disks:
7496       mode = disk.get("mode", constants.DISK_RDWR)
7497       if mode not in constants.DISK_ACCESS_SET:
7498         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7499                                    mode, errors.ECODE_INVAL)
7500       size = disk.get("size", None)
7501       if size is None:
7502         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7503       try:
7504         size = int(size)
7505       except (TypeError, ValueError):
7506         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7507                                    errors.ECODE_INVAL)
7508       vg = disk.get("vg", self.cfg.GetVGName())
7509       new_disk = {"size": size, "mode": mode, "vg": vg}
7510       if "adopt" in disk:
7511         new_disk["adopt"] = disk["adopt"]
7512       self.disks.append(new_disk)
7513
7514     if self.op.mode == constants.INSTANCE_IMPORT:
7515
7516       # Check that the new instance doesn't have less disks than the export
7517       instance_disks = len(self.disks)
7518       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7519       if instance_disks < export_disks:
7520         raise errors.OpPrereqError("Not enough disks to import."
7521                                    " (instance: %d, export: %d)" %
7522                                    (instance_disks, export_disks),
7523                                    errors.ECODE_INVAL)
7524
7525       disk_images = []
7526       for idx in range(export_disks):
7527         option = 'disk%d_dump' % idx
7528         if export_info.has_option(constants.INISECT_INS, option):
7529           # FIXME: are the old os-es, disk sizes, etc. useful?
7530           export_name = export_info.get(constants.INISECT_INS, option)
7531           image = utils.PathJoin(self.op.src_path, export_name)
7532           disk_images.append(image)
7533         else:
7534           disk_images.append(False)
7535
7536       self.src_images = disk_images
7537
7538       old_name = export_info.get(constants.INISECT_INS, 'name')
7539       try:
7540         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7541       except (TypeError, ValueError), err:
7542         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7543                                    " an integer: %s" % str(err),
7544                                    errors.ECODE_STATE)
7545       if self.op.instance_name == old_name:
7546         for idx, nic in enumerate(self.nics):
7547           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7548             nic_mac_ini = 'nic%d_mac' % idx
7549             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7550
7551     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7552
7553     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7554     if self.op.ip_check:
7555       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7556         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7557                                    (self.check_ip, self.op.instance_name),
7558                                    errors.ECODE_NOTUNIQUE)
7559
7560     #### mac address generation
7561     # By generating here the mac address both the allocator and the hooks get
7562     # the real final mac address rather than the 'auto' or 'generate' value.
7563     # There is a race condition between the generation and the instance object
7564     # creation, which means that we know the mac is valid now, but we're not
7565     # sure it will be when we actually add the instance. If things go bad
7566     # adding the instance will abort because of a duplicate mac, and the
7567     # creation job will fail.
7568     for nic in self.nics:
7569       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7570         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7571
7572     #### allocator run
7573
7574     if self.op.iallocator is not None:
7575       self._RunAllocator()
7576
7577     #### node related checks
7578
7579     # check primary node
7580     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7581     assert self.pnode is not None, \
7582       "Cannot retrieve locked node %s" % self.op.pnode
7583     if pnode.offline:
7584       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7585                                  pnode.name, errors.ECODE_STATE)
7586     if pnode.drained:
7587       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7588                                  pnode.name, errors.ECODE_STATE)
7589     if not pnode.vm_capable:
7590       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7591                                  " '%s'" % pnode.name, errors.ECODE_STATE)
7592
7593     self.secondaries = []
7594
7595     # mirror node verification
7596     if self.op.disk_template in constants.DTS_NET_MIRROR:
7597       if self.op.snode == pnode.name:
7598         raise errors.OpPrereqError("The secondary node cannot be the"
7599                                    " primary node.", errors.ECODE_INVAL)
7600       _CheckNodeOnline(self, self.op.snode)
7601       _CheckNodeNotDrained(self, self.op.snode)
7602       _CheckNodeVmCapable(self, self.op.snode)
7603       self.secondaries.append(self.op.snode)
7604
7605     nodenames = [pnode.name] + self.secondaries
7606
7607     if not self.adopt_disks:
7608       # Check lv size requirements, if not adopting
7609       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7610       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7611
7612     else: # instead, we must check the adoption data
7613       all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7614       if len(all_lvs) != len(self.disks):
7615         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7616                                    errors.ECODE_INVAL)
7617       for lv_name in all_lvs:
7618         try:
7619           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7620           # to ReserveLV uses the same syntax
7621           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7622         except errors.ReservationError:
7623           raise errors.OpPrereqError("LV named %s used by another instance" %
7624                                      lv_name, errors.ECODE_NOTUNIQUE)
7625
7626       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7627       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7628
7629       node_lvs = self.rpc.call_lv_list([pnode.name],
7630                                        vg_names.payload.keys())[pnode.name]
7631       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7632       node_lvs = node_lvs.payload
7633
7634       delta = all_lvs.difference(node_lvs.keys())
7635       if delta:
7636         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7637                                    utils.CommaJoin(delta),
7638                                    errors.ECODE_INVAL)
7639       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7640       if online_lvs:
7641         raise errors.OpPrereqError("Online logical volumes found, cannot"
7642                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7643                                    errors.ECODE_STATE)
7644       # update the size of disk based on what is found
7645       for dsk in self.disks:
7646         dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7647
7648     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7649
7650     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7651     # check OS parameters (remotely)
7652     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7653
7654     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7655
7656     # memory check on primary node
7657     if self.op.start:
7658       _CheckNodeFreeMemory(self, self.pnode.name,
7659                            "creating instance %s" % self.op.instance_name,
7660                            self.be_full[constants.BE_MEMORY],
7661                            self.op.hypervisor)
7662
7663     self.dry_run_result = list(nodenames)
7664
7665   def Exec(self, feedback_fn):
7666     """Create and add the instance to the cluster.
7667
7668     """
7669     instance = self.op.instance_name
7670     pnode_name = self.pnode.name
7671
7672     ht_kind = self.op.hypervisor
7673     if ht_kind in constants.HTS_REQ_PORT:
7674       network_port = self.cfg.AllocatePort()
7675     else:
7676       network_port = None
7677
7678     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
7679       # this is needed because os.path.join does not accept None arguments
7680       if self.op.file_storage_dir is None:
7681         string_file_storage_dir = ""
7682       else:
7683         string_file_storage_dir = self.op.file_storage_dir
7684
7685       # build the full file storage dir path
7686       if self.op.disk_template == constants.DT_SHARED_FILE:
7687         get_fsd_fn = self.cfg.GetSharedFileStorageDir
7688       else:
7689         get_fsd_fn = self.cfg.GetFileStorageDir
7690
7691       file_storage_dir = utils.PathJoin(get_fsd_fn(),
7692                                         string_file_storage_dir, instance)
7693     else:
7694       file_storage_dir = ""
7695
7696     disks = _GenerateDiskTemplate(self,
7697                                   self.op.disk_template,
7698                                   instance, pnode_name,
7699                                   self.secondaries,
7700                                   self.disks,
7701                                   file_storage_dir,
7702                                   self.op.file_driver,
7703                                   0,
7704                                   feedback_fn)
7705
7706     iobj = objects.Instance(name=instance, os=self.op.os_type,
7707                             primary_node=pnode_name,
7708                             nics=self.nics, disks=disks,
7709                             disk_template=self.op.disk_template,
7710                             admin_up=False,
7711                             network_port=network_port,
7712                             beparams=self.op.beparams,
7713                             hvparams=self.op.hvparams,
7714                             hypervisor=self.op.hypervisor,
7715                             osparams=self.op.osparams,
7716                             )
7717
7718     if self.adopt_disks:
7719       # rename LVs to the newly-generated names; we need to construct
7720       # 'fake' LV disks with the old data, plus the new unique_id
7721       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7722       rename_to = []
7723       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7724         rename_to.append(t_dsk.logical_id)
7725         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7726         self.cfg.SetDiskID(t_dsk, pnode_name)
7727       result = self.rpc.call_blockdev_rename(pnode_name,
7728                                              zip(tmp_disks, rename_to))
7729       result.Raise("Failed to rename adoped LVs")
7730     else:
7731       feedback_fn("* creating instance disks...")
7732       try:
7733         _CreateDisks(self, iobj)
7734       except errors.OpExecError:
7735         self.LogWarning("Device creation failed, reverting...")
7736         try:
7737           _RemoveDisks(self, iobj)
7738         finally:
7739           self.cfg.ReleaseDRBDMinors(instance)
7740           raise
7741
7742       if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7743         feedback_fn("* wiping instance disks...")
7744         try:
7745           _WipeDisks(self, iobj)
7746         except errors.OpExecError:
7747           self.LogWarning("Device wiping failed, reverting...")
7748           try:
7749             _RemoveDisks(self, iobj)
7750           finally:
7751             self.cfg.ReleaseDRBDMinors(instance)
7752             raise
7753
7754     feedback_fn("adding instance %s to cluster config" % instance)
7755
7756     self.cfg.AddInstance(iobj, self.proc.GetECId())
7757
7758     # Declare that we don't want to remove the instance lock anymore, as we've
7759     # added the instance to the config
7760     del self.remove_locks[locking.LEVEL_INSTANCE]
7761     # Unlock all the nodes
7762     if self.op.mode == constants.INSTANCE_IMPORT:
7763       nodes_keep = [self.op.src_node]
7764       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7765                        if node != self.op.src_node]
7766       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7767       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7768     else:
7769       self.context.glm.release(locking.LEVEL_NODE)
7770       del self.acquired_locks[locking.LEVEL_NODE]
7771
7772     if self.op.wait_for_sync:
7773       disk_abort = not _WaitForSync(self, iobj)
7774     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7775       # make sure the disks are not degraded (still sync-ing is ok)
7776       time.sleep(15)
7777       feedback_fn("* checking mirrors status")
7778       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7779     else:
7780       disk_abort = False
7781
7782     if disk_abort:
7783       _RemoveDisks(self, iobj)
7784       self.cfg.RemoveInstance(iobj.name)
7785       # Make sure the instance lock gets removed
7786       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7787       raise errors.OpExecError("There are some degraded disks for"
7788                                " this instance")
7789
7790     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7791       if self.op.mode == constants.INSTANCE_CREATE:
7792         if not self.op.no_install:
7793           feedback_fn("* running the instance OS create scripts...")
7794           # FIXME: pass debug option from opcode to backend
7795           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7796                                                  self.op.debug_level)
7797           result.Raise("Could not add os for instance %s"
7798                        " on node %s" % (instance, pnode_name))
7799
7800       elif self.op.mode == constants.INSTANCE_IMPORT:
7801         feedback_fn("* running the instance OS import scripts...")
7802
7803         transfers = []
7804
7805         for idx, image in enumerate(self.src_images):
7806           if not image:
7807             continue
7808
7809           # FIXME: pass debug option from opcode to backend
7810           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7811                                              constants.IEIO_FILE, (image, ),
7812                                              constants.IEIO_SCRIPT,
7813                                              (iobj.disks[idx], idx),
7814                                              None)
7815           transfers.append(dt)
7816
7817         import_result = \
7818           masterd.instance.TransferInstanceData(self, feedback_fn,
7819                                                 self.op.src_node, pnode_name,
7820                                                 self.pnode.secondary_ip,
7821                                                 iobj, transfers)
7822         if not compat.all(import_result):
7823           self.LogWarning("Some disks for instance %s on node %s were not"
7824                           " imported successfully" % (instance, pnode_name))
7825
7826       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7827         feedback_fn("* preparing remote import...")
7828         # The source cluster will stop the instance before attempting to make a
7829         # connection. In some cases stopping an instance can take a long time,
7830         # hence the shutdown timeout is added to the connection timeout.
7831         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7832                            self.op.source_shutdown_timeout)
7833         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7834
7835         assert iobj.primary_node == self.pnode.name
7836         disk_results = \
7837           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7838                                         self.source_x509_ca,
7839                                         self._cds, timeouts)
7840         if not compat.all(disk_results):
7841           # TODO: Should the instance still be started, even if some disks
7842           # failed to import (valid for local imports, too)?
7843           self.LogWarning("Some disks for instance %s on node %s were not"
7844                           " imported successfully" % (instance, pnode_name))
7845
7846         # Run rename script on newly imported instance
7847         assert iobj.name == instance
7848         feedback_fn("Running rename script for %s" % instance)
7849         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7850                                                    self.source_instance_name,
7851                                                    self.op.debug_level)
7852         if result.fail_msg:
7853           self.LogWarning("Failed to run rename script for %s on node"
7854                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7855
7856       else:
7857         # also checked in the prereq part
7858         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7859                                      % self.op.mode)
7860
7861     if self.op.start:
7862       iobj.admin_up = True
7863       self.cfg.Update(iobj, feedback_fn)
7864       logging.info("Starting instance %s on node %s", instance, pnode_name)
7865       feedback_fn("* starting instance...")
7866       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7867       result.Raise("Could not start instance")
7868
7869     return list(iobj.all_nodes)
7870
7871
7872 class LUInstanceConsole(NoHooksLU):
7873   """Connect to an instance's console.
7874
7875   This is somewhat special in that it returns the command line that
7876   you need to run on the master node in order to connect to the
7877   console.
7878
7879   """
7880   REQ_BGL = False
7881
7882   def ExpandNames(self):
7883     self._ExpandAndLockInstance()
7884
7885   def CheckPrereq(self):
7886     """Check prerequisites.
7887
7888     This checks that the instance is in the cluster.
7889
7890     """
7891     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7892     assert self.instance is not None, \
7893       "Cannot retrieve locked instance %s" % self.op.instance_name
7894     _CheckNodeOnline(self, self.instance.primary_node)
7895
7896   def Exec(self, feedback_fn):
7897     """Connect to the console of an instance
7898
7899     """
7900     instance = self.instance
7901     node = instance.primary_node
7902
7903     node_insts = self.rpc.call_instance_list([node],
7904                                              [instance.hypervisor])[node]
7905     node_insts.Raise("Can't get node information from %s" % node)
7906
7907     if instance.name not in node_insts.payload:
7908       if instance.admin_up:
7909         state = constants.INSTST_ERRORDOWN
7910       else:
7911         state = constants.INSTST_ADMINDOWN
7912       raise errors.OpExecError("Instance %s is not running (state %s)" %
7913                                (instance.name, state))
7914
7915     logging.debug("Connecting to console of %s on %s", instance.name, node)
7916
7917     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
7918
7919
7920 def _GetInstanceConsole(cluster, instance):
7921   """Returns console information for an instance.
7922
7923   @type cluster: L{objects.Cluster}
7924   @type instance: L{objects.Instance}
7925   @rtype: dict
7926
7927   """
7928   hyper = hypervisor.GetHypervisor(instance.hypervisor)
7929   # beparams and hvparams are passed separately, to avoid editing the
7930   # instance and then saving the defaults in the instance itself.
7931   hvparams = cluster.FillHV(instance)
7932   beparams = cluster.FillBE(instance)
7933   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7934
7935   assert console.instance == instance.name
7936   assert console.Validate()
7937
7938   return console.ToDict()
7939
7940
7941 class LUInstanceReplaceDisks(LogicalUnit):
7942   """Replace the disks of an instance.
7943
7944   """
7945   HPATH = "mirrors-replace"
7946   HTYPE = constants.HTYPE_INSTANCE
7947   REQ_BGL = False
7948
7949   def CheckArguments(self):
7950     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7951                                   self.op.iallocator)
7952
7953   def ExpandNames(self):
7954     self._ExpandAndLockInstance()
7955
7956     if self.op.iallocator is not None:
7957       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7958
7959     elif self.op.remote_node is not None:
7960       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7961       self.op.remote_node = remote_node
7962
7963       # Warning: do not remove the locking of the new secondary here
7964       # unless DRBD8.AddChildren is changed to work in parallel;
7965       # currently it doesn't since parallel invocations of
7966       # FindUnusedMinor will conflict
7967       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7968       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7969
7970     else:
7971       self.needed_locks[locking.LEVEL_NODE] = []
7972       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7973
7974     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7975                                    self.op.iallocator, self.op.remote_node,
7976                                    self.op.disks, False, self.op.early_release)
7977
7978     self.tasklets = [self.replacer]
7979
7980   def DeclareLocks(self, level):
7981     # If we're not already locking all nodes in the set we have to declare the
7982     # instance's primary/secondary nodes.
7983     if (level == locking.LEVEL_NODE and
7984         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7985       self._LockInstancesNodes()
7986
7987   def BuildHooksEnv(self):
7988     """Build hooks env.
7989
7990     This runs on the master, the primary and all the secondaries.
7991
7992     """
7993     instance = self.replacer.instance
7994     env = {
7995       "MODE": self.op.mode,
7996       "NEW_SECONDARY": self.op.remote_node,
7997       "OLD_SECONDARY": instance.secondary_nodes[0],
7998       }
7999     env.update(_BuildInstanceHookEnvByObject(self, instance))
8000     nl = [
8001       self.cfg.GetMasterNode(),
8002       instance.primary_node,
8003       ]
8004     if self.op.remote_node is not None:
8005       nl.append(self.op.remote_node)
8006     return env, nl, nl
8007
8008
8009 class TLReplaceDisks(Tasklet):
8010   """Replaces disks for an instance.
8011
8012   Note: Locking is not within the scope of this class.
8013
8014   """
8015   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8016                disks, delay_iallocator, early_release):
8017     """Initializes this class.
8018
8019     """
8020     Tasklet.__init__(self, lu)
8021
8022     # Parameters
8023     self.instance_name = instance_name
8024     self.mode = mode
8025     self.iallocator_name = iallocator_name
8026     self.remote_node = remote_node
8027     self.disks = disks
8028     self.delay_iallocator = delay_iallocator
8029     self.early_release = early_release
8030
8031     # Runtime data
8032     self.instance = None
8033     self.new_node = None
8034     self.target_node = None
8035     self.other_node = None
8036     self.remote_node_info = None
8037     self.node_secondary_ip = None
8038
8039   @staticmethod
8040   def CheckArguments(mode, remote_node, iallocator):
8041     """Helper function for users of this class.
8042
8043     """
8044     # check for valid parameter combination
8045     if mode == constants.REPLACE_DISK_CHG:
8046       if remote_node is None and iallocator is None:
8047         raise errors.OpPrereqError("When changing the secondary either an"
8048                                    " iallocator script must be used or the"
8049                                    " new node given", errors.ECODE_INVAL)
8050
8051       if remote_node is not None and iallocator is not None:
8052         raise errors.OpPrereqError("Give either the iallocator or the new"
8053                                    " secondary, not both", errors.ECODE_INVAL)
8054
8055     elif remote_node is not None or iallocator is not None:
8056       # Not replacing the secondary
8057       raise errors.OpPrereqError("The iallocator and new node options can"
8058                                  " only be used when changing the"
8059                                  " secondary node", errors.ECODE_INVAL)
8060
8061   @staticmethod
8062   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8063     """Compute a new secondary node using an IAllocator.
8064
8065     """
8066     ial = IAllocator(lu.cfg, lu.rpc,
8067                      mode=constants.IALLOCATOR_MODE_RELOC,
8068                      name=instance_name,
8069                      relocate_from=relocate_from)
8070
8071     ial.Run(iallocator_name)
8072
8073     if not ial.success:
8074       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8075                                  " %s" % (iallocator_name, ial.info),
8076                                  errors.ECODE_NORES)
8077
8078     if len(ial.result) != ial.required_nodes:
8079       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8080                                  " of nodes (%s), required %s" %
8081                                  (iallocator_name,
8082                                   len(ial.result), ial.required_nodes),
8083                                  errors.ECODE_FAULT)
8084
8085     remote_node_name = ial.result[0]
8086
8087     lu.LogInfo("Selected new secondary for instance '%s': %s",
8088                instance_name, remote_node_name)
8089
8090     return remote_node_name
8091
8092   def _FindFaultyDisks(self, node_name):
8093     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8094                                     node_name, True)
8095
8096   def CheckPrereq(self):
8097     """Check prerequisites.
8098
8099     This checks that the instance is in the cluster.
8100
8101     """
8102     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8103     assert instance is not None, \
8104       "Cannot retrieve locked instance %s" % self.instance_name
8105
8106     if instance.disk_template != constants.DT_DRBD8:
8107       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8108                                  " instances", errors.ECODE_INVAL)
8109
8110     if len(instance.secondary_nodes) != 1:
8111       raise errors.OpPrereqError("The instance has a strange layout,"
8112                                  " expected one secondary but found %d" %
8113                                  len(instance.secondary_nodes),
8114                                  errors.ECODE_FAULT)
8115
8116     if not self.delay_iallocator:
8117       self._CheckPrereq2()
8118
8119   def _CheckPrereq2(self):
8120     """Check prerequisites, second part.
8121
8122     This function should always be part of CheckPrereq. It was separated and is
8123     now called from Exec because during node evacuation iallocator was only
8124     called with an unmodified cluster model, not taking planned changes into
8125     account.
8126
8127     """
8128     instance = self.instance
8129     secondary_node = instance.secondary_nodes[0]
8130
8131     if self.iallocator_name is None:
8132       remote_node = self.remote_node
8133     else:
8134       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8135                                        instance.name, instance.secondary_nodes)
8136
8137     if remote_node is not None:
8138       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8139       assert self.remote_node_info is not None, \
8140         "Cannot retrieve locked node %s" % remote_node
8141     else:
8142       self.remote_node_info = None
8143
8144     if remote_node == self.instance.primary_node:
8145       raise errors.OpPrereqError("The specified node is the primary node of"
8146                                  " the instance.", errors.ECODE_INVAL)
8147
8148     if remote_node == secondary_node:
8149       raise errors.OpPrereqError("The specified node is already the"
8150                                  " secondary node of the instance.",
8151                                  errors.ECODE_INVAL)
8152
8153     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8154                                     constants.REPLACE_DISK_CHG):
8155       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8156                                  errors.ECODE_INVAL)
8157
8158     if self.mode == constants.REPLACE_DISK_AUTO:
8159       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8160       faulty_secondary = self._FindFaultyDisks(secondary_node)
8161
8162       if faulty_primary and faulty_secondary:
8163         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8164                                    " one node and can not be repaired"
8165                                    " automatically" % self.instance_name,
8166                                    errors.ECODE_STATE)
8167
8168       if faulty_primary:
8169         self.disks = faulty_primary
8170         self.target_node = instance.primary_node
8171         self.other_node = secondary_node
8172         check_nodes = [self.target_node, self.other_node]
8173       elif faulty_secondary:
8174         self.disks = faulty_secondary
8175         self.target_node = secondary_node
8176         self.other_node = instance.primary_node
8177         check_nodes = [self.target_node, self.other_node]
8178       else:
8179         self.disks = []
8180         check_nodes = []
8181
8182     else:
8183       # Non-automatic modes
8184       if self.mode == constants.REPLACE_DISK_PRI:
8185         self.target_node = instance.primary_node
8186         self.other_node = secondary_node
8187         check_nodes = [self.target_node, self.other_node]
8188
8189       elif self.mode == constants.REPLACE_DISK_SEC:
8190         self.target_node = secondary_node
8191         self.other_node = instance.primary_node
8192         check_nodes = [self.target_node, self.other_node]
8193
8194       elif self.mode == constants.REPLACE_DISK_CHG:
8195         self.new_node = remote_node
8196         self.other_node = instance.primary_node
8197         self.target_node = secondary_node
8198         check_nodes = [self.new_node, self.other_node]
8199
8200         _CheckNodeNotDrained(self.lu, remote_node)
8201         _CheckNodeVmCapable(self.lu, remote_node)
8202
8203         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8204         assert old_node_info is not None
8205         if old_node_info.offline and not self.early_release:
8206           # doesn't make sense to delay the release
8207           self.early_release = True
8208           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8209                           " early-release mode", secondary_node)
8210
8211       else:
8212         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8213                                      self.mode)
8214
8215       # If not specified all disks should be replaced
8216       if not self.disks:
8217         self.disks = range(len(self.instance.disks))
8218
8219     for node in check_nodes:
8220       _CheckNodeOnline(self.lu, node)
8221
8222     # Check whether disks are valid
8223     for disk_idx in self.disks:
8224       instance.FindDisk(disk_idx)
8225
8226     # Get secondary node IP addresses
8227     node_2nd_ip = {}
8228
8229     for node_name in [self.target_node, self.other_node, self.new_node]:
8230       if node_name is not None:
8231         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8232
8233     self.node_secondary_ip = node_2nd_ip
8234
8235   def Exec(self, feedback_fn):
8236     """Execute disk replacement.
8237
8238     This dispatches the disk replacement to the appropriate handler.
8239
8240     """
8241     if self.delay_iallocator:
8242       self._CheckPrereq2()
8243
8244     if not self.disks:
8245       feedback_fn("No disks need replacement")
8246       return
8247
8248     feedback_fn("Replacing disk(s) %s for %s" %
8249                 (utils.CommaJoin(self.disks), self.instance.name))
8250
8251     activate_disks = (not self.instance.admin_up)
8252
8253     # Activate the instance disks if we're replacing them on a down instance
8254     if activate_disks:
8255       _StartInstanceDisks(self.lu, self.instance, True)
8256
8257     try:
8258       # Should we replace the secondary node?
8259       if self.new_node is not None:
8260         fn = self._ExecDrbd8Secondary
8261       else:
8262         fn = self._ExecDrbd8DiskOnly
8263
8264       return fn(feedback_fn)
8265
8266     finally:
8267       # Deactivate the instance disks if we're replacing them on a
8268       # down instance
8269       if activate_disks:
8270         _SafeShutdownInstanceDisks(self.lu, self.instance)
8271
8272   def _CheckVolumeGroup(self, nodes):
8273     self.lu.LogInfo("Checking volume groups")
8274
8275     vgname = self.cfg.GetVGName()
8276
8277     # Make sure volume group exists on all involved nodes
8278     results = self.rpc.call_vg_list(nodes)
8279     if not results:
8280       raise errors.OpExecError("Can't list volume groups on the nodes")
8281
8282     for node in nodes:
8283       res = results[node]
8284       res.Raise("Error checking node %s" % node)
8285       if vgname not in res.payload:
8286         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8287                                  (vgname, node))
8288
8289   def _CheckDisksExistence(self, nodes):
8290     # Check disk existence
8291     for idx, dev in enumerate(self.instance.disks):
8292       if idx not in self.disks:
8293         continue
8294
8295       for node in nodes:
8296         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8297         self.cfg.SetDiskID(dev, node)
8298
8299         result = self.rpc.call_blockdev_find(node, dev)
8300
8301         msg = result.fail_msg
8302         if msg or not result.payload:
8303           if not msg:
8304             msg = "disk not found"
8305           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8306                                    (idx, node, msg))
8307
8308   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8309     for idx, dev in enumerate(self.instance.disks):
8310       if idx not in self.disks:
8311         continue
8312
8313       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8314                       (idx, node_name))
8315
8316       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8317                                    ldisk=ldisk):
8318         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8319                                  " replace disks for instance %s" %
8320                                  (node_name, self.instance.name))
8321
8322   def _CreateNewStorage(self, node_name):
8323     vgname = self.cfg.GetVGName()
8324     iv_names = {}
8325
8326     for idx, dev in enumerate(self.instance.disks):
8327       if idx not in self.disks:
8328         continue
8329
8330       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8331
8332       self.cfg.SetDiskID(dev, node_name)
8333
8334       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8335       names = _GenerateUniqueNames(self.lu, lv_names)
8336
8337       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8338                              logical_id=(vgname, names[0]))
8339       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8340                              logical_id=(vgname, names[1]))
8341
8342       new_lvs = [lv_data, lv_meta]
8343       old_lvs = dev.children
8344       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8345
8346       # we pass force_create=True to force the LVM creation
8347       for new_lv in new_lvs:
8348         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8349                         _GetInstanceInfoText(self.instance), False)
8350
8351     return iv_names
8352
8353   def _CheckDevices(self, node_name, iv_names):
8354     for name, (dev, _, _) in iv_names.iteritems():
8355       self.cfg.SetDiskID(dev, node_name)
8356
8357       result = self.rpc.call_blockdev_find(node_name, dev)
8358
8359       msg = result.fail_msg
8360       if msg or not result.payload:
8361         if not msg:
8362           msg = "disk not found"
8363         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8364                                  (name, msg))
8365
8366       if result.payload.is_degraded:
8367         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8368
8369   def _RemoveOldStorage(self, node_name, iv_names):
8370     for name, (_, old_lvs, _) in iv_names.iteritems():
8371       self.lu.LogInfo("Remove logical volumes for %s" % name)
8372
8373       for lv in old_lvs:
8374         self.cfg.SetDiskID(lv, node_name)
8375
8376         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8377         if msg:
8378           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8379                              hint="remove unused LVs manually")
8380
8381   def _ReleaseNodeLock(self, node_name):
8382     """Releases the lock for a given node."""
8383     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8384
8385   def _ExecDrbd8DiskOnly(self, feedback_fn):
8386     """Replace a disk on the primary or secondary for DRBD 8.
8387
8388     The algorithm for replace is quite complicated:
8389
8390       1. for each disk to be replaced:
8391
8392         1. create new LVs on the target node with unique names
8393         1. detach old LVs from the drbd device
8394         1. rename old LVs to name_replaced.<time_t>
8395         1. rename new LVs to old LVs
8396         1. attach the new LVs (with the old names now) to the drbd device
8397
8398       1. wait for sync across all devices
8399
8400       1. for each modified disk:
8401
8402         1. remove old LVs (which have the name name_replaces.<time_t>)
8403
8404     Failures are not very well handled.
8405
8406     """
8407     steps_total = 6
8408
8409     # Step: check device activation
8410     self.lu.LogStep(1, steps_total, "Check device existence")
8411     self._CheckDisksExistence([self.other_node, self.target_node])
8412     self._CheckVolumeGroup([self.target_node, self.other_node])
8413
8414     # Step: check other node consistency
8415     self.lu.LogStep(2, steps_total, "Check peer consistency")
8416     self._CheckDisksConsistency(self.other_node,
8417                                 self.other_node == self.instance.primary_node,
8418                                 False)
8419
8420     # Step: create new storage
8421     self.lu.LogStep(3, steps_total, "Allocate new storage")
8422     iv_names = self._CreateNewStorage(self.target_node)
8423
8424     # Step: for each lv, detach+rename*2+attach
8425     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8426     for dev, old_lvs, new_lvs in iv_names.itervalues():
8427       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8428
8429       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8430                                                      old_lvs)
8431       result.Raise("Can't detach drbd from local storage on node"
8432                    " %s for device %s" % (self.target_node, dev.iv_name))
8433       #dev.children = []
8434       #cfg.Update(instance)
8435
8436       # ok, we created the new LVs, so now we know we have the needed
8437       # storage; as such, we proceed on the target node to rename
8438       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8439       # using the assumption that logical_id == physical_id (which in
8440       # turn is the unique_id on that node)
8441
8442       # FIXME(iustin): use a better name for the replaced LVs
8443       temp_suffix = int(time.time())
8444       ren_fn = lambda d, suff: (d.physical_id[0],
8445                                 d.physical_id[1] + "_replaced-%s" % suff)
8446
8447       # Build the rename list based on what LVs exist on the node
8448       rename_old_to_new = []
8449       for to_ren in old_lvs:
8450         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8451         if not result.fail_msg and result.payload:
8452           # device exists
8453           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8454
8455       self.lu.LogInfo("Renaming the old LVs on the target node")
8456       result = self.rpc.call_blockdev_rename(self.target_node,
8457                                              rename_old_to_new)
8458       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8459
8460       # Now we rename the new LVs to the old LVs
8461       self.lu.LogInfo("Renaming the new LVs on the target node")
8462       rename_new_to_old = [(new, old.physical_id)
8463                            for old, new in zip(old_lvs, new_lvs)]
8464       result = self.rpc.call_blockdev_rename(self.target_node,
8465                                              rename_new_to_old)
8466       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8467
8468       for old, new in zip(old_lvs, new_lvs):
8469         new.logical_id = old.logical_id
8470         self.cfg.SetDiskID(new, self.target_node)
8471
8472       for disk in old_lvs:
8473         disk.logical_id = ren_fn(disk, temp_suffix)
8474         self.cfg.SetDiskID(disk, self.target_node)
8475
8476       # Now that the new lvs have the old name, we can add them to the device
8477       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8478       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8479                                                   new_lvs)
8480       msg = result.fail_msg
8481       if msg:
8482         for new_lv in new_lvs:
8483           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8484                                                new_lv).fail_msg
8485           if msg2:
8486             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8487                                hint=("cleanup manually the unused logical"
8488                                      "volumes"))
8489         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8490
8491       dev.children = new_lvs
8492
8493       self.cfg.Update(self.instance, feedback_fn)
8494
8495     cstep = 5
8496     if self.early_release:
8497       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8498       cstep += 1
8499       self._RemoveOldStorage(self.target_node, iv_names)
8500       # WARNING: we release both node locks here, do not do other RPCs
8501       # than WaitForSync to the primary node
8502       self._ReleaseNodeLock([self.target_node, self.other_node])
8503
8504     # Wait for sync
8505     # This can fail as the old devices are degraded and _WaitForSync
8506     # does a combined result over all disks, so we don't check its return value
8507     self.lu.LogStep(cstep, steps_total, "Sync devices")
8508     cstep += 1
8509     _WaitForSync(self.lu, self.instance)
8510
8511     # Check all devices manually
8512     self._CheckDevices(self.instance.primary_node, iv_names)
8513
8514     # Step: remove old storage
8515     if not self.early_release:
8516       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8517       cstep += 1
8518       self._RemoveOldStorage(self.target_node, iv_names)
8519
8520   def _ExecDrbd8Secondary(self, feedback_fn):
8521     """Replace the secondary node for DRBD 8.
8522
8523     The algorithm for replace is quite complicated:
8524       - for all disks of the instance:
8525         - create new LVs on the new node with same names
8526         - shutdown the drbd device on the old secondary
8527         - disconnect the drbd network on the primary
8528         - create the drbd device on the new secondary
8529         - network attach the drbd on the primary, using an artifice:
8530           the drbd code for Attach() will connect to the network if it
8531           finds a device which is connected to the good local disks but
8532           not network enabled
8533       - wait for sync across all devices
8534       - remove all disks from the old secondary
8535
8536     Failures are not very well handled.
8537
8538     """
8539     steps_total = 6
8540
8541     # Step: check device activation
8542     self.lu.LogStep(1, steps_total, "Check device existence")
8543     self._CheckDisksExistence([self.instance.primary_node])
8544     self._CheckVolumeGroup([self.instance.primary_node])
8545
8546     # Step: check other node consistency
8547     self.lu.LogStep(2, steps_total, "Check peer consistency")
8548     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8549
8550     # Step: create new storage
8551     self.lu.LogStep(3, steps_total, "Allocate new storage")
8552     for idx, dev in enumerate(self.instance.disks):
8553       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8554                       (self.new_node, idx))
8555       # we pass force_create=True to force LVM creation
8556       for new_lv in dev.children:
8557         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8558                         _GetInstanceInfoText(self.instance), False)
8559
8560     # Step 4: dbrd minors and drbd setups changes
8561     # after this, we must manually remove the drbd minors on both the
8562     # error and the success paths
8563     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8564     minors = self.cfg.AllocateDRBDMinor([self.new_node
8565                                          for dev in self.instance.disks],
8566                                         self.instance.name)
8567     logging.debug("Allocated minors %r", minors)
8568
8569     iv_names = {}
8570     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8571       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8572                       (self.new_node, idx))
8573       # create new devices on new_node; note that we create two IDs:
8574       # one without port, so the drbd will be activated without
8575       # networking information on the new node at this stage, and one
8576       # with network, for the latter activation in step 4
8577       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8578       if self.instance.primary_node == o_node1:
8579         p_minor = o_minor1
8580       else:
8581         assert self.instance.primary_node == o_node2, "Three-node instance?"
8582         p_minor = o_minor2
8583
8584       new_alone_id = (self.instance.primary_node, self.new_node, None,
8585                       p_minor, new_minor, o_secret)
8586       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8587                     p_minor, new_minor, o_secret)
8588
8589       iv_names[idx] = (dev, dev.children, new_net_id)
8590       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8591                     new_net_id)
8592       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8593                               logical_id=new_alone_id,
8594                               children=dev.children,
8595                               size=dev.size)
8596       try:
8597         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8598                               _GetInstanceInfoText(self.instance), False)
8599       except errors.GenericError:
8600         self.cfg.ReleaseDRBDMinors(self.instance.name)
8601         raise
8602
8603     # We have new devices, shutdown the drbd on the old secondary
8604     for idx, dev in enumerate(self.instance.disks):
8605       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8606       self.cfg.SetDiskID(dev, self.target_node)
8607       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8608       if msg:
8609         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8610                            "node: %s" % (idx, msg),
8611                            hint=("Please cleanup this device manually as"
8612                                  " soon as possible"))
8613
8614     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8615     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8616                                                self.node_secondary_ip,
8617                                                self.instance.disks)\
8618                                               [self.instance.primary_node]
8619
8620     msg = result.fail_msg
8621     if msg:
8622       # detaches didn't succeed (unlikely)
8623       self.cfg.ReleaseDRBDMinors(self.instance.name)
8624       raise errors.OpExecError("Can't detach the disks from the network on"
8625                                " old node: %s" % (msg,))
8626
8627     # if we managed to detach at least one, we update all the disks of
8628     # the instance to point to the new secondary
8629     self.lu.LogInfo("Updating instance configuration")
8630     for dev, _, new_logical_id in iv_names.itervalues():
8631       dev.logical_id = new_logical_id
8632       self.cfg.SetDiskID(dev, self.instance.primary_node)
8633
8634     self.cfg.Update(self.instance, feedback_fn)
8635
8636     # and now perform the drbd attach
8637     self.lu.LogInfo("Attaching primary drbds to new secondary"
8638                     " (standalone => connected)")
8639     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8640                                             self.new_node],
8641                                            self.node_secondary_ip,
8642                                            self.instance.disks,
8643                                            self.instance.name,
8644                                            False)
8645     for to_node, to_result in result.items():
8646       msg = to_result.fail_msg
8647       if msg:
8648         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8649                            to_node, msg,
8650                            hint=("please do a gnt-instance info to see the"
8651                                  " status of disks"))
8652     cstep = 5
8653     if self.early_release:
8654       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8655       cstep += 1
8656       self._RemoveOldStorage(self.target_node, iv_names)
8657       # WARNING: we release all node locks here, do not do other RPCs
8658       # than WaitForSync to the primary node
8659       self._ReleaseNodeLock([self.instance.primary_node,
8660                              self.target_node,
8661                              self.new_node])
8662
8663     # Wait for sync
8664     # This can fail as the old devices are degraded and _WaitForSync
8665     # does a combined result over all disks, so we don't check its return value
8666     self.lu.LogStep(cstep, steps_total, "Sync devices")
8667     cstep += 1
8668     _WaitForSync(self.lu, self.instance)
8669
8670     # Check all devices manually
8671     self._CheckDevices(self.instance.primary_node, iv_names)
8672
8673     # Step: remove old storage
8674     if not self.early_release:
8675       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8676       self._RemoveOldStorage(self.target_node, iv_names)
8677
8678
8679 class LURepairNodeStorage(NoHooksLU):
8680   """Repairs the volume group on a node.
8681
8682   """
8683   REQ_BGL = False
8684
8685   def CheckArguments(self):
8686     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8687
8688     storage_type = self.op.storage_type
8689
8690     if (constants.SO_FIX_CONSISTENCY not in
8691         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8692       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8693                                  " repaired" % storage_type,
8694                                  errors.ECODE_INVAL)
8695
8696   def ExpandNames(self):
8697     self.needed_locks = {
8698       locking.LEVEL_NODE: [self.op.node_name],
8699       }
8700
8701   def _CheckFaultyDisks(self, instance, node_name):
8702     """Ensure faulty disks abort the opcode or at least warn."""
8703     try:
8704       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8705                                   node_name, True):
8706         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8707                                    " node '%s'" % (instance.name, node_name),
8708                                    errors.ECODE_STATE)
8709     except errors.OpPrereqError, err:
8710       if self.op.ignore_consistency:
8711         self.proc.LogWarning(str(err.args[0]))
8712       else:
8713         raise
8714
8715   def CheckPrereq(self):
8716     """Check prerequisites.
8717
8718     """
8719     # Check whether any instance on this node has faulty disks
8720     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8721       if not inst.admin_up:
8722         continue
8723       check_nodes = set(inst.all_nodes)
8724       check_nodes.discard(self.op.node_name)
8725       for inst_node_name in check_nodes:
8726         self._CheckFaultyDisks(inst, inst_node_name)
8727
8728   def Exec(self, feedback_fn):
8729     feedback_fn("Repairing storage unit '%s' on %s ..." %
8730                 (self.op.name, self.op.node_name))
8731
8732     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8733     result = self.rpc.call_storage_execute(self.op.node_name,
8734                                            self.op.storage_type, st_args,
8735                                            self.op.name,
8736                                            constants.SO_FIX_CONSISTENCY)
8737     result.Raise("Failed to repair storage unit '%s' on %s" %
8738                  (self.op.name, self.op.node_name))
8739
8740
8741 class LUNodeEvacStrategy(NoHooksLU):
8742   """Computes the node evacuation strategy.
8743
8744   """
8745   REQ_BGL = False
8746
8747   def CheckArguments(self):
8748     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8749
8750   def ExpandNames(self):
8751     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8752     self.needed_locks = locks = {}
8753     if self.op.remote_node is None:
8754       locks[locking.LEVEL_NODE] = locking.ALL_SET
8755     else:
8756       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8757       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8758
8759   def Exec(self, feedback_fn):
8760     if self.op.remote_node is not None:
8761       instances = []
8762       for node in self.op.nodes:
8763         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8764       result = []
8765       for i in instances:
8766         if i.primary_node == self.op.remote_node:
8767           raise errors.OpPrereqError("Node %s is the primary node of"
8768                                      " instance %s, cannot use it as"
8769                                      " secondary" %
8770                                      (self.op.remote_node, i.name),
8771                                      errors.ECODE_INVAL)
8772         result.append([i.name, self.op.remote_node])
8773     else:
8774       ial = IAllocator(self.cfg, self.rpc,
8775                        mode=constants.IALLOCATOR_MODE_MEVAC,
8776                        evac_nodes=self.op.nodes)
8777       ial.Run(self.op.iallocator, validate=True)
8778       if not ial.success:
8779         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8780                                  errors.ECODE_NORES)
8781       result = ial.result
8782     return result
8783
8784
8785 class LUInstanceGrowDisk(LogicalUnit):
8786   """Grow a disk of an instance.
8787
8788   """
8789   HPATH = "disk-grow"
8790   HTYPE = constants.HTYPE_INSTANCE
8791   REQ_BGL = False
8792
8793   def ExpandNames(self):
8794     self._ExpandAndLockInstance()
8795     self.needed_locks[locking.LEVEL_NODE] = []
8796     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8797
8798   def DeclareLocks(self, level):
8799     if level == locking.LEVEL_NODE:
8800       self._LockInstancesNodes()
8801
8802   def BuildHooksEnv(self):
8803     """Build hooks env.
8804
8805     This runs on the master, the primary and all the secondaries.
8806
8807     """
8808     env = {
8809       "DISK": self.op.disk,
8810       "AMOUNT": self.op.amount,
8811       }
8812     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8813     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8814     return env, nl, nl
8815
8816   def CheckPrereq(self):
8817     """Check prerequisites.
8818
8819     This checks that the instance is in the cluster.
8820
8821     """
8822     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8823     assert instance is not None, \
8824       "Cannot retrieve locked instance %s" % self.op.instance_name
8825     nodenames = list(instance.all_nodes)
8826     for node in nodenames:
8827       _CheckNodeOnline(self, node)
8828
8829     self.instance = instance
8830
8831     if instance.disk_template not in constants.DTS_GROWABLE:
8832       raise errors.OpPrereqError("Instance's disk layout does not support"
8833                                  " growing.", errors.ECODE_INVAL)
8834
8835     self.disk = instance.FindDisk(self.op.disk)
8836
8837     if instance.disk_template not in (constants.DT_FILE,
8838                                       constants.DT_SHARED_FILE):
8839       # TODO: check the free disk space for file, when that feature will be
8840       # supported
8841       _CheckNodesFreeDiskPerVG(self, nodenames,
8842                                self.disk.ComputeGrowth(self.op.amount))
8843
8844   def Exec(self, feedback_fn):
8845     """Execute disk grow.
8846
8847     """
8848     instance = self.instance
8849     disk = self.disk
8850
8851     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8852     if not disks_ok:
8853       raise errors.OpExecError("Cannot activate block device to grow")
8854
8855     for node in instance.all_nodes:
8856       self.cfg.SetDiskID(disk, node)
8857       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8858       result.Raise("Grow request failed to node %s" % node)
8859
8860       # TODO: Rewrite code to work properly
8861       # DRBD goes into sync mode for a short amount of time after executing the
8862       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8863       # calling "resize" in sync mode fails. Sleeping for a short amount of
8864       # time is a work-around.
8865       time.sleep(5)
8866
8867     disk.RecordGrow(self.op.amount)
8868     self.cfg.Update(instance, feedback_fn)
8869     if self.op.wait_for_sync:
8870       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8871       if disk_abort:
8872         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8873                              " status.\nPlease check the instance.")
8874       if not instance.admin_up:
8875         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8876     elif not instance.admin_up:
8877       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8878                            " not supposed to be running because no wait for"
8879                            " sync mode was requested.")
8880
8881
8882 class LUInstanceQueryData(NoHooksLU):
8883   """Query runtime instance data.
8884
8885   """
8886   REQ_BGL = False
8887
8888   def ExpandNames(self):
8889     self.needed_locks = {}
8890     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8891
8892     if self.op.instances:
8893       self.wanted_names = []
8894       for name in self.op.instances:
8895         full_name = _ExpandInstanceName(self.cfg, name)
8896         self.wanted_names.append(full_name)
8897       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8898     else:
8899       self.wanted_names = None
8900       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8901
8902     self.needed_locks[locking.LEVEL_NODE] = []
8903     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8904
8905   def DeclareLocks(self, level):
8906     if level == locking.LEVEL_NODE:
8907       self._LockInstancesNodes()
8908
8909   def CheckPrereq(self):
8910     """Check prerequisites.
8911
8912     This only checks the optional instance list against the existing names.
8913
8914     """
8915     if self.wanted_names is None:
8916       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8917
8918     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8919                              in self.wanted_names]
8920
8921   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8922     """Returns the status of a block device
8923
8924     """
8925     if self.op.static or not node:
8926       return None
8927
8928     self.cfg.SetDiskID(dev, node)
8929
8930     result = self.rpc.call_blockdev_find(node, dev)
8931     if result.offline:
8932       return None
8933
8934     result.Raise("Can't compute disk status for %s" % instance_name)
8935
8936     status = result.payload
8937     if status is None:
8938       return None
8939
8940     return (status.dev_path, status.major, status.minor,
8941             status.sync_percent, status.estimated_time,
8942             status.is_degraded, status.ldisk_status)
8943
8944   def _ComputeDiskStatus(self, instance, snode, dev):
8945     """Compute block device status.
8946
8947     """
8948     if dev.dev_type in constants.LDS_DRBD:
8949       # we change the snode then (otherwise we use the one passed in)
8950       if dev.logical_id[0] == instance.primary_node:
8951         snode = dev.logical_id[1]
8952       else:
8953         snode = dev.logical_id[0]
8954
8955     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8956                                               instance.name, dev)
8957     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8958
8959     if dev.children:
8960       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8961                       for child in dev.children]
8962     else:
8963       dev_children = []
8964
8965     data = {
8966       "iv_name": dev.iv_name,
8967       "dev_type": dev.dev_type,
8968       "logical_id": dev.logical_id,
8969       "physical_id": dev.physical_id,
8970       "pstatus": dev_pstatus,
8971       "sstatus": dev_sstatus,
8972       "children": dev_children,
8973       "mode": dev.mode,
8974       "size": dev.size,
8975       }
8976
8977     return data
8978
8979   def Exec(self, feedback_fn):
8980     """Gather and return data"""
8981     result = {}
8982
8983     cluster = self.cfg.GetClusterInfo()
8984
8985     for instance in self.wanted_instances:
8986       if not self.op.static:
8987         remote_info = self.rpc.call_instance_info(instance.primary_node,
8988                                                   instance.name,
8989                                                   instance.hypervisor)
8990         remote_info.Raise("Error checking node %s" % instance.primary_node)
8991         remote_info = remote_info.payload
8992         if remote_info and "state" in remote_info:
8993           remote_state = "up"
8994         else:
8995           remote_state = "down"
8996       else:
8997         remote_state = None
8998       if instance.admin_up:
8999         config_state = "up"
9000       else:
9001         config_state = "down"
9002
9003       disks = [self._ComputeDiskStatus(instance, None, device)
9004                for device in instance.disks]
9005
9006       idict = {
9007         "name": instance.name,
9008         "config_state": config_state,
9009         "run_state": remote_state,
9010         "pnode": instance.primary_node,
9011         "snodes": instance.secondary_nodes,
9012         "os": instance.os,
9013         # this happens to be the same format used for hooks
9014         "nics": _NICListToTuple(self, instance.nics),
9015         "disk_template": instance.disk_template,
9016         "disks": disks,
9017         "hypervisor": instance.hypervisor,
9018         "network_port": instance.network_port,
9019         "hv_instance": instance.hvparams,
9020         "hv_actual": cluster.FillHV(instance, skip_globals=True),
9021         "be_instance": instance.beparams,
9022         "be_actual": cluster.FillBE(instance),
9023         "os_instance": instance.osparams,
9024         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9025         "serial_no": instance.serial_no,
9026         "mtime": instance.mtime,
9027         "ctime": instance.ctime,
9028         "uuid": instance.uuid,
9029         }
9030
9031       result[instance.name] = idict
9032
9033     return result
9034
9035
9036 class LUInstanceSetParams(LogicalUnit):
9037   """Modifies an instances's parameters.
9038
9039   """
9040   HPATH = "instance-modify"
9041   HTYPE = constants.HTYPE_INSTANCE
9042   REQ_BGL = False
9043
9044   def CheckArguments(self):
9045     if not (self.op.nics or self.op.disks or self.op.disk_template or
9046             self.op.hvparams or self.op.beparams or self.op.os_name):
9047       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9048
9049     if self.op.hvparams:
9050       _CheckGlobalHvParams(self.op.hvparams)
9051
9052     # Disk validation
9053     disk_addremove = 0
9054     for disk_op, disk_dict in self.op.disks:
9055       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9056       if disk_op == constants.DDM_REMOVE:
9057         disk_addremove += 1
9058         continue
9059       elif disk_op == constants.DDM_ADD:
9060         disk_addremove += 1
9061       else:
9062         if not isinstance(disk_op, int):
9063           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9064         if not isinstance(disk_dict, dict):
9065           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9066           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9067
9068       if disk_op == constants.DDM_ADD:
9069         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9070         if mode not in constants.DISK_ACCESS_SET:
9071           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9072                                      errors.ECODE_INVAL)
9073         size = disk_dict.get('size', None)
9074         if size is None:
9075           raise errors.OpPrereqError("Required disk parameter size missing",
9076                                      errors.ECODE_INVAL)
9077         try:
9078           size = int(size)
9079         except (TypeError, ValueError), err:
9080           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9081                                      str(err), errors.ECODE_INVAL)
9082         disk_dict['size'] = size
9083       else:
9084         # modification of disk
9085         if 'size' in disk_dict:
9086           raise errors.OpPrereqError("Disk size change not possible, use"
9087                                      " grow-disk", errors.ECODE_INVAL)
9088
9089     if disk_addremove > 1:
9090       raise errors.OpPrereqError("Only one disk add or remove operation"
9091                                  " supported at a time", errors.ECODE_INVAL)
9092
9093     if self.op.disks and self.op.disk_template is not None:
9094       raise errors.OpPrereqError("Disk template conversion and other disk"
9095                                  " changes not supported at the same time",
9096                                  errors.ECODE_INVAL)
9097
9098     if (self.op.disk_template and
9099         self.op.disk_template in constants.DTS_NET_MIRROR and
9100         self.op.remote_node is None):
9101       raise errors.OpPrereqError("Changing the disk template to a mirrored"
9102                                  " one requires specifying a secondary node",
9103                                  errors.ECODE_INVAL)
9104
9105     # NIC validation
9106     nic_addremove = 0
9107     for nic_op, nic_dict in self.op.nics:
9108       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9109       if nic_op == constants.DDM_REMOVE:
9110         nic_addremove += 1
9111         continue
9112       elif nic_op == constants.DDM_ADD:
9113         nic_addremove += 1
9114       else:
9115         if not isinstance(nic_op, int):
9116           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9117         if not isinstance(nic_dict, dict):
9118           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9119           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9120
9121       # nic_dict should be a dict
9122       nic_ip = nic_dict.get('ip', None)
9123       if nic_ip is not None:
9124         if nic_ip.lower() == constants.VALUE_NONE:
9125           nic_dict['ip'] = None
9126         else:
9127           if not netutils.IPAddress.IsValid(nic_ip):
9128             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9129                                        errors.ECODE_INVAL)
9130
9131       nic_bridge = nic_dict.get('bridge', None)
9132       nic_link = nic_dict.get('link', None)
9133       if nic_bridge and nic_link:
9134         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9135                                    " at the same time", errors.ECODE_INVAL)
9136       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9137         nic_dict['bridge'] = None
9138       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9139         nic_dict['link'] = None
9140
9141       if nic_op == constants.DDM_ADD:
9142         nic_mac = nic_dict.get('mac', None)
9143         if nic_mac is None:
9144           nic_dict['mac'] = constants.VALUE_AUTO
9145
9146       if 'mac' in nic_dict:
9147         nic_mac = nic_dict['mac']
9148         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9149           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9150
9151         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9152           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9153                                      " modifying an existing nic",
9154                                      errors.ECODE_INVAL)
9155
9156     if nic_addremove > 1:
9157       raise errors.OpPrereqError("Only one NIC add or remove operation"
9158                                  " supported at a time", errors.ECODE_INVAL)
9159
9160   def ExpandNames(self):
9161     self._ExpandAndLockInstance()
9162     self.needed_locks[locking.LEVEL_NODE] = []
9163     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9164
9165   def DeclareLocks(self, level):
9166     if level == locking.LEVEL_NODE:
9167       self._LockInstancesNodes()
9168       if self.op.disk_template and self.op.remote_node:
9169         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9170         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9171
9172   def BuildHooksEnv(self):
9173     """Build hooks env.
9174
9175     This runs on the master, primary and secondaries.
9176
9177     """
9178     args = dict()
9179     if constants.BE_MEMORY in self.be_new:
9180       args['memory'] = self.be_new[constants.BE_MEMORY]
9181     if constants.BE_VCPUS in self.be_new:
9182       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9183     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9184     # information at all.
9185     if self.op.nics:
9186       args['nics'] = []
9187       nic_override = dict(self.op.nics)
9188       for idx, nic in enumerate(self.instance.nics):
9189         if idx in nic_override:
9190           this_nic_override = nic_override[idx]
9191         else:
9192           this_nic_override = {}
9193         if 'ip' in this_nic_override:
9194           ip = this_nic_override['ip']
9195         else:
9196           ip = nic.ip
9197         if 'mac' in this_nic_override:
9198           mac = this_nic_override['mac']
9199         else:
9200           mac = nic.mac
9201         if idx in self.nic_pnew:
9202           nicparams = self.nic_pnew[idx]
9203         else:
9204           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9205         mode = nicparams[constants.NIC_MODE]
9206         link = nicparams[constants.NIC_LINK]
9207         args['nics'].append((ip, mac, mode, link))
9208       if constants.DDM_ADD in nic_override:
9209         ip = nic_override[constants.DDM_ADD].get('ip', None)
9210         mac = nic_override[constants.DDM_ADD]['mac']
9211         nicparams = self.nic_pnew[constants.DDM_ADD]
9212         mode = nicparams[constants.NIC_MODE]
9213         link = nicparams[constants.NIC_LINK]
9214         args['nics'].append((ip, mac, mode, link))
9215       elif constants.DDM_REMOVE in nic_override:
9216         del args['nics'][-1]
9217
9218     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9219     if self.op.disk_template:
9220       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9221     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9222     return env, nl, nl
9223
9224   def CheckPrereq(self):
9225     """Check prerequisites.
9226
9227     This only checks the instance list against the existing names.
9228
9229     """
9230     # checking the new params on the primary/secondary nodes
9231
9232     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9233     cluster = self.cluster = self.cfg.GetClusterInfo()
9234     assert self.instance is not None, \
9235       "Cannot retrieve locked instance %s" % self.op.instance_name
9236     pnode = instance.primary_node
9237     nodelist = list(instance.all_nodes)
9238
9239     # OS change
9240     if self.op.os_name and not self.op.force:
9241       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9242                       self.op.force_variant)
9243       instance_os = self.op.os_name
9244     else:
9245       instance_os = instance.os
9246
9247     if self.op.disk_template:
9248       if instance.disk_template == self.op.disk_template:
9249         raise errors.OpPrereqError("Instance already has disk template %s" %
9250                                    instance.disk_template, errors.ECODE_INVAL)
9251
9252       if (instance.disk_template,
9253           self.op.disk_template) not in self._DISK_CONVERSIONS:
9254         raise errors.OpPrereqError("Unsupported disk template conversion from"
9255                                    " %s to %s" % (instance.disk_template,
9256                                                   self.op.disk_template),
9257                                    errors.ECODE_INVAL)
9258       _CheckInstanceDown(self, instance, "cannot change disk template")
9259       if self.op.disk_template in constants.DTS_NET_MIRROR:
9260         if self.op.remote_node == pnode:
9261           raise errors.OpPrereqError("Given new secondary node %s is the same"
9262                                      " as the primary node of the instance" %
9263                                      self.op.remote_node, errors.ECODE_STATE)
9264         _CheckNodeOnline(self, self.op.remote_node)
9265         _CheckNodeNotDrained(self, self.op.remote_node)
9266         # FIXME: here we assume that the old instance type is DT_PLAIN
9267         assert instance.disk_template == constants.DT_PLAIN
9268         disks = [{"size": d.size, "vg": d.logical_id[0]}
9269                  for d in instance.disks]
9270         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9271         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9272
9273     # hvparams processing
9274     if self.op.hvparams:
9275       hv_type = instance.hypervisor
9276       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9277       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9278       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9279
9280       # local check
9281       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9282       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9283       self.hv_new = hv_new # the new actual values
9284       self.hv_inst = i_hvdict # the new dict (without defaults)
9285     else:
9286       self.hv_new = self.hv_inst = {}
9287
9288     # beparams processing
9289     if self.op.beparams:
9290       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9291                                    use_none=True)
9292       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9293       be_new = cluster.SimpleFillBE(i_bedict)
9294       self.be_new = be_new # the new actual values
9295       self.be_inst = i_bedict # the new dict (without defaults)
9296     else:
9297       self.be_new = self.be_inst = {}
9298
9299     # osparams processing
9300     if self.op.osparams:
9301       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9302       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9303       self.os_inst = i_osdict # the new dict (without defaults)
9304     else:
9305       self.os_inst = {}
9306
9307     self.warn = []
9308
9309     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9310       mem_check_list = [pnode]
9311       if be_new[constants.BE_AUTO_BALANCE]:
9312         # either we changed auto_balance to yes or it was from before
9313         mem_check_list.extend(instance.secondary_nodes)
9314       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9315                                                   instance.hypervisor)
9316       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9317                                          instance.hypervisor)
9318       pninfo = nodeinfo[pnode]
9319       msg = pninfo.fail_msg
9320       if msg:
9321         # Assume the primary node is unreachable and go ahead
9322         self.warn.append("Can't get info from primary node %s: %s" %
9323                          (pnode,  msg))
9324       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9325         self.warn.append("Node data from primary node %s doesn't contain"
9326                          " free memory information" % pnode)
9327       elif instance_info.fail_msg:
9328         self.warn.append("Can't get instance runtime information: %s" %
9329                         instance_info.fail_msg)
9330       else:
9331         if instance_info.payload:
9332           current_mem = int(instance_info.payload['memory'])
9333         else:
9334           # Assume instance not running
9335           # (there is a slight race condition here, but it's not very probable,
9336           # and we have no other way to check)
9337           current_mem = 0
9338         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9339                     pninfo.payload['memory_free'])
9340         if miss_mem > 0:
9341           raise errors.OpPrereqError("This change will prevent the instance"
9342                                      " from starting, due to %d MB of memory"
9343                                      " missing on its primary node" % miss_mem,
9344                                      errors.ECODE_NORES)
9345
9346       if be_new[constants.BE_AUTO_BALANCE]:
9347         for node, nres in nodeinfo.items():
9348           if node not in instance.secondary_nodes:
9349             continue
9350           msg = nres.fail_msg
9351           if msg:
9352             self.warn.append("Can't get info from secondary node %s: %s" %
9353                              (node, msg))
9354           elif not isinstance(nres.payload.get('memory_free', None), int):
9355             self.warn.append("Secondary node %s didn't return free"
9356                              " memory information" % node)
9357           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9358             self.warn.append("Not enough memory to failover instance to"
9359                              " secondary node %s" % node)
9360
9361     # NIC processing
9362     self.nic_pnew = {}
9363     self.nic_pinst = {}
9364     for nic_op, nic_dict in self.op.nics:
9365       if nic_op == constants.DDM_REMOVE:
9366         if not instance.nics:
9367           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9368                                      errors.ECODE_INVAL)
9369         continue
9370       if nic_op != constants.DDM_ADD:
9371         # an existing nic
9372         if not instance.nics:
9373           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9374                                      " no NICs" % nic_op,
9375                                      errors.ECODE_INVAL)
9376         if nic_op < 0 or nic_op >= len(instance.nics):
9377           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9378                                      " are 0 to %d" %
9379                                      (nic_op, len(instance.nics) - 1),
9380                                      errors.ECODE_INVAL)
9381         old_nic_params = instance.nics[nic_op].nicparams
9382         old_nic_ip = instance.nics[nic_op].ip
9383       else:
9384         old_nic_params = {}
9385         old_nic_ip = None
9386
9387       update_params_dict = dict([(key, nic_dict[key])
9388                                  for key in constants.NICS_PARAMETERS
9389                                  if key in nic_dict])
9390
9391       if 'bridge' in nic_dict:
9392         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9393
9394       new_nic_params = _GetUpdatedParams(old_nic_params,
9395                                          update_params_dict)
9396       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9397       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9398       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9399       self.nic_pinst[nic_op] = new_nic_params
9400       self.nic_pnew[nic_op] = new_filled_nic_params
9401       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9402
9403       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9404         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9405         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9406         if msg:
9407           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9408           if self.op.force:
9409             self.warn.append(msg)
9410           else:
9411             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9412       if new_nic_mode == constants.NIC_MODE_ROUTED:
9413         if 'ip' in nic_dict:
9414           nic_ip = nic_dict['ip']
9415         else:
9416           nic_ip = old_nic_ip
9417         if nic_ip is None:
9418           raise errors.OpPrereqError('Cannot set the nic ip to None'
9419                                      ' on a routed nic', errors.ECODE_INVAL)
9420       if 'mac' in nic_dict:
9421         nic_mac = nic_dict['mac']
9422         if nic_mac is None:
9423           raise errors.OpPrereqError('Cannot set the nic mac to None',
9424                                      errors.ECODE_INVAL)
9425         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9426           # otherwise generate the mac
9427           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9428         else:
9429           # or validate/reserve the current one
9430           try:
9431             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9432           except errors.ReservationError:
9433             raise errors.OpPrereqError("MAC address %s already in use"
9434                                        " in cluster" % nic_mac,
9435                                        errors.ECODE_NOTUNIQUE)
9436
9437     # DISK processing
9438     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9439       raise errors.OpPrereqError("Disk operations not supported for"
9440                                  " diskless instances",
9441                                  errors.ECODE_INVAL)
9442     for disk_op, _ in self.op.disks:
9443       if disk_op == constants.DDM_REMOVE:
9444         if len(instance.disks) == 1:
9445           raise errors.OpPrereqError("Cannot remove the last disk of"
9446                                      " an instance", errors.ECODE_INVAL)
9447         _CheckInstanceDown(self, instance, "cannot remove disks")
9448
9449       if (disk_op == constants.DDM_ADD and
9450           len(instance.disks) >= constants.MAX_DISKS):
9451         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9452                                    " add more" % constants.MAX_DISKS,
9453                                    errors.ECODE_STATE)
9454       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9455         # an existing disk
9456         if disk_op < 0 or disk_op >= len(instance.disks):
9457           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9458                                      " are 0 to %d" %
9459                                      (disk_op, len(instance.disks)),
9460                                      errors.ECODE_INVAL)
9461
9462     return
9463
9464   def _ConvertPlainToDrbd(self, feedback_fn):
9465     """Converts an instance from plain to drbd.
9466
9467     """
9468     feedback_fn("Converting template to drbd")
9469     instance = self.instance
9470     pnode = instance.primary_node
9471     snode = self.op.remote_node
9472
9473     # create a fake disk info for _GenerateDiskTemplate
9474     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9475     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9476                                       instance.name, pnode, [snode],
9477                                       disk_info, None, None, 0, feedback_fn)
9478     info = _GetInstanceInfoText(instance)
9479     feedback_fn("Creating aditional volumes...")
9480     # first, create the missing data and meta devices
9481     for disk in new_disks:
9482       # unfortunately this is... not too nice
9483       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9484                             info, True)
9485       for child in disk.children:
9486         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9487     # at this stage, all new LVs have been created, we can rename the
9488     # old ones
9489     feedback_fn("Renaming original volumes...")
9490     rename_list = [(o, n.children[0].logical_id)
9491                    for (o, n) in zip(instance.disks, new_disks)]
9492     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9493     result.Raise("Failed to rename original LVs")
9494
9495     feedback_fn("Initializing DRBD devices...")
9496     # all child devices are in place, we can now create the DRBD devices
9497     for disk in new_disks:
9498       for node in [pnode, snode]:
9499         f_create = node == pnode
9500         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9501
9502     # at this point, the instance has been modified
9503     instance.disk_template = constants.DT_DRBD8
9504     instance.disks = new_disks
9505     self.cfg.Update(instance, feedback_fn)
9506
9507     # disks are created, waiting for sync
9508     disk_abort = not _WaitForSync(self, instance)
9509     if disk_abort:
9510       raise errors.OpExecError("There are some degraded disks for"
9511                                " this instance, please cleanup manually")
9512
9513   def _ConvertDrbdToPlain(self, feedback_fn):
9514     """Converts an instance from drbd to plain.
9515
9516     """
9517     instance = self.instance
9518     assert len(instance.secondary_nodes) == 1
9519     pnode = instance.primary_node
9520     snode = instance.secondary_nodes[0]
9521     feedback_fn("Converting template to plain")
9522
9523     old_disks = instance.disks
9524     new_disks = [d.children[0] for d in old_disks]
9525
9526     # copy over size and mode
9527     for parent, child in zip(old_disks, new_disks):
9528       child.size = parent.size
9529       child.mode = parent.mode
9530
9531     # update instance structure
9532     instance.disks = new_disks
9533     instance.disk_template = constants.DT_PLAIN
9534     self.cfg.Update(instance, feedback_fn)
9535
9536     feedback_fn("Removing volumes on the secondary node...")
9537     for disk in old_disks:
9538       self.cfg.SetDiskID(disk, snode)
9539       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9540       if msg:
9541         self.LogWarning("Could not remove block device %s on node %s,"
9542                         " continuing anyway: %s", disk.iv_name, snode, msg)
9543
9544     feedback_fn("Removing unneeded volumes on the primary node...")
9545     for idx, disk in enumerate(old_disks):
9546       meta = disk.children[1]
9547       self.cfg.SetDiskID(meta, pnode)
9548       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9549       if msg:
9550         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9551                         " continuing anyway: %s", idx, pnode, msg)
9552
9553   def Exec(self, feedback_fn):
9554     """Modifies an instance.
9555
9556     All parameters take effect only at the next restart of the instance.
9557
9558     """
9559     # Process here the warnings from CheckPrereq, as we don't have a
9560     # feedback_fn there.
9561     for warn in self.warn:
9562       feedback_fn("WARNING: %s" % warn)
9563
9564     result = []
9565     instance = self.instance
9566     # disk changes
9567     for disk_op, disk_dict in self.op.disks:
9568       if disk_op == constants.DDM_REMOVE:
9569         # remove the last disk
9570         device = instance.disks.pop()
9571         device_idx = len(instance.disks)
9572         for node, disk in device.ComputeNodeTree(instance.primary_node):
9573           self.cfg.SetDiskID(disk, node)
9574           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9575           if msg:
9576             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9577                             " continuing anyway", device_idx, node, msg)
9578         result.append(("disk/%d" % device_idx, "remove"))
9579       elif disk_op == constants.DDM_ADD:
9580         # add a new disk
9581         if instance.disk_template in (constants.DT_FILE,
9582                                         constants.DT_SHARED_FILE):
9583           file_driver, file_path = instance.disks[0].logical_id
9584           file_path = os.path.dirname(file_path)
9585         else:
9586           file_driver = file_path = None
9587         disk_idx_base = len(instance.disks)
9588         new_disk = _GenerateDiskTemplate(self,
9589                                          instance.disk_template,
9590                                          instance.name, instance.primary_node,
9591                                          instance.secondary_nodes,
9592                                          [disk_dict],
9593                                          file_path,
9594                                          file_driver,
9595                                          disk_idx_base, feedback_fn)[0]
9596         instance.disks.append(new_disk)
9597         info = _GetInstanceInfoText(instance)
9598
9599         logging.info("Creating volume %s for instance %s",
9600                      new_disk.iv_name, instance.name)
9601         # Note: this needs to be kept in sync with _CreateDisks
9602         #HARDCODE
9603         for node in instance.all_nodes:
9604           f_create = node == instance.primary_node
9605           try:
9606             _CreateBlockDev(self, node, instance, new_disk,
9607                             f_create, info, f_create)
9608           except errors.OpExecError, err:
9609             self.LogWarning("Failed to create volume %s (%s) on"
9610                             " node %s: %s",
9611                             new_disk.iv_name, new_disk, node, err)
9612         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9613                        (new_disk.size, new_disk.mode)))
9614       else:
9615         # change a given disk
9616         instance.disks[disk_op].mode = disk_dict['mode']
9617         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9618
9619     if self.op.disk_template:
9620       r_shut = _ShutdownInstanceDisks(self, instance)
9621       if not r_shut:
9622         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9623                                  " proceed with disk template conversion")
9624       mode = (instance.disk_template, self.op.disk_template)
9625       try:
9626         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9627       except:
9628         self.cfg.ReleaseDRBDMinors(instance.name)
9629         raise
9630       result.append(("disk_template", self.op.disk_template))
9631
9632     # NIC changes
9633     for nic_op, nic_dict in self.op.nics:
9634       if nic_op == constants.DDM_REMOVE:
9635         # remove the last nic
9636         del instance.nics[-1]
9637         result.append(("nic.%d" % len(instance.nics), "remove"))
9638       elif nic_op == constants.DDM_ADD:
9639         # mac and bridge should be set, by now
9640         mac = nic_dict['mac']
9641         ip = nic_dict.get('ip', None)
9642         nicparams = self.nic_pinst[constants.DDM_ADD]
9643         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9644         instance.nics.append(new_nic)
9645         result.append(("nic.%d" % (len(instance.nics) - 1),
9646                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9647                        (new_nic.mac, new_nic.ip,
9648                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9649                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9650                        )))
9651       else:
9652         for key in 'mac', 'ip':
9653           if key in nic_dict:
9654             setattr(instance.nics[nic_op], key, nic_dict[key])
9655         if nic_op in self.nic_pinst:
9656           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9657         for key, val in nic_dict.iteritems():
9658           result.append(("nic.%s/%d" % (key, nic_op), val))
9659
9660     # hvparams changes
9661     if self.op.hvparams:
9662       instance.hvparams = self.hv_inst
9663       for key, val in self.op.hvparams.iteritems():
9664         result.append(("hv/%s" % key, val))
9665
9666     # beparams changes
9667     if self.op.beparams:
9668       instance.beparams = self.be_inst
9669       for key, val in self.op.beparams.iteritems():
9670         result.append(("be/%s" % key, val))
9671
9672     # OS change
9673     if self.op.os_name:
9674       instance.os = self.op.os_name
9675
9676     # osparams changes
9677     if self.op.osparams:
9678       instance.osparams = self.os_inst
9679       for key, val in self.op.osparams.iteritems():
9680         result.append(("os/%s" % key, val))
9681
9682     self.cfg.Update(instance, feedback_fn)
9683
9684     return result
9685
9686   _DISK_CONVERSIONS = {
9687     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9688     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9689     }
9690
9691
9692 class LUBackupQuery(NoHooksLU):
9693   """Query the exports list
9694
9695   """
9696   REQ_BGL = False
9697
9698   def ExpandNames(self):
9699     self.needed_locks = {}
9700     self.share_locks[locking.LEVEL_NODE] = 1
9701     if not self.op.nodes:
9702       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9703     else:
9704       self.needed_locks[locking.LEVEL_NODE] = \
9705         _GetWantedNodes(self, self.op.nodes)
9706
9707   def Exec(self, feedback_fn):
9708     """Compute the list of all the exported system images.
9709
9710     @rtype: dict
9711     @return: a dictionary with the structure node->(export-list)
9712         where export-list is a list of the instances exported on
9713         that node.
9714
9715     """
9716     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9717     rpcresult = self.rpc.call_export_list(self.nodes)
9718     result = {}
9719     for node in rpcresult:
9720       if rpcresult[node].fail_msg:
9721         result[node] = False
9722       else:
9723         result[node] = rpcresult[node].payload
9724
9725     return result
9726
9727
9728 class LUBackupPrepare(NoHooksLU):
9729   """Prepares an instance for an export and returns useful information.
9730
9731   """
9732   REQ_BGL = False
9733
9734   def ExpandNames(self):
9735     self._ExpandAndLockInstance()
9736
9737   def CheckPrereq(self):
9738     """Check prerequisites.
9739
9740     """
9741     instance_name = self.op.instance_name
9742
9743     self.instance = self.cfg.GetInstanceInfo(instance_name)
9744     assert self.instance is not None, \
9745           "Cannot retrieve locked instance %s" % self.op.instance_name
9746     _CheckNodeOnline(self, self.instance.primary_node)
9747
9748     self._cds = _GetClusterDomainSecret()
9749
9750   def Exec(self, feedback_fn):
9751     """Prepares an instance for an export.
9752
9753     """
9754     instance = self.instance
9755
9756     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9757       salt = utils.GenerateSecret(8)
9758
9759       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9760       result = self.rpc.call_x509_cert_create(instance.primary_node,
9761                                               constants.RIE_CERT_VALIDITY)
9762       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9763
9764       (name, cert_pem) = result.payload
9765
9766       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9767                                              cert_pem)
9768
9769       return {
9770         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9771         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9772                           salt),
9773         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9774         }
9775
9776     return None
9777
9778
9779 class LUBackupExport(LogicalUnit):
9780   """Export an instance to an image in the cluster.
9781
9782   """
9783   HPATH = "instance-export"
9784   HTYPE = constants.HTYPE_INSTANCE
9785   REQ_BGL = False
9786
9787   def CheckArguments(self):
9788     """Check the arguments.
9789
9790     """
9791     self.x509_key_name = self.op.x509_key_name
9792     self.dest_x509_ca_pem = self.op.destination_x509_ca
9793
9794     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9795       if not self.x509_key_name:
9796         raise errors.OpPrereqError("Missing X509 key name for encryption",
9797                                    errors.ECODE_INVAL)
9798
9799       if not self.dest_x509_ca_pem:
9800         raise errors.OpPrereqError("Missing destination X509 CA",
9801                                    errors.ECODE_INVAL)
9802
9803   def ExpandNames(self):
9804     self._ExpandAndLockInstance()
9805
9806     # Lock all nodes for local exports
9807     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9808       # FIXME: lock only instance primary and destination node
9809       #
9810       # Sad but true, for now we have do lock all nodes, as we don't know where
9811       # the previous export might be, and in this LU we search for it and
9812       # remove it from its current node. In the future we could fix this by:
9813       #  - making a tasklet to search (share-lock all), then create the
9814       #    new one, then one to remove, after
9815       #  - removing the removal operation altogether
9816       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9817
9818   def DeclareLocks(self, level):
9819     """Last minute lock declaration."""
9820     # All nodes are locked anyway, so nothing to do here.
9821
9822   def BuildHooksEnv(self):
9823     """Build hooks env.
9824
9825     This will run on the master, primary node and target node.
9826
9827     """
9828     env = {
9829       "EXPORT_MODE": self.op.mode,
9830       "EXPORT_NODE": self.op.target_node,
9831       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9832       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9833       # TODO: Generic function for boolean env variables
9834       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9835       }
9836
9837     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9838
9839     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9840
9841     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9842       nl.append(self.op.target_node)
9843
9844     return env, nl, nl
9845
9846   def CheckPrereq(self):
9847     """Check prerequisites.
9848
9849     This checks that the instance and node names are valid.
9850
9851     """
9852     instance_name = self.op.instance_name
9853
9854     self.instance = self.cfg.GetInstanceInfo(instance_name)
9855     assert self.instance is not None, \
9856           "Cannot retrieve locked instance %s" % self.op.instance_name
9857     _CheckNodeOnline(self, self.instance.primary_node)
9858
9859     if (self.op.remove_instance and self.instance.admin_up and
9860         not self.op.shutdown):
9861       raise errors.OpPrereqError("Can not remove instance without shutting it"
9862                                  " down before")
9863
9864     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9865       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9866       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9867       assert self.dst_node is not None
9868
9869       _CheckNodeOnline(self, self.dst_node.name)
9870       _CheckNodeNotDrained(self, self.dst_node.name)
9871
9872       self._cds = None
9873       self.dest_disk_info = None
9874       self.dest_x509_ca = None
9875
9876     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9877       self.dst_node = None
9878
9879       if len(self.op.target_node) != len(self.instance.disks):
9880         raise errors.OpPrereqError(("Received destination information for %s"
9881                                     " disks, but instance %s has %s disks") %
9882                                    (len(self.op.target_node), instance_name,
9883                                     len(self.instance.disks)),
9884                                    errors.ECODE_INVAL)
9885
9886       cds = _GetClusterDomainSecret()
9887
9888       # Check X509 key name
9889       try:
9890         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9891       except (TypeError, ValueError), err:
9892         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9893
9894       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9895         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9896                                    errors.ECODE_INVAL)
9897
9898       # Load and verify CA
9899       try:
9900         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9901       except OpenSSL.crypto.Error, err:
9902         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9903                                    (err, ), errors.ECODE_INVAL)
9904
9905       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9906       if errcode is not None:
9907         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9908                                    (msg, ), errors.ECODE_INVAL)
9909
9910       self.dest_x509_ca = cert
9911
9912       # Verify target information
9913       disk_info = []
9914       for idx, disk_data in enumerate(self.op.target_node):
9915         try:
9916           (host, port, magic) = \
9917             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9918         except errors.GenericError, err:
9919           raise errors.OpPrereqError("Target info for disk %s: %s" %
9920                                      (idx, err), errors.ECODE_INVAL)
9921
9922         disk_info.append((host, port, magic))
9923
9924       assert len(disk_info) == len(self.op.target_node)
9925       self.dest_disk_info = disk_info
9926
9927     else:
9928       raise errors.ProgrammerError("Unhandled export mode %r" %
9929                                    self.op.mode)
9930
9931     # instance disk type verification
9932     # TODO: Implement export support for file-based disks
9933     for disk in self.instance.disks:
9934       if disk.dev_type == constants.LD_FILE:
9935         raise errors.OpPrereqError("Export not supported for instances with"
9936                                    " file-based disks", errors.ECODE_INVAL)
9937
9938   def _CleanupExports(self, feedback_fn):
9939     """Removes exports of current instance from all other nodes.
9940
9941     If an instance in a cluster with nodes A..D was exported to node C, its
9942     exports will be removed from the nodes A, B and D.
9943
9944     """
9945     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9946
9947     nodelist = self.cfg.GetNodeList()
9948     nodelist.remove(self.dst_node.name)
9949
9950     # on one-node clusters nodelist will be empty after the removal
9951     # if we proceed the backup would be removed because OpBackupQuery
9952     # substitutes an empty list with the full cluster node list.
9953     iname = self.instance.name
9954     if nodelist:
9955       feedback_fn("Removing old exports for instance %s" % iname)
9956       exportlist = self.rpc.call_export_list(nodelist)
9957       for node in exportlist:
9958         if exportlist[node].fail_msg:
9959           continue
9960         if iname in exportlist[node].payload:
9961           msg = self.rpc.call_export_remove(node, iname).fail_msg
9962           if msg:
9963             self.LogWarning("Could not remove older export for instance %s"
9964                             " on node %s: %s", iname, node, msg)
9965
9966   def Exec(self, feedback_fn):
9967     """Export an instance to an image in the cluster.
9968
9969     """
9970     assert self.op.mode in constants.EXPORT_MODES
9971
9972     instance = self.instance
9973     src_node = instance.primary_node
9974
9975     if self.op.shutdown:
9976       # shutdown the instance, but not the disks
9977       feedback_fn("Shutting down instance %s" % instance.name)
9978       result = self.rpc.call_instance_shutdown(src_node, instance,
9979                                                self.op.shutdown_timeout)
9980       # TODO: Maybe ignore failures if ignore_remove_failures is set
9981       result.Raise("Could not shutdown instance %s on"
9982                    " node %s" % (instance.name, src_node))
9983
9984     # set the disks ID correctly since call_instance_start needs the
9985     # correct drbd minor to create the symlinks
9986     for disk in instance.disks:
9987       self.cfg.SetDiskID(disk, src_node)
9988
9989     activate_disks = (not instance.admin_up)
9990
9991     if activate_disks:
9992       # Activate the instance disks if we'exporting a stopped instance
9993       feedback_fn("Activating disks for %s" % instance.name)
9994       _StartInstanceDisks(self, instance, None)
9995
9996     try:
9997       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9998                                                      instance)
9999
10000       helper.CreateSnapshots()
10001       try:
10002         if (self.op.shutdown and instance.admin_up and
10003             not self.op.remove_instance):
10004           assert not activate_disks
10005           feedback_fn("Starting instance %s" % instance.name)
10006           result = self.rpc.call_instance_start(src_node, instance, None, None)
10007           msg = result.fail_msg
10008           if msg:
10009             feedback_fn("Failed to start instance: %s" % msg)
10010             _ShutdownInstanceDisks(self, instance)
10011             raise errors.OpExecError("Could not start instance: %s" % msg)
10012
10013         if self.op.mode == constants.EXPORT_MODE_LOCAL:
10014           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10015         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10016           connect_timeout = constants.RIE_CONNECT_TIMEOUT
10017           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10018
10019           (key_name, _, _) = self.x509_key_name
10020
10021           dest_ca_pem = \
10022             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10023                                             self.dest_x509_ca)
10024
10025           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10026                                                      key_name, dest_ca_pem,
10027                                                      timeouts)
10028       finally:
10029         helper.Cleanup()
10030
10031       # Check for backwards compatibility
10032       assert len(dresults) == len(instance.disks)
10033       assert compat.all(isinstance(i, bool) for i in dresults), \
10034              "Not all results are boolean: %r" % dresults
10035
10036     finally:
10037       if activate_disks:
10038         feedback_fn("Deactivating disks for %s" % instance.name)
10039         _ShutdownInstanceDisks(self, instance)
10040
10041     if not (compat.all(dresults) and fin_resu):
10042       failures = []
10043       if not fin_resu:
10044         failures.append("export finalization")
10045       if not compat.all(dresults):
10046         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10047                                if not dsk)
10048         failures.append("disk export: disk(s) %s" % fdsk)
10049
10050       raise errors.OpExecError("Export failed, errors in %s" %
10051                                utils.CommaJoin(failures))
10052
10053     # At this point, the export was successful, we can cleanup/finish
10054
10055     # Remove instance if requested
10056     if self.op.remove_instance:
10057       feedback_fn("Removing instance %s" % instance.name)
10058       _RemoveInstance(self, feedback_fn, instance,
10059                       self.op.ignore_remove_failures)
10060
10061     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10062       self._CleanupExports(feedback_fn)
10063
10064     return fin_resu, dresults
10065
10066
10067 class LUBackupRemove(NoHooksLU):
10068   """Remove exports related to the named instance.
10069
10070   """
10071   REQ_BGL = False
10072
10073   def ExpandNames(self):
10074     self.needed_locks = {}
10075     # We need all nodes to be locked in order for RemoveExport to work, but we
10076     # don't need to lock the instance itself, as nothing will happen to it (and
10077     # we can remove exports also for a removed instance)
10078     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10079
10080   def Exec(self, feedback_fn):
10081     """Remove any export.
10082
10083     """
10084     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10085     # If the instance was not found we'll try with the name that was passed in.
10086     # This will only work if it was an FQDN, though.
10087     fqdn_warn = False
10088     if not instance_name:
10089       fqdn_warn = True
10090       instance_name = self.op.instance_name
10091
10092     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10093     exportlist = self.rpc.call_export_list(locked_nodes)
10094     found = False
10095     for node in exportlist:
10096       msg = exportlist[node].fail_msg
10097       if msg:
10098         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10099         continue
10100       if instance_name in exportlist[node].payload:
10101         found = True
10102         result = self.rpc.call_export_remove(node, instance_name)
10103         msg = result.fail_msg
10104         if msg:
10105           logging.error("Could not remove export for instance %s"
10106                         " on node %s: %s", instance_name, node, msg)
10107
10108     if fqdn_warn and not found:
10109       feedback_fn("Export not found. If trying to remove an export belonging"
10110                   " to a deleted instance please use its Fully Qualified"
10111                   " Domain Name.")
10112
10113
10114 class LUGroupAdd(LogicalUnit):
10115   """Logical unit for creating node groups.
10116
10117   """
10118   HPATH = "group-add"
10119   HTYPE = constants.HTYPE_GROUP
10120   REQ_BGL = False
10121
10122   def ExpandNames(self):
10123     # We need the new group's UUID here so that we can create and acquire the
10124     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10125     # that it should not check whether the UUID exists in the configuration.
10126     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10127     self.needed_locks = {}
10128     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10129
10130   def CheckPrereq(self):
10131     """Check prerequisites.
10132
10133     This checks that the given group name is not an existing node group
10134     already.
10135
10136     """
10137     try:
10138       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10139     except errors.OpPrereqError:
10140       pass
10141     else:
10142       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10143                                  " node group (UUID: %s)" %
10144                                  (self.op.group_name, existing_uuid),
10145                                  errors.ECODE_EXISTS)
10146
10147     if self.op.ndparams:
10148       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10149
10150   def BuildHooksEnv(self):
10151     """Build hooks env.
10152
10153     """
10154     env = {
10155       "GROUP_NAME": self.op.group_name,
10156       }
10157     mn = self.cfg.GetMasterNode()
10158     return env, [mn], [mn]
10159
10160   def Exec(self, feedback_fn):
10161     """Add the node group to the cluster.
10162
10163     """
10164     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10165                                   uuid=self.group_uuid,
10166                                   alloc_policy=self.op.alloc_policy,
10167                                   ndparams=self.op.ndparams)
10168
10169     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10170     del self.remove_locks[locking.LEVEL_NODEGROUP]
10171
10172
10173 class LUGroupAssignNodes(NoHooksLU):
10174   """Logical unit for assigning nodes to groups.
10175
10176   """
10177   REQ_BGL = False
10178
10179   def ExpandNames(self):
10180     # These raise errors.OpPrereqError on their own:
10181     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10182     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10183
10184     # We want to lock all the affected nodes and groups. We have readily
10185     # available the list of nodes, and the *destination* group. To gather the
10186     # list of "source" groups, we need to fetch node information.
10187     self.node_data = self.cfg.GetAllNodesInfo()
10188     affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10189     affected_groups.add(self.group_uuid)
10190
10191     self.needed_locks = {
10192       locking.LEVEL_NODEGROUP: list(affected_groups),
10193       locking.LEVEL_NODE: self.op.nodes,
10194       }
10195
10196   def CheckPrereq(self):
10197     """Check prerequisites.
10198
10199     """
10200     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10201     instance_data = self.cfg.GetAllInstancesInfo()
10202
10203     if self.group is None:
10204       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10205                                (self.op.group_name, self.group_uuid))
10206
10207     (new_splits, previous_splits) = \
10208       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10209                                              for node in self.op.nodes],
10210                                             self.node_data, instance_data)
10211
10212     if new_splits:
10213       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10214
10215       if not self.op.force:
10216         raise errors.OpExecError("The following instances get split by this"
10217                                  " change and --force was not given: %s" %
10218                                  fmt_new_splits)
10219       else:
10220         self.LogWarning("This operation will split the following instances: %s",
10221                         fmt_new_splits)
10222
10223         if previous_splits:
10224           self.LogWarning("In addition, these already-split instances continue"
10225                           " to be spit across groups: %s",
10226                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10227
10228   def Exec(self, feedback_fn):
10229     """Assign nodes to a new group.
10230
10231     """
10232     for node in self.op.nodes:
10233       self.node_data[node].group = self.group_uuid
10234
10235     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10236
10237   @staticmethod
10238   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10239     """Check for split instances after a node assignment.
10240
10241     This method considers a series of node assignments as an atomic operation,
10242     and returns information about split instances after applying the set of
10243     changes.
10244
10245     In particular, it returns information about newly split instances, and
10246     instances that were already split, and remain so after the change.
10247
10248     Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10249     considered.
10250
10251     @type changes: list of (node_name, new_group_uuid) pairs.
10252     @param changes: list of node assignments to consider.
10253     @param node_data: a dict with data for all nodes
10254     @param instance_data: a dict with all instances to consider
10255     @rtype: a two-tuple
10256     @return: a list of instances that were previously okay and result split as a
10257       consequence of this change, and a list of instances that were previously
10258       split and this change does not fix.
10259
10260     """
10261     changed_nodes = dict((node, group) for node, group in changes
10262                          if node_data[node].group != group)
10263
10264     all_split_instances = set()
10265     previously_split_instances = set()
10266
10267     def InstanceNodes(instance):
10268       return [instance.primary_node] + list(instance.secondary_nodes)
10269
10270     for inst in instance_data.values():
10271       if inst.disk_template not in constants.DTS_NET_MIRROR:
10272         continue
10273
10274       instance_nodes = InstanceNodes(inst)
10275
10276       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10277         previously_split_instances.add(inst.name)
10278
10279       if len(set(changed_nodes.get(node, node_data[node].group)
10280                  for node in instance_nodes)) > 1:
10281         all_split_instances.add(inst.name)
10282
10283     return (list(all_split_instances - previously_split_instances),
10284             list(previously_split_instances & all_split_instances))
10285
10286
10287 class _GroupQuery(_QueryBase):
10288   FIELDS = query.GROUP_FIELDS
10289
10290   def ExpandNames(self, lu):
10291     lu.needed_locks = {}
10292
10293     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10294     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10295
10296     if not self.names:
10297       self.wanted = [name_to_uuid[name]
10298                      for name in utils.NiceSort(name_to_uuid.keys())]
10299     else:
10300       # Accept names to be either names or UUIDs.
10301       missing = []
10302       self.wanted = []
10303       all_uuid = frozenset(self._all_groups.keys())
10304
10305       for name in self.names:
10306         if name in all_uuid:
10307           self.wanted.append(name)
10308         elif name in name_to_uuid:
10309           self.wanted.append(name_to_uuid[name])
10310         else:
10311           missing.append(name)
10312
10313       if missing:
10314         raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10315                                    errors.ECODE_NOENT)
10316
10317   def DeclareLocks(self, lu, level):
10318     pass
10319
10320   def _GetQueryData(self, lu):
10321     """Computes the list of node groups and their attributes.
10322
10323     """
10324     do_nodes = query.GQ_NODE in self.requested_data
10325     do_instances = query.GQ_INST in self.requested_data
10326
10327     group_to_nodes = None
10328     group_to_instances = None
10329
10330     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10331     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10332     # latter GetAllInstancesInfo() is not enough, for we have to go through
10333     # instance->node. Hence, we will need to process nodes even if we only need
10334     # instance information.
10335     if do_nodes or do_instances:
10336       all_nodes = lu.cfg.GetAllNodesInfo()
10337       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10338       node_to_group = {}
10339
10340       for node in all_nodes.values():
10341         if node.group in group_to_nodes:
10342           group_to_nodes[node.group].append(node.name)
10343           node_to_group[node.name] = node.group
10344
10345       if do_instances:
10346         all_instances = lu.cfg.GetAllInstancesInfo()
10347         group_to_instances = dict((uuid, []) for uuid in self.wanted)
10348
10349         for instance in all_instances.values():
10350           node = instance.primary_node
10351           if node in node_to_group:
10352             group_to_instances[node_to_group[node]].append(instance.name)
10353
10354         if not do_nodes:
10355           # Do not pass on node information if it was not requested.
10356           group_to_nodes = None
10357
10358     return query.GroupQueryData([self._all_groups[uuid]
10359                                  for uuid in self.wanted],
10360                                 group_to_nodes, group_to_instances)
10361
10362
10363 class LUGroupQuery(NoHooksLU):
10364   """Logical unit for querying node groups.
10365
10366   """
10367   REQ_BGL = False
10368
10369   def CheckArguments(self):
10370     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10371                           self.op.output_fields, False)
10372
10373   def ExpandNames(self):
10374     self.gq.ExpandNames(self)
10375
10376   def Exec(self, feedback_fn):
10377     return self.gq.OldStyleQuery(self)
10378
10379
10380 class LUGroupSetParams(LogicalUnit):
10381   """Modifies the parameters of a node group.
10382
10383   """
10384   HPATH = "group-modify"
10385   HTYPE = constants.HTYPE_GROUP
10386   REQ_BGL = False
10387
10388   def CheckArguments(self):
10389     all_changes = [
10390       self.op.ndparams,
10391       self.op.alloc_policy,
10392       ]
10393
10394     if all_changes.count(None) == len(all_changes):
10395       raise errors.OpPrereqError("Please pass at least one modification",
10396                                  errors.ECODE_INVAL)
10397
10398   def ExpandNames(self):
10399     # This raises errors.OpPrereqError on its own:
10400     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10401
10402     self.needed_locks = {
10403       locking.LEVEL_NODEGROUP: [self.group_uuid],
10404       }
10405
10406   def CheckPrereq(self):
10407     """Check prerequisites.
10408
10409     """
10410     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10411
10412     if self.group is None:
10413       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10414                                (self.op.group_name, self.group_uuid))
10415
10416     if self.op.ndparams:
10417       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10418       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10419       self.new_ndparams = new_ndparams
10420
10421   def BuildHooksEnv(self):
10422     """Build hooks env.
10423
10424     """
10425     env = {
10426       "GROUP_NAME": self.op.group_name,
10427       "NEW_ALLOC_POLICY": self.op.alloc_policy,
10428       }
10429     mn = self.cfg.GetMasterNode()
10430     return env, [mn], [mn]
10431
10432   def Exec(self, feedback_fn):
10433     """Modifies the node group.
10434
10435     """
10436     result = []
10437
10438     if self.op.ndparams:
10439       self.group.ndparams = self.new_ndparams
10440       result.append(("ndparams", str(self.group.ndparams)))
10441
10442     if self.op.alloc_policy:
10443       self.group.alloc_policy = self.op.alloc_policy
10444
10445     self.cfg.Update(self.group, feedback_fn)
10446     return result
10447
10448
10449
10450 class LUGroupRemove(LogicalUnit):
10451   HPATH = "group-remove"
10452   HTYPE = constants.HTYPE_GROUP
10453   REQ_BGL = False
10454
10455   def ExpandNames(self):
10456     # This will raises errors.OpPrereqError on its own:
10457     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10458     self.needed_locks = {
10459       locking.LEVEL_NODEGROUP: [self.group_uuid],
10460       }
10461
10462   def CheckPrereq(self):
10463     """Check prerequisites.
10464
10465     This checks that the given group name exists as a node group, that is
10466     empty (i.e., contains no nodes), and that is not the last group of the
10467     cluster.
10468
10469     """
10470     # Verify that the group is empty.
10471     group_nodes = [node.name
10472                    for node in self.cfg.GetAllNodesInfo().values()
10473                    if node.group == self.group_uuid]
10474
10475     if group_nodes:
10476       raise errors.OpPrereqError("Group '%s' not empty, has the following"
10477                                  " nodes: %s" %
10478                                  (self.op.group_name,
10479                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
10480                                  errors.ECODE_STATE)
10481
10482     # Verify the cluster would not be left group-less.
10483     if len(self.cfg.GetNodeGroupList()) == 1:
10484       raise errors.OpPrereqError("Group '%s' is the only group,"
10485                                  " cannot be removed" %
10486                                  self.op.group_name,
10487                                  errors.ECODE_STATE)
10488
10489   def BuildHooksEnv(self):
10490     """Build hooks env.
10491
10492     """
10493     env = {
10494       "GROUP_NAME": self.op.group_name,
10495       }
10496     mn = self.cfg.GetMasterNode()
10497     return env, [mn], [mn]
10498
10499   def Exec(self, feedback_fn):
10500     """Remove the node group.
10501
10502     """
10503     try:
10504       self.cfg.RemoveNodeGroup(self.group_uuid)
10505     except errors.ConfigurationError:
10506       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10507                                (self.op.group_name, self.group_uuid))
10508
10509     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10510
10511
10512 class LUGroupRename(LogicalUnit):
10513   HPATH = "group-rename"
10514   HTYPE = constants.HTYPE_GROUP
10515   REQ_BGL = False
10516
10517   def ExpandNames(self):
10518     # This raises errors.OpPrereqError on its own:
10519     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10520
10521     self.needed_locks = {
10522       locking.LEVEL_NODEGROUP: [self.group_uuid],
10523       }
10524
10525   def CheckPrereq(self):
10526     """Check prerequisites.
10527
10528     Ensures requested new name is not yet used.
10529
10530     """
10531     try:
10532       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10533     except errors.OpPrereqError:
10534       pass
10535     else:
10536       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10537                                  " node group (UUID: %s)" %
10538                                  (self.op.new_name, new_name_uuid),
10539                                  errors.ECODE_EXISTS)
10540
10541   def BuildHooksEnv(self):
10542     """Build hooks env.
10543
10544     """
10545     env = {
10546       "OLD_NAME": self.op.group_name,
10547       "NEW_NAME": self.op.new_name,
10548       }
10549
10550     mn = self.cfg.GetMasterNode()
10551     all_nodes = self.cfg.GetAllNodesInfo()
10552     run_nodes = [mn]
10553     all_nodes.pop(mn, None)
10554
10555     for node in all_nodes.values():
10556       if node.group == self.group_uuid:
10557         run_nodes.append(node.name)
10558
10559     return env, run_nodes, run_nodes
10560
10561   def Exec(self, feedback_fn):
10562     """Rename the node group.
10563
10564     """
10565     group = self.cfg.GetNodeGroup(self.group_uuid)
10566
10567     if group is None:
10568       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10569                                (self.op.group_name, self.group_uuid))
10570
10571     group.name = self.op.new_name
10572     self.cfg.Update(group, feedback_fn)
10573
10574     return self.op.new_name
10575
10576
10577 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10578   """Generic tags LU.
10579
10580   This is an abstract class which is the parent of all the other tags LUs.
10581
10582   """
10583
10584   def ExpandNames(self):
10585     self.needed_locks = {}
10586     if self.op.kind == constants.TAG_NODE:
10587       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10588       self.needed_locks[locking.LEVEL_NODE] = self.op.name
10589     elif self.op.kind == constants.TAG_INSTANCE:
10590       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10591       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10592
10593     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10594     # not possible to acquire the BGL based on opcode parameters)
10595
10596   def CheckPrereq(self):
10597     """Check prerequisites.
10598
10599     """
10600     if self.op.kind == constants.TAG_CLUSTER:
10601       self.target = self.cfg.GetClusterInfo()
10602     elif self.op.kind == constants.TAG_NODE:
10603       self.target = self.cfg.GetNodeInfo(self.op.name)
10604     elif self.op.kind == constants.TAG_INSTANCE:
10605       self.target = self.cfg.GetInstanceInfo(self.op.name)
10606     else:
10607       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10608                                  str(self.op.kind), errors.ECODE_INVAL)
10609
10610
10611 class LUTagsGet(TagsLU):
10612   """Returns the tags of a given object.
10613
10614   """
10615   REQ_BGL = False
10616
10617   def ExpandNames(self):
10618     TagsLU.ExpandNames(self)
10619
10620     # Share locks as this is only a read operation
10621     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10622
10623   def Exec(self, feedback_fn):
10624     """Returns the tag list.
10625
10626     """
10627     return list(self.target.GetTags())
10628
10629
10630 class LUTagsSearch(NoHooksLU):
10631   """Searches the tags for a given pattern.
10632
10633   """
10634   REQ_BGL = False
10635
10636   def ExpandNames(self):
10637     self.needed_locks = {}
10638
10639   def CheckPrereq(self):
10640     """Check prerequisites.
10641
10642     This checks the pattern passed for validity by compiling it.
10643
10644     """
10645     try:
10646       self.re = re.compile(self.op.pattern)
10647     except re.error, err:
10648       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10649                                  (self.op.pattern, err), errors.ECODE_INVAL)
10650
10651   def Exec(self, feedback_fn):
10652     """Returns the tag list.
10653
10654     """
10655     cfg = self.cfg
10656     tgts = [("/cluster", cfg.GetClusterInfo())]
10657     ilist = cfg.GetAllInstancesInfo().values()
10658     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10659     nlist = cfg.GetAllNodesInfo().values()
10660     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10661     results = []
10662     for path, target in tgts:
10663       for tag in target.GetTags():
10664         if self.re.search(tag):
10665           results.append((path, tag))
10666     return results
10667
10668
10669 class LUTagsSet(TagsLU):
10670   """Sets a tag on a given object.
10671
10672   """
10673   REQ_BGL = False
10674
10675   def CheckPrereq(self):
10676     """Check prerequisites.
10677
10678     This checks the type and length of the tag name and value.
10679
10680     """
10681     TagsLU.CheckPrereq(self)
10682     for tag in self.op.tags:
10683       objects.TaggableObject.ValidateTag(tag)
10684
10685   def Exec(self, feedback_fn):
10686     """Sets the tag.
10687
10688     """
10689     try:
10690       for tag in self.op.tags:
10691         self.target.AddTag(tag)
10692     except errors.TagError, err:
10693       raise errors.OpExecError("Error while setting tag: %s" % str(err))
10694     self.cfg.Update(self.target, feedback_fn)
10695
10696
10697 class LUTagsDel(TagsLU):
10698   """Delete a list of tags from a given object.
10699
10700   """
10701   REQ_BGL = False
10702
10703   def CheckPrereq(self):
10704     """Check prerequisites.
10705
10706     This checks that we have the given tag.
10707
10708     """
10709     TagsLU.CheckPrereq(self)
10710     for tag in self.op.tags:
10711       objects.TaggableObject.ValidateTag(tag)
10712     del_tags = frozenset(self.op.tags)
10713     cur_tags = self.target.GetTags()
10714
10715     diff_tags = del_tags - cur_tags
10716     if diff_tags:
10717       diff_names = ("'%s'" % i for i in sorted(diff_tags))
10718       raise errors.OpPrereqError("Tag(s) %s not found" %
10719                                  (utils.CommaJoin(diff_names), ),
10720                                  errors.ECODE_NOENT)
10721
10722   def Exec(self, feedback_fn):
10723     """Remove the tag from the object.
10724
10725     """
10726     for tag in self.op.tags:
10727       self.target.RemoveTag(tag)
10728     self.cfg.Update(self.target, feedback_fn)
10729
10730
10731 class LUTestDelay(NoHooksLU):
10732   """Sleep for a specified amount of time.
10733
10734   This LU sleeps on the master and/or nodes for a specified amount of
10735   time.
10736
10737   """
10738   REQ_BGL = False
10739
10740   def ExpandNames(self):
10741     """Expand names and set required locks.
10742
10743     This expands the node list, if any.
10744
10745     """
10746     self.needed_locks = {}
10747     if self.op.on_nodes:
10748       # _GetWantedNodes can be used here, but is not always appropriate to use
10749       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10750       # more information.
10751       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10752       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10753
10754   def _TestDelay(self):
10755     """Do the actual sleep.
10756
10757     """
10758     if self.op.on_master:
10759       if not utils.TestDelay(self.op.duration):
10760         raise errors.OpExecError("Error during master delay test")
10761     if self.op.on_nodes:
10762       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10763       for node, node_result in result.items():
10764         node_result.Raise("Failure during rpc call to node %s" % node)
10765
10766   def Exec(self, feedback_fn):
10767     """Execute the test delay opcode, with the wanted repetitions.
10768
10769     """
10770     if self.op.repeat == 0:
10771       self._TestDelay()
10772     else:
10773       top_value = self.op.repeat - 1
10774       for i in range(self.op.repeat):
10775         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10776         self._TestDelay()
10777
10778
10779 class LUTestJqueue(NoHooksLU):
10780   """Utility LU to test some aspects of the job queue.
10781
10782   """
10783   REQ_BGL = False
10784
10785   # Must be lower than default timeout for WaitForJobChange to see whether it
10786   # notices changed jobs
10787   _CLIENT_CONNECT_TIMEOUT = 20.0
10788   _CLIENT_CONFIRM_TIMEOUT = 60.0
10789
10790   @classmethod
10791   def _NotifyUsingSocket(cls, cb, errcls):
10792     """Opens a Unix socket and waits for another program to connect.
10793
10794     @type cb: callable
10795     @param cb: Callback to send socket name to client
10796     @type errcls: class
10797     @param errcls: Exception class to use for errors
10798
10799     """
10800     # Using a temporary directory as there's no easy way to create temporary
10801     # sockets without writing a custom loop around tempfile.mktemp and
10802     # socket.bind
10803     tmpdir = tempfile.mkdtemp()
10804     try:
10805       tmpsock = utils.PathJoin(tmpdir, "sock")
10806
10807       logging.debug("Creating temporary socket at %s", tmpsock)
10808       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10809       try:
10810         sock.bind(tmpsock)
10811         sock.listen(1)
10812
10813         # Send details to client
10814         cb(tmpsock)
10815
10816         # Wait for client to connect before continuing
10817         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10818         try:
10819           (conn, _) = sock.accept()
10820         except socket.error, err:
10821           raise errcls("Client didn't connect in time (%s)" % err)
10822       finally:
10823         sock.close()
10824     finally:
10825       # Remove as soon as client is connected
10826       shutil.rmtree(tmpdir)
10827
10828     # Wait for client to close
10829     try:
10830       try:
10831         # pylint: disable-msg=E1101
10832         # Instance of '_socketobject' has no ... member
10833         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10834         conn.recv(1)
10835       except socket.error, err:
10836         raise errcls("Client failed to confirm notification (%s)" % err)
10837     finally:
10838       conn.close()
10839
10840   def _SendNotification(self, test, arg, sockname):
10841     """Sends a notification to the client.
10842
10843     @type test: string
10844     @param test: Test name
10845     @param arg: Test argument (depends on test)
10846     @type sockname: string
10847     @param sockname: Socket path
10848
10849     """
10850     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10851
10852   def _Notify(self, prereq, test, arg):
10853     """Notifies the client of a test.
10854
10855     @type prereq: bool
10856     @param prereq: Whether this is a prereq-phase test
10857     @type test: string
10858     @param test: Test name
10859     @param arg: Test argument (depends on test)
10860
10861     """
10862     if prereq:
10863       errcls = errors.OpPrereqError
10864     else:
10865       errcls = errors.OpExecError
10866
10867     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10868                                                   test, arg),
10869                                    errcls)
10870
10871   def CheckArguments(self):
10872     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10873     self.expandnames_calls = 0
10874
10875   def ExpandNames(self):
10876     checkargs_calls = getattr(self, "checkargs_calls", 0)
10877     if checkargs_calls < 1:
10878       raise errors.ProgrammerError("CheckArguments was not called")
10879
10880     self.expandnames_calls += 1
10881
10882     if self.op.notify_waitlock:
10883       self._Notify(True, constants.JQT_EXPANDNAMES, None)
10884
10885     self.LogInfo("Expanding names")
10886
10887     # Get lock on master node (just to get a lock, not for a particular reason)
10888     self.needed_locks = {
10889       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10890       }
10891
10892   def Exec(self, feedback_fn):
10893     if self.expandnames_calls < 1:
10894       raise errors.ProgrammerError("ExpandNames was not called")
10895
10896     if self.op.notify_exec:
10897       self._Notify(False, constants.JQT_EXEC, None)
10898
10899     self.LogInfo("Executing")
10900
10901     if self.op.log_messages:
10902       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10903       for idx, msg in enumerate(self.op.log_messages):
10904         self.LogInfo("Sending log message %s", idx + 1)
10905         feedback_fn(constants.JQT_MSGPREFIX + msg)
10906         # Report how many test messages have been sent
10907         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10908
10909     if self.op.fail:
10910       raise errors.OpExecError("Opcode failure was requested")
10911
10912     return True
10913
10914
10915 class IAllocator(object):
10916   """IAllocator framework.
10917
10918   An IAllocator instance has three sets of attributes:
10919     - cfg that is needed to query the cluster
10920     - input data (all members of the _KEYS class attribute are required)
10921     - four buffer attributes (in|out_data|text), that represent the
10922       input (to the external script) in text and data structure format,
10923       and the output from it, again in two formats
10924     - the result variables from the script (success, info, nodes) for
10925       easy usage
10926
10927   """
10928   # pylint: disable-msg=R0902
10929   # lots of instance attributes
10930   _ALLO_KEYS = [
10931     "name", "mem_size", "disks", "disk_template",
10932     "os", "tags", "nics", "vcpus", "hypervisor",
10933     ]
10934   _RELO_KEYS = [
10935     "name", "relocate_from",
10936     ]
10937   _EVAC_KEYS = [
10938     "evac_nodes",
10939     ]
10940
10941   def __init__(self, cfg, rpc, mode, **kwargs):
10942     self.cfg = cfg
10943     self.rpc = rpc
10944     # init buffer variables
10945     self.in_text = self.out_text = self.in_data = self.out_data = None
10946     # init all input fields so that pylint is happy
10947     self.mode = mode
10948     self.mem_size = self.disks = self.disk_template = None
10949     self.os = self.tags = self.nics = self.vcpus = None
10950     self.hypervisor = None
10951     self.relocate_from = None
10952     self.name = None
10953     self.evac_nodes = None
10954     # computed fields
10955     self.required_nodes = None
10956     # init result fields
10957     self.success = self.info = self.result = None
10958     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10959       keyset = self._ALLO_KEYS
10960       fn = self._AddNewInstance
10961     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10962       keyset = self._RELO_KEYS
10963       fn = self._AddRelocateInstance
10964     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10965       keyset = self._EVAC_KEYS
10966       fn = self._AddEvacuateNodes
10967     else:
10968       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10969                                    " IAllocator" % self.mode)
10970     for key in kwargs:
10971       if key not in keyset:
10972         raise errors.ProgrammerError("Invalid input parameter '%s' to"
10973                                      " IAllocator" % key)
10974       setattr(self, key, kwargs[key])
10975
10976     for key in keyset:
10977       if key not in kwargs:
10978         raise errors.ProgrammerError("Missing input parameter '%s' to"
10979                                      " IAllocator" % key)
10980     self._BuildInputData(fn)
10981
10982   def _ComputeClusterData(self):
10983     """Compute the generic allocator input data.
10984
10985     This is the data that is independent of the actual operation.
10986
10987     """
10988     cfg = self.cfg
10989     cluster_info = cfg.GetClusterInfo()
10990     # cluster data
10991     data = {
10992       "version": constants.IALLOCATOR_VERSION,
10993       "cluster_name": cfg.GetClusterName(),
10994       "cluster_tags": list(cluster_info.GetTags()),
10995       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10996       # we don't have job IDs
10997       }
10998     ninfo = cfg.GetAllNodesInfo()
10999     iinfo = cfg.GetAllInstancesInfo().values()
11000     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11001
11002     # node data
11003     node_list = [n.name for n in ninfo.values() if n.vm_capable]
11004
11005     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11006       hypervisor_name = self.hypervisor
11007     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11008       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11009     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11010       hypervisor_name = cluster_info.enabled_hypervisors[0]
11011
11012     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11013                                         hypervisor_name)
11014     node_iinfo = \
11015       self.rpc.call_all_instances_info(node_list,
11016                                        cluster_info.enabled_hypervisors)
11017
11018     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11019
11020     config_ndata = self._ComputeBasicNodeData(ninfo)
11021     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11022                                                  i_list, config_ndata)
11023     assert len(data["nodes"]) == len(ninfo), \
11024         "Incomplete node data computed"
11025
11026     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11027
11028     self.in_data = data
11029
11030   @staticmethod
11031   def _ComputeNodeGroupData(cfg):
11032     """Compute node groups data.
11033
11034     """
11035     ng = {}
11036     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11037       ng[guuid] = {
11038         "name": gdata.name,
11039         "alloc_policy": gdata.alloc_policy,
11040         }
11041     return ng
11042
11043   @staticmethod
11044   def _ComputeBasicNodeData(node_cfg):
11045     """Compute global node data.
11046
11047     @rtype: dict
11048     @returns: a dict of name: (node dict, node config)
11049
11050     """
11051     node_results = {}
11052     for ninfo in node_cfg.values():
11053       # fill in static (config-based) values
11054       pnr = {
11055         "tags": list(ninfo.GetTags()),
11056         "primary_ip": ninfo.primary_ip,
11057         "secondary_ip": ninfo.secondary_ip,
11058         "offline": ninfo.offline,
11059         "drained": ninfo.drained,
11060         "master_candidate": ninfo.master_candidate,
11061         "group": ninfo.group,
11062         "master_capable": ninfo.master_capable,
11063         "vm_capable": ninfo.vm_capable,
11064         }
11065
11066       node_results[ninfo.name] = pnr
11067
11068     return node_results
11069
11070   @staticmethod
11071   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11072                               node_results):
11073     """Compute global node data.
11074
11075     @param node_results: the basic node structures as filled from the config
11076
11077     """
11078     # make a copy of the current dict
11079     node_results = dict(node_results)
11080     for nname, nresult in node_data.items():
11081       assert nname in node_results, "Missing basic data for node %s" % nname
11082       ninfo = node_cfg[nname]
11083
11084       if not (ninfo.offline or ninfo.drained):
11085         nresult.Raise("Can't get data for node %s" % nname)
11086         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11087                                 nname)
11088         remote_info = nresult.payload
11089
11090         for attr in ['memory_total', 'memory_free', 'memory_dom0',
11091                      'vg_size', 'vg_free', 'cpu_total']:
11092           if attr not in remote_info:
11093             raise errors.OpExecError("Node '%s' didn't return attribute"
11094                                      " '%s'" % (nname, attr))
11095           if not isinstance(remote_info[attr], int):
11096             raise errors.OpExecError("Node '%s' returned invalid value"
11097                                      " for '%s': %s" %
11098                                      (nname, attr, remote_info[attr]))
11099         # compute memory used by primary instances
11100         i_p_mem = i_p_up_mem = 0
11101         for iinfo, beinfo in i_list:
11102           if iinfo.primary_node == nname:
11103             i_p_mem += beinfo[constants.BE_MEMORY]
11104             if iinfo.name not in node_iinfo[nname].payload:
11105               i_used_mem = 0
11106             else:
11107               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11108             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11109             remote_info['memory_free'] -= max(0, i_mem_diff)
11110
11111             if iinfo.admin_up:
11112               i_p_up_mem += beinfo[constants.BE_MEMORY]
11113
11114         # compute memory used by instances
11115         pnr_dyn = {
11116           "total_memory": remote_info['memory_total'],
11117           "reserved_memory": remote_info['memory_dom0'],
11118           "free_memory": remote_info['memory_free'],
11119           "total_disk": remote_info['vg_size'],
11120           "free_disk": remote_info['vg_free'],
11121           "total_cpus": remote_info['cpu_total'],
11122           "i_pri_memory": i_p_mem,
11123           "i_pri_up_memory": i_p_up_mem,
11124           }
11125         pnr_dyn.update(node_results[nname])
11126         node_results[nname] = pnr_dyn
11127
11128     return node_results
11129
11130   @staticmethod
11131   def _ComputeInstanceData(cluster_info, i_list):
11132     """Compute global instance data.
11133
11134     """
11135     instance_data = {}
11136     for iinfo, beinfo in i_list:
11137       nic_data = []
11138       for nic in iinfo.nics:
11139         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11140         nic_dict = {"mac": nic.mac,
11141                     "ip": nic.ip,
11142                     "mode": filled_params[constants.NIC_MODE],
11143                     "link": filled_params[constants.NIC_LINK],
11144                    }
11145         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11146           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11147         nic_data.append(nic_dict)
11148       pir = {
11149         "tags": list(iinfo.GetTags()),
11150         "admin_up": iinfo.admin_up,
11151         "vcpus": beinfo[constants.BE_VCPUS],
11152         "memory": beinfo[constants.BE_MEMORY],
11153         "os": iinfo.os,
11154         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11155         "nics": nic_data,
11156         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11157         "disk_template": iinfo.disk_template,
11158         "hypervisor": iinfo.hypervisor,
11159         }
11160       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11161                                                  pir["disks"])
11162       instance_data[iinfo.name] = pir
11163
11164     return instance_data
11165
11166   def _AddNewInstance(self):
11167     """Add new instance data to allocator structure.
11168
11169     This in combination with _AllocatorGetClusterData will create the
11170     correct structure needed as input for the allocator.
11171
11172     The checks for the completeness of the opcode must have already been
11173     done.
11174
11175     """
11176     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11177
11178     if self.disk_template in constants.DTS_NET_MIRROR:
11179       self.required_nodes = 2
11180     else:
11181       self.required_nodes = 1
11182     request = {
11183       "name": self.name,
11184       "disk_template": self.disk_template,
11185       "tags": self.tags,
11186       "os": self.os,
11187       "vcpus": self.vcpus,
11188       "memory": self.mem_size,
11189       "disks": self.disks,
11190       "disk_space_total": disk_space,
11191       "nics": self.nics,
11192       "required_nodes": self.required_nodes,
11193       }
11194     return request
11195
11196   def _AddRelocateInstance(self):
11197     """Add relocate instance data to allocator structure.
11198
11199     This in combination with _IAllocatorGetClusterData will create the
11200     correct structure needed as input for the allocator.
11201
11202     The checks for the completeness of the opcode must have already been
11203     done.
11204
11205     """
11206     instance = self.cfg.GetInstanceInfo(self.name)
11207     if instance is None:
11208       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11209                                    " IAllocator" % self.name)
11210
11211     if instance.disk_template not in constants.DTS_NET_MIRROR:
11212       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11213                                  errors.ECODE_INVAL)
11214
11215     if len(instance.secondary_nodes) != 1:
11216       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11217                                  errors.ECODE_STATE)
11218
11219     self.required_nodes = 1
11220     disk_sizes = [{'size': disk.size} for disk in instance.disks]
11221     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11222
11223     request = {
11224       "name": self.name,
11225       "disk_space_total": disk_space,
11226       "required_nodes": self.required_nodes,
11227       "relocate_from": self.relocate_from,
11228       }
11229     return request
11230
11231   def _AddEvacuateNodes(self):
11232     """Add evacuate nodes data to allocator structure.
11233
11234     """
11235     request = {
11236       "evac_nodes": self.evac_nodes
11237       }
11238     return request
11239
11240   def _BuildInputData(self, fn):
11241     """Build input data structures.
11242
11243     """
11244     self._ComputeClusterData()
11245
11246     request = fn()
11247     request["type"] = self.mode
11248     self.in_data["request"] = request
11249
11250     self.in_text = serializer.Dump(self.in_data)
11251
11252   def Run(self, name, validate=True, call_fn=None):
11253     """Run an instance allocator and return the results.
11254
11255     """
11256     if call_fn is None:
11257       call_fn = self.rpc.call_iallocator_runner
11258
11259     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11260     result.Raise("Failure while running the iallocator script")
11261
11262     self.out_text = result.payload
11263     if validate:
11264       self._ValidateResult()
11265
11266   def _ValidateResult(self):
11267     """Process the allocator results.
11268
11269     This will process and if successful save the result in
11270     self.out_data and the other parameters.
11271
11272     """
11273     try:
11274       rdict = serializer.Load(self.out_text)
11275     except Exception, err:
11276       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11277
11278     if not isinstance(rdict, dict):
11279       raise errors.OpExecError("Can't parse iallocator results: not a dict")
11280
11281     # TODO: remove backwards compatiblity in later versions
11282     if "nodes" in rdict and "result" not in rdict:
11283       rdict["result"] = rdict["nodes"]
11284       del rdict["nodes"]
11285
11286     for key in "success", "info", "result":
11287       if key not in rdict:
11288         raise errors.OpExecError("Can't parse iallocator results:"
11289                                  " missing key '%s'" % key)
11290       setattr(self, key, rdict[key])
11291
11292     if not isinstance(rdict["result"], list):
11293       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11294                                " is not a list")
11295     self.out_data = rdict
11296
11297
11298 class LUTestAllocator(NoHooksLU):
11299   """Run allocator tests.
11300
11301   This LU runs the allocator tests
11302
11303   """
11304   def CheckPrereq(self):
11305     """Check prerequisites.
11306
11307     This checks the opcode parameters depending on the director and mode test.
11308
11309     """
11310     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11311       for attr in ["mem_size", "disks", "disk_template",
11312                    "os", "tags", "nics", "vcpus"]:
11313         if not hasattr(self.op, attr):
11314           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11315                                      attr, errors.ECODE_INVAL)
11316       iname = self.cfg.ExpandInstanceName(self.op.name)
11317       if iname is not None:
11318         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11319                                    iname, errors.ECODE_EXISTS)
11320       if not isinstance(self.op.nics, list):
11321         raise errors.OpPrereqError("Invalid parameter 'nics'",
11322                                    errors.ECODE_INVAL)
11323       if not isinstance(self.op.disks, list):
11324         raise errors.OpPrereqError("Invalid parameter 'disks'",
11325                                    errors.ECODE_INVAL)
11326       for row in self.op.disks:
11327         if (not isinstance(row, dict) or
11328             "size" not in row or
11329             not isinstance(row["size"], int) or
11330             "mode" not in row or
11331             row["mode"] not in ['r', 'w']):
11332           raise errors.OpPrereqError("Invalid contents of the 'disks'"
11333                                      " parameter", errors.ECODE_INVAL)
11334       if self.op.hypervisor is None:
11335         self.op.hypervisor = self.cfg.GetHypervisorType()
11336     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11337       fname = _ExpandInstanceName(self.cfg, self.op.name)
11338       self.op.name = fname
11339       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11340     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11341       if not hasattr(self.op, "evac_nodes"):
11342         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11343                                    " opcode input", errors.ECODE_INVAL)
11344     else:
11345       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11346                                  self.op.mode, errors.ECODE_INVAL)
11347
11348     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11349       if self.op.allocator is None:
11350         raise errors.OpPrereqError("Missing allocator name",
11351                                    errors.ECODE_INVAL)
11352     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11353       raise errors.OpPrereqError("Wrong allocator test '%s'" %
11354                                  self.op.direction, errors.ECODE_INVAL)
11355
11356   def Exec(self, feedback_fn):
11357     """Run the allocator test.
11358
11359     """
11360     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11361       ial = IAllocator(self.cfg, self.rpc,
11362                        mode=self.op.mode,
11363                        name=self.op.name,
11364                        mem_size=self.op.mem_size,
11365                        disks=self.op.disks,
11366                        disk_template=self.op.disk_template,
11367                        os=self.op.os,
11368                        tags=self.op.tags,
11369                        nics=self.op.nics,
11370                        vcpus=self.op.vcpus,
11371                        hypervisor=self.op.hypervisor,
11372                        )
11373     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11374       ial = IAllocator(self.cfg, self.rpc,
11375                        mode=self.op.mode,
11376                        name=self.op.name,
11377                        relocate_from=list(self.relocate_from),
11378                        )
11379     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11380       ial = IAllocator(self.cfg, self.rpc,
11381                        mode=self.op.mode,
11382                        evac_nodes=self.op.evac_nodes)
11383     else:
11384       raise errors.ProgrammerError("Uncatched mode %s in"
11385                                    " LUTestAllocator.Exec", self.op.mode)
11386
11387     if self.op.direction == constants.IALLOCATOR_DIR_IN:
11388       result = ial.in_text
11389     else:
11390       ial.Run(self.op.allocator, validate=False)
11391       result = ial.out_text
11392     return result
11393
11394
11395 #: Query type implementations
11396 _QUERY_IMPL = {
11397   constants.QR_INSTANCE: _InstanceQuery,
11398   constants.QR_NODE: _NodeQuery,
11399   constants.QR_GROUP: _GroupQuery,
11400   }
11401
11402
11403 def _GetQueryImplementation(name):
11404   """Returns the implemtnation for a query type.
11405
11406   @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11407
11408   """
11409   try:
11410     return _QUERY_IMPL[name]
11411   except KeyError:
11412     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11413                                errors.ECODE_INVAL)