code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 class ResultWithJobs:
  78   """Data container for LU results with jobs.
  79
  80   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  81   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  82   contained in the C{jobs} attribute and include the job IDs in the opcode
  83   result.
  84
  85   """
  86   def __init__(self, jobs, **kwargs):
  87     """Initializes this class.
  88
  89     Additional return values can be specified as keyword arguments.
  90
  91     @type jobs: list of lists of L{opcode.OpCode}
  92     @param jobs: A list of lists of opcode objects
  93
  94     """
  95     self.jobs = jobs
  96     self.other = kwargs
  97
  98
  99 class LogicalUnit(object):
 100   """Logical Unit base class.
 101
 102   Subclasses must follow these rules:
 103     - implement ExpandNames
 104     - implement CheckPrereq (except when tasklets are used)
 105     - implement Exec (except when tasklets are used)
 106     - implement BuildHooksEnv
 107     - implement BuildHooksNodes
 108     - redefine HPATH and HTYPE
 109     - optionally redefine their run requirements:
 110         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 111
 112   Note that all commands require root permissions.
 113
 114   @ivar dry_run_result: the value (if any) that will be returned to the caller
 115       in dry-run mode (signalled by opcode dry_run parameter)
 116
 117   """
 118   HPATH = None
 119   HTYPE = None
 120   REQ_BGL = True
 121
 122   def __init__(self, processor, op, context, rpc):
 123     """Constructor for LogicalUnit.
 124
 125     This needs to be overridden in derived classes in order to check op
 126     validity.
 127
 128     """
 129     self.proc = processor
 130     self.op = op
 131     self.cfg = context.cfg
 132     self.context = context
 133     self.rpc = rpc
 134     # Dicts used to declare locking needs to mcpu
 135     self.needed_locks = None
 136     self.acquired_locks = {}
 137     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 138     self.add_locks = {}
 139     self.remove_locks = {}
 140     # Used to force good behavior when calling helper functions
 141     self.recalculate_locks = {}
 142     # logging
 143     self.Log = processor.Log # pylint: disable-msg=C0103
 144     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 145     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 146     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 147     # support for dry-run
 148     self.dry_run_result = None
 149     # support for generic debug attribute
 150     if (not hasattr(self.op, "debug_level") or
 151         not isinstance(self.op.debug_level, int)):
 152       self.op.debug_level = 0
 153
 154     # Tasklets
 155     self.tasklets = None
 156
 157     # Validate opcode parameters and set defaults
 158     self.op.Validate(True)
 159
 160     self.CheckArguments()
 161
 162   def CheckArguments(self):
 163     """Check syntactic validity for the opcode arguments.
 164
 165     This method is for doing a simple syntactic check and ensure
 166     validity of opcode parameters, without any cluster-related
 167     checks. While the same can be accomplished in ExpandNames and/or
 168     CheckPrereq, doing these separate is better because:
 169
 170       - ExpandNames is left as as purely a lock-related function
 171       - CheckPrereq is run after we have acquired locks (and possible
 172         waited for them)
 173
 174     The function is allowed to change the self.op attribute so that
 175     later methods can no longer worry about missing parameters.
 176
 177     """
 178     pass
 179
 180   def ExpandNames(self):
 181     """Expand names for this LU.
 182
 183     This method is called before starting to execute the opcode, and it should
 184     update all the parameters of the opcode to their canonical form (e.g. a
 185     short node name must be fully expanded after this method has successfully
 186     completed). This way locking, hooks, logging, etc. can work correctly.
 187
 188     LUs which implement this method must also populate the self.needed_locks
 189     member, as a dict with lock levels as keys, and a list of needed lock names
 190     as values. Rules:
 191
 192       - use an empty dict if you don't need any lock
 193       - if you don't need any lock at a particular level omit that level
 194       - don't put anything for the BGL level
 195       - if you want all locks at a level use locking.ALL_SET as a value
 196
 197     If you need to share locks (rather than acquire them exclusively) at one
 198     level you can modify self.share_locks, setting a true value (usually 1) for
 199     that level. By default locks are not shared.
 200
 201     This function can also define a list of tasklets, which then will be
 202     executed in order instead of the usual LU-level CheckPrereq and Exec
 203     functions, if those are not defined by the LU.
 204
 205     Examples::
 206
 207       # Acquire all nodes and one instance
 208       self.needed_locks = {
 209         locking.LEVEL_NODE: locking.ALL_SET,
 210         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 211       }
 212       # Acquire just two nodes
 213       self.needed_locks = {
 214         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 215       }
 216       # Acquire no locks
 217       self.needed_locks = {} # No, you can't leave it to the default value None
 218
 219     """
 220     # The implementation of this method is mandatory only if the new LU is
 221     # concurrent, so that old LUs don't need to be changed all at the same
 222     # time.
 223     if self.REQ_BGL:
 224       self.needed_locks = {} # Exclusive LUs don't need locks.
 225     else:
 226       raise NotImplementedError
 227
 228   def DeclareLocks(self, level):
 229     """Declare LU locking needs for a level
 230
 231     While most LUs can just declare their locking needs at ExpandNames time,
 232     sometimes there's the need to calculate some locks after having acquired
 233     the ones before. This function is called just before acquiring locks at a
 234     particular level, but after acquiring the ones at lower levels, and permits
 235     such calculations. It can be used to modify self.needed_locks, and by
 236     default it does nothing.
 237
 238     This function is only called if you have something already set in
 239     self.needed_locks for the level.
 240
 241     @param level: Locking level which is going to be locked
 242     @type level: member of ganeti.locking.LEVELS
 243
 244     """
 245
 246   def CheckPrereq(self):
 247     """Check prerequisites for this LU.
 248
 249     This method should check that the prerequisites for the execution
 250     of this LU are fulfilled. It can do internode communication, but
 251     it should be idempotent - no cluster or system changes are
 252     allowed.
 253
 254     The method should raise errors.OpPrereqError in case something is
 255     not fulfilled. Its return value is ignored.
 256
 257     This method should also update all the parameters of the opcode to
 258     their canonical form if it hasn't been done by ExpandNames before.
 259
 260     """
 261     if self.tasklets is not None:
 262       for (idx, tl) in enumerate(self.tasklets):
 263         logging.debug("Checking prerequisites for tasklet %s/%s",
 264                       idx + 1, len(self.tasklets))
 265         tl.CheckPrereq()
 266     else:
 267       pass
 268
 269   def Exec(self, feedback_fn):
 270     """Execute the LU.
 271
 272     This method should implement the actual work. It should raise
 273     errors.OpExecError for failures that are somewhat dealt with in
 274     code, or expected.
 275
 276     """
 277     if self.tasklets is not None:
 278       for (idx, tl) in enumerate(self.tasklets):
 279         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 280         tl.Exec(feedback_fn)
 281     else:
 282       raise NotImplementedError
 283
 284   def BuildHooksEnv(self):
 285     """Build hooks environment for this LU.
 286
 287     @rtype: dict
 288     @return: Dictionary containing the environment that will be used for
 289       running the hooks for this LU. The keys of the dict must not be prefixed
 290       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 291       will extend the environment with additional variables. If no environment
 292       should be defined, an empty dictionary should be returned (not C{None}).
 293     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 294       will not be called.
 295
 296     """
 297     raise NotImplementedError
 298
 299   def BuildHooksNodes(self):
 300     """Build list of nodes to run LU's hooks.
 301
 302     @rtype: tuple; (list, list)
 303     @return: Tuple containing a list of node names on which the hook
 304       should run before the execution and a list of node names on which the
 305       hook should run after the execution. No nodes should be returned as an
 306       empty list (and not None).
 307     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 308       will not be called.
 309
 310     """
 311     raise NotImplementedError
 312
 313   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 314     """Notify the LU about the results of its hooks.
 315
 316     This method is called every time a hooks phase is executed, and notifies
 317     the Logical Unit about the hooks' result. The LU can then use it to alter
 318     its result based on the hooks.  By default the method does nothing and the
 319     previous result is passed back unchanged but any LU can define it if it
 320     wants to use the local cluster hook-scripts somehow.
 321
 322     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 323         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 324     @param hook_results: the results of the multi-node hooks rpc call
 325     @param feedback_fn: function used send feedback back to the caller
 326     @param lu_result: the previous Exec result this LU had, or None
 327         in the PRE phase
 328     @return: the new Exec result, based on the previous result
 329         and hook results
 330
 331     """
 332     # API must be kept, thus we ignore the unused argument and could
 333     # be a function warnings
 334     # pylint: disable-msg=W0613,R0201
 335     return lu_result
 336
 337   def _ExpandAndLockInstance(self):
 338     """Helper function to expand and lock an instance.
 339
 340     Many LUs that work on an instance take its name in self.op.instance_name
 341     and need to expand it and then declare the expanded name for locking. This
 342     function does it, and then updates self.op.instance_name to the expanded
 343     name. It also initializes needed_locks as a dict, if this hasn't been done
 344     before.
 345
 346     """
 347     if self.needed_locks is None:
 348       self.needed_locks = {}
 349     else:
 350       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 351         "_ExpandAndLockInstance called with instance-level locks set"
 352     self.op.instance_name = _ExpandInstanceName(self.cfg,
 353                                                 self.op.instance_name)
 354     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 355
 356   def _LockInstancesNodes(self, primary_only=False):
 357     """Helper function to declare instances' nodes for locking.
 358
 359     This function should be called after locking one or more instances to lock
 360     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 361     with all primary or secondary nodes for instances already locked and
 362     present in self.needed_locks[locking.LEVEL_INSTANCE].
 363
 364     It should be called from DeclareLocks, and for safety only works if
 365     self.recalculate_locks[locking.LEVEL_NODE] is set.
 366
 367     In the future it may grow parameters to just lock some instance's nodes, or
 368     to just lock primaries or secondary nodes, if needed.
 369
 370     If should be called in DeclareLocks in a way similar to::
 371
 372       if level == locking.LEVEL_NODE:
 373         self._LockInstancesNodes()
 374
 375     @type primary_only: boolean
 376     @param primary_only: only lock primary nodes of locked instances
 377
 378     """
 379     assert locking.LEVEL_NODE in self.recalculate_locks, \
 380       "_LockInstancesNodes helper function called with no nodes to recalculate"
 381
 382     # TODO: check if we're really been called with the instance locks held
 383
 384     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 385     # future we might want to have different behaviors depending on the value
 386     # of self.recalculate_locks[locking.LEVEL_NODE]
 387     wanted_nodes = []
 388     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 389       instance = self.context.cfg.GetInstanceInfo(instance_name)
 390       wanted_nodes.append(instance.primary_node)
 391       if not primary_only:
 392         wanted_nodes.extend(instance.secondary_nodes)
 393
 394     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 395       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 396     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 397       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 398
 399     del self.recalculate_locks[locking.LEVEL_NODE]
 400
 401
 402 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 403   """Simple LU which runs no hooks.
 404
 405   This LU is intended as a parent for other LogicalUnits which will
 406   run no hooks, in order to reduce duplicate code.
 407
 408   """
 409   HPATH = None
 410   HTYPE = None
 411
 412   def BuildHooksEnv(self):
 413     """Empty BuildHooksEnv for NoHooksLu.
 414
 415     This just raises an error.
 416
 417     """
 418     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 419
 420   def BuildHooksNodes(self):
 421     """Empty BuildHooksNodes for NoHooksLU.
 422
 423     """
 424     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 425
 426
 427 class Tasklet:
 428   """Tasklet base class.
 429
 430   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 431   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 432   tasklets know nothing about locks.
 433
 434   Subclasses must follow these rules:
 435     - Implement CheckPrereq
 436     - Implement Exec
 437
 438   """
 439   def __init__(self, lu):
 440     self.lu = lu
 441
 442     # Shortcuts
 443     self.cfg = lu.cfg
 444     self.rpc = lu.rpc
 445
 446   def CheckPrereq(self):
 447     """Check prerequisites for this tasklets.
 448
 449     This method should check whether the prerequisites for the execution of
 450     this tasklet are fulfilled. It can do internode communication, but it
 451     should be idempotent - no cluster or system changes are allowed.
 452
 453     The method should raise errors.OpPrereqError in case something is not
 454     fulfilled. Its return value is ignored.
 455
 456     This method should also update all parameters to their canonical form if it
 457     hasn't been done before.
 458
 459     """
 460     pass
 461
 462   def Exec(self, feedback_fn):
 463     """Execute the tasklet.
 464
 465     This method should implement the actual work. It should raise
 466     errors.OpExecError for failures that are somewhat dealt with in code, or
 467     expected.
 468
 469     """
 470     raise NotImplementedError
 471
 472
 473 class _QueryBase:
 474   """Base for query utility classes.
 475
 476   """
 477   #: Attribute holding field definitions
 478   FIELDS = None
 479
 480   def __init__(self, filter_, fields, use_locking):
 481     """Initializes this class.
 482
 483     """
 484     self.use_locking = use_locking
 485
 486     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 487                              namefield="name")
 488     self.requested_data = self.query.RequestedData()
 489     self.names = self.query.RequestedNames()
 490
 491     # Sort only if no names were requested
 492     self.sort_by_name = not self.names
 493
 494     self.do_locking = None
 495     self.wanted = None
 496
 497   def _GetNames(self, lu, all_names, lock_level):
 498     """Helper function to determine names asked for in the query.
 499
 500     """
 501     if self.do_locking:
 502       names = lu.acquired_locks[lock_level]
 503     else:
 504       names = all_names
 505
 506     if self.wanted == locking.ALL_SET:
 507       assert not self.names
 508       # caller didn't specify names, so ordering is not important
 509       return utils.NiceSort(names)
 510
 511     # caller specified names and we must keep the same order
 512     assert self.names
 513     assert not self.do_locking or lu.acquired_locks[lock_level]
 514
 515     missing = set(self.wanted).difference(names)
 516     if missing:
 517       raise errors.OpExecError("Some items were removed before retrieving"
 518                                " their data: %s" % missing)
 519
 520     # Return expanded names
 521     return self.wanted
 522
 523   def ExpandNames(self, lu):
 524     """Expand names for this query.
 525
 526     See L{LogicalUnit.ExpandNames}.
 527
 528     """
 529     raise NotImplementedError()
 530
 531   def DeclareLocks(self, lu, level):
 532     """Declare locks for this query.
 533
 534     See L{LogicalUnit.DeclareLocks}.
 535
 536     """
 537     raise NotImplementedError()
 538
 539   def _GetQueryData(self, lu):
 540     """Collects all data for this query.
 541
 542     @return: Query data object
 543
 544     """
 545     raise NotImplementedError()
 546
 547   def NewStyleQuery(self, lu):
 548     """Collect data and execute query.
 549
 550     """
 551     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 552                                   sort_by_name=self.sort_by_name)
 553
 554   def OldStyleQuery(self, lu):
 555     """Collect data and execute query.
 556
 557     """
 558     return self.query.OldStyleQuery(self._GetQueryData(lu),
 559                                     sort_by_name=self.sort_by_name)
 560
 561
 562 def _GetWantedNodes(lu, nodes):
 563   """Returns list of checked and expanded node names.
 564
 565   @type lu: L{LogicalUnit}
 566   @param lu: the logical unit on whose behalf we execute
 567   @type nodes: list
 568   @param nodes: list of node names or None for all nodes
 569   @rtype: list
 570   @return: the list of nodes, sorted
 571   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 572
 573   """
 574   if nodes:
 575     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 576
 577   return utils.NiceSort(lu.cfg.GetNodeList())
 578
 579
 580 def _GetWantedInstances(lu, instances):
 581   """Returns list of checked and expanded instance names.
 582
 583   @type lu: L{LogicalUnit}
 584   @param lu: the logical unit on whose behalf we execute
 585   @type instances: list
 586   @param instances: list of instance names or None for all instances
 587   @rtype: list
 588   @return: the list of instances, sorted
 589   @raise errors.OpPrereqError: if the instances parameter is wrong type
 590   @raise errors.OpPrereqError: if any of the passed instances is not found
 591
 592   """
 593   if instances:
 594     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 595   else:
 596     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 597   return wanted
 598
 599
 600 def _GetUpdatedParams(old_params, update_dict,
 601                       use_default=True, use_none=False):
 602   """Return the new version of a parameter dictionary.
 603
 604   @type old_params: dict
 605   @param old_params: old parameters
 606   @type update_dict: dict
 607   @param update_dict: dict containing new parameter values, or
 608       constants.VALUE_DEFAULT to reset the parameter to its default
 609       value
 610   @param use_default: boolean
 611   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 612       values as 'to be deleted' values
 613   @param use_none: boolean
 614   @type use_none: whether to recognise C{None} values as 'to be
 615       deleted' values
 616   @rtype: dict
 617   @return: the new parameter dictionary
 618
 619   """
 620   params_copy = copy.deepcopy(old_params)
 621   for key, val in update_dict.iteritems():
 622     if ((use_default and val == constants.VALUE_DEFAULT) or
 623         (use_none and val is None)):
 624       try:
 625         del params_copy[key]
 626       except KeyError:
 627         pass
 628     else:
 629       params_copy[key] = val
 630   return params_copy
 631
 632
 633 def _RunPostHook(lu, node_name):
 634   """Runs the post-hook for an opcode on a single node.
 635
 636   """
 637   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 638   try:
 639     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 640   except:
 641     # pylint: disable-msg=W0702
 642     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 643
 644
 645 def _CheckOutputFields(static, dynamic, selected):
 646   """Checks whether all selected fields are valid.
 647
 648   @type static: L{utils.FieldSet}
 649   @param static: static fields set
 650   @type dynamic: L{utils.FieldSet}
 651   @param dynamic: dynamic fields set
 652
 653   """
 654   f = utils.FieldSet()
 655   f.Extend(static)
 656   f.Extend(dynamic)
 657
 658   delta = f.NonMatching(selected)
 659   if delta:
 660     raise errors.OpPrereqError("Unknown output fields selected: %s"
 661                                % ",".join(delta), errors.ECODE_INVAL)
 662
 663
 664 def _CheckGlobalHvParams(params):
 665   """Validates that given hypervisor params are not global ones.
 666
 667   This will ensure that instances don't get customised versions of
 668   global params.
 669
 670   """
 671   used_globals = constants.HVC_GLOBALS.intersection(params)
 672   if used_globals:
 673     msg = ("The following hypervisor parameters are global and cannot"
 674            " be customized at instance level, please modify them at"
 675            " cluster level: %s" % utils.CommaJoin(used_globals))
 676     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 677
 678
 679 def _CheckNodeOnline(lu, node, msg=None):
 680   """Ensure that a given node is online.
 681
 682   @param lu: the LU on behalf of which we make the check
 683   @param node: the node to check
 684   @param msg: if passed, should be a message to replace the default one
 685   @raise errors.OpPrereqError: if the node is offline
 686
 687   """
 688   if msg is None:
 689     msg = "Can't use offline node"
 690   if lu.cfg.GetNodeInfo(node).offline:
 691     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 692
 693
 694 def _CheckNodeNotDrained(lu, node):
 695   """Ensure that a given node is not drained.
 696
 697   @param lu: the LU on behalf of which we make the check
 698   @param node: the node to check
 699   @raise errors.OpPrereqError: if the node is drained
 700
 701   """
 702   if lu.cfg.GetNodeInfo(node).drained:
 703     raise errors.OpPrereqError("Can't use drained node %s" % node,
 704                                errors.ECODE_STATE)
 705
 706
 707 def _CheckNodeVmCapable(lu, node):
 708   """Ensure that a given node is vm capable.
 709
 710   @param lu: the LU on behalf of which we make the check
 711   @param node: the node to check
 712   @raise errors.OpPrereqError: if the node is not vm capable
 713
 714   """
 715   if not lu.cfg.GetNodeInfo(node).vm_capable:
 716     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 717                                errors.ECODE_STATE)
 718
 719
 720 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 721   """Ensure that a node supports a given OS.
 722
 723   @param lu: the LU on behalf of which we make the check
 724   @param node: the node to check
 725   @param os_name: the OS to query about
 726   @param force_variant: whether to ignore variant errors
 727   @raise errors.OpPrereqError: if the node is not supporting the OS
 728
 729   """
 730   result = lu.rpc.call_os_get(node, os_name)
 731   result.Raise("OS '%s' not in supported OS list for node %s" %
 732                (os_name, node),
 733                prereq=True, ecode=errors.ECODE_INVAL)
 734   if not force_variant:
 735     _CheckOSVariant(result.payload, os_name)
 736
 737
 738 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 739   """Ensure that a node has the given secondary ip.
 740
 741   @type lu: L{LogicalUnit}
 742   @param lu: the LU on behalf of which we make the check
 743   @type node: string
 744   @param node: the node to check
 745   @type secondary_ip: string
 746   @param secondary_ip: the ip to check
 747   @type prereq: boolean
 748   @param prereq: whether to throw a prerequisite or an execute error
 749   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 750   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 751
 752   """
 753   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 754   result.Raise("Failure checking secondary ip on node %s" % node,
 755                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 756   if not result.payload:
 757     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 758            " please fix and re-run this command" % secondary_ip)
 759     if prereq:
 760       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 761     else:
 762       raise errors.OpExecError(msg)
 763
 764
 765 def _GetClusterDomainSecret():
 766   """Reads the cluster domain secret.
 767
 768   """
 769   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 770                                strict=True)
 771
 772
 773 def _CheckInstanceDown(lu, instance, reason):
 774   """Ensure that an instance is not running."""
 775   if instance.admin_up:
 776     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 777                                (instance.name, reason), errors.ECODE_STATE)
 778
 779   pnode = instance.primary_node
 780   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 781   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 782               prereq=True, ecode=errors.ECODE_ENVIRON)
 783
 784   if instance.name in ins_l.payload:
 785     raise errors.OpPrereqError("Instance %s is running, %s" %
 786                                (instance.name, reason), errors.ECODE_STATE)
 787
 788
 789 def _ExpandItemName(fn, name, kind):
 790   """Expand an item name.
 791
 792   @param fn: the function to use for expansion
 793   @param name: requested item name
 794   @param kind: text description ('Node' or 'Instance')
 795   @return: the resolved (full) name
 796   @raise errors.OpPrereqError: if the item is not found
 797
 798   """
 799   full_name = fn(name)
 800   if full_name is None:
 801     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 802                                errors.ECODE_NOENT)
 803   return full_name
 804
 805
 806 def _ExpandNodeName(cfg, name):
 807   """Wrapper over L{_ExpandItemName} for nodes."""
 808   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 809
 810
 811 def _ExpandInstanceName(cfg, name):
 812   """Wrapper over L{_ExpandItemName} for instance."""
 813   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 814
 815
 816 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 817                           memory, vcpus, nics, disk_template, disks,
 818                           bep, hvp, hypervisor_name):
 819   """Builds instance related env variables for hooks
 820
 821   This builds the hook environment from individual variables.
 822
 823   @type name: string
 824   @param name: the name of the instance
 825   @type primary_node: string
 826   @param primary_node: the name of the instance's primary node
 827   @type secondary_nodes: list
 828   @param secondary_nodes: list of secondary nodes as strings
 829   @type os_type: string
 830   @param os_type: the name of the instance's OS
 831   @type status: boolean
 832   @param status: the should_run status of the instance
 833   @type memory: string
 834   @param memory: the memory size of the instance
 835   @type vcpus: string
 836   @param vcpus: the count of VCPUs the instance has
 837   @type nics: list
 838   @param nics: list of tuples (ip, mac, mode, link) representing
 839       the NICs the instance has
 840   @type disk_template: string
 841   @param disk_template: the disk template of the instance
 842   @type disks: list
 843   @param disks: the list of (size, mode) pairs
 844   @type bep: dict
 845   @param bep: the backend parameters for the instance
 846   @type hvp: dict
 847   @param hvp: the hypervisor parameters for the instance
 848   @type hypervisor_name: string
 849   @param hypervisor_name: the hypervisor for the instance
 850   @rtype: dict
 851   @return: the hook environment for this instance
 852
 853   """
 854   if status:
 855     str_status = "up"
 856   else:
 857     str_status = "down"
 858   env = {
 859     "OP_TARGET": name,
 860     "INSTANCE_NAME": name,
 861     "INSTANCE_PRIMARY": primary_node,
 862     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 863     "INSTANCE_OS_TYPE": os_type,
 864     "INSTANCE_STATUS": str_status,
 865     "INSTANCE_MEMORY": memory,
 866     "INSTANCE_VCPUS": vcpus,
 867     "INSTANCE_DISK_TEMPLATE": disk_template,
 868     "INSTANCE_HYPERVISOR": hypervisor_name,
 869   }
 870
 871   if nics:
 872     nic_count = len(nics)
 873     for idx, (ip, mac, mode, link) in enumerate(nics):
 874       if ip is None:
 875         ip = ""
 876       env["INSTANCE_NIC%d_IP" % idx] = ip
 877       env["INSTANCE_NIC%d_MAC" % idx] = mac
 878       env["INSTANCE_NIC%d_MODE" % idx] = mode
 879       env["INSTANCE_NIC%d_LINK" % idx] = link
 880       if mode == constants.NIC_MODE_BRIDGED:
 881         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 882   else:
 883     nic_count = 0
 884
 885   env["INSTANCE_NIC_COUNT"] = nic_count
 886
 887   if disks:
 888     disk_count = len(disks)
 889     for idx, (size, mode) in enumerate(disks):
 890       env["INSTANCE_DISK%d_SIZE" % idx] = size
 891       env["INSTANCE_DISK%d_MODE" % idx] = mode
 892   else:
 893     disk_count = 0
 894
 895   env["INSTANCE_DISK_COUNT"] = disk_count
 896
 897   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 898     for key, value in source.items():
 899       env["INSTANCE_%s_%s" % (kind, key)] = value
 900
 901   return env
 902
 903
 904 def _NICListToTuple(lu, nics):
 905   """Build a list of nic information tuples.
 906
 907   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 908   value in LUInstanceQueryData.
 909
 910   @type lu:  L{LogicalUnit}
 911   @param lu: the logical unit on whose behalf we execute
 912   @type nics: list of L{objects.NIC}
 913   @param nics: list of nics to convert to hooks tuples
 914
 915   """
 916   hooks_nics = []
 917   cluster = lu.cfg.GetClusterInfo()
 918   for nic in nics:
 919     ip = nic.ip
 920     mac = nic.mac
 921     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 922     mode = filled_params[constants.NIC_MODE]
 923     link = filled_params[constants.NIC_LINK]
 924     hooks_nics.append((ip, mac, mode, link))
 925   return hooks_nics
 926
 927
 928 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 929   """Builds instance related env variables for hooks from an object.
 930
 931   @type lu: L{LogicalUnit}
 932   @param lu: the logical unit on whose behalf we execute
 933   @type instance: L{objects.Instance}
 934   @param instance: the instance for which we should build the
 935       environment
 936   @type override: dict
 937   @param override: dictionary with key/values that will override
 938       our values
 939   @rtype: dict
 940   @return: the hook environment dictionary
 941
 942   """
 943   cluster = lu.cfg.GetClusterInfo()
 944   bep = cluster.FillBE(instance)
 945   hvp = cluster.FillHV(instance)
 946   args = {
 947     'name': instance.name,
 948     'primary_node': instance.primary_node,
 949     'secondary_nodes': instance.secondary_nodes,
 950     'os_type': instance.os,
 951     'status': instance.admin_up,
 952     'memory': bep[constants.BE_MEMORY],
 953     'vcpus': bep[constants.BE_VCPUS],
 954     'nics': _NICListToTuple(lu, instance.nics),
 955     'disk_template': instance.disk_template,
 956     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 957     'bep': bep,
 958     'hvp': hvp,
 959     'hypervisor_name': instance.hypervisor,
 960   }
 961   if override:
 962     args.update(override)
 963   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 964
 965
 966 def _AdjustCandidatePool(lu, exceptions):
 967   """Adjust the candidate pool after node operations.
 968
 969   """
 970   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 971   if mod_list:
 972     lu.LogInfo("Promoted nodes to master candidate role: %s",
 973                utils.CommaJoin(node.name for node in mod_list))
 974     for name in mod_list:
 975       lu.context.ReaddNode(name)
 976   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 977   if mc_now > mc_max:
 978     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 979                (mc_now, mc_max))
 980
 981
 982 def _DecideSelfPromotion(lu, exceptions=None):
 983   """Decide whether I should promote myself as a master candidate.
 984
 985   """
 986   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 987   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 988   # the new node will increase mc_max with one, so:
 989   mc_should = min(mc_should + 1, cp_size)
 990   return mc_now < mc_should
 991
 992
 993 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 994   """Check that the brigdes needed by a list of nics exist.
 995
 996   """
 997   cluster = lu.cfg.GetClusterInfo()
 998   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 999   brlist = [params[constants.NIC_LINK] for params in paramslist
1000             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1001   if brlist:
1002     result = lu.rpc.call_bridges_exist(target_node, brlist)
1003     result.Raise("Error checking bridges on destination node '%s'" %
1004                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1005
1006
1007 def _CheckInstanceBridgesExist(lu, instance, node=None):
1008   """Check that the brigdes needed by an instance exist.
1009
1010   """
1011   if node is None:
1012     node = instance.primary_node
1013   _CheckNicsBridgesExist(lu, instance.nics, node)
1014
1015
1016 def _CheckOSVariant(os_obj, name):
1017   """Check whether an OS name conforms to the os variants specification.
1018
1019   @type os_obj: L{objects.OS}
1020   @param os_obj: OS object to check
1021   @type name: string
1022   @param name: OS name passed by the user, to check for validity
1023
1024   """
1025   if not os_obj.supported_variants:
1026     return
1027   variant = objects.OS.GetVariant(name)
1028   if not variant:
1029     raise errors.OpPrereqError("OS name must include a variant",
1030                                errors.ECODE_INVAL)
1031
1032   if variant not in os_obj.supported_variants:
1033     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1034
1035
1036 def _GetNodeInstancesInner(cfg, fn):
1037   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1038
1039
1040 def _GetNodeInstances(cfg, node_name):
1041   """Returns a list of all primary and secondary instances on a node.
1042
1043   """
1044
1045   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1046
1047
1048 def _GetNodePrimaryInstances(cfg, node_name):
1049   """Returns primary instances on a node.
1050
1051   """
1052   return _GetNodeInstancesInner(cfg,
1053                                 lambda inst: node_name == inst.primary_node)
1054
1055
1056 def _GetNodeSecondaryInstances(cfg, node_name):
1057   """Returns secondary instances on a node.
1058
1059   """
1060   return _GetNodeInstancesInner(cfg,
1061                                 lambda inst: node_name in inst.secondary_nodes)
1062
1063
1064 def _GetStorageTypeArgs(cfg, storage_type):
1065   """Returns the arguments for a storage type.
1066
1067   """
1068   # Special case for file storage
1069   if storage_type == constants.ST_FILE:
1070     # storage.FileStorage wants a list of storage directories
1071     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1072
1073   return []
1074
1075
1076 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1077   faulty = []
1078
1079   for dev in instance.disks:
1080     cfg.SetDiskID(dev, node_name)
1081
1082   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1083   result.Raise("Failed to get disk status from node %s" % node_name,
1084                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1085
1086   for idx, bdev_status in enumerate(result.payload):
1087     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1088       faulty.append(idx)
1089
1090   return faulty
1091
1092
1093 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1094   """Check the sanity of iallocator and node arguments and use the
1095   cluster-wide iallocator if appropriate.
1096
1097   Check that at most one of (iallocator, node) is specified. If none is
1098   specified, then the LU's opcode's iallocator slot is filled with the
1099   cluster-wide default iallocator.
1100
1101   @type iallocator_slot: string
1102   @param iallocator_slot: the name of the opcode iallocator slot
1103   @type node_slot: string
1104   @param node_slot: the name of the opcode target node slot
1105
1106   """
1107   node = getattr(lu.op, node_slot, None)
1108   iallocator = getattr(lu.op, iallocator_slot, None)
1109
1110   if node is not None and iallocator is not None:
1111     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1112                                errors.ECODE_INVAL)
1113   elif node is None and iallocator is None:
1114     default_iallocator = lu.cfg.GetDefaultIAllocator()
1115     if default_iallocator:
1116       setattr(lu.op, iallocator_slot, default_iallocator)
1117     else:
1118       raise errors.OpPrereqError("No iallocator or node given and no"
1119                                  " cluster-wide default iallocator found."
1120                                  " Please specify either an iallocator or a"
1121                                  " node, or set a cluster-wide default"
1122                                  " iallocator.")
1123
1124
1125 class LUClusterPostInit(LogicalUnit):
1126   """Logical unit for running hooks after cluster initialization.
1127
1128   """
1129   HPATH = "cluster-init"
1130   HTYPE = constants.HTYPE_CLUSTER
1131
1132   def BuildHooksEnv(self):
1133     """Build hooks env.
1134
1135     """
1136     return {
1137       "OP_TARGET": self.cfg.GetClusterName(),
1138       }
1139
1140   def BuildHooksNodes(self):
1141     """Build hooks nodes.
1142
1143     """
1144     return ([], [self.cfg.GetMasterNode()])
1145
1146   def Exec(self, feedback_fn):
1147     """Nothing to do.
1148
1149     """
1150     return True
1151
1152
1153 class LUClusterDestroy(LogicalUnit):
1154   """Logical unit for destroying the cluster.
1155
1156   """
1157   HPATH = "cluster-destroy"
1158   HTYPE = constants.HTYPE_CLUSTER
1159
1160   def BuildHooksEnv(self):
1161     """Build hooks env.
1162
1163     """
1164     return {
1165       "OP_TARGET": self.cfg.GetClusterName(),
1166       }
1167
1168   def BuildHooksNodes(self):
1169     """Build hooks nodes.
1170
1171     """
1172     return ([], [])
1173
1174   def CheckPrereq(self):
1175     """Check prerequisites.
1176
1177     This checks whether the cluster is empty.
1178
1179     Any errors are signaled by raising errors.OpPrereqError.
1180
1181     """
1182     master = self.cfg.GetMasterNode()
1183
1184     nodelist = self.cfg.GetNodeList()
1185     if len(nodelist) != 1 or nodelist[0] != master:
1186       raise errors.OpPrereqError("There are still %d node(s) in"
1187                                  " this cluster." % (len(nodelist) - 1),
1188                                  errors.ECODE_INVAL)
1189     instancelist = self.cfg.GetInstanceList()
1190     if instancelist:
1191       raise errors.OpPrereqError("There are still %d instance(s) in"
1192                                  " this cluster." % len(instancelist),
1193                                  errors.ECODE_INVAL)
1194
1195   def Exec(self, feedback_fn):
1196     """Destroys the cluster.
1197
1198     """
1199     master = self.cfg.GetMasterNode()
1200
1201     # Run post hooks on master node before it's removed
1202     _RunPostHook(self, master)
1203
1204     result = self.rpc.call_node_stop_master(master, False)
1205     result.Raise("Could not disable the master role")
1206
1207     return master
1208
1209
1210 def _VerifyCertificate(filename):
1211   """Verifies a certificate for LUClusterVerify.
1212
1213   @type filename: string
1214   @param filename: Path to PEM file
1215
1216   """
1217   try:
1218     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1219                                            utils.ReadFile(filename))
1220   except Exception, err: # pylint: disable-msg=W0703
1221     return (LUClusterVerify.ETYPE_ERROR,
1222             "Failed to load X509 certificate %s: %s" % (filename, err))
1223
1224   (errcode, msg) = \
1225     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1226                                 constants.SSL_CERT_EXPIRATION_ERROR)
1227
1228   if msg:
1229     fnamemsg = "While verifying %s: %s" % (filename, msg)
1230   else:
1231     fnamemsg = None
1232
1233   if errcode is None:
1234     return (None, fnamemsg)
1235   elif errcode == utils.CERT_WARNING:
1236     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1237   elif errcode == utils.CERT_ERROR:
1238     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1239
1240   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1241
1242
1243 class LUClusterVerify(LogicalUnit):
1244   """Verifies the cluster status.
1245
1246   """
1247   HPATH = "cluster-verify"
1248   HTYPE = constants.HTYPE_CLUSTER
1249   REQ_BGL = False
1250
1251   TCLUSTER = "cluster"
1252   TNODE = "node"
1253   TINSTANCE = "instance"
1254
1255   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1256   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1257   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1258   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1259   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1260   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1261   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1262   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1263   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1264   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1265   ENODEDRBD = (TNODE, "ENODEDRBD")
1266   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1267   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1268   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1269   ENODEHV = (TNODE, "ENODEHV")
1270   ENODELVM = (TNODE, "ENODELVM")
1271   ENODEN1 = (TNODE, "ENODEN1")
1272   ENODENET = (TNODE, "ENODENET")
1273   ENODEOS = (TNODE, "ENODEOS")
1274   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1275   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1276   ENODERPC = (TNODE, "ENODERPC")
1277   ENODESSH = (TNODE, "ENODESSH")
1278   ENODEVERSION = (TNODE, "ENODEVERSION")
1279   ENODESETUP = (TNODE, "ENODESETUP")
1280   ENODETIME = (TNODE, "ENODETIME")
1281   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1282
1283   ETYPE_FIELD = "code"
1284   ETYPE_ERROR = "ERROR"
1285   ETYPE_WARNING = "WARNING"
1286
1287   _HOOKS_INDENT_RE = re.compile("^", re.M)
1288
1289   class NodeImage(object):
1290     """A class representing the logical and physical status of a node.
1291
1292     @type name: string
1293     @ivar name: the node name to which this object refers
1294     @ivar volumes: a structure as returned from
1295         L{ganeti.backend.GetVolumeList} (runtime)
1296     @ivar instances: a list of running instances (runtime)
1297     @ivar pinst: list of configured primary instances (config)
1298     @ivar sinst: list of configured secondary instances (config)
1299     @ivar sbp: dictionary of {primary-node: list of instances} for all
1300         instances for which this node is secondary (config)
1301     @ivar mfree: free memory, as reported by hypervisor (runtime)
1302     @ivar dfree: free disk, as reported by the node (runtime)
1303     @ivar offline: the offline status (config)
1304     @type rpc_fail: boolean
1305     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1306         not whether the individual keys were correct) (runtime)
1307     @type lvm_fail: boolean
1308     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1309     @type hyp_fail: boolean
1310     @ivar hyp_fail: whether the RPC call didn't return the instance list
1311     @type ghost: boolean
1312     @ivar ghost: whether this is a known node or not (config)
1313     @type os_fail: boolean
1314     @ivar os_fail: whether the RPC call didn't return valid OS data
1315     @type oslist: list
1316     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1317     @type vm_capable: boolean
1318     @ivar vm_capable: whether the node can host instances
1319
1320     """
1321     def __init__(self, offline=False, name=None, vm_capable=True):
1322       self.name = name
1323       self.volumes = {}
1324       self.instances = []
1325       self.pinst = []
1326       self.sinst = []
1327       self.sbp = {}
1328       self.mfree = 0
1329       self.dfree = 0
1330       self.offline = offline
1331       self.vm_capable = vm_capable
1332       self.rpc_fail = False
1333       self.lvm_fail = False
1334       self.hyp_fail = False
1335       self.ghost = False
1336       self.os_fail = False
1337       self.oslist = {}
1338
1339   def ExpandNames(self):
1340     self.needed_locks = {
1341       locking.LEVEL_NODE: locking.ALL_SET,
1342       locking.LEVEL_INSTANCE: locking.ALL_SET,
1343     }
1344     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1345
1346   def _Error(self, ecode, item, msg, *args, **kwargs):
1347     """Format an error message.
1348
1349     Based on the opcode's error_codes parameter, either format a
1350     parseable error code, or a simpler error string.
1351
1352     This must be called only from Exec and functions called from Exec.
1353
1354     """
1355     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1356     itype, etxt = ecode
1357     # first complete the msg
1358     if args:
1359       msg = msg % args
1360     # then format the whole message
1361     if self.op.error_codes:
1362       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1363     else:
1364       if item:
1365         item = " " + item
1366       else:
1367         item = ""
1368       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1369     # and finally report it via the feedback_fn
1370     self._feedback_fn("  - %s" % msg)
1371
1372   def _ErrorIf(self, cond, *args, **kwargs):
1373     """Log an error message if the passed condition is True.
1374
1375     """
1376     cond = bool(cond) or self.op.debug_simulate_errors
1377     if cond:
1378       self._Error(*args, **kwargs)
1379     # do not mark the operation as failed for WARN cases only
1380     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1381       self.bad = self.bad or cond
1382
1383   def _VerifyNode(self, ninfo, nresult):
1384     """Perform some basic validation on data returned from a node.
1385
1386       - check the result data structure is well formed and has all the
1387         mandatory fields
1388       - check ganeti version
1389
1390     @type ninfo: L{objects.Node}
1391     @param ninfo: the node to check
1392     @param nresult: the results from the node
1393     @rtype: boolean
1394     @return: whether overall this call was successful (and we can expect
1395          reasonable values in the respose)
1396
1397     """
1398     node = ninfo.name
1399     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1400
1401     # main result, nresult should be a non-empty dict
1402     test = not nresult or not isinstance(nresult, dict)
1403     _ErrorIf(test, self.ENODERPC, node,
1404                   "unable to verify node: no data returned")
1405     if test:
1406       return False
1407
1408     # compares ganeti version
1409     local_version = constants.PROTOCOL_VERSION
1410     remote_version = nresult.get("version", None)
1411     test = not (remote_version and
1412                 isinstance(remote_version, (list, tuple)) and
1413                 len(remote_version) == 2)
1414     _ErrorIf(test, self.ENODERPC, node,
1415              "connection to node returned invalid data")
1416     if test:
1417       return False
1418
1419     test = local_version != remote_version[0]
1420     _ErrorIf(test, self.ENODEVERSION, node,
1421              "incompatible protocol versions: master %s,"
1422              " node %s", local_version, remote_version[0])
1423     if test:
1424       return False
1425
1426     # node seems compatible, we can actually try to look into its results
1427
1428     # full package version
1429     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1430                   self.ENODEVERSION, node,
1431                   "software version mismatch: master %s, node %s",
1432                   constants.RELEASE_VERSION, remote_version[1],
1433                   code=self.ETYPE_WARNING)
1434
1435     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1436     if ninfo.vm_capable and isinstance(hyp_result, dict):
1437       for hv_name, hv_result in hyp_result.iteritems():
1438         test = hv_result is not None
1439         _ErrorIf(test, self.ENODEHV, node,
1440                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1441
1442     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1443     if ninfo.vm_capable and isinstance(hvp_result, list):
1444       for item, hv_name, hv_result in hvp_result:
1445         _ErrorIf(True, self.ENODEHV, node,
1446                  "hypervisor %s parameter verify failure (source %s): %s",
1447                  hv_name, item, hv_result)
1448
1449     test = nresult.get(constants.NV_NODESETUP,
1450                        ["Missing NODESETUP results"])
1451     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1452              "; ".join(test))
1453
1454     return True
1455
1456   def _VerifyNodeTime(self, ninfo, nresult,
1457                       nvinfo_starttime, nvinfo_endtime):
1458     """Check the node time.
1459
1460     @type ninfo: L{objects.Node}
1461     @param ninfo: the node to check
1462     @param nresult: the remote results for the node
1463     @param nvinfo_starttime: the start time of the RPC call
1464     @param nvinfo_endtime: the end time of the RPC call
1465
1466     """
1467     node = ninfo.name
1468     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1469
1470     ntime = nresult.get(constants.NV_TIME, None)
1471     try:
1472       ntime_merged = utils.MergeTime(ntime)
1473     except (ValueError, TypeError):
1474       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1475       return
1476
1477     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1478       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1479     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1480       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1481     else:
1482       ntime_diff = None
1483
1484     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1485              "Node time diverges by at least %s from master node time",
1486              ntime_diff)
1487
1488   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1489     """Check the node time.
1490
1491     @type ninfo: L{objects.Node}
1492     @param ninfo: the node to check
1493     @param nresult: the remote results for the node
1494     @param vg_name: the configured VG name
1495
1496     """
1497     if vg_name is None:
1498       return
1499
1500     node = ninfo.name
1501     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1502
1503     # checks vg existence and size > 20G
1504     vglist = nresult.get(constants.NV_VGLIST, None)
1505     test = not vglist
1506     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1507     if not test:
1508       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1509                                             constants.MIN_VG_SIZE)
1510       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1511
1512     # check pv names
1513     pvlist = nresult.get(constants.NV_PVLIST, None)
1514     test = pvlist is None
1515     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1516     if not test:
1517       # check that ':' is not present in PV names, since it's a
1518       # special character for lvcreate (denotes the range of PEs to
1519       # use on the PV)
1520       for _, pvname, owner_vg in pvlist:
1521         test = ":" in pvname
1522         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1523                  " '%s' of VG '%s'", pvname, owner_vg)
1524
1525   def _VerifyNodeNetwork(self, ninfo, nresult):
1526     """Check the node time.
1527
1528     @type ninfo: L{objects.Node}
1529     @param ninfo: the node to check
1530     @param nresult: the remote results for the node
1531
1532     """
1533     node = ninfo.name
1534     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1535
1536     test = constants.NV_NODELIST not in nresult
1537     _ErrorIf(test, self.ENODESSH, node,
1538              "node hasn't returned node ssh connectivity data")
1539     if not test:
1540       if nresult[constants.NV_NODELIST]:
1541         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1542           _ErrorIf(True, self.ENODESSH, node,
1543                    "ssh communication with node '%s': %s", a_node, a_msg)
1544
1545     test = constants.NV_NODENETTEST not in nresult
1546     _ErrorIf(test, self.ENODENET, node,
1547              "node hasn't returned node tcp connectivity data")
1548     if not test:
1549       if nresult[constants.NV_NODENETTEST]:
1550         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1551         for anode in nlist:
1552           _ErrorIf(True, self.ENODENET, node,
1553                    "tcp communication with node '%s': %s",
1554                    anode, nresult[constants.NV_NODENETTEST][anode])
1555
1556     test = constants.NV_MASTERIP not in nresult
1557     _ErrorIf(test, self.ENODENET, node,
1558              "node hasn't returned node master IP reachability data")
1559     if not test:
1560       if not nresult[constants.NV_MASTERIP]:
1561         if node == self.master_node:
1562           msg = "the master node cannot reach the master IP (not configured?)"
1563         else:
1564           msg = "cannot reach the master IP"
1565         _ErrorIf(True, self.ENODENET, node, msg)
1566
1567   def _VerifyInstance(self, instance, instanceconfig, node_image,
1568                       diskstatus):
1569     """Verify an instance.
1570
1571     This function checks to see if the required block devices are
1572     available on the instance's node.
1573
1574     """
1575     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1576     node_current = instanceconfig.primary_node
1577
1578     node_vol_should = {}
1579     instanceconfig.MapLVsByNode(node_vol_should)
1580
1581     for node in node_vol_should:
1582       n_img = node_image[node]
1583       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1584         # ignore missing volumes on offline or broken nodes
1585         continue
1586       for volume in node_vol_should[node]:
1587         test = volume not in n_img.volumes
1588         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1589                  "volume %s missing on node %s", volume, node)
1590
1591     if instanceconfig.admin_up:
1592       pri_img = node_image[node_current]
1593       test = instance not in pri_img.instances and not pri_img.offline
1594       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1595                "instance not running on its primary node %s",
1596                node_current)
1597
1598     for node, n_img in node_image.items():
1599       if node != node_current:
1600         test = instance in n_img.instances
1601         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1602                  "instance should not run on node %s", node)
1603
1604     diskdata = [(nname, success, status, idx)
1605                 for (nname, disks) in diskstatus.items()
1606                 for idx, (success, status) in enumerate(disks)]
1607
1608     for nname, success, bdev_status, idx in diskdata:
1609       # the 'ghost node' construction in Exec() ensures that we have a
1610       # node here
1611       snode = node_image[nname]
1612       bad_snode = snode.ghost or snode.offline
1613       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1614                self.EINSTANCEFAULTYDISK, instance,
1615                "couldn't retrieve status for disk/%s on %s: %s",
1616                idx, nname, bdev_status)
1617       _ErrorIf((instanceconfig.admin_up and success and
1618                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1619                self.EINSTANCEFAULTYDISK, instance,
1620                "disk/%s on %s is faulty", idx, nname)
1621
1622   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1623     """Verify if there are any unknown volumes in the cluster.
1624
1625     The .os, .swap and backup volumes are ignored. All other volumes are
1626     reported as unknown.
1627
1628     @type reserved: L{ganeti.utils.FieldSet}
1629     @param reserved: a FieldSet of reserved volume names
1630
1631     """
1632     for node, n_img in node_image.items():
1633       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1634         # skip non-healthy nodes
1635         continue
1636       for volume in n_img.volumes:
1637         test = ((node not in node_vol_should or
1638                 volume not in node_vol_should[node]) and
1639                 not reserved.Matches(volume))
1640         self._ErrorIf(test, self.ENODEORPHANLV, node,
1641                       "volume %s is unknown", volume)
1642
1643   def _VerifyOrphanInstances(self, instancelist, node_image):
1644     """Verify the list of running instances.
1645
1646     This checks what instances are running but unknown to the cluster.
1647
1648     """
1649     for node, n_img in node_image.items():
1650       for o_inst in n_img.instances:
1651         test = o_inst not in instancelist
1652         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1653                       "instance %s on node %s should not exist", o_inst, node)
1654
1655   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1656     """Verify N+1 Memory Resilience.
1657
1658     Check that if one single node dies we can still start all the
1659     instances it was primary for.
1660
1661     """
1662     cluster_info = self.cfg.GetClusterInfo()
1663     for node, n_img in node_image.items():
1664       # This code checks that every node which is now listed as
1665       # secondary has enough memory to host all instances it is
1666       # supposed to should a single other node in the cluster fail.
1667       # FIXME: not ready for failover to an arbitrary node
1668       # FIXME: does not support file-backed instances
1669       # WARNING: we currently take into account down instances as well
1670       # as up ones, considering that even if they're down someone
1671       # might want to start them even in the event of a node failure.
1672       if n_img.offline:
1673         # we're skipping offline nodes from the N+1 warning, since
1674         # most likely we don't have good memory infromation from them;
1675         # we already list instances living on such nodes, and that's
1676         # enough warning
1677         continue
1678       for prinode, instances in n_img.sbp.items():
1679         needed_mem = 0
1680         for instance in instances:
1681           bep = cluster_info.FillBE(instance_cfg[instance])
1682           if bep[constants.BE_AUTO_BALANCE]:
1683             needed_mem += bep[constants.BE_MEMORY]
1684         test = n_img.mfree < needed_mem
1685         self._ErrorIf(test, self.ENODEN1, node,
1686                       "not enough memory to accomodate instance failovers"
1687                       " should node %s fail (%dMiB needed, %dMiB available)",
1688                       prinode, needed_mem, n_img.mfree)
1689
1690   @classmethod
1691   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1692                    (files_all, files_all_opt, files_mc, files_vm)):
1693     """Verifies file checksums collected from all nodes.
1694
1695     @param errorif: Callback for reporting errors
1696     @param nodeinfo: List of L{objects.Node} objects
1697     @param master_node: Name of master node
1698     @param all_nvinfo: RPC results
1699
1700     """
1701     node_names = frozenset(node.name for node in nodeinfo)
1702
1703     assert master_node in node_names
1704     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1705             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1706            "Found file listed in more than one file list"
1707
1708     # Define functions determining which nodes to consider for a file
1709     file2nodefn = dict([(filename, fn)
1710       for (files, fn) in [(files_all, None),
1711                           (files_all_opt, None),
1712                           (files_mc, lambda node: (node.master_candidate or
1713                                                    node.name == master_node)),
1714                           (files_vm, lambda node: node.vm_capable)]
1715       for filename in files])
1716
1717     fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1718
1719     for node in nodeinfo:
1720       nresult = all_nvinfo[node.name]
1721
1722       if nresult.fail_msg or not nresult.payload:
1723         node_files = None
1724       else:
1725         node_files = nresult.payload.get(constants.NV_FILELIST, None)
1726
1727       test = not (node_files and isinstance(node_files, dict))
1728       errorif(test, cls.ENODEFILECHECK, node.name,
1729               "Node did not return file checksum data")
1730       if test:
1731         continue
1732
1733       for (filename, checksum) in node_files.items():
1734         # Check if the file should be considered for a node
1735         fn = file2nodefn[filename]
1736         if fn is None or fn(node):
1737           fileinfo[filename].setdefault(checksum, set()).add(node.name)
1738
1739     for (filename, checksums) in fileinfo.items():
1740       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1741
1742       # Nodes having the file
1743       with_file = frozenset(node_name
1744                             for nodes in fileinfo[filename].values()
1745                             for node_name in nodes)
1746
1747       # Nodes missing file
1748       missing_file = node_names - with_file
1749
1750       if filename in files_all_opt:
1751         # All or no nodes
1752         errorif(missing_file and missing_file != node_names,
1753                 cls.ECLUSTERFILECHECK, None,
1754                 "File %s is optional, but it must exist on all or no nodes (not"
1755                 " found on %s)",
1756                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1757       else:
1758         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1759                 "File %s is missing from node(s) %s", filename,
1760                 utils.CommaJoin(utils.NiceSort(missing_file)))
1761
1762       # See if there are multiple versions of the file
1763       test = len(checksums) > 1
1764       if test:
1765         variants = ["variant %s on %s" %
1766                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1767                     for (idx, (checksum, nodes)) in
1768                       enumerate(sorted(checksums.items()))]
1769       else:
1770         variants = []
1771
1772       errorif(test, cls.ECLUSTERFILECHECK, None,
1773               "File %s found with %s different checksums (%s)",
1774               filename, len(checksums), "; ".join(variants))
1775
1776   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1777                       drbd_map):
1778     """Verifies and the node DRBD status.
1779
1780     @type ninfo: L{objects.Node}
1781     @param ninfo: the node to check
1782     @param nresult: the remote results for the node
1783     @param instanceinfo: the dict of instances
1784     @param drbd_helper: the configured DRBD usermode helper
1785     @param drbd_map: the DRBD map as returned by
1786         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1787
1788     """
1789     node = ninfo.name
1790     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1791
1792     if drbd_helper:
1793       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1794       test = (helper_result == None)
1795       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1796                "no drbd usermode helper returned")
1797       if helper_result:
1798         status, payload = helper_result
1799         test = not status
1800         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1801                  "drbd usermode helper check unsuccessful: %s", payload)
1802         test = status and (payload != drbd_helper)
1803         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1804                  "wrong drbd usermode helper: %s", payload)
1805
1806     # compute the DRBD minors
1807     node_drbd = {}
1808     for minor, instance in drbd_map[node].items():
1809       test = instance not in instanceinfo
1810       _ErrorIf(test, self.ECLUSTERCFG, None,
1811                "ghost instance '%s' in temporary DRBD map", instance)
1812         # ghost instance should not be running, but otherwise we
1813         # don't give double warnings (both ghost instance and
1814         # unallocated minor in use)
1815       if test:
1816         node_drbd[minor] = (instance, False)
1817       else:
1818         instance = instanceinfo[instance]
1819         node_drbd[minor] = (instance.name, instance.admin_up)
1820
1821     # and now check them
1822     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1823     test = not isinstance(used_minors, (tuple, list))
1824     _ErrorIf(test, self.ENODEDRBD, node,
1825              "cannot parse drbd status file: %s", str(used_minors))
1826     if test:
1827       # we cannot check drbd status
1828       return
1829
1830     for minor, (iname, must_exist) in node_drbd.items():
1831       test = minor not in used_minors and must_exist
1832       _ErrorIf(test, self.ENODEDRBD, node,
1833                "drbd minor %d of instance %s is not active", minor, iname)
1834     for minor in used_minors:
1835       test = minor not in node_drbd
1836       _ErrorIf(test, self.ENODEDRBD, node,
1837                "unallocated drbd minor %d is in use", minor)
1838
1839   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1840     """Builds the node OS structures.
1841
1842     @type ninfo: L{objects.Node}
1843     @param ninfo: the node to check
1844     @param nresult: the remote results for the node
1845     @param nimg: the node image object
1846
1847     """
1848     node = ninfo.name
1849     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1850
1851     remote_os = nresult.get(constants.NV_OSLIST, None)
1852     test = (not isinstance(remote_os, list) or
1853             not compat.all(isinstance(v, list) and len(v) == 7
1854                            for v in remote_os))
1855
1856     _ErrorIf(test, self.ENODEOS, node,
1857              "node hasn't returned valid OS data")
1858
1859     nimg.os_fail = test
1860
1861     if test:
1862       return
1863
1864     os_dict = {}
1865
1866     for (name, os_path, status, diagnose,
1867          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1868
1869       if name not in os_dict:
1870         os_dict[name] = []
1871
1872       # parameters is a list of lists instead of list of tuples due to
1873       # JSON lacking a real tuple type, fix it:
1874       parameters = [tuple(v) for v in parameters]
1875       os_dict[name].append((os_path, status, diagnose,
1876                             set(variants), set(parameters), set(api_ver)))
1877
1878     nimg.oslist = os_dict
1879
1880   def _VerifyNodeOS(self, ninfo, nimg, base):
1881     """Verifies the node OS list.
1882
1883     @type ninfo: L{objects.Node}
1884     @param ninfo: the node to check
1885     @param nimg: the node image object
1886     @param base: the 'template' node we match against (e.g. from the master)
1887
1888     """
1889     node = ninfo.name
1890     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1891
1892     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1893
1894     for os_name, os_data in nimg.oslist.items():
1895       assert os_data, "Empty OS status for OS %s?!" % os_name
1896       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1897       _ErrorIf(not f_status, self.ENODEOS, node,
1898                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1899       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1900                "OS '%s' has multiple entries (first one shadows the rest): %s",
1901                os_name, utils.CommaJoin([v[0] for v in os_data]))
1902       # this will catched in backend too
1903       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1904                and not f_var, self.ENODEOS, node,
1905                "OS %s with API at least %d does not declare any variant",
1906                os_name, constants.OS_API_V15)
1907       # comparisons with the 'base' image
1908       test = os_name not in base.oslist
1909       _ErrorIf(test, self.ENODEOS, node,
1910                "Extra OS %s not present on reference node (%s)",
1911                os_name, base.name)
1912       if test:
1913         continue
1914       assert base.oslist[os_name], "Base node has empty OS status?"
1915       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1916       if not b_status:
1917         # base OS is invalid, skipping
1918         continue
1919       for kind, a, b in [("API version", f_api, b_api),
1920                          ("variants list", f_var, b_var),
1921                          ("parameters", f_param, b_param)]:
1922         _ErrorIf(a != b, self.ENODEOS, node,
1923                  "OS %s %s differs from reference node %s: %s vs. %s",
1924                  kind, os_name, base.name,
1925                  utils.CommaJoin(a), utils.CommaJoin(b))
1926
1927     # check any missing OSes
1928     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1929     _ErrorIf(missing, self.ENODEOS, node,
1930              "OSes present on reference node %s but missing on this node: %s",
1931              base.name, utils.CommaJoin(missing))
1932
1933   def _VerifyOob(self, ninfo, nresult):
1934     """Verifies out of band functionality of a node.
1935
1936     @type ninfo: L{objects.Node}
1937     @param ninfo: the node to check
1938     @param nresult: the remote results for the node
1939
1940     """
1941     node = ninfo.name
1942     # We just have to verify the paths on master and/or master candidates
1943     # as the oob helper is invoked on the master
1944     if ((ninfo.master_candidate or ninfo.master_capable) and
1945         constants.NV_OOB_PATHS in nresult):
1946       for path_result in nresult[constants.NV_OOB_PATHS]:
1947         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1948
1949   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1950     """Verifies and updates the node volume data.
1951
1952     This function will update a L{NodeImage}'s internal structures
1953     with data from the remote call.
1954
1955     @type ninfo: L{objects.Node}
1956     @param ninfo: the node to check
1957     @param nresult: the remote results for the node
1958     @param nimg: the node image object
1959     @param vg_name: the configured VG name
1960
1961     """
1962     node = ninfo.name
1963     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1964
1965     nimg.lvm_fail = True
1966     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1967     if vg_name is None:
1968       pass
1969     elif isinstance(lvdata, basestring):
1970       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1971                utils.SafeEncode(lvdata))
1972     elif not isinstance(lvdata, dict):
1973       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1974     else:
1975       nimg.volumes = lvdata
1976       nimg.lvm_fail = False
1977
1978   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1979     """Verifies and updates the node instance list.
1980
1981     If the listing was successful, then updates this node's instance
1982     list. Otherwise, it marks the RPC call as failed for the instance
1983     list key.
1984
1985     @type ninfo: L{objects.Node}
1986     @param ninfo: the node to check
1987     @param nresult: the remote results for the node
1988     @param nimg: the node image object
1989
1990     """
1991     idata = nresult.get(constants.NV_INSTANCELIST, None)
1992     test = not isinstance(idata, list)
1993     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1994                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1995     if test:
1996       nimg.hyp_fail = True
1997     else:
1998       nimg.instances = idata
1999
2000   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2001     """Verifies and computes a node information map
2002
2003     @type ninfo: L{objects.Node}
2004     @param ninfo: the node to check
2005     @param nresult: the remote results for the node
2006     @param nimg: the node image object
2007     @param vg_name: the configured VG name
2008
2009     """
2010     node = ninfo.name
2011     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2012
2013     # try to read free memory (from the hypervisor)
2014     hv_info = nresult.get(constants.NV_HVINFO, None)
2015     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2016     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2017     if not test:
2018       try:
2019         nimg.mfree = int(hv_info["memory_free"])
2020       except (ValueError, TypeError):
2021         _ErrorIf(True, self.ENODERPC, node,
2022                  "node returned invalid nodeinfo, check hypervisor")
2023
2024     # FIXME: devise a free space model for file based instances as well
2025     if vg_name is not None:
2026       test = (constants.NV_VGLIST not in nresult or
2027               vg_name not in nresult[constants.NV_VGLIST])
2028       _ErrorIf(test, self.ENODELVM, node,
2029                "node didn't return data for the volume group '%s'"
2030                " - it is either missing or broken", vg_name)
2031       if not test:
2032         try:
2033           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2034         except (ValueError, TypeError):
2035           _ErrorIf(True, self.ENODERPC, node,
2036                    "node returned invalid LVM info, check LVM status")
2037
2038   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2039     """Gets per-disk status information for all instances.
2040
2041     @type nodelist: list of strings
2042     @param nodelist: Node names
2043     @type node_image: dict of (name, L{objects.Node})
2044     @param node_image: Node objects
2045     @type instanceinfo: dict of (name, L{objects.Instance})
2046     @param instanceinfo: Instance objects
2047     @rtype: {instance: {node: [(succes, payload)]}}
2048     @return: a dictionary of per-instance dictionaries with nodes as
2049         keys and disk information as values; the disk information is a
2050         list of tuples (success, payload)
2051
2052     """
2053     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2054
2055     node_disks = {}
2056     node_disks_devonly = {}
2057     diskless_instances = set()
2058     diskless = constants.DT_DISKLESS
2059
2060     for nname in nodelist:
2061       node_instances = list(itertools.chain(node_image[nname].pinst,
2062                                             node_image[nname].sinst))
2063       diskless_instances.update(inst for inst in node_instances
2064                                 if instanceinfo[inst].disk_template == diskless)
2065       disks = [(inst, disk)
2066                for inst in node_instances
2067                for disk in instanceinfo[inst].disks]
2068
2069       if not disks:
2070         # No need to collect data
2071         continue
2072
2073       node_disks[nname] = disks
2074
2075       # Creating copies as SetDiskID below will modify the objects and that can
2076       # lead to incorrect data returned from nodes
2077       devonly = [dev.Copy() for (_, dev) in disks]
2078
2079       for dev in devonly:
2080         self.cfg.SetDiskID(dev, nname)
2081
2082       node_disks_devonly[nname] = devonly
2083
2084     assert len(node_disks) == len(node_disks_devonly)
2085
2086     # Collect data from all nodes with disks
2087     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2088                                                           node_disks_devonly)
2089
2090     assert len(result) == len(node_disks)
2091
2092     instdisk = {}
2093
2094     for (nname, nres) in result.items():
2095       disks = node_disks[nname]
2096
2097       if nres.offline:
2098         # No data from this node
2099         data = len(disks) * [(False, "node offline")]
2100       else:
2101         msg = nres.fail_msg
2102         _ErrorIf(msg, self.ENODERPC, nname,
2103                  "while getting disk information: %s", msg)
2104         if msg:
2105           # No data from this node
2106           data = len(disks) * [(False, msg)]
2107         else:
2108           data = []
2109           for idx, i in enumerate(nres.payload):
2110             if isinstance(i, (tuple, list)) and len(i) == 2:
2111               data.append(i)
2112             else:
2113               logging.warning("Invalid result from node %s, entry %d: %s",
2114                               nname, idx, i)
2115               data.append((False, "Invalid result from the remote node"))
2116
2117       for ((inst, _), status) in zip(disks, data):
2118         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2119
2120     # Add empty entries for diskless instances.
2121     for inst in diskless_instances:
2122       assert inst not in instdisk
2123       instdisk[inst] = {}
2124
2125     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2126                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2127                       compat.all(isinstance(s, (tuple, list)) and
2128                                  len(s) == 2 for s in statuses)
2129                       for inst, nnames in instdisk.items()
2130                       for nname, statuses in nnames.items())
2131     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2132
2133     return instdisk
2134
2135   def _VerifyHVP(self, hvp_data):
2136     """Verifies locally the syntax of the hypervisor parameters.
2137
2138     """
2139     for item, hv_name, hv_params in hvp_data:
2140       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2141              (item, hv_name))
2142       try:
2143         hv_class = hypervisor.GetHypervisor(hv_name)
2144         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2145         hv_class.CheckParameterSyntax(hv_params)
2146       except errors.GenericError, err:
2147         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2148
2149   def BuildHooksEnv(self):
2150     """Build hooks env.
2151
2152     Cluster-Verify hooks just ran in the post phase and their failure makes
2153     the output be logged in the verify output and the verification to fail.
2154
2155     """
2156     cfg = self.cfg
2157
2158     env = {
2159       "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2160       }
2161
2162     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2163                for node in cfg.GetAllNodesInfo().values())
2164
2165     return env
2166
2167   def BuildHooksNodes(self):
2168     """Build hooks nodes.
2169
2170     """
2171     return ([], self.cfg.GetNodeList())
2172
2173   def Exec(self, feedback_fn):
2174     """Verify integrity of cluster, performing various test on nodes.
2175
2176     """
2177     # This method has too many local variables. pylint: disable-msg=R0914
2178     self.bad = False
2179     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2180     verbose = self.op.verbose
2181     self._feedback_fn = feedback_fn
2182     feedback_fn("* Verifying global settings")
2183     for msg in self.cfg.VerifyConfig():
2184       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2185
2186     # Check the cluster certificates
2187     for cert_filename in constants.ALL_CERT_FILES:
2188       (errcode, msg) = _VerifyCertificate(cert_filename)
2189       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2190
2191     vg_name = self.cfg.GetVGName()
2192     drbd_helper = self.cfg.GetDRBDHelper()
2193     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2194     cluster = self.cfg.GetClusterInfo()
2195     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2196     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2197     nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2198     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2199     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2200                         for iname in instancelist)
2201     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2202     i_non_redundant = [] # Non redundant instances
2203     i_non_a_balanced = [] # Non auto-balanced instances
2204     n_offline = 0 # Count of offline nodes
2205     n_drained = 0 # Count of nodes being drained
2206     node_vol_should = {}
2207
2208     # FIXME: verify OS list
2209
2210     # File verification
2211     filemap = _ComputeAncillaryFiles(cluster, False)
2212
2213     # do local checksums
2214     master_node = self.master_node = self.cfg.GetMasterNode()
2215     master_ip = self.cfg.GetMasterIP()
2216
2217     # Compute the set of hypervisor parameters
2218     hvp_data = []
2219     for hv_name in hypervisors:
2220       hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2221     for os_name, os_hvp in cluster.os_hvp.items():
2222       for hv_name, hv_params in os_hvp.items():
2223         if not hv_params:
2224           continue
2225         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2226         hvp_data.append(("os %s" % os_name, hv_name, full_params))
2227     # TODO: collapse identical parameter values in a single one
2228     for instance in instanceinfo.values():
2229       if not instance.hvparams:
2230         continue
2231       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2232                        cluster.FillHV(instance)))
2233     # and verify them locally
2234     self._VerifyHVP(hvp_data)
2235
2236     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2237     node_verify_param = {
2238       constants.NV_FILELIST:
2239         utils.UniqueSequence(filename
2240                              for files in filemap
2241                              for filename in files),
2242       constants.NV_NODELIST: [node.name for node in nodeinfo
2243                               if not node.offline],
2244       constants.NV_HYPERVISOR: hypervisors,
2245       constants.NV_HVPARAMS: hvp_data,
2246       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2247                                   node.secondary_ip) for node in nodeinfo
2248                                  if not node.offline],
2249       constants.NV_INSTANCELIST: hypervisors,
2250       constants.NV_VERSION: None,
2251       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2252       constants.NV_NODESETUP: None,
2253       constants.NV_TIME: None,
2254       constants.NV_MASTERIP: (master_node, master_ip),
2255       constants.NV_OSLIST: None,
2256       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2257       }
2258
2259     if vg_name is not None:
2260       node_verify_param[constants.NV_VGLIST] = None
2261       node_verify_param[constants.NV_LVLIST] = vg_name
2262       node_verify_param[constants.NV_PVLIST] = [vg_name]
2263       node_verify_param[constants.NV_DRBDLIST] = None
2264
2265     if drbd_helper:
2266       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2267
2268     # Build our expected cluster state
2269     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2270                                                  name=node.name,
2271                                                  vm_capable=node.vm_capable))
2272                       for node in nodeinfo)
2273
2274     # Gather OOB paths
2275     oob_paths = []
2276     for node in nodeinfo:
2277       path = _SupportsOob(self.cfg, node)
2278       if path and path not in oob_paths:
2279         oob_paths.append(path)
2280
2281     if oob_paths:
2282       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2283
2284     for instance in instancelist:
2285       inst_config = instanceinfo[instance]
2286
2287       for nname in inst_config.all_nodes:
2288         if nname not in node_image:
2289           # ghost node
2290           gnode = self.NodeImage(name=nname)
2291           gnode.ghost = True
2292           node_image[nname] = gnode
2293
2294       inst_config.MapLVsByNode(node_vol_should)
2295
2296       pnode = inst_config.primary_node
2297       node_image[pnode].pinst.append(instance)
2298
2299       for snode in inst_config.secondary_nodes:
2300         nimg = node_image[snode]
2301         nimg.sinst.append(instance)
2302         if pnode not in nimg.sbp:
2303           nimg.sbp[pnode] = []
2304         nimg.sbp[pnode].append(instance)
2305
2306     # At this point, we have the in-memory data structures complete,
2307     # except for the runtime information, which we'll gather next
2308
2309     # Due to the way our RPC system works, exact response times cannot be
2310     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2311     # time before and after executing the request, we can at least have a time
2312     # window.
2313     nvinfo_starttime = time.time()
2314     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2315                                            self.cfg.GetClusterName())
2316     nvinfo_endtime = time.time()
2317
2318     all_drbd_map = self.cfg.ComputeDRBDMap()
2319
2320     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2321     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2322
2323     feedback_fn("* Verifying configuration file consistency")
2324     self._VerifyFiles(_ErrorIf, nodeinfo, master_node, all_nvinfo, filemap)
2325
2326     feedback_fn("* Verifying node status")
2327
2328     refos_img = None
2329
2330     for node_i in nodeinfo:
2331       node = node_i.name
2332       nimg = node_image[node]
2333
2334       if node_i.offline:
2335         if verbose:
2336           feedback_fn("* Skipping offline node %s" % (node,))
2337         n_offline += 1
2338         continue
2339
2340       if node == master_node:
2341         ntype = "master"
2342       elif node_i.master_candidate:
2343         ntype = "master candidate"
2344       elif node_i.drained:
2345         ntype = "drained"
2346         n_drained += 1
2347       else:
2348         ntype = "regular"
2349       if verbose:
2350         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2351
2352       msg = all_nvinfo[node].fail_msg
2353       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2354       if msg:
2355         nimg.rpc_fail = True
2356         continue
2357
2358       nresult = all_nvinfo[node].payload
2359
2360       nimg.call_ok = self._VerifyNode(node_i, nresult)
2361       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2362       self._VerifyNodeNetwork(node_i, nresult)
2363       self._VerifyOob(node_i, nresult)
2364
2365       if nimg.vm_capable:
2366         self._VerifyNodeLVM(node_i, nresult, vg_name)
2367         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2368                              all_drbd_map)
2369
2370         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2371         self._UpdateNodeInstances(node_i, nresult, nimg)
2372         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2373         self._UpdateNodeOS(node_i, nresult, nimg)
2374         if not nimg.os_fail:
2375           if refos_img is None:
2376             refos_img = nimg
2377           self._VerifyNodeOS(node_i, nimg, refos_img)
2378
2379     feedback_fn("* Verifying instance status")
2380     for instance in instancelist:
2381       if verbose:
2382         feedback_fn("* Verifying instance %s" % instance)
2383       inst_config = instanceinfo[instance]
2384       self._VerifyInstance(instance, inst_config, node_image,
2385                            instdisk[instance])
2386       inst_nodes_offline = []
2387
2388       pnode = inst_config.primary_node
2389       pnode_img = node_image[pnode]
2390       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2391                self.ENODERPC, pnode, "instance %s, connection to"
2392                " primary node failed", instance)
2393
2394       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2395                self.EINSTANCEBADNODE, instance,
2396                "instance is marked as running and lives on offline node %s",
2397                inst_config.primary_node)
2398
2399       # If the instance is non-redundant we cannot survive losing its primary
2400       # node, so we are not N+1 compliant. On the other hand we have no disk
2401       # templates with more than one secondary so that situation is not well
2402       # supported either.
2403       # FIXME: does not support file-backed instances
2404       if not inst_config.secondary_nodes:
2405         i_non_redundant.append(instance)
2406
2407       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2408                instance, "instance has multiple secondary nodes: %s",
2409                utils.CommaJoin(inst_config.secondary_nodes),
2410                code=self.ETYPE_WARNING)
2411
2412       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2413         pnode = inst_config.primary_node
2414         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2415         instance_groups = {}
2416
2417         for node in instance_nodes:
2418           instance_groups.setdefault(nodeinfo_byname[node].group,
2419                                      []).append(node)
2420
2421         pretty_list = [
2422           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2423           # Sort so that we always list the primary node first.
2424           for group, nodes in sorted(instance_groups.items(),
2425                                      key=lambda (_, nodes): pnode in nodes,
2426                                      reverse=True)]
2427
2428         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2429                       instance, "instance has primary and secondary nodes in"
2430                       " different groups: %s", utils.CommaJoin(pretty_list),
2431                       code=self.ETYPE_WARNING)
2432
2433       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2434         i_non_a_balanced.append(instance)
2435
2436       for snode in inst_config.secondary_nodes:
2437         s_img = node_image[snode]
2438         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2439                  "instance %s, connection to secondary node failed", instance)
2440
2441         if s_img.offline:
2442           inst_nodes_offline.append(snode)
2443
2444       # warn that the instance lives on offline nodes
2445       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2446                "instance has offline secondary node(s) %s",
2447                utils.CommaJoin(inst_nodes_offline))
2448       # ... or ghost/non-vm_capable nodes
2449       for node in inst_config.all_nodes:
2450         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2451                  "instance lives on ghost node %s", node)
2452         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2453                  instance, "instance lives on non-vm_capable node %s", node)
2454
2455     feedback_fn("* Verifying orphan volumes")
2456     reserved = utils.FieldSet(*cluster.reserved_lvs)
2457     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2458
2459     feedback_fn("* Verifying orphan instances")
2460     self._VerifyOrphanInstances(instancelist, node_image)
2461
2462     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2463       feedback_fn("* Verifying N+1 Memory redundancy")
2464       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2465
2466     feedback_fn("* Other Notes")
2467     if i_non_redundant:
2468       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2469                   % len(i_non_redundant))
2470
2471     if i_non_a_balanced:
2472       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2473                   % len(i_non_a_balanced))
2474
2475     if n_offline:
2476       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2477
2478     if n_drained:
2479       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2480
2481     return not self.bad
2482
2483   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2484     """Analyze the post-hooks' result
2485
2486     This method analyses the hook result, handles it, and sends some
2487     nicely-formatted feedback back to the user.
2488
2489     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2490         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2491     @param hooks_results: the results of the multi-node hooks rpc call
2492     @param feedback_fn: function used send feedback back to the caller
2493     @param lu_result: previous Exec result
2494     @return: the new Exec result, based on the previous result
2495         and hook results
2496
2497     """
2498     # We only really run POST phase hooks, and are only interested in
2499     # their results
2500     if phase == constants.HOOKS_PHASE_POST:
2501       # Used to change hooks' output to proper indentation
2502       feedback_fn("* Hooks Results")
2503       assert hooks_results, "invalid result from hooks"
2504
2505       for node_name in hooks_results:
2506         res = hooks_results[node_name]
2507         msg = res.fail_msg
2508         test = msg and not res.offline
2509         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2510                       "Communication failure in hooks execution: %s", msg)
2511         if res.offline or msg:
2512           # No need to investigate payload if node is offline or gave an error.
2513           # override manually lu_result here as _ErrorIf only
2514           # overrides self.bad
2515           lu_result = 1
2516           continue
2517         for script, hkr, output in res.payload:
2518           test = hkr == constants.HKR_FAIL
2519           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2520                         "Script %s failed, output:", script)
2521           if test:
2522             output = self._HOOKS_INDENT_RE.sub('      ', output)
2523             feedback_fn("%s" % output)
2524             lu_result = 0
2525
2526       return lu_result
2527
2528
2529 class LUClusterVerifyDisks(NoHooksLU):
2530   """Verifies the cluster disks status.
2531
2532   """
2533   REQ_BGL = False
2534
2535   def ExpandNames(self):
2536     self.needed_locks = {
2537       locking.LEVEL_NODE: locking.ALL_SET,
2538       locking.LEVEL_INSTANCE: locking.ALL_SET,
2539     }
2540     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2541
2542   def Exec(self, feedback_fn):
2543     """Verify integrity of cluster disks.
2544
2545     @rtype: tuple of three items
2546     @return: a tuple of (dict of node-to-node_error, list of instances
2547         which need activate-disks, dict of instance: (node, volume) for
2548         missing volumes
2549
2550     """
2551     result = res_nodes, res_instances, res_missing = {}, [], {}
2552
2553     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2554     instances = self.cfg.GetAllInstancesInfo().values()
2555
2556     nv_dict = {}
2557     for inst in instances:
2558       inst_lvs = {}
2559       if not inst.admin_up:
2560         continue
2561       inst.MapLVsByNode(inst_lvs)
2562       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2563       for node, vol_list in inst_lvs.iteritems():
2564         for vol in vol_list:
2565           nv_dict[(node, vol)] = inst
2566
2567     if not nv_dict:
2568       return result
2569
2570     node_lvs = self.rpc.call_lv_list(nodes, [])
2571     for node, node_res in node_lvs.items():
2572       if node_res.offline:
2573         continue
2574       msg = node_res.fail_msg
2575       if msg:
2576         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2577         res_nodes[node] = msg
2578         continue
2579
2580       lvs = node_res.payload
2581       for lv_name, (_, _, lv_online) in lvs.items():
2582         inst = nv_dict.pop((node, lv_name), None)
2583         if (not lv_online and inst is not None
2584             and inst.name not in res_instances):
2585           res_instances.append(inst.name)
2586
2587     # any leftover items in nv_dict are missing LVs, let's arrange the
2588     # data better
2589     for key, inst in nv_dict.iteritems():
2590       if inst.name not in res_missing:
2591         res_missing[inst.name] = []
2592       res_missing[inst.name].append(key)
2593
2594     return result
2595
2596
2597 class LUClusterRepairDiskSizes(NoHooksLU):
2598   """Verifies the cluster disks sizes.
2599
2600   """
2601   REQ_BGL = False
2602
2603   def ExpandNames(self):
2604     if self.op.instances:
2605       self.wanted_names = []
2606       for name in self.op.instances:
2607         full_name = _ExpandInstanceName(self.cfg, name)
2608         self.wanted_names.append(full_name)
2609       self.needed_locks = {
2610         locking.LEVEL_NODE: [],
2611         locking.LEVEL_INSTANCE: self.wanted_names,
2612         }
2613       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2614     else:
2615       self.wanted_names = None
2616       self.needed_locks = {
2617         locking.LEVEL_NODE: locking.ALL_SET,
2618         locking.LEVEL_INSTANCE: locking.ALL_SET,
2619         }
2620     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2621
2622   def DeclareLocks(self, level):
2623     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2624       self._LockInstancesNodes(primary_only=True)
2625
2626   def CheckPrereq(self):
2627     """Check prerequisites.
2628
2629     This only checks the optional instance list against the existing names.
2630
2631     """
2632     if self.wanted_names is None:
2633       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2634
2635     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2636                              in self.wanted_names]
2637
2638   def _EnsureChildSizes(self, disk):
2639     """Ensure children of the disk have the needed disk size.
2640
2641     This is valid mainly for DRBD8 and fixes an issue where the
2642     children have smaller disk size.
2643
2644     @param disk: an L{ganeti.objects.Disk} object
2645
2646     """
2647     if disk.dev_type == constants.LD_DRBD8:
2648       assert disk.children, "Empty children for DRBD8?"
2649       fchild = disk.children[0]
2650       mismatch = fchild.size < disk.size
2651       if mismatch:
2652         self.LogInfo("Child disk has size %d, parent %d, fixing",
2653                      fchild.size, disk.size)
2654         fchild.size = disk.size
2655
2656       # and we recurse on this child only, not on the metadev
2657       return self._EnsureChildSizes(fchild) or mismatch
2658     else:
2659       return False
2660
2661   def Exec(self, feedback_fn):
2662     """Verify the size of cluster disks.
2663
2664     """
2665     # TODO: check child disks too
2666     # TODO: check differences in size between primary/secondary nodes
2667     per_node_disks = {}
2668     for instance in self.wanted_instances:
2669       pnode = instance.primary_node
2670       if pnode not in per_node_disks:
2671         per_node_disks[pnode] = []
2672       for idx, disk in enumerate(instance.disks):
2673         per_node_disks[pnode].append((instance, idx, disk))
2674
2675     changed = []
2676     for node, dskl in per_node_disks.items():
2677       newl = [v[2].Copy() for v in dskl]
2678       for dsk in newl:
2679         self.cfg.SetDiskID(dsk, node)
2680       result = self.rpc.call_blockdev_getsize(node, newl)
2681       if result.fail_msg:
2682         self.LogWarning("Failure in blockdev_getsize call to node"
2683                         " %s, ignoring", node)
2684         continue
2685       if len(result.payload) != len(dskl):
2686         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2687                         " result.payload=%s", node, len(dskl), result.payload)
2688         self.LogWarning("Invalid result from node %s, ignoring node results",
2689                         node)
2690         continue
2691       for ((instance, idx, disk), size) in zip(dskl, result.payload):
2692         if size is None:
2693           self.LogWarning("Disk %d of instance %s did not return size"
2694                           " information, ignoring", idx, instance.name)
2695           continue
2696         if not isinstance(size, (int, long)):
2697           self.LogWarning("Disk %d of instance %s did not return valid"
2698                           " size information, ignoring", idx, instance.name)
2699           continue
2700         size = size >> 20
2701         if size != disk.size:
2702           self.LogInfo("Disk %d of instance %s has mismatched size,"
2703                        " correcting: recorded %d, actual %d", idx,
2704                        instance.name, disk.size, size)
2705           disk.size = size
2706           self.cfg.Update(instance, feedback_fn)
2707           changed.append((instance.name, idx, size))
2708         if self._EnsureChildSizes(disk):
2709           self.cfg.Update(instance, feedback_fn)
2710           changed.append((instance.name, idx, disk.size))
2711     return changed
2712
2713
2714 class LUClusterRename(LogicalUnit):
2715   """Rename the cluster.
2716
2717   """
2718   HPATH = "cluster-rename"
2719   HTYPE = constants.HTYPE_CLUSTER
2720
2721   def BuildHooksEnv(self):
2722     """Build hooks env.
2723
2724     """
2725     return {
2726       "OP_TARGET": self.cfg.GetClusterName(),
2727       "NEW_NAME": self.op.name,
2728       }
2729
2730   def BuildHooksNodes(self):
2731     """Build hooks nodes.
2732
2733     """
2734     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2735
2736   def CheckPrereq(self):
2737     """Verify that the passed name is a valid one.
2738
2739     """
2740     hostname = netutils.GetHostname(name=self.op.name,
2741                                     family=self.cfg.GetPrimaryIPFamily())
2742
2743     new_name = hostname.name
2744     self.ip = new_ip = hostname.ip
2745     old_name = self.cfg.GetClusterName()
2746     old_ip = self.cfg.GetMasterIP()
2747     if new_name == old_name and new_ip == old_ip:
2748       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2749                                  " cluster has changed",
2750                                  errors.ECODE_INVAL)
2751     if new_ip != old_ip:
2752       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2753         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2754                                    " reachable on the network" %
2755                                    new_ip, errors.ECODE_NOTUNIQUE)
2756
2757     self.op.name = new_name
2758
2759   def Exec(self, feedback_fn):
2760     """Rename the cluster.
2761
2762     """
2763     clustername = self.op.name
2764     ip = self.ip
2765
2766     # shutdown the master IP
2767     master = self.cfg.GetMasterNode()
2768     result = self.rpc.call_node_stop_master(master, False)
2769     result.Raise("Could not disable the master role")
2770
2771     try:
2772       cluster = self.cfg.GetClusterInfo()
2773       cluster.cluster_name = clustername
2774       cluster.master_ip = ip
2775       self.cfg.Update(cluster, feedback_fn)
2776
2777       # update the known hosts file
2778       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2779       node_list = self.cfg.GetOnlineNodeList()
2780       try:
2781         node_list.remove(master)
2782       except ValueError:
2783         pass
2784       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2785     finally:
2786       result = self.rpc.call_node_start_master(master, False, False)
2787       msg = result.fail_msg
2788       if msg:
2789         self.LogWarning("Could not re-enable the master role on"
2790                         " the master, please restart manually: %s", msg)
2791
2792     return clustername
2793
2794
2795 class LUClusterSetParams(LogicalUnit):
2796   """Change the parameters of the cluster.
2797
2798   """
2799   HPATH = "cluster-modify"
2800   HTYPE = constants.HTYPE_CLUSTER
2801   REQ_BGL = False
2802
2803   def CheckArguments(self):
2804     """Check parameters
2805
2806     """
2807     if self.op.uid_pool:
2808       uidpool.CheckUidPool(self.op.uid_pool)
2809
2810     if self.op.add_uids:
2811       uidpool.CheckUidPool(self.op.add_uids)
2812
2813     if self.op.remove_uids:
2814       uidpool.CheckUidPool(self.op.remove_uids)
2815
2816   def ExpandNames(self):
2817     # FIXME: in the future maybe other cluster params won't require checking on
2818     # all nodes to be modified.
2819     self.needed_locks = {
2820       locking.LEVEL_NODE: locking.ALL_SET,
2821     }
2822     self.share_locks[locking.LEVEL_NODE] = 1
2823
2824   def BuildHooksEnv(self):
2825     """Build hooks env.
2826
2827     """
2828     return {
2829       "OP_TARGET": self.cfg.GetClusterName(),
2830       "NEW_VG_NAME": self.op.vg_name,
2831       }
2832
2833   def BuildHooksNodes(self):
2834     """Build hooks nodes.
2835
2836     """
2837     mn = self.cfg.GetMasterNode()
2838     return ([mn], [mn])
2839
2840   def CheckPrereq(self):
2841     """Check prerequisites.
2842
2843     This checks whether the given params don't conflict and
2844     if the given volume group is valid.
2845
2846     """
2847     if self.op.vg_name is not None and not self.op.vg_name:
2848       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2849         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2850                                    " instances exist", errors.ECODE_INVAL)
2851
2852     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2853       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2854         raise errors.OpPrereqError("Cannot disable drbd helper while"
2855                                    " drbd-based instances exist",
2856                                    errors.ECODE_INVAL)
2857
2858     node_list = self.acquired_locks[locking.LEVEL_NODE]
2859
2860     # if vg_name not None, checks given volume group on all nodes
2861     if self.op.vg_name:
2862       vglist = self.rpc.call_vg_list(node_list)
2863       for node in node_list:
2864         msg = vglist[node].fail_msg
2865         if msg:
2866           # ignoring down node
2867           self.LogWarning("Error while gathering data on node %s"
2868                           " (ignoring node): %s", node, msg)
2869           continue
2870         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2871                                               self.op.vg_name,
2872                                               constants.MIN_VG_SIZE)
2873         if vgstatus:
2874           raise errors.OpPrereqError("Error on node '%s': %s" %
2875                                      (node, vgstatus), errors.ECODE_ENVIRON)
2876
2877     if self.op.drbd_helper:
2878       # checks given drbd helper on all nodes
2879       helpers = self.rpc.call_drbd_helper(node_list)
2880       for node in node_list:
2881         ninfo = self.cfg.GetNodeInfo(node)
2882         if ninfo.offline:
2883           self.LogInfo("Not checking drbd helper on offline node %s", node)
2884           continue
2885         msg = helpers[node].fail_msg
2886         if msg:
2887           raise errors.OpPrereqError("Error checking drbd helper on node"
2888                                      " '%s': %s" % (node, msg),
2889                                      errors.ECODE_ENVIRON)
2890         node_helper = helpers[node].payload
2891         if node_helper != self.op.drbd_helper:
2892           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2893                                      (node, node_helper), errors.ECODE_ENVIRON)
2894
2895     self.cluster = cluster = self.cfg.GetClusterInfo()
2896     # validate params changes
2897     if self.op.beparams:
2898       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2899       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2900
2901     if self.op.ndparams:
2902       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2903       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2904
2905       # TODO: we need a more general way to handle resetting
2906       # cluster-level parameters to default values
2907       if self.new_ndparams["oob_program"] == "":
2908         self.new_ndparams["oob_program"] = \
2909             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2910
2911     if self.op.nicparams:
2912       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2913       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2914       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2915       nic_errors = []
2916
2917       # check all instances for consistency
2918       for instance in self.cfg.GetAllInstancesInfo().values():
2919         for nic_idx, nic in enumerate(instance.nics):
2920           params_copy = copy.deepcopy(nic.nicparams)
2921           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2922
2923           # check parameter syntax
2924           try:
2925             objects.NIC.CheckParameterSyntax(params_filled)
2926           except errors.ConfigurationError, err:
2927             nic_errors.append("Instance %s, nic/%d: %s" %
2928                               (instance.name, nic_idx, err))
2929
2930           # if we're moving instances to routed, check that they have an ip
2931           target_mode = params_filled[constants.NIC_MODE]
2932           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2933             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2934                               (instance.name, nic_idx))
2935       if nic_errors:
2936         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2937                                    "\n".join(nic_errors))
2938
2939     # hypervisor list/parameters
2940     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2941     if self.op.hvparams:
2942       for hv_name, hv_dict in self.op.hvparams.items():
2943         if hv_name not in self.new_hvparams:
2944           self.new_hvparams[hv_name] = hv_dict
2945         else:
2946           self.new_hvparams[hv_name].update(hv_dict)
2947
2948     # os hypervisor parameters
2949     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2950     if self.op.os_hvp:
2951       for os_name, hvs in self.op.os_hvp.items():
2952         if os_name not in self.new_os_hvp:
2953           self.new_os_hvp[os_name] = hvs
2954         else:
2955           for hv_name, hv_dict in hvs.items():
2956             if hv_name not in self.new_os_hvp[os_name]:
2957               self.new_os_hvp[os_name][hv_name] = hv_dict
2958             else:
2959               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2960
2961     # os parameters
2962     self.new_osp = objects.FillDict(cluster.osparams, {})
2963     if self.op.osparams:
2964       for os_name, osp in self.op.osparams.items():
2965         if os_name not in self.new_osp:
2966           self.new_osp[os_name] = {}
2967
2968         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2969                                                   use_none=True)
2970
2971         if not self.new_osp[os_name]:
2972           # we removed all parameters
2973           del self.new_osp[os_name]
2974         else:
2975           # check the parameter validity (remote check)
2976           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2977                          os_name, self.new_osp[os_name])
2978
2979     # changes to the hypervisor list
2980     if self.op.enabled_hypervisors is not None:
2981       self.hv_list = self.op.enabled_hypervisors
2982       for hv in self.hv_list:
2983         # if the hypervisor doesn't already exist in the cluster
2984         # hvparams, we initialize it to empty, and then (in both
2985         # cases) we make sure to fill the defaults, as we might not
2986         # have a complete defaults list if the hypervisor wasn't
2987         # enabled before
2988         if hv not in new_hvp:
2989           new_hvp[hv] = {}
2990         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2991         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2992     else:
2993       self.hv_list = cluster.enabled_hypervisors
2994
2995     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2996       # either the enabled list has changed, or the parameters have, validate
2997       for hv_name, hv_params in self.new_hvparams.items():
2998         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2999             (self.op.enabled_hypervisors and
3000              hv_name in self.op.enabled_hypervisors)):
3001           # either this is a new hypervisor, or its parameters have changed
3002           hv_class = hypervisor.GetHypervisor(hv_name)
3003           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3004           hv_class.CheckParameterSyntax(hv_params)
3005           _CheckHVParams(self, node_list, hv_name, hv_params)
3006
3007     if self.op.os_hvp:
3008       # no need to check any newly-enabled hypervisors, since the
3009       # defaults have already been checked in the above code-block
3010       for os_name, os_hvp in self.new_os_hvp.items():
3011         for hv_name, hv_params in os_hvp.items():
3012           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3013           # we need to fill in the new os_hvp on top of the actual hv_p
3014           cluster_defaults = self.new_hvparams.get(hv_name, {})
3015           new_osp = objects.FillDict(cluster_defaults, hv_params)
3016           hv_class = hypervisor.GetHypervisor(hv_name)
3017           hv_class.CheckParameterSyntax(new_osp)
3018           _CheckHVParams(self, node_list, hv_name, new_osp)
3019
3020     if self.op.default_iallocator:
3021       alloc_script = utils.FindFile(self.op.default_iallocator,
3022                                     constants.IALLOCATOR_SEARCH_PATH,
3023                                     os.path.isfile)
3024       if alloc_script is None:
3025         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3026                                    " specified" % self.op.default_iallocator,
3027                                    errors.ECODE_INVAL)
3028
3029   def Exec(self, feedback_fn):
3030     """Change the parameters of the cluster.
3031
3032     """
3033     if self.op.vg_name is not None:
3034       new_volume = self.op.vg_name
3035       if not new_volume:
3036         new_volume = None
3037       if new_volume != self.cfg.GetVGName():
3038         self.cfg.SetVGName(new_volume)
3039       else:
3040         feedback_fn("Cluster LVM configuration already in desired"
3041                     " state, not changing")
3042     if self.op.drbd_helper is not None:
3043       new_helper = self.op.drbd_helper
3044       if not new_helper:
3045         new_helper = None
3046       if new_helper != self.cfg.GetDRBDHelper():
3047         self.cfg.SetDRBDHelper(new_helper)
3048       else:
3049         feedback_fn("Cluster DRBD helper already in desired state,"
3050                     " not changing")
3051     if self.op.hvparams:
3052       self.cluster.hvparams = self.new_hvparams
3053     if self.op.os_hvp:
3054       self.cluster.os_hvp = self.new_os_hvp
3055     if self.op.enabled_hypervisors is not None:
3056       self.cluster.hvparams = self.new_hvparams
3057       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3058     if self.op.beparams:
3059       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3060     if self.op.nicparams:
3061       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3062     if self.op.osparams:
3063       self.cluster.osparams = self.new_osp
3064     if self.op.ndparams:
3065       self.cluster.ndparams = self.new_ndparams
3066
3067     if self.op.candidate_pool_size is not None:
3068       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3069       # we need to update the pool size here, otherwise the save will fail
3070       _AdjustCandidatePool(self, [])
3071
3072     if self.op.maintain_node_health is not None:
3073       self.cluster.maintain_node_health = self.op.maintain_node_health
3074
3075     if self.op.prealloc_wipe_disks is not None:
3076       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3077
3078     if self.op.add_uids is not None:
3079       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3080
3081     if self.op.remove_uids is not None:
3082       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3083
3084     if self.op.uid_pool is not None:
3085       self.cluster.uid_pool = self.op.uid_pool
3086
3087     if self.op.default_iallocator is not None:
3088       self.cluster.default_iallocator = self.op.default_iallocator
3089
3090     if self.op.reserved_lvs is not None:
3091       self.cluster.reserved_lvs = self.op.reserved_lvs
3092
3093     def helper_os(aname, mods, desc):
3094       desc += " OS list"
3095       lst = getattr(self.cluster, aname)
3096       for key, val in mods:
3097         if key == constants.DDM_ADD:
3098           if val in lst:
3099             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3100           else:
3101             lst.append(val)
3102         elif key == constants.DDM_REMOVE:
3103           if val in lst:
3104             lst.remove(val)
3105           else:
3106             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3107         else:
3108           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3109
3110     if self.op.hidden_os:
3111       helper_os("hidden_os", self.op.hidden_os, "hidden")
3112
3113     if self.op.blacklisted_os:
3114       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3115
3116     if self.op.master_netdev:
3117       master = self.cfg.GetMasterNode()
3118       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3119                   self.cluster.master_netdev)
3120       result = self.rpc.call_node_stop_master(master, False)
3121       result.Raise("Could not disable the master ip")
3122       feedback_fn("Changing master_netdev from %s to %s" %
3123                   (self.cluster.master_netdev, self.op.master_netdev))
3124       self.cluster.master_netdev = self.op.master_netdev
3125
3126     self.cfg.Update(self.cluster, feedback_fn)
3127
3128     if self.op.master_netdev:
3129       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3130                   self.op.master_netdev)
3131       result = self.rpc.call_node_start_master(master, False, False)
3132       if result.fail_msg:
3133         self.LogWarning("Could not re-enable the master ip on"
3134                         " the master, please restart manually: %s",
3135                         result.fail_msg)
3136
3137
3138 def _UploadHelper(lu, nodes, fname):
3139   """Helper for uploading a file and showing warnings.
3140
3141   """
3142   if os.path.exists(fname):
3143     result = lu.rpc.call_upload_file(nodes, fname)
3144     for to_node, to_result in result.items():
3145       msg = to_result.fail_msg
3146       if msg:
3147         msg = ("Copy of file %s to node %s failed: %s" %
3148                (fname, to_node, msg))
3149         lu.proc.LogWarning(msg)
3150
3151
3152 def _ComputeAncillaryFiles(cluster, redist):
3153   """Compute files external to Ganeti which need to be consistent.
3154
3155   @type redist: boolean
3156   @param redist: Whether to include files which need to be redistributed
3157
3158   """
3159   # Compute files for all nodes
3160   files_all = set([
3161     constants.SSH_KNOWN_HOSTS_FILE,
3162     constants.CONFD_HMAC_KEY,
3163     constants.CLUSTER_DOMAIN_SECRET_FILE,
3164     ])
3165
3166   if not redist:
3167     files_all.update(constants.ALL_CERT_FILES)
3168     files_all.update(ssconf.SimpleStore().GetFileList())
3169
3170   if cluster.modify_etc_hosts:
3171     files_all.add(constants.ETC_HOSTS)
3172
3173   # Files which must either exist on all nodes or on none
3174   files_all_opt = set([
3175     constants.RAPI_USERS_FILE,
3176     ])
3177
3178   # Files which should only be on master candidates
3179   files_mc = set()
3180   if not redist:
3181     files_mc.add(constants.CLUSTER_CONF_FILE)
3182
3183   # Files which should only be on VM-capable nodes
3184   files_vm = set(filename
3185     for hv_name in cluster.enabled_hypervisors
3186     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3187
3188   # Filenames must be unique
3189   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3190           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3191          "Found file listed in more than one file list"
3192
3193   return (files_all, files_all_opt, files_mc, files_vm)
3194
3195
3196 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3197   """Distribute additional files which are part of the cluster configuration.
3198
3199   ConfigWriter takes care of distributing the config and ssconf files, but
3200   there are more files which should be distributed to all nodes. This function
3201   makes sure those are copied.
3202
3203   @param lu: calling logical unit
3204   @param additional_nodes: list of nodes not in the config to distribute to
3205   @type additional_vm: boolean
3206   @param additional_vm: whether the additional nodes are vm-capable or not
3207
3208   """
3209   # Gather target nodes
3210   cluster = lu.cfg.GetClusterInfo()
3211   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3212
3213   online_nodes = lu.cfg.GetOnlineNodeList()
3214   vm_nodes = lu.cfg.GetVmCapableNodeList()
3215
3216   if additional_nodes is not None:
3217     online_nodes.extend(additional_nodes)
3218     if additional_vm:
3219       vm_nodes.extend(additional_nodes)
3220
3221   # Never distribute to master node
3222   for nodelist in [online_nodes, vm_nodes]:
3223     if master_info.name in nodelist:
3224       nodelist.remove(master_info.name)
3225
3226   # Gather file lists
3227   (files_all, files_all_opt, files_mc, files_vm) = \
3228     _ComputeAncillaryFiles(cluster, True)
3229
3230   # Never re-distribute configuration file from here
3231   assert not (constants.CLUSTER_CONF_FILE in files_all or
3232               constants.CLUSTER_CONF_FILE in files_vm)
3233   assert not files_mc, "Master candidates not handled in this function"
3234
3235   filemap = [
3236     (online_nodes, files_all),
3237     (online_nodes, files_all_opt),
3238     (vm_nodes, files_vm),
3239     ]
3240
3241   # Upload the files
3242   for (node_list, files) in filemap:
3243     for fname in files:
3244       _UploadHelper(lu, node_list, fname)
3245
3246
3247 class LUClusterRedistConf(NoHooksLU):
3248   """Force the redistribution of cluster configuration.
3249
3250   This is a very simple LU.
3251
3252   """
3253   REQ_BGL = False
3254
3255   def ExpandNames(self):
3256     self.needed_locks = {
3257       locking.LEVEL_NODE: locking.ALL_SET,
3258     }
3259     self.share_locks[locking.LEVEL_NODE] = 1
3260
3261   def Exec(self, feedback_fn):
3262     """Redistribute the configuration.
3263
3264     """
3265     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3266     _RedistributeAncillaryFiles(self)
3267
3268
3269 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3270   """Sleep and poll for an instance's disk to sync.
3271
3272   """
3273   if not instance.disks or disks is not None and not disks:
3274     return True
3275
3276   disks = _ExpandCheckDisks(instance, disks)
3277
3278   if not oneshot:
3279     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3280
3281   node = instance.primary_node
3282
3283   for dev in disks:
3284     lu.cfg.SetDiskID(dev, node)
3285
3286   # TODO: Convert to utils.Retry
3287
3288   retries = 0
3289   degr_retries = 10 # in seconds, as we sleep 1 second each time
3290   while True:
3291     max_time = 0
3292     done = True
3293     cumul_degraded = False
3294     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3295     msg = rstats.fail_msg
3296     if msg:
3297       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3298       retries += 1
3299       if retries >= 10:
3300         raise errors.RemoteError("Can't contact node %s for mirror data,"
3301                                  " aborting." % node)
3302       time.sleep(6)
3303       continue
3304     rstats = rstats.payload
3305     retries = 0
3306     for i, mstat in enumerate(rstats):
3307       if mstat is None:
3308         lu.LogWarning("Can't compute data for node %s/%s",
3309                            node, disks[i].iv_name)
3310         continue
3311
3312       cumul_degraded = (cumul_degraded or
3313                         (mstat.is_degraded and mstat.sync_percent is None))
3314       if mstat.sync_percent is not None:
3315         done = False
3316         if mstat.estimated_time is not None:
3317           rem_time = ("%s remaining (estimated)" %
3318                       utils.FormatSeconds(mstat.estimated_time))
3319           max_time = mstat.estimated_time
3320         else:
3321           rem_time = "no time estimate"
3322         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3323                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3324
3325     # if we're done but degraded, let's do a few small retries, to
3326     # make sure we see a stable and not transient situation; therefore
3327     # we force restart of the loop
3328     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3329       logging.info("Degraded disks found, %d retries left", degr_retries)
3330       degr_retries -= 1
3331       time.sleep(1)
3332       continue
3333
3334     if done or oneshot:
3335       break
3336
3337     time.sleep(min(60, max_time))
3338
3339   if done:
3340     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3341   return not cumul_degraded
3342
3343
3344 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3345   """Check that mirrors are not degraded.
3346
3347   The ldisk parameter, if True, will change the test from the
3348   is_degraded attribute (which represents overall non-ok status for
3349   the device(s)) to the ldisk (representing the local storage status).
3350
3351   """
3352   lu.cfg.SetDiskID(dev, node)
3353
3354   result = True
3355
3356   if on_primary or dev.AssembleOnSecondary():
3357     rstats = lu.rpc.call_blockdev_find(node, dev)
3358     msg = rstats.fail_msg
3359     if msg:
3360       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3361       result = False
3362     elif not rstats.payload:
3363       lu.LogWarning("Can't find disk on node %s", node)
3364       result = False
3365     else:
3366       if ldisk:
3367         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3368       else:
3369         result = result and not rstats.payload.is_degraded
3370
3371   if dev.children:
3372     for child in dev.children:
3373       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3374
3375   return result
3376
3377
3378 class LUOobCommand(NoHooksLU):
3379   """Logical unit for OOB handling.
3380
3381   """
3382   REG_BGL = False
3383   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3384
3385   def CheckPrereq(self):
3386     """Check prerequisites.
3387
3388     This checks:
3389      - the node exists in the configuration
3390      - OOB is supported
3391
3392     Any errors are signaled by raising errors.OpPrereqError.
3393
3394     """
3395     self.nodes = []
3396     self.master_node = self.cfg.GetMasterNode()
3397
3398     assert self.op.power_delay >= 0.0
3399
3400     if self.op.node_names:
3401       if self.op.command in self._SKIP_MASTER:
3402         if self.master_node in self.op.node_names:
3403           master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3404           master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3405
3406           if master_oob_handler:
3407             additional_text = ("Run '%s %s %s' if you want to operate on the"
3408                                " master regardless") % (master_oob_handler,
3409                                                         self.op.command,
3410                                                         self.master_node)
3411           else:
3412             additional_text = "The master node does not support out-of-band"
3413
3414           raise errors.OpPrereqError(("Operating on the master node %s is not"
3415                                       " allowed for %s\n%s") %
3416                                      (self.master_node, self.op.command,
3417                                       additional_text), errors.ECODE_INVAL)
3418     else:
3419       self.op.node_names = self.cfg.GetNodeList()
3420       if self.op.command in self._SKIP_MASTER:
3421         self.op.node_names.remove(self.master_node)
3422
3423     if self.op.command in self._SKIP_MASTER:
3424       assert self.master_node not in self.op.node_names
3425
3426     for node_name in self.op.node_names:
3427       node = self.cfg.GetNodeInfo(node_name)
3428
3429       if node is None:
3430         raise errors.OpPrereqError("Node %s not found" % node_name,
3431                                    errors.ECODE_NOENT)
3432       else:
3433         self.nodes.append(node)
3434
3435       if (not self.op.ignore_status and
3436           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3437         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3438                                     " not marked offline") % node_name,
3439                                    errors.ECODE_STATE)
3440
3441   def ExpandNames(self):
3442     """Gather locks we need.
3443
3444     """
3445     if self.op.node_names:
3446       self.op.node_names = [_ExpandNodeName(self.cfg, name)
3447                             for name in self.op.node_names]
3448       lock_names = self.op.node_names
3449     else:
3450       lock_names = locking.ALL_SET
3451
3452     self.needed_locks = {
3453       locking.LEVEL_NODE: lock_names,
3454       }
3455
3456   def Exec(self, feedback_fn):
3457     """Execute OOB and return result if we expect any.
3458
3459     """
3460     master_node = self.master_node
3461     ret = []
3462
3463     for idx, node in enumerate(self.nodes):
3464       node_entry = [(constants.RS_NORMAL, node.name)]
3465       ret.append(node_entry)
3466
3467       oob_program = _SupportsOob(self.cfg, node)
3468
3469       if not oob_program:
3470         node_entry.append((constants.RS_UNAVAIL, None))
3471         continue
3472
3473       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3474                    self.op.command, oob_program, node.name)
3475       result = self.rpc.call_run_oob(master_node, oob_program,
3476                                      self.op.command, node.name,
3477                                      self.op.timeout)
3478
3479       if result.fail_msg:
3480         self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3481                         node.name, result.fail_msg)
3482         node_entry.append((constants.RS_NODATA, None))
3483       else:
3484         try:
3485           self._CheckPayload(result)
3486         except errors.OpExecError, err:
3487           self.LogWarning("The payload returned by '%s' is not valid: %s",
3488                           node.name, err)
3489           node_entry.append((constants.RS_NODATA, None))
3490         else:
3491           if self.op.command == constants.OOB_HEALTH:
3492             # For health we should log important events
3493             for item, status in result.payload:
3494               if status in [constants.OOB_STATUS_WARNING,
3495                             constants.OOB_STATUS_CRITICAL]:
3496                 self.LogWarning("On node '%s' item '%s' has status '%s'",
3497                                 node.name, item, status)
3498
3499           if self.op.command == constants.OOB_POWER_ON:
3500             node.powered = True
3501           elif self.op.command == constants.OOB_POWER_OFF:
3502             node.powered = False
3503           elif self.op.command == constants.OOB_POWER_STATUS:
3504             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3505             if powered != node.powered:
3506               logging.warning(("Recorded power state (%s) of node '%s' does not"
3507                                " match actual power state (%s)"), node.powered,
3508                               node.name, powered)
3509
3510           # For configuration changing commands we should update the node
3511           if self.op.command in (constants.OOB_POWER_ON,
3512                                  constants.OOB_POWER_OFF):
3513             self.cfg.Update(node, feedback_fn)
3514
3515           node_entry.append((constants.RS_NORMAL, result.payload))
3516
3517           if (self.op.command == constants.OOB_POWER_ON and
3518               idx < len(self.nodes) - 1):
3519             time.sleep(self.op.power_delay)
3520
3521     return ret
3522
3523   def _CheckPayload(self, result):
3524     """Checks if the payload is valid.
3525
3526     @param result: RPC result
3527     @raises errors.OpExecError: If payload is not valid
3528
3529     """
3530     errs = []
3531     if self.op.command == constants.OOB_HEALTH:
3532       if not isinstance(result.payload, list):
3533         errs.append("command 'health' is expected to return a list but got %s" %
3534                     type(result.payload))
3535       else:
3536         for item, status in result.payload:
3537           if status not in constants.OOB_STATUSES:
3538             errs.append("health item '%s' has invalid status '%s'" %
3539                         (item, status))
3540
3541     if self.op.command == constants.OOB_POWER_STATUS:
3542       if not isinstance(result.payload, dict):
3543         errs.append("power-status is expected to return a dict but got %s" %
3544                     type(result.payload))
3545
3546     if self.op.command in [
3547         constants.OOB_POWER_ON,
3548         constants.OOB_POWER_OFF,
3549         constants.OOB_POWER_CYCLE,
3550         ]:
3551       if result.payload is not None:
3552         errs.append("%s is expected to not return payload but got '%s'" %
3553                     (self.op.command, result.payload))
3554
3555     if errs:
3556       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3557                                utils.CommaJoin(errs))
3558
3559 class _OsQuery(_QueryBase):
3560   FIELDS = query.OS_FIELDS
3561
3562   def ExpandNames(self, lu):
3563     # Lock all nodes in shared mode
3564     # Temporary removal of locks, should be reverted later
3565     # TODO: reintroduce locks when they are lighter-weight
3566     lu.needed_locks = {}
3567     #self.share_locks[locking.LEVEL_NODE] = 1
3568     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3569
3570     # The following variables interact with _QueryBase._GetNames
3571     if self.names:
3572       self.wanted = self.names
3573     else:
3574       self.wanted = locking.ALL_SET
3575
3576     self.do_locking = self.use_locking
3577
3578   def DeclareLocks(self, lu, level):
3579     pass
3580
3581   @staticmethod
3582   def _DiagnoseByOS(rlist):
3583     """Remaps a per-node return list into an a per-os per-node dictionary
3584
3585     @param rlist: a map with node names as keys and OS objects as values
3586
3587     @rtype: dict
3588     @return: a dictionary with osnames as keys and as value another
3589         map, with nodes as keys and tuples of (path, status, diagnose,
3590         variants, parameters, api_versions) as values, eg::
3591
3592           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3593                                      (/srv/..., False, "invalid api")],
3594                            "node2": [(/srv/..., True, "", [], [])]}
3595           }
3596
3597     """
3598     all_os = {}
3599     # we build here the list of nodes that didn't fail the RPC (at RPC
3600     # level), so that nodes with a non-responding node daemon don't
3601     # make all OSes invalid
3602     good_nodes = [node_name for node_name in rlist
3603                   if not rlist[node_name].fail_msg]
3604     for node_name, nr in rlist.items():
3605       if nr.fail_msg or not nr.payload:
3606         continue
3607       for (name, path, status, diagnose, variants,
3608            params, api_versions) in nr.payload:
3609         if name not in all_os:
3610           # build a list of nodes for this os containing empty lists
3611           # for each node in node_list
3612           all_os[name] = {}
3613           for nname in good_nodes:
3614             all_os[name][nname] = []
3615         # convert params from [name, help] to (name, help)
3616         params = [tuple(v) for v in params]
3617         all_os[name][node_name].append((path, status, diagnose,
3618                                         variants, params, api_versions))
3619     return all_os
3620
3621   def _GetQueryData(self, lu):
3622     """Computes the list of nodes and their attributes.
3623
3624     """
3625     # Locking is not used
3626     assert not (lu.acquired_locks or self.do_locking or self.use_locking)
3627
3628     valid_nodes = [node.name
3629                    for node in lu.cfg.GetAllNodesInfo().values()
3630                    if not node.offline and node.vm_capable]
3631     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3632     cluster = lu.cfg.GetClusterInfo()
3633
3634     data = {}
3635
3636     for (os_name, os_data) in pol.items():
3637       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3638                           hidden=(os_name in cluster.hidden_os),
3639                           blacklisted=(os_name in cluster.blacklisted_os))
3640
3641       variants = set()
3642       parameters = set()
3643       api_versions = set()
3644
3645       for idx, osl in enumerate(os_data.values()):
3646         info.valid = bool(info.valid and osl and osl[0][1])
3647         if not info.valid:
3648           break
3649
3650         (node_variants, node_params, node_api) = osl[0][3:6]
3651         if idx == 0:
3652           # First entry
3653           variants.update(node_variants)
3654           parameters.update(node_params)
3655           api_versions.update(node_api)
3656         else:
3657           # Filter out inconsistent values
3658           variants.intersection_update(node_variants)
3659           parameters.intersection_update(node_params)
3660           api_versions.intersection_update(node_api)
3661
3662       info.variants = list(variants)
3663       info.parameters = list(parameters)
3664       info.api_versions = list(api_versions)
3665
3666       data[os_name] = info
3667
3668     # Prepare data in requested order
3669     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3670             if name in data]
3671
3672
3673 class LUOsDiagnose(NoHooksLU):
3674   """Logical unit for OS diagnose/query.
3675
3676   """
3677   REQ_BGL = False
3678
3679   @staticmethod
3680   def _BuildFilter(fields, names):
3681     """Builds a filter for querying OSes.
3682
3683     """
3684     name_filter = qlang.MakeSimpleFilter("name", names)
3685
3686     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3687     # respective field is not requested
3688     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3689                      for fname in ["hidden", "blacklisted"]
3690                      if fname not in fields]
3691     if "valid" not in fields:
3692       status_filter.append([qlang.OP_TRUE, "valid"])
3693
3694     if status_filter:
3695       status_filter.insert(0, qlang.OP_AND)
3696     else:
3697       status_filter = None
3698
3699     if name_filter and status_filter:
3700       return [qlang.OP_AND, name_filter, status_filter]
3701     elif name_filter:
3702       return name_filter
3703     else:
3704       return status_filter
3705
3706   def CheckArguments(self):
3707     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3708                        self.op.output_fields, False)
3709
3710   def ExpandNames(self):
3711     self.oq.ExpandNames(self)
3712
3713   def Exec(self, feedback_fn):
3714     return self.oq.OldStyleQuery(self)
3715
3716
3717 class LUNodeRemove(LogicalUnit):
3718   """Logical unit for removing a node.
3719
3720   """
3721   HPATH = "node-remove"
3722   HTYPE = constants.HTYPE_NODE
3723
3724   def BuildHooksEnv(self):
3725     """Build hooks env.
3726
3727     This doesn't run on the target node in the pre phase as a failed
3728     node would then be impossible to remove.
3729
3730     """
3731     return {
3732       "OP_TARGET": self.op.node_name,
3733       "NODE_NAME": self.op.node_name,
3734       }
3735
3736   def BuildHooksNodes(self):
3737     """Build hooks nodes.
3738
3739     """
3740     all_nodes = self.cfg.GetNodeList()
3741     try:
3742       all_nodes.remove(self.op.node_name)
3743     except ValueError:
3744       logging.warning("Node '%s', which is about to be removed, was not found"
3745                       " in the list of all nodes", self.op.node_name)
3746     return (all_nodes, all_nodes)
3747
3748   def CheckPrereq(self):
3749     """Check prerequisites.
3750
3751     This checks:
3752      - the node exists in the configuration
3753      - it does not have primary or secondary instances
3754      - it's not the master
3755
3756     Any errors are signaled by raising errors.OpPrereqError.
3757
3758     """
3759     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3760     node = self.cfg.GetNodeInfo(self.op.node_name)
3761     assert node is not None
3762
3763     instance_list = self.cfg.GetInstanceList()
3764
3765     masternode = self.cfg.GetMasterNode()
3766     if node.name == masternode:
3767       raise errors.OpPrereqError("Node is the master node,"
3768                                  " you need to failover first.",
3769                                  errors.ECODE_INVAL)
3770
3771     for instance_name in instance_list:
3772       instance = self.cfg.GetInstanceInfo(instance_name)
3773       if node.name in instance.all_nodes:
3774         raise errors.OpPrereqError("Instance %s is still running on the node,"
3775                                    " please remove first." % instance_name,
3776                                    errors.ECODE_INVAL)
3777     self.op.node_name = node.name
3778     self.node = node
3779
3780   def Exec(self, feedback_fn):
3781     """Removes the node from the cluster.
3782
3783     """
3784     node = self.node
3785     logging.info("Stopping the node daemon and removing configs from node %s",
3786                  node.name)
3787
3788     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3789
3790     # Promote nodes to master candidate as needed
3791     _AdjustCandidatePool(self, exceptions=[node.name])
3792     self.context.RemoveNode(node.name)
3793
3794     # Run post hooks on the node before it's removed
3795     _RunPostHook(self, node.name)
3796
3797     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3798     msg = result.fail_msg
3799     if msg:
3800       self.LogWarning("Errors encountered on the remote node while leaving"
3801                       " the cluster: %s", msg)
3802
3803     # Remove node from our /etc/hosts
3804     if self.cfg.GetClusterInfo().modify_etc_hosts:
3805       master_node = self.cfg.GetMasterNode()
3806       result = self.rpc.call_etc_hosts_modify(master_node,
3807                                               constants.ETC_HOSTS_REMOVE,
3808                                               node.name, None)
3809       result.Raise("Can't update hosts file with new host data")
3810       _RedistributeAncillaryFiles(self)
3811
3812
3813 class _NodeQuery(_QueryBase):
3814   FIELDS = query.NODE_FIELDS
3815
3816   def ExpandNames(self, lu):
3817     lu.needed_locks = {}
3818     lu.share_locks[locking.LEVEL_NODE] = 1
3819
3820     if self.names:
3821       self.wanted = _GetWantedNodes(lu, self.names)
3822     else:
3823       self.wanted = locking.ALL_SET
3824
3825     self.do_locking = (self.use_locking and
3826                        query.NQ_LIVE in self.requested_data)
3827
3828     if self.do_locking:
3829       # if we don't request only static fields, we need to lock the nodes
3830       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3831
3832   def DeclareLocks(self, lu, level):
3833     pass
3834
3835   def _GetQueryData(self, lu):
3836     """Computes the list of nodes and their attributes.
3837
3838     """
3839     all_info = lu.cfg.GetAllNodesInfo()
3840
3841     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3842
3843     # Gather data as requested
3844     if query.NQ_LIVE in self.requested_data:
3845       # filter out non-vm_capable nodes
3846       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3847
3848       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3849                                         lu.cfg.GetHypervisorType())
3850       live_data = dict((name, nresult.payload)
3851                        for (name, nresult) in node_data.items()
3852                        if not nresult.fail_msg and nresult.payload)
3853     else:
3854       live_data = None
3855
3856     if query.NQ_INST in self.requested_data:
3857       node_to_primary = dict([(name, set()) for name in nodenames])
3858       node_to_secondary = dict([(name, set()) for name in nodenames])
3859
3860       inst_data = lu.cfg.GetAllInstancesInfo()
3861
3862       for inst in inst_data.values():
3863         if inst.primary_node in node_to_primary:
3864           node_to_primary[inst.primary_node].add(inst.name)
3865         for secnode in inst.secondary_nodes:
3866           if secnode in node_to_secondary:
3867             node_to_secondary[secnode].add(inst.name)
3868     else:
3869       node_to_primary = None
3870       node_to_secondary = None
3871
3872     if query.NQ_OOB in self.requested_data:
3873       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3874                          for name, node in all_info.iteritems())
3875     else:
3876       oob_support = None
3877
3878     if query.NQ_GROUP in self.requested_data:
3879       groups = lu.cfg.GetAllNodeGroupsInfo()
3880     else:
3881       groups = {}
3882
3883     return query.NodeQueryData([all_info[name] for name in nodenames],
3884                                live_data, lu.cfg.GetMasterNode(),
3885                                node_to_primary, node_to_secondary, groups,
3886                                oob_support, lu.cfg.GetClusterInfo())
3887
3888
3889 class LUNodeQuery(NoHooksLU):
3890   """Logical unit for querying nodes.
3891
3892   """
3893   # pylint: disable-msg=W0142
3894   REQ_BGL = False
3895
3896   def CheckArguments(self):
3897     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3898                          self.op.output_fields, self.op.use_locking)
3899
3900   def ExpandNames(self):
3901     self.nq.ExpandNames(self)
3902
3903   def Exec(self, feedback_fn):
3904     return self.nq.OldStyleQuery(self)
3905
3906
3907 class LUNodeQueryvols(NoHooksLU):
3908   """Logical unit for getting volumes on node(s).
3909
3910   """
3911   REQ_BGL = False
3912   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3913   _FIELDS_STATIC = utils.FieldSet("node")
3914
3915   def CheckArguments(self):
3916     _CheckOutputFields(static=self._FIELDS_STATIC,
3917                        dynamic=self._FIELDS_DYNAMIC,
3918                        selected=self.op.output_fields)
3919
3920   def ExpandNames(self):
3921     self.needed_locks = {}
3922     self.share_locks[locking.LEVEL_NODE] = 1
3923     if not self.op.nodes:
3924       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3925     else:
3926       self.needed_locks[locking.LEVEL_NODE] = \
3927         _GetWantedNodes(self, self.op.nodes)
3928
3929   def Exec(self, feedback_fn):
3930     """Computes the list of nodes and their attributes.
3931
3932     """
3933     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3934     volumes = self.rpc.call_node_volumes(nodenames)
3935
3936     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3937              in self.cfg.GetInstanceList()]
3938
3939     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3940
3941     output = []
3942     for node in nodenames:
3943       nresult = volumes[node]
3944       if nresult.offline:
3945         continue
3946       msg = nresult.fail_msg
3947       if msg:
3948         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3949         continue
3950
3951       node_vols = nresult.payload[:]
3952       node_vols.sort(key=lambda vol: vol['dev'])
3953
3954       for vol in node_vols:
3955         node_output = []
3956         for field in self.op.output_fields:
3957           if field == "node":
3958             val = node
3959           elif field == "phys":
3960             val = vol['dev']
3961           elif field == "vg":
3962             val = vol['vg']
3963           elif field == "name":
3964             val = vol['name']
3965           elif field == "size":
3966             val = int(float(vol['size']))
3967           elif field == "instance":
3968             for inst in ilist:
3969               if node not in lv_by_node[inst]:
3970                 continue
3971               if vol['name'] in lv_by_node[inst][node]:
3972                 val = inst.name
3973                 break
3974             else:
3975               val = '-'
3976           else:
3977             raise errors.ParameterError(field)
3978           node_output.append(str(val))
3979
3980         output.append(node_output)
3981
3982     return output
3983
3984
3985 class LUNodeQueryStorage(NoHooksLU):
3986   """Logical unit for getting information on storage units on node(s).
3987
3988   """
3989   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3990   REQ_BGL = False
3991
3992   def CheckArguments(self):
3993     _CheckOutputFields(static=self._FIELDS_STATIC,
3994                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3995                        selected=self.op.output_fields)
3996
3997   def ExpandNames(self):
3998     self.needed_locks = {}
3999     self.share_locks[locking.LEVEL_NODE] = 1
4000
4001     if self.op.nodes:
4002       self.needed_locks[locking.LEVEL_NODE] = \
4003         _GetWantedNodes(self, self.op.nodes)
4004     else:
4005       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4006
4007   def Exec(self, feedback_fn):
4008     """Computes the list of nodes and their attributes.
4009
4010     """
4011     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
4012
4013     # Always get name to sort by
4014     if constants.SF_NAME in self.op.output_fields:
4015       fields = self.op.output_fields[:]
4016     else:
4017       fields = [constants.SF_NAME] + self.op.output_fields
4018
4019     # Never ask for node or type as it's only known to the LU
4020     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4021       while extra in fields:
4022         fields.remove(extra)
4023
4024     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4025     name_idx = field_idx[constants.SF_NAME]
4026
4027     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4028     data = self.rpc.call_storage_list(self.nodes,
4029                                       self.op.storage_type, st_args,
4030                                       self.op.name, fields)
4031
4032     result = []
4033
4034     for node in utils.NiceSort(self.nodes):
4035       nresult = data[node]
4036       if nresult.offline:
4037         continue
4038
4039       msg = nresult.fail_msg
4040       if msg:
4041         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4042         continue
4043
4044       rows = dict([(row[name_idx], row) for row in nresult.payload])
4045
4046       for name in utils.NiceSort(rows.keys()):
4047         row = rows[name]
4048
4049         out = []
4050
4051         for field in self.op.output_fields:
4052           if field == constants.SF_NODE:
4053             val = node
4054           elif field == constants.SF_TYPE:
4055             val = self.op.storage_type
4056           elif field in field_idx:
4057             val = row[field_idx[field]]
4058           else:
4059             raise errors.ParameterError(field)
4060
4061           out.append(val)
4062
4063         result.append(out)
4064
4065     return result
4066
4067
4068 class _InstanceQuery(_QueryBase):
4069   FIELDS = query.INSTANCE_FIELDS
4070
4071   def ExpandNames(self, lu):
4072     lu.needed_locks = {}
4073     lu.share_locks[locking.LEVEL_INSTANCE] = 1
4074     lu.share_locks[locking.LEVEL_NODE] = 1
4075
4076     if self.names:
4077       self.wanted = _GetWantedInstances(lu, self.names)
4078     else:
4079       self.wanted = locking.ALL_SET
4080
4081     self.do_locking = (self.use_locking and
4082                        query.IQ_LIVE in self.requested_data)
4083     if self.do_locking:
4084       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4085       lu.needed_locks[locking.LEVEL_NODE] = []
4086       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4087
4088   def DeclareLocks(self, lu, level):
4089     if level == locking.LEVEL_NODE and self.do_locking:
4090       lu._LockInstancesNodes() # pylint: disable-msg=W0212
4091
4092   def _GetQueryData(self, lu):
4093     """Computes the list of instances and their attributes.
4094
4095     """
4096     cluster = lu.cfg.GetClusterInfo()
4097     all_info = lu.cfg.GetAllInstancesInfo()
4098
4099     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4100
4101     instance_list = [all_info[name] for name in instance_names]
4102     nodes = frozenset(itertools.chain(*(inst.all_nodes
4103                                         for inst in instance_list)))
4104     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4105     bad_nodes = []
4106     offline_nodes = []
4107     wrongnode_inst = set()
4108
4109     # Gather data as requested
4110     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4111       live_data = {}
4112       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4113       for name in nodes:
4114         result = node_data[name]
4115         if result.offline:
4116           # offline nodes will be in both lists
4117           assert result.fail_msg
4118           offline_nodes.append(name)
4119         if result.fail_msg:
4120           bad_nodes.append(name)
4121         elif result.payload:
4122           for inst in result.payload:
4123             if inst in all_info:
4124               if all_info[inst].primary_node == name:
4125                 live_data.update(result.payload)
4126               else:
4127                 wrongnode_inst.add(inst)
4128             else:
4129               # orphan instance; we don't list it here as we don't
4130               # handle this case yet in the output of instance listing
4131               logging.warning("Orphan instance '%s' found on node %s",
4132                               inst, name)
4133         # else no instance is alive
4134     else:
4135       live_data = {}
4136
4137     if query.IQ_DISKUSAGE in self.requested_data:
4138       disk_usage = dict((inst.name,
4139                          _ComputeDiskSize(inst.disk_template,
4140                                           [{constants.IDISK_SIZE: disk.size}
4141                                            for disk in inst.disks]))
4142                         for inst in instance_list)
4143     else:
4144       disk_usage = None
4145
4146     if query.IQ_CONSOLE in self.requested_data:
4147       consinfo = {}
4148       for inst in instance_list:
4149         if inst.name in live_data:
4150           # Instance is running
4151           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4152         else:
4153           consinfo[inst.name] = None
4154       assert set(consinfo.keys()) == set(instance_names)
4155     else:
4156       consinfo = None
4157
4158     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4159                                    disk_usage, offline_nodes, bad_nodes,
4160                                    live_data, wrongnode_inst, consinfo)
4161
4162
4163 class LUQuery(NoHooksLU):
4164   """Query for resources/items of a certain kind.
4165
4166   """
4167   # pylint: disable-msg=W0142
4168   REQ_BGL = False
4169
4170   def CheckArguments(self):
4171     qcls = _GetQueryImplementation(self.op.what)
4172
4173     self.impl = qcls(self.op.filter, self.op.fields, False)
4174
4175   def ExpandNames(self):
4176     self.impl.ExpandNames(self)
4177
4178   def DeclareLocks(self, level):
4179     self.impl.DeclareLocks(self, level)
4180
4181   def Exec(self, feedback_fn):
4182     return self.impl.NewStyleQuery(self)
4183
4184
4185 class LUQueryFields(NoHooksLU):
4186   """Query for resources/items of a certain kind.
4187
4188   """
4189   # pylint: disable-msg=W0142
4190   REQ_BGL = False
4191
4192   def CheckArguments(self):
4193     self.qcls = _GetQueryImplementation(self.op.what)
4194
4195   def ExpandNames(self):
4196     self.needed_locks = {}
4197
4198   def Exec(self, feedback_fn):
4199     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4200
4201
4202 class LUNodeModifyStorage(NoHooksLU):
4203   """Logical unit for modifying a storage volume on a node.
4204
4205   """
4206   REQ_BGL = False
4207
4208   def CheckArguments(self):
4209     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4210
4211     storage_type = self.op.storage_type
4212
4213     try:
4214       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4215     except KeyError:
4216       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4217                                  " modified" % storage_type,
4218                                  errors.ECODE_INVAL)
4219
4220     diff = set(self.op.changes.keys()) - modifiable
4221     if diff:
4222       raise errors.OpPrereqError("The following fields can not be modified for"
4223                                  " storage units of type '%s': %r" %
4224                                  (storage_type, list(diff)),
4225                                  errors.ECODE_INVAL)
4226
4227   def ExpandNames(self):
4228     self.needed_locks = {
4229       locking.LEVEL_NODE: self.op.node_name,
4230       }
4231
4232   def Exec(self, feedback_fn):
4233     """Computes the list of nodes and their attributes.
4234
4235     """
4236     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4237     result = self.rpc.call_storage_modify(self.op.node_name,
4238                                           self.op.storage_type, st_args,
4239                                           self.op.name, self.op.changes)
4240     result.Raise("Failed to modify storage unit '%s' on %s" %
4241                  (self.op.name, self.op.node_name))
4242
4243
4244 class LUNodeAdd(LogicalUnit):
4245   """Logical unit for adding node to the cluster.
4246
4247   """
4248   HPATH = "node-add"
4249   HTYPE = constants.HTYPE_NODE
4250   _NFLAGS = ["master_capable", "vm_capable"]
4251
4252   def CheckArguments(self):
4253     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4254     # validate/normalize the node name
4255     self.hostname = netutils.GetHostname(name=self.op.node_name,
4256                                          family=self.primary_ip_family)
4257     self.op.node_name = self.hostname.name
4258     if self.op.readd and self.op.group:
4259       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4260                                  " being readded", errors.ECODE_INVAL)
4261
4262   def BuildHooksEnv(self):
4263     """Build hooks env.
4264
4265     This will run on all nodes before, and on all nodes + the new node after.
4266
4267     """
4268     return {
4269       "OP_TARGET": self.op.node_name,
4270       "NODE_NAME": self.op.node_name,
4271       "NODE_PIP": self.op.primary_ip,
4272       "NODE_SIP": self.op.secondary_ip,
4273       "MASTER_CAPABLE": str(self.op.master_capable),
4274       "VM_CAPABLE": str(self.op.vm_capable),
4275       }
4276
4277   def BuildHooksNodes(self):
4278     """Build hooks nodes.
4279
4280     """
4281     # Exclude added node
4282     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4283     post_nodes = pre_nodes + [self.op.node_name, ]
4284
4285     return (pre_nodes, post_nodes)
4286
4287   def CheckPrereq(self):
4288     """Check prerequisites.
4289
4290     This checks:
4291      - the new node is not already in the config
4292      - it is resolvable
4293      - its parameters (single/dual homed) matches the cluster
4294
4295     Any errors are signaled by raising errors.OpPrereqError.
4296
4297     """
4298     cfg = self.cfg
4299     hostname = self.hostname
4300     node = hostname.name
4301     primary_ip = self.op.primary_ip = hostname.ip
4302     if self.op.secondary_ip is None:
4303       if self.primary_ip_family == netutils.IP6Address.family:
4304         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4305                                    " IPv4 address must be given as secondary",
4306                                    errors.ECODE_INVAL)
4307       self.op.secondary_ip = primary_ip
4308
4309     secondary_ip = self.op.secondary_ip
4310     if not netutils.IP4Address.IsValid(secondary_ip):
4311       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4312                                  " address" % secondary_ip, errors.ECODE_INVAL)
4313
4314     node_list = cfg.GetNodeList()
4315     if not self.op.readd and node in node_list:
4316       raise errors.OpPrereqError("Node %s is already in the configuration" %
4317                                  node, errors.ECODE_EXISTS)
4318     elif self.op.readd and node not in node_list:
4319       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4320                                  errors.ECODE_NOENT)
4321
4322     self.changed_primary_ip = False
4323
4324     for existing_node_name in node_list:
4325       existing_node = cfg.GetNodeInfo(existing_node_name)
4326
4327       if self.op.readd and node == existing_node_name:
4328         if existing_node.secondary_ip != secondary_ip:
4329           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4330                                      " address configuration as before",
4331                                      errors.ECODE_INVAL)
4332         if existing_node.primary_ip != primary_ip:
4333           self.changed_primary_ip = True
4334
4335         continue
4336
4337       if (existing_node.primary_ip == primary_ip or
4338           existing_node.secondary_ip == primary_ip or
4339           existing_node.primary_ip == secondary_ip or
4340           existing_node.secondary_ip == secondary_ip):
4341         raise errors.OpPrereqError("New node ip address(es) conflict with"
4342                                    " existing node %s" % existing_node.name,
4343                                    errors.ECODE_NOTUNIQUE)
4344
4345     # After this 'if' block, None is no longer a valid value for the
4346     # _capable op attributes
4347     if self.op.readd:
4348       old_node = self.cfg.GetNodeInfo(node)
4349       assert old_node is not None, "Can't retrieve locked node %s" % node
4350       for attr in self._NFLAGS:
4351         if getattr(self.op, attr) is None:
4352           setattr(self.op, attr, getattr(old_node, attr))
4353     else:
4354       for attr in self._NFLAGS:
4355         if getattr(self.op, attr) is None:
4356           setattr(self.op, attr, True)
4357
4358     if self.op.readd and not self.op.vm_capable:
4359       pri, sec = cfg.GetNodeInstances(node)
4360       if pri or sec:
4361         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4362                                    " flag set to false, but it already holds"
4363                                    " instances" % node,
4364                                    errors.ECODE_STATE)
4365
4366     # check that the type of the node (single versus dual homed) is the
4367     # same as for the master
4368     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4369     master_singlehomed = myself.secondary_ip == myself.primary_ip
4370     newbie_singlehomed = secondary_ip == primary_ip
4371     if master_singlehomed != newbie_singlehomed:
4372       if master_singlehomed:
4373         raise errors.OpPrereqError("The master has no secondary ip but the"
4374                                    " new node has one",
4375                                    errors.ECODE_INVAL)
4376       else:
4377         raise errors.OpPrereqError("The master has a secondary ip but the"
4378                                    " new node doesn't have one",
4379                                    errors.ECODE_INVAL)
4380
4381     # checks reachability
4382     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4383       raise errors.OpPrereqError("Node not reachable by ping",
4384                                  errors.ECODE_ENVIRON)
4385
4386     if not newbie_singlehomed:
4387       # check reachability from my secondary ip to newbie's secondary ip
4388       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4389                            source=myself.secondary_ip):
4390         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4391                                    " based ping to node daemon port",
4392                                    errors.ECODE_ENVIRON)
4393
4394     if self.op.readd:
4395       exceptions = [node]
4396     else:
4397       exceptions = []
4398
4399     if self.op.master_capable:
4400       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4401     else:
4402       self.master_candidate = False
4403
4404     if self.op.readd:
4405       self.new_node = old_node
4406     else:
4407       node_group = cfg.LookupNodeGroup(self.op.group)
4408       self.new_node = objects.Node(name=node,
4409                                    primary_ip=primary_ip,
4410                                    secondary_ip=secondary_ip,
4411                                    master_candidate=self.master_candidate,
4412                                    offline=False, drained=False,
4413                                    group=node_group)
4414
4415     if self.op.ndparams:
4416       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4417
4418   def Exec(self, feedback_fn):
4419     """Adds the new node to the cluster.
4420
4421     """
4422     new_node = self.new_node
4423     node = new_node.name
4424
4425     # We adding a new node so we assume it's powered
4426     new_node.powered = True
4427
4428     # for re-adds, reset the offline/drained/master-candidate flags;
4429     # we need to reset here, otherwise offline would prevent RPC calls
4430     # later in the procedure; this also means that if the re-add
4431     # fails, we are left with a non-offlined, broken node
4432     if self.op.readd:
4433       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4434       self.LogInfo("Readding a node, the offline/drained flags were reset")
4435       # if we demote the node, we do cleanup later in the procedure
4436       new_node.master_candidate = self.master_candidate
4437       if self.changed_primary_ip:
4438         new_node.primary_ip = self.op.primary_ip
4439
4440     # copy the master/vm_capable flags
4441     for attr in self._NFLAGS:
4442       setattr(new_node, attr, getattr(self.op, attr))
4443
4444     # notify the user about any possible mc promotion
4445     if new_node.master_candidate:
4446       self.LogInfo("Node will be a master candidate")
4447
4448     if self.op.ndparams:
4449       new_node.ndparams = self.op.ndparams
4450     else:
4451       new_node.ndparams = {}
4452
4453     # check connectivity
4454     result = self.rpc.call_version([node])[node]
4455     result.Raise("Can't get version information from node %s" % node)
4456     if constants.PROTOCOL_VERSION == result.payload:
4457       logging.info("Communication to node %s fine, sw version %s match",
4458                    node, result.payload)
4459     else:
4460       raise errors.OpExecError("Version mismatch master version %s,"
4461                                " node version %s" %
4462                                (constants.PROTOCOL_VERSION, result.payload))
4463
4464     # Add node to our /etc/hosts, and add key to known_hosts
4465     if self.cfg.GetClusterInfo().modify_etc_hosts:
4466       master_node = self.cfg.GetMasterNode()
4467       result = self.rpc.call_etc_hosts_modify(master_node,
4468                                               constants.ETC_HOSTS_ADD,
4469                                               self.hostname.name,
4470                                               self.hostname.ip)
4471       result.Raise("Can't update hosts file with new host data")
4472
4473     if new_node.secondary_ip != new_node.primary_ip:
4474       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4475                                False)
4476
4477     node_verify_list = [self.cfg.GetMasterNode()]
4478     node_verify_param = {
4479       constants.NV_NODELIST: [node],
4480       # TODO: do a node-net-test as well?
4481     }
4482
4483     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4484                                        self.cfg.GetClusterName())
4485     for verifier in node_verify_list:
4486       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4487       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4488       if nl_payload:
4489         for failed in nl_payload:
4490           feedback_fn("ssh/hostname verification failed"
4491                       " (checking from %s): %s" %
4492                       (verifier, nl_payload[failed]))
4493         raise errors.OpExecError("ssh/hostname verification failed.")
4494
4495     if self.op.readd:
4496       _RedistributeAncillaryFiles(self)
4497       self.context.ReaddNode(new_node)
4498       # make sure we redistribute the config
4499       self.cfg.Update(new_node, feedback_fn)
4500       # and make sure the new node will not have old files around
4501       if not new_node.master_candidate:
4502         result = self.rpc.call_node_demote_from_mc(new_node.name)
4503         msg = result.fail_msg
4504         if msg:
4505           self.LogWarning("Node failed to demote itself from master"
4506                           " candidate status: %s" % msg)
4507     else:
4508       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4509                                   additional_vm=self.op.vm_capable)
4510       self.context.AddNode(new_node, self.proc.GetECId())
4511
4512
4513 class LUNodeSetParams(LogicalUnit):
4514   """Modifies the parameters of a node.
4515
4516   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4517       to the node role (as _ROLE_*)
4518   @cvar _R2F: a dictionary from node role to tuples of flags
4519   @cvar _FLAGS: a list of attribute names corresponding to the flags
4520
4521   """
4522   HPATH = "node-modify"
4523   HTYPE = constants.HTYPE_NODE
4524   REQ_BGL = False
4525   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4526   _F2R = {
4527     (True, False, False): _ROLE_CANDIDATE,
4528     (False, True, False): _ROLE_DRAINED,
4529     (False, False, True): _ROLE_OFFLINE,
4530     (False, False, False): _ROLE_REGULAR,
4531     }
4532   _R2F = dict((v, k) for k, v in _F2R.items())
4533   _FLAGS = ["master_candidate", "drained", "offline"]
4534
4535   def CheckArguments(self):
4536     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4537     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4538                 self.op.master_capable, self.op.vm_capable,
4539                 self.op.secondary_ip, self.op.ndparams]
4540     if all_mods.count(None) == len(all_mods):
4541       raise errors.OpPrereqError("Please pass at least one modification",
4542                                  errors.ECODE_INVAL)
4543     if all_mods.count(True) > 1:
4544       raise errors.OpPrereqError("Can't set the node into more than one"
4545                                  " state at the same time",
4546                                  errors.ECODE_INVAL)
4547
4548     # Boolean value that tells us whether we might be demoting from MC
4549     self.might_demote = (self.op.master_candidate == False or
4550                          self.op.offline == True or
4551                          self.op.drained == True or
4552                          self.op.master_capable == False)
4553
4554     if self.op.secondary_ip:
4555       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4556         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4557                                    " address" % self.op.secondary_ip,
4558                                    errors.ECODE_INVAL)
4559
4560     self.lock_all = self.op.auto_promote and self.might_demote
4561     self.lock_instances = self.op.secondary_ip is not None
4562
4563   def ExpandNames(self):
4564     if self.lock_all:
4565       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4566     else:
4567       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4568
4569     if self.lock_instances:
4570       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4571
4572   def DeclareLocks(self, level):
4573     # If we have locked all instances, before waiting to lock nodes, release
4574     # all the ones living on nodes unrelated to the current operation.
4575     if level == locking.LEVEL_NODE and self.lock_instances:
4576       instances_release = []
4577       instances_keep = []
4578       self.affected_instances = []
4579       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4580         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4581           instance = self.context.cfg.GetInstanceInfo(instance_name)
4582           i_mirrored = instance.disk_template in constants.DTS_INT_MIRROR
4583           if i_mirrored and self.op.node_name in instance.all_nodes:
4584             instances_keep.append(instance_name)
4585             self.affected_instances.append(instance)
4586           else:
4587             instances_release.append(instance_name)
4588         if instances_release:
4589           self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4590           self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4591
4592   def BuildHooksEnv(self):
4593     """Build hooks env.
4594
4595     This runs on the master node.
4596
4597     """
4598     return {
4599       "OP_TARGET": self.op.node_name,
4600       "MASTER_CANDIDATE": str(self.op.master_candidate),
4601       "OFFLINE": str(self.op.offline),
4602       "DRAINED": str(self.op.drained),
4603       "MASTER_CAPABLE": str(self.op.master_capable),
4604       "VM_CAPABLE": str(self.op.vm_capable),
4605       }
4606
4607   def BuildHooksNodes(self):
4608     """Build hooks nodes.
4609
4610     """
4611     nl = [self.cfg.GetMasterNode(), self.op.node_name]
4612     return (nl, nl)
4613
4614   def CheckPrereq(self):
4615     """Check prerequisites.
4616
4617     This only checks the instance list against the existing names.
4618
4619     """
4620     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4621
4622     if (self.op.master_candidate is not None or
4623         self.op.drained is not None or
4624         self.op.offline is not None):
4625       # we can't change the master's node flags
4626       if self.op.node_name == self.cfg.GetMasterNode():
4627         raise errors.OpPrereqError("The master role can be changed"
4628                                    " only via master-failover",
4629                                    errors.ECODE_INVAL)
4630
4631     if self.op.master_candidate and not node.master_capable:
4632       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4633                                  " it a master candidate" % node.name,
4634                                  errors.ECODE_STATE)
4635
4636     if self.op.vm_capable == False:
4637       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4638       if ipri or isec:
4639         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4640                                    " the vm_capable flag" % node.name,
4641                                    errors.ECODE_STATE)
4642
4643     if node.master_candidate and self.might_demote and not self.lock_all:
4644       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4645       # check if after removing the current node, we're missing master
4646       # candidates
4647       (mc_remaining, mc_should, _) = \
4648           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4649       if mc_remaining < mc_should:
4650         raise errors.OpPrereqError("Not enough master candidates, please"
4651                                    " pass auto promote option to allow"
4652                                    " promotion", errors.ECODE_STATE)
4653
4654     self.old_flags = old_flags = (node.master_candidate,
4655                                   node.drained, node.offline)
4656     assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4657     self.old_role = old_role = self._F2R[old_flags]
4658
4659     # Check for ineffective changes
4660     for attr in self._FLAGS:
4661       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4662         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4663         setattr(self.op, attr, None)
4664
4665     # Past this point, any flag change to False means a transition
4666     # away from the respective state, as only real changes are kept
4667
4668     # TODO: We might query the real power state if it supports OOB
4669     if _SupportsOob(self.cfg, node):
4670       if self.op.offline is False and not (node.powered or
4671                                            self.op.powered == True):
4672         raise errors.OpPrereqError(("Please power on node %s first before you"
4673                                     " can reset offline state") %
4674                                    self.op.node_name)
4675     elif self.op.powered is not None:
4676       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4677                                   " which does not support out-of-band"
4678                                   " handling") % self.op.node_name)
4679
4680     # If we're being deofflined/drained, we'll MC ourself if needed
4681     if (self.op.drained == False or self.op.offline == False or
4682         (self.op.master_capable and not node.master_capable)):
4683       if _DecideSelfPromotion(self):
4684         self.op.master_candidate = True
4685         self.LogInfo("Auto-promoting node to master candidate")
4686
4687     # If we're no longer master capable, we'll demote ourselves from MC
4688     if self.op.master_capable == False and node.master_candidate:
4689       self.LogInfo("Demoting from master candidate")
4690       self.op.master_candidate = False
4691
4692     # Compute new role
4693     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4694     if self.op.master_candidate:
4695       new_role = self._ROLE_CANDIDATE
4696     elif self.op.drained:
4697       new_role = self._ROLE_DRAINED
4698     elif self.op.offline:
4699       new_role = self._ROLE_OFFLINE
4700     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4701       # False is still in new flags, which means we're un-setting (the
4702       # only) True flag
4703       new_role = self._ROLE_REGULAR
4704     else: # no new flags, nothing, keep old role
4705       new_role = old_role
4706
4707     self.new_role = new_role
4708
4709     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4710       # Trying to transition out of offline status
4711       result = self.rpc.call_version([node.name])[node.name]
4712       if result.fail_msg:
4713         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4714                                    " to report its version: %s" %
4715                                    (node.name, result.fail_msg),
4716                                    errors.ECODE_STATE)
4717       else:
4718         self.LogWarning("Transitioning node from offline to online state"
4719                         " without using re-add. Please make sure the node"
4720                         " is healthy!")
4721
4722     if self.op.secondary_ip:
4723       # Ok even without locking, because this can't be changed by any LU
4724       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4725       master_singlehomed = master.secondary_ip == master.primary_ip
4726       if master_singlehomed and self.op.secondary_ip:
4727         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4728                                    " homed cluster", errors.ECODE_INVAL)
4729
4730       if node.offline:
4731         if self.affected_instances:
4732           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4733                                      " node has instances (%s) configured"
4734                                      " to use it" % self.affected_instances)
4735       else:
4736         # On online nodes, check that no instances are running, and that
4737         # the node has the new ip and we can reach it.
4738         for instance in self.affected_instances:
4739           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4740
4741         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4742         if master.name != node.name:
4743           # check reachability from master secondary ip to new secondary ip
4744           if not netutils.TcpPing(self.op.secondary_ip,
4745                                   constants.DEFAULT_NODED_PORT,
4746                                   source=master.secondary_ip):
4747             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4748                                        " based ping to node daemon port",
4749                                        errors.ECODE_ENVIRON)
4750
4751     if self.op.ndparams:
4752       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4753       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4754       self.new_ndparams = new_ndparams
4755
4756   def Exec(self, feedback_fn):
4757     """Modifies a node.
4758
4759     """
4760     node = self.node
4761     old_role = self.old_role
4762     new_role = self.new_role
4763
4764     result = []
4765
4766     if self.op.ndparams:
4767       node.ndparams = self.new_ndparams
4768
4769     if self.op.powered is not None:
4770       node.powered = self.op.powered
4771
4772     for attr in ["master_capable", "vm_capable"]:
4773       val = getattr(self.op, attr)
4774       if val is not None:
4775         setattr(node, attr, val)
4776         result.append((attr, str(val)))
4777
4778     if new_role != old_role:
4779       # Tell the node to demote itself, if no longer MC and not offline
4780       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4781         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4782         if msg:
4783           self.LogWarning("Node failed to demote itself: %s", msg)
4784
4785       new_flags = self._R2F[new_role]
4786       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4787         if of != nf:
4788           result.append((desc, str(nf)))
4789       (node.master_candidate, node.drained, node.offline) = new_flags
4790
4791       # we locked all nodes, we adjust the CP before updating this node
4792       if self.lock_all:
4793         _AdjustCandidatePool(self, [node.name])
4794
4795     if self.op.secondary_ip:
4796       node.secondary_ip = self.op.secondary_ip
4797       result.append(("secondary_ip", self.op.secondary_ip))
4798
4799     # this will trigger configuration file update, if needed
4800     self.cfg.Update(node, feedback_fn)
4801
4802     # this will trigger job queue propagation or cleanup if the mc
4803     # flag changed
4804     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4805       self.context.ReaddNode(node)
4806
4807     return result
4808
4809
4810 class LUNodePowercycle(NoHooksLU):
4811   """Powercycles a node.
4812
4813   """
4814   REQ_BGL = False
4815
4816   def CheckArguments(self):
4817     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4818     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4819       raise errors.OpPrereqError("The node is the master and the force"
4820                                  " parameter was not set",
4821                                  errors.ECODE_INVAL)
4822
4823   def ExpandNames(self):
4824     """Locking for PowercycleNode.
4825
4826     This is a last-resort option and shouldn't block on other
4827     jobs. Therefore, we grab no locks.
4828
4829     """
4830     self.needed_locks = {}
4831
4832   def Exec(self, feedback_fn):
4833     """Reboots a node.
4834
4835     """
4836     result = self.rpc.call_node_powercycle(self.op.node_name,
4837                                            self.cfg.GetHypervisorType())
4838     result.Raise("Failed to schedule the reboot")
4839     return result.payload
4840
4841
4842 class LUClusterQuery(NoHooksLU):
4843   """Query cluster configuration.
4844
4845   """
4846   REQ_BGL = False
4847
4848   def ExpandNames(self):
4849     self.needed_locks = {}
4850
4851   def Exec(self, feedback_fn):
4852     """Return cluster config.
4853
4854     """
4855     cluster = self.cfg.GetClusterInfo()
4856     os_hvp = {}
4857
4858     # Filter just for enabled hypervisors
4859     for os_name, hv_dict in cluster.os_hvp.items():
4860       os_hvp[os_name] = {}
4861       for hv_name, hv_params in hv_dict.items():
4862         if hv_name in cluster.enabled_hypervisors:
4863           os_hvp[os_name][hv_name] = hv_params
4864
4865     # Convert ip_family to ip_version
4866     primary_ip_version = constants.IP4_VERSION
4867     if cluster.primary_ip_family == netutils.IP6Address.family:
4868       primary_ip_version = constants.IP6_VERSION
4869
4870     result = {
4871       "software_version": constants.RELEASE_VERSION,
4872       "protocol_version": constants.PROTOCOL_VERSION,
4873       "config_version": constants.CONFIG_VERSION,
4874       "os_api_version": max(constants.OS_API_VERSIONS),
4875       "export_version": constants.EXPORT_VERSION,
4876       "architecture": (platform.architecture()[0], platform.machine()),
4877       "name": cluster.cluster_name,
4878       "master": cluster.master_node,
4879       "default_hypervisor": cluster.enabled_hypervisors[0],
4880       "enabled_hypervisors": cluster.enabled_hypervisors,
4881       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4882                         for hypervisor_name in cluster.enabled_hypervisors]),
4883       "os_hvp": os_hvp,
4884       "beparams": cluster.beparams,
4885       "osparams": cluster.osparams,
4886       "nicparams": cluster.nicparams,
4887       "ndparams": cluster.ndparams,
4888       "candidate_pool_size": cluster.candidate_pool_size,
4889       "master_netdev": cluster.master_netdev,
4890       "volume_group_name": cluster.volume_group_name,
4891       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4892       "file_storage_dir": cluster.file_storage_dir,
4893       "shared_file_storage_dir": cluster.shared_file_storage_dir,
4894       "maintain_node_health": cluster.maintain_node_health,
4895       "ctime": cluster.ctime,
4896       "mtime": cluster.mtime,
4897       "uuid": cluster.uuid,
4898       "tags": list(cluster.GetTags()),
4899       "uid_pool": cluster.uid_pool,
4900       "default_iallocator": cluster.default_iallocator,
4901       "reserved_lvs": cluster.reserved_lvs,
4902       "primary_ip_version": primary_ip_version,
4903       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4904       "hidden_os": cluster.hidden_os,
4905       "blacklisted_os": cluster.blacklisted_os,
4906       }
4907
4908     return result
4909
4910
4911 class LUClusterConfigQuery(NoHooksLU):
4912   """Return configuration values.
4913
4914   """
4915   REQ_BGL = False
4916   _FIELDS_DYNAMIC = utils.FieldSet()
4917   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4918                                   "watcher_pause", "volume_group_name")
4919
4920   def CheckArguments(self):
4921     _CheckOutputFields(static=self._FIELDS_STATIC,
4922                        dynamic=self._FIELDS_DYNAMIC,
4923                        selected=self.op.output_fields)
4924
4925   def ExpandNames(self):
4926     self.needed_locks = {}
4927
4928   def Exec(self, feedback_fn):
4929     """Dump a representation of the cluster config to the standard output.
4930
4931     """
4932     values = []
4933     for field in self.op.output_fields:
4934       if field == "cluster_name":
4935         entry = self.cfg.GetClusterName()
4936       elif field == "master_node":
4937         entry = self.cfg.GetMasterNode()
4938       elif field == "drain_flag":
4939         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4940       elif field == "watcher_pause":
4941         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4942       elif field == "volume_group_name":
4943         entry = self.cfg.GetVGName()
4944       else:
4945         raise errors.ParameterError(field)
4946       values.append(entry)
4947     return values
4948
4949
4950 class LUInstanceActivateDisks(NoHooksLU):
4951   """Bring up an instance's disks.
4952
4953   """
4954   REQ_BGL = False
4955
4956   def ExpandNames(self):
4957     self._ExpandAndLockInstance()
4958     self.needed_locks[locking.LEVEL_NODE] = []
4959     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4960
4961   def DeclareLocks(self, level):
4962     if level == locking.LEVEL_NODE:
4963       self._LockInstancesNodes()
4964
4965   def CheckPrereq(self):
4966     """Check prerequisites.
4967
4968     This checks that the instance is in the cluster.
4969
4970     """
4971     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4972     assert self.instance is not None, \
4973       "Cannot retrieve locked instance %s" % self.op.instance_name
4974     _CheckNodeOnline(self, self.instance.primary_node)
4975
4976   def Exec(self, feedback_fn):
4977     """Activate the disks.
4978
4979     """
4980     disks_ok, disks_info = \
4981               _AssembleInstanceDisks(self, self.instance,
4982                                      ignore_size=self.op.ignore_size)
4983     if not disks_ok:
4984       raise errors.OpExecError("Cannot activate block devices")
4985
4986     return disks_info
4987
4988
4989 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4990                            ignore_size=False):
4991   """Prepare the block devices for an instance.
4992
4993   This sets up the block devices on all nodes.
4994
4995   @type lu: L{LogicalUnit}
4996   @param lu: the logical unit on whose behalf we execute
4997   @type instance: L{objects.Instance}
4998   @param instance: the instance for whose disks we assemble
4999   @type disks: list of L{objects.Disk} or None
5000   @param disks: which disks to assemble (or all, if None)
5001   @type ignore_secondaries: boolean
5002   @param ignore_secondaries: if true, errors on secondary nodes
5003       won't result in an error return from the function
5004   @type ignore_size: boolean
5005   @param ignore_size: if true, the current known size of the disk
5006       will not be used during the disk activation, useful for cases
5007       when the size is wrong
5008   @return: False if the operation failed, otherwise a list of
5009       (host, instance_visible_name, node_visible_name)
5010       with the mapping from node devices to instance devices
5011
5012   """
5013   device_info = []
5014   disks_ok = True
5015   iname = instance.name
5016   disks = _ExpandCheckDisks(instance, disks)
5017
5018   # With the two passes mechanism we try to reduce the window of
5019   # opportunity for the race condition of switching DRBD to primary
5020   # before handshaking occured, but we do not eliminate it
5021
5022   # The proper fix would be to wait (with some limits) until the
5023   # connection has been made and drbd transitions from WFConnection
5024   # into any other network-connected state (Connected, SyncTarget,
5025   # SyncSource, etc.)
5026
5027   # 1st pass, assemble on all nodes in secondary mode
5028   for idx, inst_disk in enumerate(disks):
5029     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5030       if ignore_size:
5031         node_disk = node_disk.Copy()
5032         node_disk.UnsetSize()
5033       lu.cfg.SetDiskID(node_disk, node)
5034       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5035       msg = result.fail_msg
5036       if msg:
5037         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5038                            " (is_primary=False, pass=1): %s",
5039                            inst_disk.iv_name, node, msg)
5040         if not ignore_secondaries:
5041           disks_ok = False
5042
5043   # FIXME: race condition on drbd migration to primary
5044
5045   # 2nd pass, do only the primary node
5046   for idx, inst_disk in enumerate(disks):
5047     dev_path = None
5048
5049     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5050       if node != instance.primary_node:
5051         continue
5052       if ignore_size:
5053         node_disk = node_disk.Copy()
5054         node_disk.UnsetSize()
5055       lu.cfg.SetDiskID(node_disk, node)
5056       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5057       msg = result.fail_msg
5058       if msg:
5059         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5060                            " (is_primary=True, pass=2): %s",
5061                            inst_disk.iv_name, node, msg)
5062         disks_ok = False
5063       else:
5064         dev_path = result.payload
5065
5066     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5067
5068   # leave the disks configured for the primary node
5069   # this is a workaround that would be fixed better by
5070   # improving the logical/physical id handling
5071   for disk in disks:
5072     lu.cfg.SetDiskID(disk, instance.primary_node)
5073
5074   return disks_ok, device_info
5075
5076
5077 def _StartInstanceDisks(lu, instance, force):
5078   """Start the disks of an instance.
5079
5080   """
5081   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5082                                            ignore_secondaries=force)
5083   if not disks_ok:
5084     _ShutdownInstanceDisks(lu, instance)
5085     if force is not None and not force:
5086       lu.proc.LogWarning("", hint="If the message above refers to a"
5087                          " secondary node,"
5088                          " you can retry the operation using '--force'.")
5089     raise errors.OpExecError("Disk consistency error")
5090
5091
5092 class LUInstanceDeactivateDisks(NoHooksLU):
5093   """Shutdown an instance's disks.
5094
5095   """
5096   REQ_BGL = False
5097
5098   def ExpandNames(self):
5099     self._ExpandAndLockInstance()
5100     self.needed_locks[locking.LEVEL_NODE] = []
5101     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5102
5103   def DeclareLocks(self, level):
5104     if level == locking.LEVEL_NODE:
5105       self._LockInstancesNodes()
5106
5107   def CheckPrereq(self):
5108     """Check prerequisites.
5109
5110     This checks that the instance is in the cluster.
5111
5112     """
5113     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5114     assert self.instance is not None, \
5115       "Cannot retrieve locked instance %s" % self.op.instance_name
5116
5117   def Exec(self, feedback_fn):
5118     """Deactivate the disks
5119
5120     """
5121     instance = self.instance
5122     if self.op.force:
5123       _ShutdownInstanceDisks(self, instance)
5124     else:
5125       _SafeShutdownInstanceDisks(self, instance)
5126
5127
5128 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5129   """Shutdown block devices of an instance.
5130
5131   This function checks if an instance is running, before calling
5132   _ShutdownInstanceDisks.
5133
5134   """
5135   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5136   _ShutdownInstanceDisks(lu, instance, disks=disks)
5137
5138
5139 def _ExpandCheckDisks(instance, disks):
5140   """Return the instance disks selected by the disks list
5141
5142   @type disks: list of L{objects.Disk} or None
5143   @param disks: selected disks
5144   @rtype: list of L{objects.Disk}
5145   @return: selected instance disks to act on
5146
5147   """
5148   if disks is None:
5149     return instance.disks
5150   else:
5151     if not set(disks).issubset(instance.disks):
5152       raise errors.ProgrammerError("Can only act on disks belonging to the"
5153                                    " target instance")
5154     return disks
5155
5156
5157 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5158   """Shutdown block devices of an instance.
5159
5160   This does the shutdown on all nodes of the instance.
5161
5162   If the ignore_primary is false, errors on the primary node are
5163   ignored.
5164
5165   """
5166   all_result = True
5167   disks = _ExpandCheckDisks(instance, disks)
5168
5169   for disk in disks:
5170     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5171       lu.cfg.SetDiskID(top_disk, node)
5172       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5173       msg = result.fail_msg
5174       if msg:
5175         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5176                       disk.iv_name, node, msg)
5177         if ((node == instance.primary_node and not ignore_primary) or
5178             (node != instance.primary_node and not result.offline)):
5179           all_result = False
5180   return all_result
5181
5182
5183 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5184   """Checks if a node has enough free memory.
5185
5186   This function check if a given node has the needed amount of free
5187   memory. In case the node has less memory or we cannot get the
5188   information from the node, this function raise an OpPrereqError
5189   exception.
5190
5191   @type lu: C{LogicalUnit}
5192   @param lu: a logical unit from which we get configuration data
5193   @type node: C{str}
5194   @param node: the node to check
5195   @type reason: C{str}
5196   @param reason: string to use in the error message
5197   @type requested: C{int}
5198   @param requested: the amount of memory in MiB to check for
5199   @type hypervisor_name: C{str}
5200   @param hypervisor_name: the hypervisor to ask for memory stats
5201   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5202       we cannot check the node
5203
5204   """
5205   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5206   nodeinfo[node].Raise("Can't get data from node %s" % node,
5207                        prereq=True, ecode=errors.ECODE_ENVIRON)
5208   free_mem = nodeinfo[node].payload.get('memory_free', None)
5209   if not isinstance(free_mem, int):
5210     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5211                                " was '%s'" % (node, free_mem),
5212                                errors.ECODE_ENVIRON)
5213   if requested > free_mem:
5214     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5215                                " needed %s MiB, available %s MiB" %
5216                                (node, reason, requested, free_mem),
5217                                errors.ECODE_NORES)
5218
5219
5220 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5221   """Checks if nodes have enough free disk space in the all VGs.
5222
5223   This function check if all given nodes have the needed amount of
5224   free disk. In case any node has less disk or we cannot get the
5225   information from the node, this function raise an OpPrereqError
5226   exception.
5227
5228   @type lu: C{LogicalUnit}
5229   @param lu: a logical unit from which we get configuration data
5230   @type nodenames: C{list}
5231   @param nodenames: the list of node names to check
5232   @type req_sizes: C{dict}
5233   @param req_sizes: the hash of vg and corresponding amount of disk in
5234       MiB to check for
5235   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5236       or we cannot check the node
5237
5238   """
5239   for vg, req_size in req_sizes.items():
5240     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5241
5242
5243 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5244   """Checks if nodes have enough free disk space in the specified VG.
5245
5246   This function check if all given nodes have the needed amount of
5247   free disk. In case any node has less disk or we cannot get the
5248   information from the node, this function raise an OpPrereqError
5249   exception.
5250
5251   @type lu: C{LogicalUnit}
5252   @param lu: a logical unit from which we get configuration data
5253   @type nodenames: C{list}
5254   @param nodenames: the list of node names to check
5255   @type vg: C{str}
5256   @param vg: the volume group to check
5257   @type requested: C{int}
5258   @param requested: the amount of disk in MiB to check for
5259   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5260       or we cannot check the node
5261
5262   """
5263   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5264   for node in nodenames:
5265     info = nodeinfo[node]
5266     info.Raise("Cannot get current information from node %s" % node,
5267                prereq=True, ecode=errors.ECODE_ENVIRON)
5268     vg_free = info.payload.get("vg_free", None)
5269     if not isinstance(vg_free, int):
5270       raise errors.OpPrereqError("Can't compute free disk space on node"
5271                                  " %s for vg %s, result was '%s'" %
5272                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5273     if requested > vg_free:
5274       raise errors.OpPrereqError("Not enough disk space on target node %s"
5275                                  " vg %s: required %d MiB, available %d MiB" %
5276                                  (node, vg, requested, vg_free),
5277                                  errors.ECODE_NORES)
5278
5279
5280 class LUInstanceStartup(LogicalUnit):
5281   """Starts an instance.
5282
5283   """
5284   HPATH = "instance-start"
5285   HTYPE = constants.HTYPE_INSTANCE
5286   REQ_BGL = False
5287
5288   def CheckArguments(self):
5289     # extra beparams
5290     if self.op.beparams:
5291       # fill the beparams dict
5292       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5293
5294   def ExpandNames(self):
5295     self._ExpandAndLockInstance()
5296
5297   def BuildHooksEnv(self):
5298     """Build hooks env.
5299
5300     This runs on master, primary and secondary nodes of the instance.
5301
5302     """
5303     env = {
5304       "FORCE": self.op.force,
5305       }
5306
5307     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5308
5309     return env
5310
5311   def BuildHooksNodes(self):
5312     """Build hooks nodes.
5313
5314     """
5315     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5316     return (nl, nl)
5317
5318   def CheckPrereq(self):
5319     """Check prerequisites.
5320
5321     This checks that the instance is in the cluster.
5322
5323     """
5324     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5325     assert self.instance is not None, \
5326       "Cannot retrieve locked instance %s" % self.op.instance_name
5327
5328     # extra hvparams
5329     if self.op.hvparams:
5330       # check hypervisor parameter syntax (locally)
5331       cluster = self.cfg.GetClusterInfo()
5332       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5333       filled_hvp = cluster.FillHV(instance)
5334       filled_hvp.update(self.op.hvparams)
5335       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5336       hv_type.CheckParameterSyntax(filled_hvp)
5337       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5338
5339     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5340
5341     if self.primary_offline and self.op.ignore_offline_nodes:
5342       self.proc.LogWarning("Ignoring offline primary node")
5343
5344       if self.op.hvparams or self.op.beparams:
5345         self.proc.LogWarning("Overridden parameters are ignored")
5346     else:
5347       _CheckNodeOnline(self, instance.primary_node)
5348
5349       bep = self.cfg.GetClusterInfo().FillBE(instance)
5350
5351       # check bridges existence
5352       _CheckInstanceBridgesExist(self, instance)
5353
5354       remote_info = self.rpc.call_instance_info(instance.primary_node,
5355                                                 instance.name,
5356                                                 instance.hypervisor)
5357       remote_info.Raise("Error checking node %s" % instance.primary_node,
5358                         prereq=True, ecode=errors.ECODE_ENVIRON)
5359       if not remote_info.payload: # not running already
5360         _CheckNodeFreeMemory(self, instance.primary_node,
5361                              "starting instance %s" % instance.name,
5362                              bep[constants.BE_MEMORY], instance.hypervisor)
5363
5364   def Exec(self, feedback_fn):
5365     """Start the instance.
5366
5367     """
5368     instance = self.instance
5369     force = self.op.force
5370
5371     self.cfg.MarkInstanceUp(instance.name)
5372
5373     if self.primary_offline:
5374       assert self.op.ignore_offline_nodes
5375       self.proc.LogInfo("Primary node offline, marked instance as started")
5376     else:
5377       node_current = instance.primary_node
5378
5379       _StartInstanceDisks(self, instance, force)
5380
5381       result = self.rpc.call_instance_start(node_current, instance,
5382                                             self.op.hvparams, self.op.beparams)
5383       msg = result.fail_msg
5384       if msg:
5385         _ShutdownInstanceDisks(self, instance)
5386         raise errors.OpExecError("Could not start instance: %s" % msg)
5387
5388
5389 class LUInstanceReboot(LogicalUnit):
5390   """Reboot an instance.
5391
5392   """
5393   HPATH = "instance-reboot"
5394   HTYPE = constants.HTYPE_INSTANCE
5395   REQ_BGL = False
5396
5397   def ExpandNames(self):
5398     self._ExpandAndLockInstance()
5399
5400   def BuildHooksEnv(self):
5401     """Build hooks env.
5402
5403     This runs on master, primary and secondary nodes of the instance.
5404
5405     """
5406     env = {
5407       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5408       "REBOOT_TYPE": self.op.reboot_type,
5409       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5410       }
5411
5412     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5413
5414     return env
5415
5416   def BuildHooksNodes(self):
5417     """Build hooks nodes.
5418
5419     """
5420     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5421     return (nl, nl)
5422
5423   def CheckPrereq(self):
5424     """Check prerequisites.
5425
5426     This checks that the instance is in the cluster.
5427
5428     """
5429     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5430     assert self.instance is not None, \
5431       "Cannot retrieve locked instance %s" % self.op.instance_name
5432
5433     _CheckNodeOnline(self, instance.primary_node)
5434
5435     # check bridges existence
5436     _CheckInstanceBridgesExist(self, instance)
5437
5438   def Exec(self, feedback_fn):
5439     """Reboot the instance.
5440
5441     """
5442     instance = self.instance
5443     ignore_secondaries = self.op.ignore_secondaries
5444     reboot_type = self.op.reboot_type
5445
5446     remote_info = self.rpc.call_instance_info(instance.primary_node,
5447                                               instance.name,
5448                                               instance.hypervisor)
5449     remote_info.Raise("Error checking node %s" % instance.primary_node)
5450     instance_running = bool(remote_info.payload)
5451
5452     node_current = instance.primary_node
5453
5454     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5455                                             constants.INSTANCE_REBOOT_HARD]:
5456       for disk in instance.disks:
5457         self.cfg.SetDiskID(disk, node_current)
5458       result = self.rpc.call_instance_reboot(node_current, instance,
5459                                              reboot_type,
5460                                              self.op.shutdown_timeout)
5461       result.Raise("Could not reboot instance")
5462     else:
5463       if instance_running:
5464         result = self.rpc.call_instance_shutdown(node_current, instance,
5465                                                  self.op.shutdown_timeout)
5466         result.Raise("Could not shutdown instance for full reboot")
5467         _ShutdownInstanceDisks(self, instance)
5468       else:
5469         self.LogInfo("Instance %s was already stopped, starting now",
5470                      instance.name)
5471       _StartInstanceDisks(self, instance, ignore_secondaries)
5472       result = self.rpc.call_instance_start(node_current, instance, None, None)
5473       msg = result.fail_msg
5474       if msg:
5475         _ShutdownInstanceDisks(self, instance)
5476         raise errors.OpExecError("Could not start instance for"
5477                                  " full reboot: %s" % msg)
5478
5479     self.cfg.MarkInstanceUp(instance.name)
5480
5481
5482 class LUInstanceShutdown(LogicalUnit):
5483   """Shutdown an instance.
5484
5485   """
5486   HPATH = "instance-stop"
5487   HTYPE = constants.HTYPE_INSTANCE
5488   REQ_BGL = False
5489
5490   def ExpandNames(self):
5491     self._ExpandAndLockInstance()
5492
5493   def BuildHooksEnv(self):
5494     """Build hooks env.
5495
5496     This runs on master, primary and secondary nodes of the instance.
5497
5498     """
5499     env = _BuildInstanceHookEnvByObject(self, self.instance)
5500     env["TIMEOUT"] = self.op.timeout
5501     return env
5502
5503   def BuildHooksNodes(self):
5504     """Build hooks nodes.
5505
5506     """
5507     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5508     return (nl, nl)
5509
5510   def CheckPrereq(self):
5511     """Check prerequisites.
5512
5513     This checks that the instance is in the cluster.
5514
5515     """
5516     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5517     assert self.instance is not None, \
5518       "Cannot retrieve locked instance %s" % self.op.instance_name
5519
5520     self.primary_offline = \
5521       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5522
5523     if self.primary_offline and self.op.ignore_offline_nodes:
5524       self.proc.LogWarning("Ignoring offline primary node")
5525     else:
5526       _CheckNodeOnline(self, self.instance.primary_node)
5527
5528   def Exec(self, feedback_fn):
5529     """Shutdown the instance.
5530
5531     """
5532     instance = self.instance
5533     node_current = instance.primary_node
5534     timeout = self.op.timeout
5535
5536     self.cfg.MarkInstanceDown(instance.name)
5537
5538     if self.primary_offline:
5539       assert self.op.ignore_offline_nodes
5540       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5541     else:
5542       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5543       msg = result.fail_msg
5544       if msg:
5545         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5546
5547       _ShutdownInstanceDisks(self, instance)
5548
5549
5550 class LUInstanceReinstall(LogicalUnit):
5551   """Reinstall an instance.
5552
5553   """
5554   HPATH = "instance-reinstall"
5555   HTYPE = constants.HTYPE_INSTANCE
5556   REQ_BGL = False
5557
5558   def ExpandNames(self):
5559     self._ExpandAndLockInstance()
5560
5561   def BuildHooksEnv(self):
5562     """Build hooks env.
5563
5564     This runs on master, primary and secondary nodes of the instance.
5565
5566     """
5567     return _BuildInstanceHookEnvByObject(self, self.instance)
5568
5569   def BuildHooksNodes(self):
5570     """Build hooks nodes.
5571
5572     """
5573     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5574     return (nl, nl)
5575
5576   def CheckPrereq(self):
5577     """Check prerequisites.
5578
5579     This checks that the instance is in the cluster and is not running.
5580
5581     """
5582     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5583     assert instance is not None, \
5584       "Cannot retrieve locked instance %s" % self.op.instance_name
5585     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5586                      " offline, cannot reinstall")
5587     for node in instance.secondary_nodes:
5588       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5589                        " cannot reinstall")
5590
5591     if instance.disk_template == constants.DT_DISKLESS:
5592       raise errors.OpPrereqError("Instance '%s' has no disks" %
5593                                  self.op.instance_name,
5594                                  errors.ECODE_INVAL)
5595     _CheckInstanceDown(self, instance, "cannot reinstall")
5596
5597     if self.op.os_type is not None:
5598       # OS verification
5599       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5600       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5601       instance_os = self.op.os_type
5602     else:
5603       instance_os = instance.os
5604
5605     nodelist = list(instance.all_nodes)
5606
5607     if self.op.osparams:
5608       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5609       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5610       self.os_inst = i_osdict # the new dict (without defaults)
5611     else:
5612       self.os_inst = None
5613
5614     self.instance = instance
5615
5616   def Exec(self, feedback_fn):
5617     """Reinstall the instance.
5618
5619     """
5620     inst = self.instance
5621
5622     if self.op.os_type is not None:
5623       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5624       inst.os = self.op.os_type
5625       # Write to configuration
5626       self.cfg.Update(inst, feedback_fn)
5627
5628     _StartInstanceDisks(self, inst, None)
5629     try:
5630       feedback_fn("Running the instance OS create scripts...")
5631       # FIXME: pass debug option from opcode to backend
5632       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5633                                              self.op.debug_level,
5634                                              osparams=self.os_inst)
5635       result.Raise("Could not install OS for instance %s on node %s" %
5636                    (inst.name, inst.primary_node))
5637     finally:
5638       _ShutdownInstanceDisks(self, inst)
5639
5640
5641 class LUInstanceRecreateDisks(LogicalUnit):
5642   """Recreate an instance's missing disks.
5643
5644   """
5645   HPATH = "instance-recreate-disks"
5646   HTYPE = constants.HTYPE_INSTANCE
5647   REQ_BGL = False
5648
5649   def ExpandNames(self):
5650     self._ExpandAndLockInstance()
5651
5652   def BuildHooksEnv(self):
5653     """Build hooks env.
5654
5655     This runs on master, primary and secondary nodes of the instance.
5656
5657     """
5658     return _BuildInstanceHookEnvByObject(self, self.instance)
5659
5660   def BuildHooksNodes(self):
5661     """Build hooks nodes.
5662
5663     """
5664     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5665     return (nl, nl)
5666
5667   def CheckPrereq(self):
5668     """Check prerequisites.
5669
5670     This checks that the instance is in the cluster and is not running.
5671
5672     """
5673     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5674     assert instance is not None, \
5675       "Cannot retrieve locked instance %s" % self.op.instance_name
5676     _CheckNodeOnline(self, instance.primary_node)
5677
5678     if instance.disk_template == constants.DT_DISKLESS:
5679       raise errors.OpPrereqError("Instance '%s' has no disks" %
5680                                  self.op.instance_name, errors.ECODE_INVAL)
5681     _CheckInstanceDown(self, instance, "cannot recreate disks")
5682
5683     if not self.op.disks:
5684       self.op.disks = range(len(instance.disks))
5685     else:
5686       for idx in self.op.disks:
5687         if idx >= len(instance.disks):
5688           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5689                                      errors.ECODE_INVAL)
5690
5691     self.instance = instance
5692
5693   def Exec(self, feedback_fn):
5694     """Recreate the disks.
5695
5696     """
5697     to_skip = []
5698     for idx, _ in enumerate(self.instance.disks):
5699       if idx not in self.op.disks: # disk idx has not been passed in
5700         to_skip.append(idx)
5701         continue
5702
5703     _CreateDisks(self, self.instance, to_skip=to_skip)
5704
5705
5706 class LUInstanceRename(LogicalUnit):
5707   """Rename an instance.
5708
5709   """
5710   HPATH = "instance-rename"
5711   HTYPE = constants.HTYPE_INSTANCE
5712
5713   def CheckArguments(self):
5714     """Check arguments.
5715
5716     """
5717     if self.op.ip_check and not self.op.name_check:
5718       # TODO: make the ip check more flexible and not depend on the name check
5719       raise errors.OpPrereqError("Cannot do ip check without a name check",
5720                                  errors.ECODE_INVAL)
5721
5722   def BuildHooksEnv(self):
5723     """Build hooks env.
5724
5725     This runs on master, primary and secondary nodes of the instance.
5726
5727     """
5728     env = _BuildInstanceHookEnvByObject(self, self.instance)
5729     env["INSTANCE_NEW_NAME"] = self.op.new_name
5730     return env
5731
5732   def BuildHooksNodes(self):
5733     """Build hooks nodes.
5734
5735     """
5736     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5737     return (nl, nl)
5738
5739   def CheckPrereq(self):
5740     """Check prerequisites.
5741
5742     This checks that the instance is in the cluster and is not running.
5743
5744     """
5745     self.op.instance_name = _ExpandInstanceName(self.cfg,
5746                                                 self.op.instance_name)
5747     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5748     assert instance is not None
5749     _CheckNodeOnline(self, instance.primary_node)
5750     _CheckInstanceDown(self, instance, "cannot rename")
5751     self.instance = instance
5752
5753     new_name = self.op.new_name
5754     if self.op.name_check:
5755       hostname = netutils.GetHostname(name=new_name)
5756       self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5757                    hostname.name)
5758       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5759         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5760                                     " same as given hostname '%s'") %
5761                                     (hostname.name, self.op.new_name),
5762                                     errors.ECODE_INVAL)
5763       new_name = self.op.new_name = hostname.name
5764       if (self.op.ip_check and
5765           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5766         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5767                                    (hostname.ip, new_name),
5768                                    errors.ECODE_NOTUNIQUE)
5769
5770     instance_list = self.cfg.GetInstanceList()
5771     if new_name in instance_list and new_name != instance.name:
5772       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5773                                  new_name, errors.ECODE_EXISTS)
5774
5775   def Exec(self, feedback_fn):
5776     """Rename the instance.
5777
5778     """
5779     inst = self.instance
5780     old_name = inst.name
5781
5782     rename_file_storage = False
5783     if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5784         self.op.new_name != inst.name):
5785       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5786       rename_file_storage = True
5787
5788     self.cfg.RenameInstance(inst.name, self.op.new_name)
5789     # Change the instance lock. This is definitely safe while we hold the BGL.
5790     # Otherwise the new lock would have to be added in acquired mode.
5791     assert self.REQ_BGL
5792     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5793     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5794
5795     # re-read the instance from the configuration after rename
5796     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5797
5798     if rename_file_storage:
5799       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5800       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5801                                                      old_file_storage_dir,
5802                                                      new_file_storage_dir)
5803       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5804                    " (but the instance has been renamed in Ganeti)" %
5805                    (inst.primary_node, old_file_storage_dir,
5806                     new_file_storage_dir))
5807
5808     _StartInstanceDisks(self, inst, None)
5809     try:
5810       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5811                                                  old_name, self.op.debug_level)
5812       msg = result.fail_msg
5813       if msg:
5814         msg = ("Could not run OS rename script for instance %s on node %s"
5815                " (but the instance has been renamed in Ganeti): %s" %
5816                (inst.name, inst.primary_node, msg))
5817         self.proc.LogWarning(msg)
5818     finally:
5819       _ShutdownInstanceDisks(self, inst)
5820
5821     return inst.name
5822
5823
5824 class LUInstanceRemove(LogicalUnit):
5825   """Remove an instance.
5826
5827   """
5828   HPATH = "instance-remove"
5829   HTYPE = constants.HTYPE_INSTANCE
5830   REQ_BGL = False
5831
5832   def ExpandNames(self):
5833     self._ExpandAndLockInstance()
5834     self.needed_locks[locking.LEVEL_NODE] = []
5835     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5836
5837   def DeclareLocks(self, level):
5838     if level == locking.LEVEL_NODE:
5839       self._LockInstancesNodes()
5840
5841   def BuildHooksEnv(self):
5842     """Build hooks env.
5843
5844     This runs on master, primary and secondary nodes of the instance.
5845
5846     """
5847     env = _BuildInstanceHookEnvByObject(self, self.instance)
5848     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5849     return env
5850
5851   def BuildHooksNodes(self):
5852     """Build hooks nodes.
5853
5854     """
5855     nl = [self.cfg.GetMasterNode()]
5856     nl_post = list(self.instance.all_nodes) + nl
5857     return (nl, nl_post)
5858
5859   def CheckPrereq(self):
5860     """Check prerequisites.
5861
5862     This checks that the instance is in the cluster.
5863
5864     """
5865     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5866     assert self.instance is not None, \
5867       "Cannot retrieve locked instance %s" % self.op.instance_name
5868
5869   def Exec(self, feedback_fn):
5870     """Remove the instance.
5871
5872     """
5873     instance = self.instance
5874     logging.info("Shutting down instance %s on node %s",
5875                  instance.name, instance.primary_node)
5876
5877     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5878                                              self.op.shutdown_timeout)
5879     msg = result.fail_msg
5880     if msg:
5881       if self.op.ignore_failures:
5882         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5883       else:
5884         raise errors.OpExecError("Could not shutdown instance %s on"
5885                                  " node %s: %s" %
5886                                  (instance.name, instance.primary_node, msg))
5887
5888     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5889
5890
5891 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5892   """Utility function to remove an instance.
5893
5894   """
5895   logging.info("Removing block devices for instance %s", instance.name)
5896
5897   if not _RemoveDisks(lu, instance):
5898     if not ignore_failures:
5899       raise errors.OpExecError("Can't remove instance's disks")
5900     feedback_fn("Warning: can't remove instance's disks")
5901
5902   logging.info("Removing instance %s out of cluster config", instance.name)
5903
5904   lu.cfg.RemoveInstance(instance.name)
5905
5906   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5907     "Instance lock removal conflict"
5908
5909   # Remove lock for the instance
5910   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5911
5912
5913 class LUInstanceQuery(NoHooksLU):
5914   """Logical unit for querying instances.
5915
5916   """
5917   # pylint: disable-msg=W0142
5918   REQ_BGL = False
5919
5920   def CheckArguments(self):
5921     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5922                              self.op.output_fields, self.op.use_locking)
5923
5924   def ExpandNames(self):
5925     self.iq.ExpandNames(self)
5926
5927   def DeclareLocks(self, level):
5928     self.iq.DeclareLocks(self, level)
5929
5930   def Exec(self, feedback_fn):
5931     return self.iq.OldStyleQuery(self)
5932
5933
5934 class LUInstanceFailover(LogicalUnit):
5935   """Failover an instance.
5936
5937   """
5938   HPATH = "instance-failover"
5939   HTYPE = constants.HTYPE_INSTANCE
5940   REQ_BGL = False
5941
5942   def CheckArguments(self):
5943     """Check the arguments.
5944
5945     """
5946     self.iallocator = getattr(self.op, "iallocator", None)
5947     self.target_node = getattr(self.op, "target_node", None)
5948
5949   def ExpandNames(self):
5950     self._ExpandAndLockInstance()
5951
5952     if self.op.target_node is not None:
5953       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5954
5955     self.needed_locks[locking.LEVEL_NODE] = []
5956     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5957
5958     ignore_consistency = self.op.ignore_consistency
5959     shutdown_timeout = self.op.shutdown_timeout
5960     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5961                                        cleanup=False,
5962                                        iallocator=self.op.iallocator,
5963                                        target_node=self.op.target_node,
5964                                        failover=True,
5965                                        ignore_consistency=ignore_consistency,
5966                                        shutdown_timeout=shutdown_timeout)
5967     self.tasklets = [self._migrater]
5968
5969   def DeclareLocks(self, level):
5970     if level == locking.LEVEL_NODE:
5971       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5972       if instance.disk_template in constants.DTS_EXT_MIRROR:
5973         if self.op.target_node is None:
5974           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5975         else:
5976           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5977                                                    self.op.target_node]
5978         del self.recalculate_locks[locking.LEVEL_NODE]
5979       else:
5980         self._LockInstancesNodes()
5981
5982   def BuildHooksEnv(self):
5983     """Build hooks env.
5984
5985     This runs on master, primary and secondary nodes of the instance.
5986
5987     """
5988     instance = self._migrater.instance
5989     source_node = instance.primary_node
5990     target_node = self._migrater.target_node
5991     env = {
5992       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5993       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5994       "OLD_PRIMARY": source_node,
5995       "NEW_PRIMARY": target_node,
5996       }
5997
5998     if instance.disk_template in constants.DTS_INT_MIRROR:
5999       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6000       env["NEW_SECONDARY"] = source_node
6001     else:
6002       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6003
6004     env.update(_BuildInstanceHookEnvByObject(self, instance))
6005
6006     return env
6007
6008   def BuildHooksNodes(self):
6009     """Build hooks nodes.
6010
6011     """
6012     instance = self._migrater.instance
6013     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6014     return (nl, nl + [instance.primary_node])
6015
6016
6017 class LUInstanceMigrate(LogicalUnit):
6018   """Migrate an instance.
6019
6020   This is migration without shutting down, compared to the failover,
6021   which is done with shutdown.
6022
6023   """
6024   HPATH = "instance-migrate"
6025   HTYPE = constants.HTYPE_INSTANCE
6026   REQ_BGL = False
6027
6028   def ExpandNames(self):
6029     self._ExpandAndLockInstance()
6030
6031     if self.op.target_node is not None:
6032       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6033
6034     self.needed_locks[locking.LEVEL_NODE] = []
6035     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6036
6037     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6038                                        cleanup=self.op.cleanup,
6039                                        iallocator=self.op.iallocator,
6040                                        target_node=self.op.target_node,
6041                                        failover=False,
6042                                        fallback=self.op.allow_failover)
6043     self.tasklets = [self._migrater]
6044
6045   def DeclareLocks(self, level):
6046     if level == locking.LEVEL_NODE:
6047       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6048       if instance.disk_template in constants.DTS_EXT_MIRROR:
6049         if self.op.target_node is None:
6050           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6051         else:
6052           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6053                                                    self.op.target_node]
6054         del self.recalculate_locks[locking.LEVEL_NODE]
6055       else:
6056         self._LockInstancesNodes()
6057
6058   def BuildHooksEnv(self):
6059     """Build hooks env.
6060
6061     This runs on master, primary and secondary nodes of the instance.
6062
6063     """
6064     instance = self._migrater.instance
6065     source_node = instance.primary_node
6066     target_node = self._migrater.target_node
6067     env = _BuildInstanceHookEnvByObject(self, instance)
6068     env.update({
6069       "MIGRATE_LIVE": self._migrater.live,
6070       "MIGRATE_CLEANUP": self.op.cleanup,
6071       "OLD_PRIMARY": source_node,
6072       "NEW_PRIMARY": target_node,
6073       })
6074
6075     if instance.disk_template in constants.DTS_INT_MIRROR:
6076       env["OLD_SECONDARY"] = target_node
6077       env["NEW_SECONDARY"] = source_node
6078     else:
6079       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6080
6081     return env
6082
6083   def BuildHooksNodes(self):
6084     """Build hooks nodes.
6085
6086     """
6087     instance = self._migrater.instance
6088     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6089     return (nl, nl + [instance.primary_node])
6090
6091
6092 class LUInstanceMove(LogicalUnit):
6093   """Move an instance by data-copying.
6094
6095   """
6096   HPATH = "instance-move"
6097   HTYPE = constants.HTYPE_INSTANCE
6098   REQ_BGL = False
6099
6100   def ExpandNames(self):
6101     self._ExpandAndLockInstance()
6102     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6103     self.op.target_node = target_node
6104     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6105     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6106
6107   def DeclareLocks(self, level):
6108     if level == locking.LEVEL_NODE:
6109       self._LockInstancesNodes(primary_only=True)
6110
6111   def BuildHooksEnv(self):
6112     """Build hooks env.
6113
6114     This runs on master, primary and secondary nodes of the instance.
6115
6116     """
6117     env = {
6118       "TARGET_NODE": self.op.target_node,
6119       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6120       }
6121     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6122     return env
6123
6124   def BuildHooksNodes(self):
6125     """Build hooks nodes.
6126
6127     """
6128     nl = [
6129       self.cfg.GetMasterNode(),
6130       self.instance.primary_node,
6131       self.op.target_node,
6132       ]
6133     return (nl, nl)
6134
6135   def CheckPrereq(self):
6136     """Check prerequisites.
6137
6138     This checks that the instance is in the cluster.
6139
6140     """
6141     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6142     assert self.instance is not None, \
6143       "Cannot retrieve locked instance %s" % self.op.instance_name
6144
6145     node = self.cfg.GetNodeInfo(self.op.target_node)
6146     assert node is not None, \
6147       "Cannot retrieve locked node %s" % self.op.target_node
6148
6149     self.target_node = target_node = node.name
6150
6151     if target_node == instance.primary_node:
6152       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6153                                  (instance.name, target_node),
6154                                  errors.ECODE_STATE)
6155
6156     bep = self.cfg.GetClusterInfo().FillBE(instance)
6157
6158     for idx, dsk in enumerate(instance.disks):
6159       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6160         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6161                                    " cannot copy" % idx, errors.ECODE_STATE)
6162
6163     _CheckNodeOnline(self, target_node)
6164     _CheckNodeNotDrained(self, target_node)
6165     _CheckNodeVmCapable(self, target_node)
6166
6167     if instance.admin_up:
6168       # check memory requirements on the secondary node
6169       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6170                            instance.name, bep[constants.BE_MEMORY],
6171                            instance.hypervisor)
6172     else:
6173       self.LogInfo("Not checking memory on the secondary node as"
6174                    " instance will not be started")
6175
6176     # check bridge existance
6177     _CheckInstanceBridgesExist(self, instance, node=target_node)
6178
6179   def Exec(self, feedback_fn):
6180     """Move an instance.
6181
6182     The move is done by shutting it down on its present node, copying
6183     the data over (slow) and starting it on the new node.
6184
6185     """
6186     instance = self.instance
6187
6188     source_node = instance.primary_node
6189     target_node = self.target_node
6190
6191     self.LogInfo("Shutting down instance %s on source node %s",
6192                  instance.name, source_node)
6193
6194     result = self.rpc.call_instance_shutdown(source_node, instance,
6195                                              self.op.shutdown_timeout)
6196     msg = result.fail_msg
6197     if msg:
6198       if self.op.ignore_consistency:
6199         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6200                              " Proceeding anyway. Please make sure node"
6201                              " %s is down. Error details: %s",
6202                              instance.name, source_node, source_node, msg)
6203       else:
6204         raise errors.OpExecError("Could not shutdown instance %s on"
6205                                  " node %s: %s" %
6206                                  (instance.name, source_node, msg))
6207
6208     # create the target disks
6209     try:
6210       _CreateDisks(self, instance, target_node=target_node)
6211     except errors.OpExecError:
6212       self.LogWarning("Device creation failed, reverting...")
6213       try:
6214         _RemoveDisks(self, instance, target_node=target_node)
6215       finally:
6216         self.cfg.ReleaseDRBDMinors(instance.name)
6217         raise
6218
6219     cluster_name = self.cfg.GetClusterInfo().cluster_name
6220
6221     errs = []
6222     # activate, get path, copy the data over
6223     for idx, disk in enumerate(instance.disks):
6224       self.LogInfo("Copying data for disk %d", idx)
6225       result = self.rpc.call_blockdev_assemble(target_node, disk,
6226                                                instance.name, True, idx)
6227       if result.fail_msg:
6228         self.LogWarning("Can't assemble newly created disk %d: %s",
6229                         idx, result.fail_msg)
6230         errs.append(result.fail_msg)
6231         break
6232       dev_path = result.payload
6233       result = self.rpc.call_blockdev_export(source_node, disk,
6234                                              target_node, dev_path,
6235                                              cluster_name)
6236       if result.fail_msg:
6237         self.LogWarning("Can't copy data over for disk %d: %s",
6238                         idx, result.fail_msg)
6239         errs.append(result.fail_msg)
6240         break
6241
6242     if errs:
6243       self.LogWarning("Some disks failed to copy, aborting")
6244       try:
6245         _RemoveDisks(self, instance, target_node=target_node)
6246       finally:
6247         self.cfg.ReleaseDRBDMinors(instance.name)
6248         raise errors.OpExecError("Errors during disk copy: %s" %
6249                                  (",".join(errs),))
6250
6251     instance.primary_node = target_node
6252     self.cfg.Update(instance, feedback_fn)
6253
6254     self.LogInfo("Removing the disks on the original node")
6255     _RemoveDisks(self, instance, target_node=source_node)
6256
6257     # Only start the instance if it's marked as up
6258     if instance.admin_up:
6259       self.LogInfo("Starting instance %s on node %s",
6260                    instance.name, target_node)
6261
6262       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6263                                            ignore_secondaries=True)
6264       if not disks_ok:
6265         _ShutdownInstanceDisks(self, instance)
6266         raise errors.OpExecError("Can't activate the instance's disks")
6267
6268       result = self.rpc.call_instance_start(target_node, instance, None, None)
6269       msg = result.fail_msg
6270       if msg:
6271         _ShutdownInstanceDisks(self, instance)
6272         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6273                                  (instance.name, target_node, msg))
6274
6275
6276 class LUNodeMigrate(LogicalUnit):
6277   """Migrate all instances from a node.
6278
6279   """
6280   HPATH = "node-migrate"
6281   HTYPE = constants.HTYPE_NODE
6282   REQ_BGL = False
6283
6284   def CheckArguments(self):
6285     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6286
6287   def ExpandNames(self):
6288     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6289
6290     self.needed_locks = {}
6291
6292     # Create tasklets for migrating instances for all instances on this node
6293     names = []
6294     tasklets = []
6295
6296     self.lock_all_nodes = False
6297
6298     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6299       logging.debug("Migrating instance %s", inst.name)
6300       names.append(inst.name)
6301
6302       tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False,
6303                                         iallocator=self.op.iallocator,
6304                                         taget_node=None))
6305
6306       if inst.disk_template in constants.DTS_EXT_MIRROR:
6307         # We need to lock all nodes, as the iallocator will choose the
6308         # destination nodes afterwards
6309         self.lock_all_nodes = True
6310
6311     self.tasklets = tasklets
6312
6313     # Declare node locks
6314     if self.lock_all_nodes:
6315       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6316     else:
6317       self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6318       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6319
6320     # Declare instance locks
6321     self.needed_locks[locking.LEVEL_INSTANCE] = names
6322
6323   def DeclareLocks(self, level):
6324     if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6325       self._LockInstancesNodes()
6326
6327   def BuildHooksEnv(self):
6328     """Build hooks env.
6329
6330     This runs on the master, the primary and all the secondaries.
6331
6332     """
6333     return {
6334       "NODE_NAME": self.op.node_name,
6335       }
6336
6337   def BuildHooksNodes(self):
6338     """Build hooks nodes.
6339
6340     """
6341     nl = [self.cfg.GetMasterNode()]
6342     return (nl, nl)
6343
6344
6345 class TLMigrateInstance(Tasklet):
6346   """Tasklet class for instance migration.
6347
6348   @type live: boolean
6349   @ivar live: whether the migration will be done live or non-live;
6350       this variable is initalized only after CheckPrereq has run
6351   @type cleanup: boolean
6352   @ivar cleanup: Wheater we cleanup from a failed migration
6353   @type iallocator: string
6354   @ivar iallocator: The iallocator used to determine target_node
6355   @type target_node: string
6356   @ivar target_node: If given, the target_node to reallocate the instance to
6357   @type failover: boolean
6358   @ivar failover: Whether operation results in failover or migration
6359   @type fallback: boolean
6360   @ivar fallback: Whether fallback to failover is allowed if migration not
6361                   possible
6362   @type ignore_consistency: boolean
6363   @ivar ignore_consistency: Wheter we should ignore consistency between source
6364                             and target node
6365   @type shutdown_timeout: int
6366   @ivar shutdown_timeout: In case of failover timeout of the shutdown
6367
6368   """
6369   def __init__(self, lu, instance_name, cleanup=False, iallocator=None,
6370                target_node=None, failover=False, fallback=False,
6371                ignore_consistency=False,
6372                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6373     """Initializes this class.
6374
6375     """
6376     Tasklet.__init__(self, lu)
6377
6378     # Parameters
6379     self.instance_name = instance_name
6380     self.cleanup = cleanup
6381     self.live = False # will be overridden later
6382     self.iallocator = iallocator
6383     self.target_node = target_node
6384     self.failover = failover
6385     self.fallback = fallback
6386     self.ignore_consistency = ignore_consistency
6387     self.shutdown_timeout = shutdown_timeout
6388
6389   def CheckPrereq(self):
6390     """Check prerequisites.
6391
6392     This checks that the instance is in the cluster.
6393
6394     """
6395     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6396     instance = self.cfg.GetInstanceInfo(instance_name)
6397     assert instance is not None
6398     self.instance = instance
6399
6400     if (not self.cleanup and not instance.admin_up and not self.failover and
6401         self.fallback):
6402       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6403                       " to failover")
6404       self.failover = True
6405
6406     if instance.disk_template not in constants.DTS_MIRRORED:
6407       if self.failover:
6408         text = "failovers"
6409       else:
6410         text = "migrations"
6411       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6412                                  " %s" % (instance.disk_template, text),
6413                                  errors.ECODE_STATE)
6414
6415     if instance.disk_template in constants.DTS_EXT_MIRROR:
6416       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6417
6418       if self.iallocator:
6419         self._RunAllocator()
6420
6421       # self.target_node is already populated, either directly or by the
6422       # iallocator run
6423       target_node = self.target_node
6424
6425       if len(self.lu.tasklets) == 1:
6426         # It is safe to remove locks only when we're the only tasklet in the LU
6427         nodes_keep = [instance.primary_node, self.target_node]
6428         nodes_rel = [node for node in self.lu.acquired_locks[locking.LEVEL_NODE]
6429                      if node not in nodes_keep]
6430         self.lu.context.glm.release(locking.LEVEL_NODE, nodes_rel)
6431         self.lu.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6432
6433     else:
6434       secondary_nodes = instance.secondary_nodes
6435       if not secondary_nodes:
6436         raise errors.ConfigurationError("No secondary node but using"
6437                                         " %s disk template" %
6438                                         instance.disk_template)
6439       target_node = secondary_nodes[0]
6440       if self.iallocator or (self.target_node and
6441                              self.target_node != target_node):
6442         if self.failover:
6443           text = "failed over"
6444         else:
6445           text = "migrated"
6446         raise errors.OpPrereqError("Instances with disk template %s cannot"
6447                                    " be %s to arbitrary nodes"
6448                                    " (neither an iallocator nor a target"
6449                                    " node can be passed)" %
6450                                    (instance.disk_template, text),
6451                                    errors.ECODE_INVAL)
6452
6453     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6454
6455     # check memory requirements on the secondary node
6456     if not self.failover or instance.admin_up:
6457       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6458                            instance.name, i_be[constants.BE_MEMORY],
6459                            instance.hypervisor)
6460     else:
6461       self.lu.LogInfo("Not checking memory on the secondary node as"
6462                       " instance will not be started")
6463
6464     # check bridge existance
6465     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6466
6467     if not self.cleanup:
6468       _CheckNodeNotDrained(self.lu, target_node)
6469       if not self.failover:
6470         result = self.rpc.call_instance_migratable(instance.primary_node,
6471                                                    instance)
6472         if result.fail_msg and self.fallback:
6473           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6474                           " failover")
6475           self.failover = True
6476         else:
6477           result.Raise("Can't migrate, please use failover",
6478                        prereq=True, ecode=errors.ECODE_STATE)
6479
6480     assert not (self.failover and self.cleanup)
6481
6482   def _RunAllocator(self):
6483     """Run the allocator based on input opcode.
6484
6485     """
6486     ial = IAllocator(self.cfg, self.rpc,
6487                      mode=constants.IALLOCATOR_MODE_RELOC,
6488                      name=self.instance_name,
6489                      # TODO See why hail breaks with a single node below
6490                      relocate_from=[self.instance.primary_node,
6491                                     self.instance.primary_node],
6492                      )
6493
6494     ial.Run(self.iallocator)
6495
6496     if not ial.success:
6497       raise errors.OpPrereqError("Can't compute nodes using"
6498                                  " iallocator '%s': %s" %
6499                                  (self.iallocator, ial.info),
6500                                  errors.ECODE_NORES)
6501     if len(ial.result) != ial.required_nodes:
6502       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6503                                  " of nodes (%s), required %s" %
6504                                  (self.iallocator, len(ial.result),
6505                                   ial.required_nodes), errors.ECODE_FAULT)
6506     self.target_node = ial.result[0]
6507     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6508                  self.instance_name, self.iallocator,
6509                  utils.CommaJoin(ial.result))
6510
6511     if not self.failover:
6512       if self.lu.op.live is not None and self.lu.op.mode is not None:
6513         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6514                                    " parameters are accepted",
6515                                    errors.ECODE_INVAL)
6516       if self.lu.op.live is not None:
6517         if self.lu.op.live:
6518           self.lu.op.mode = constants.HT_MIGRATION_LIVE
6519         else:
6520           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6521         # reset the 'live' parameter to None so that repeated
6522         # invocations of CheckPrereq do not raise an exception
6523         self.lu.op.live = None
6524       elif self.lu.op.mode is None:
6525         # read the default value from the hypervisor
6526         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6527                                                 skip_globals=False)
6528         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6529
6530       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6531     else:
6532       # Failover is never live
6533       self.live = False
6534
6535   def _WaitUntilSync(self):
6536     """Poll with custom rpc for disk sync.
6537
6538     This uses our own step-based rpc call.
6539
6540     """
6541     self.feedback_fn("* wait until resync is done")
6542     all_done = False
6543     while not all_done:
6544       all_done = True
6545       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6546                                             self.nodes_ip,
6547                                             self.instance.disks)
6548       min_percent = 100
6549       for node, nres in result.items():
6550         nres.Raise("Cannot resync disks on node %s" % node)
6551         node_done, node_percent = nres.payload
6552         all_done = all_done and node_done
6553         if node_percent is not None:
6554           min_percent = min(min_percent, node_percent)
6555       if not all_done:
6556         if min_percent < 100:
6557           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6558         time.sleep(2)
6559
6560   def _EnsureSecondary(self, node):
6561     """Demote a node to secondary.
6562
6563     """
6564     self.feedback_fn("* switching node %s to secondary mode" % node)
6565
6566     for dev in self.instance.disks:
6567       self.cfg.SetDiskID(dev, node)
6568
6569     result = self.rpc.call_blockdev_close(node, self.instance.name,
6570                                           self.instance.disks)
6571     result.Raise("Cannot change disk to secondary on node %s" % node)
6572
6573   def _GoStandalone(self):
6574     """Disconnect from the network.
6575
6576     """
6577     self.feedback_fn("* changing into standalone mode")
6578     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6579                                                self.instance.disks)
6580     for node, nres in result.items():
6581       nres.Raise("Cannot disconnect disks node %s" % node)
6582
6583   def _GoReconnect(self, multimaster):
6584     """Reconnect to the network.
6585
6586     """
6587     if multimaster:
6588       msg = "dual-master"
6589     else:
6590       msg = "single-master"
6591     self.feedback_fn("* changing disks into %s mode" % msg)
6592     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6593                                            self.instance.disks,
6594                                            self.instance.name, multimaster)
6595     for node, nres in result.items():
6596       nres.Raise("Cannot change disks config on node %s" % node)
6597
6598   def _ExecCleanup(self):
6599     """Try to cleanup after a failed migration.
6600
6601     The cleanup is done by:
6602       - check that the instance is running only on one node
6603         (and update the config if needed)
6604       - change disks on its secondary node to secondary
6605       - wait until disks are fully synchronized
6606       - disconnect from the network
6607       - change disks into single-master mode
6608       - wait again until disks are fully synchronized
6609
6610     """
6611     instance = self.instance
6612     target_node = self.target_node
6613     source_node = self.source_node
6614
6615     # check running on only one node
6616     self.feedback_fn("* checking where the instance actually runs"
6617                      " (if this hangs, the hypervisor might be in"
6618                      " a bad state)")
6619     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6620     for node, result in ins_l.items():
6621       result.Raise("Can't contact node %s" % node)
6622
6623     runningon_source = instance.name in ins_l[source_node].payload
6624     runningon_target = instance.name in ins_l[target_node].payload
6625
6626     if runningon_source and runningon_target:
6627       raise errors.OpExecError("Instance seems to be running on two nodes,"
6628                                " or the hypervisor is confused. You will have"
6629                                " to ensure manually that it runs only on one"
6630                                " and restart this operation.")
6631
6632     if not (runningon_source or runningon_target):
6633       raise errors.OpExecError("Instance does not seem to be running at all."
6634                                " In this case, it's safer to repair by"
6635                                " running 'gnt-instance stop' to ensure disk"
6636                                " shutdown, and then restarting it.")
6637
6638     if runningon_target:
6639       # the migration has actually succeeded, we need to update the config
6640       self.feedback_fn("* instance running on secondary node (%s),"
6641                        " updating config" % target_node)
6642       instance.primary_node = target_node
6643       self.cfg.Update(instance, self.feedback_fn)
6644       demoted_node = source_node
6645     else:
6646       self.feedback_fn("* instance confirmed to be running on its"
6647                        " primary node (%s)" % source_node)
6648       demoted_node = target_node
6649
6650     if instance.disk_template in constants.DTS_INT_MIRROR:
6651       self._EnsureSecondary(demoted_node)
6652       try:
6653         self._WaitUntilSync()
6654       except errors.OpExecError:
6655         # we ignore here errors, since if the device is standalone, it
6656         # won't be able to sync
6657         pass
6658       self._GoStandalone()
6659       self._GoReconnect(False)
6660       self._WaitUntilSync()
6661
6662     self.feedback_fn("* done")
6663
6664   def _RevertDiskStatus(self):
6665     """Try to revert the disk status after a failed migration.
6666
6667     """
6668     target_node = self.target_node
6669     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6670       return
6671
6672     try:
6673       self._EnsureSecondary(target_node)
6674       self._GoStandalone()
6675       self._GoReconnect(False)
6676       self._WaitUntilSync()
6677     except errors.OpExecError, err:
6678       self.lu.LogWarning("Migration failed and I can't reconnect the"
6679                          " drives: error '%s'\n"
6680                          "Please look and recover the instance status" %
6681                          str(err))
6682
6683   def _AbortMigration(self):
6684     """Call the hypervisor code to abort a started migration.
6685
6686     """
6687     instance = self.instance
6688     target_node = self.target_node
6689     migration_info = self.migration_info
6690
6691     abort_result = self.rpc.call_finalize_migration(target_node,
6692                                                     instance,
6693                                                     migration_info,
6694                                                     False)
6695     abort_msg = abort_result.fail_msg
6696     if abort_msg:
6697       logging.error("Aborting migration failed on target node %s: %s",
6698                     target_node, abort_msg)
6699       # Don't raise an exception here, as we stil have to try to revert the
6700       # disk status, even if this step failed.
6701
6702   def _ExecMigration(self):
6703     """Migrate an instance.
6704
6705     The migrate is done by:
6706       - change the disks into dual-master mode
6707       - wait until disks are fully synchronized again
6708       - migrate the instance
6709       - change disks on the new secondary node (the old primary) to secondary
6710       - wait until disks are fully synchronized
6711       - change disks into single-master mode
6712
6713     """
6714     instance = self.instance
6715     target_node = self.target_node
6716     source_node = self.source_node
6717
6718     self.feedback_fn("* checking disk consistency between source and target")
6719     for dev in instance.disks:
6720       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6721         raise errors.OpExecError("Disk %s is degraded or not fully"
6722                                  " synchronized on target node,"
6723                                  " aborting migrate." % dev.iv_name)
6724
6725     # First get the migration information from the remote node
6726     result = self.rpc.call_migration_info(source_node, instance)
6727     msg = result.fail_msg
6728     if msg:
6729       log_err = ("Failed fetching source migration information from %s: %s" %
6730                  (source_node, msg))
6731       logging.error(log_err)
6732       raise errors.OpExecError(log_err)
6733
6734     self.migration_info = migration_info = result.payload
6735
6736     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6737       # Then switch the disks to master/master mode
6738       self._EnsureSecondary(target_node)
6739       self._GoStandalone()
6740       self._GoReconnect(True)
6741       self._WaitUntilSync()
6742
6743     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6744     result = self.rpc.call_accept_instance(target_node,
6745                                            instance,
6746                                            migration_info,
6747                                            self.nodes_ip[target_node])
6748
6749     msg = result.fail_msg
6750     if msg:
6751       logging.error("Instance pre-migration failed, trying to revert"
6752                     " disk status: %s", msg)
6753       self.feedback_fn("Pre-migration failed, aborting")
6754       self._AbortMigration()
6755       self._RevertDiskStatus()
6756       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6757                                (instance.name, msg))
6758
6759     self.feedback_fn("* migrating instance to %s" % target_node)
6760     result = self.rpc.call_instance_migrate(source_node, instance,
6761                                             self.nodes_ip[target_node],
6762                                             self.live)
6763     msg = result.fail_msg
6764     if msg:
6765       logging.error("Instance migration failed, trying to revert"
6766                     " disk status: %s", msg)
6767       self.feedback_fn("Migration failed, aborting")
6768       self._AbortMigration()
6769       self._RevertDiskStatus()
6770       raise errors.OpExecError("Could not migrate instance %s: %s" %
6771                                (instance.name, msg))
6772
6773     instance.primary_node = target_node
6774     # distribute new instance config to the other nodes
6775     self.cfg.Update(instance, self.feedback_fn)
6776
6777     result = self.rpc.call_finalize_migration(target_node,
6778                                               instance,
6779                                               migration_info,
6780                                               True)
6781     msg = result.fail_msg
6782     if msg:
6783       logging.error("Instance migration succeeded, but finalization failed:"
6784                     " %s", msg)
6785       raise errors.OpExecError("Could not finalize instance migration: %s" %
6786                                msg)
6787
6788     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6789       self._EnsureSecondary(source_node)
6790       self._WaitUntilSync()
6791       self._GoStandalone()
6792       self._GoReconnect(False)
6793       self._WaitUntilSync()
6794
6795     self.feedback_fn("* done")
6796
6797   def _ExecFailover(self):
6798     """Failover an instance.
6799
6800     The failover is done by shutting it down on its present node and
6801     starting it on the secondary.
6802
6803     """
6804     instance = self.instance
6805     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6806
6807     source_node = instance.primary_node
6808     target_node = self.target_node
6809
6810     if instance.admin_up:
6811       self.feedback_fn("* checking disk consistency between source and target")
6812       for dev in instance.disks:
6813         # for drbd, these are drbd over lvm
6814         if not _CheckDiskConsistency(self, dev, target_node, False):
6815           if not self.ignore_consistency:
6816             raise errors.OpExecError("Disk %s is degraded on target node,"
6817                                      " aborting failover." % dev.iv_name)
6818     else:
6819       self.feedback_fn("* not checking disk consistency as instance is not"
6820                        " running")
6821
6822     self.feedback_fn("* shutting down instance on source node")
6823     logging.info("Shutting down instance %s on node %s",
6824                  instance.name, source_node)
6825
6826     result = self.rpc.call_instance_shutdown(source_node, instance,
6827                                              self.shutdown_timeout)
6828     msg = result.fail_msg
6829     if msg:
6830       if self.ignore_consistency or primary_node.offline:
6831         self.lu.LogWarning("Could not shutdown instance %s on node %s."
6832                            " Proceeding anyway. Please make sure node"
6833                            " %s is down. Error details: %s",
6834                            instance.name, source_node, source_node, msg)
6835       else:
6836         raise errors.OpExecError("Could not shutdown instance %s on"
6837                                  " node %s: %s" %
6838                                  (instance.name, source_node, msg))
6839
6840     self.feedback_fn("* deactivating the instance's disks on source node")
6841     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6842       raise errors.OpExecError("Can't shut down the instance's disks.")
6843
6844     instance.primary_node = target_node
6845     # distribute new instance config to the other nodes
6846     self.cfg.Update(instance, self.feedback_fn)
6847
6848     # Only start the instance if it's marked as up
6849     if instance.admin_up:
6850       self.feedback_fn("* activating the instance's disks on target node")
6851       logging.info("Starting instance %s on node %s",
6852                    instance.name, target_node)
6853
6854       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6855                                            ignore_secondaries=True)
6856       if not disks_ok:
6857         _ShutdownInstanceDisks(self, instance)
6858         raise errors.OpExecError("Can't activate the instance's disks")
6859
6860       self.feedback_fn("* starting the instance on the target node")
6861       result = self.rpc.call_instance_start(target_node, instance, None, None)
6862       msg = result.fail_msg
6863       if msg:
6864         _ShutdownInstanceDisks(self, instance)
6865         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6866                                  (instance.name, target_node, msg))
6867
6868   def Exec(self, feedback_fn):
6869     """Perform the migration.
6870
6871     """
6872     self.feedback_fn = feedback_fn
6873     self.source_node = self.instance.primary_node
6874
6875     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6876     if self.instance.disk_template in constants.DTS_INT_MIRROR:
6877       self.target_node = self.instance.secondary_nodes[0]
6878       # Otherwise self.target_node has been populated either
6879       # directly, or through an iallocator.
6880
6881     self.all_nodes = [self.source_node, self.target_node]
6882     self.nodes_ip = {
6883       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6884       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6885       }
6886
6887     if self.failover:
6888       feedback_fn("Failover instance %s" % self.instance.name)
6889       self._ExecFailover()
6890     else:
6891       feedback_fn("Migrating instance %s" % self.instance.name)
6892
6893       if self.cleanup:
6894         return self._ExecCleanup()
6895       else:
6896         return self._ExecMigration()
6897
6898
6899 def _CreateBlockDev(lu, node, instance, device, force_create,
6900                     info, force_open):
6901   """Create a tree of block devices on a given node.
6902
6903   If this device type has to be created on secondaries, create it and
6904   all its children.
6905
6906   If not, just recurse to children keeping the same 'force' value.
6907
6908   @param lu: the lu on whose behalf we execute
6909   @param node: the node on which to create the device
6910   @type instance: L{objects.Instance}
6911   @param instance: the instance which owns the device
6912   @type device: L{objects.Disk}
6913   @param device: the device to create
6914   @type force_create: boolean
6915   @param force_create: whether to force creation of this device; this
6916       will be change to True whenever we find a device which has
6917       CreateOnSecondary() attribute
6918   @param info: the extra 'metadata' we should attach to the device
6919       (this will be represented as a LVM tag)
6920   @type force_open: boolean
6921   @param force_open: this parameter will be passes to the
6922       L{backend.BlockdevCreate} function where it specifies
6923       whether we run on primary or not, and it affects both
6924       the child assembly and the device own Open() execution
6925
6926   """
6927   if device.CreateOnSecondary():
6928     force_create = True
6929
6930   if device.children:
6931     for child in device.children:
6932       _CreateBlockDev(lu, node, instance, child, force_create,
6933                       info, force_open)
6934
6935   if not force_create:
6936     return
6937
6938   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6939
6940
6941 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6942   """Create a single block device on a given node.
6943
6944   This will not recurse over children of the device, so they must be
6945   created in advance.
6946
6947   @param lu: the lu on whose behalf we execute
6948   @param node: the node on which to create the device
6949   @type instance: L{objects.Instance}
6950   @param instance: the instance which owns the device
6951   @type device: L{objects.Disk}
6952   @param device: the device to create
6953   @param info: the extra 'metadata' we should attach to the device
6954       (this will be represented as a LVM tag)
6955   @type force_open: boolean
6956   @param force_open: this parameter will be passes to the
6957       L{backend.BlockdevCreate} function where it specifies
6958       whether we run on primary or not, and it affects both
6959       the child assembly and the device own Open() execution
6960
6961   """
6962   lu.cfg.SetDiskID(device, node)
6963   result = lu.rpc.call_blockdev_create(node, device, device.size,
6964                                        instance.name, force_open, info)
6965   result.Raise("Can't create block device %s on"
6966                " node %s for instance %s" % (device, node, instance.name))
6967   if device.physical_id is None:
6968     device.physical_id = result.payload
6969
6970
6971 def _GenerateUniqueNames(lu, exts):
6972   """Generate a suitable LV name.
6973
6974   This will generate a logical volume name for the given instance.
6975
6976   """
6977   results = []
6978   for val in exts:
6979     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6980     results.append("%s%s" % (new_id, val))
6981   return results
6982
6983
6984 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6985                          p_minor, s_minor):
6986   """Generate a drbd8 device complete with its children.
6987
6988   """
6989   port = lu.cfg.AllocatePort()
6990   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6991   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6992                           logical_id=(vgname, names[0]))
6993   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6994                           logical_id=(vgname, names[1]))
6995   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6996                           logical_id=(primary, secondary, port,
6997                                       p_minor, s_minor,
6998                                       shared_secret),
6999                           children=[dev_data, dev_meta],
7000                           iv_name=iv_name)
7001   return drbd_dev
7002
7003
7004 def _GenerateDiskTemplate(lu, template_name,
7005                           instance_name, primary_node,
7006                           secondary_nodes, disk_info,
7007                           file_storage_dir, file_driver,
7008                           base_index, feedback_fn):
7009   """Generate the entire disk layout for a given template type.
7010
7011   """
7012   #TODO: compute space requirements
7013
7014   vgname = lu.cfg.GetVGName()
7015   disk_count = len(disk_info)
7016   disks = []
7017   if template_name == constants.DT_DISKLESS:
7018     pass
7019   elif template_name == constants.DT_PLAIN:
7020     if len(secondary_nodes) != 0:
7021       raise errors.ProgrammerError("Wrong template configuration")
7022
7023     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7024                                       for i in range(disk_count)])
7025     for idx, disk in enumerate(disk_info):
7026       disk_index = idx + base_index
7027       vg = disk.get(constants.IDISK_VG, vgname)
7028       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7029       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7030                               size=disk[constants.IDISK_SIZE],
7031                               logical_id=(vg, names[idx]),
7032                               iv_name="disk/%d" % disk_index,
7033                               mode=disk[constants.IDISK_MODE])
7034       disks.append(disk_dev)
7035   elif template_name == constants.DT_DRBD8:
7036     if len(secondary_nodes) != 1:
7037       raise errors.ProgrammerError("Wrong template configuration")
7038     remote_node = secondary_nodes[0]
7039     minors = lu.cfg.AllocateDRBDMinor(
7040       [primary_node, remote_node] * len(disk_info), instance_name)
7041
7042     names = []
7043     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7044                                                for i in range(disk_count)]):
7045       names.append(lv_prefix + "_data")
7046       names.append(lv_prefix + "_meta")
7047     for idx, disk in enumerate(disk_info):
7048       disk_index = idx + base_index
7049       vg = disk.get(constants.IDISK_VG, vgname)
7050       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7051                                       disk[constants.IDISK_SIZE], vg,
7052                                       names[idx * 2:idx * 2 + 2],
7053                                       "disk/%d" % disk_index,
7054                                       minors[idx * 2], minors[idx * 2 + 1])
7055       disk_dev.mode = disk[constants.IDISK_MODE]
7056       disks.append(disk_dev)
7057   elif template_name == constants.DT_FILE:
7058     if len(secondary_nodes) != 0:
7059       raise errors.ProgrammerError("Wrong template configuration")
7060
7061     opcodes.RequireFileStorage()
7062
7063     for idx, disk in enumerate(disk_info):
7064       disk_index = idx + base_index
7065       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7066                               size=disk[constants.IDISK_SIZE],
7067                               iv_name="disk/%d" % disk_index,
7068                               logical_id=(file_driver,
7069                                           "%s/disk%d" % (file_storage_dir,
7070                                                          disk_index)),
7071                               mode=disk[constants.IDISK_MODE])
7072       disks.append(disk_dev)
7073   elif template_name == constants.DT_SHARED_FILE:
7074     if len(secondary_nodes) != 0:
7075       raise errors.ProgrammerError("Wrong template configuration")
7076
7077     opcodes.RequireSharedFileStorage()
7078
7079     for idx, disk in enumerate(disk_info):
7080       disk_index = idx + base_index
7081       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7082                               size=disk[constants.IDISK_SIZE],
7083                               iv_name="disk/%d" % disk_index,
7084                               logical_id=(file_driver,
7085                                           "%s/disk%d" % (file_storage_dir,
7086                                                          disk_index)),
7087                               mode=disk[constants.IDISK_MODE])
7088       disks.append(disk_dev)
7089   elif template_name == constants.DT_BLOCK:
7090     if len(secondary_nodes) != 0:
7091       raise errors.ProgrammerError("Wrong template configuration")
7092
7093     for idx, disk in enumerate(disk_info):
7094       disk_index = idx + base_index
7095       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7096                               size=disk[constants.IDISK_SIZE],
7097                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7098                                           disk[constants.IDISK_ADOPT]),
7099                               iv_name="disk/%d" % disk_index,
7100                               mode=disk[constants.IDISK_MODE])
7101       disks.append(disk_dev)
7102
7103   else:
7104     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7105   return disks
7106
7107
7108 def _GetInstanceInfoText(instance):
7109   """Compute that text that should be added to the disk's metadata.
7110
7111   """
7112   return "originstname+%s" % instance.name
7113
7114
7115 def _CalcEta(time_taken, written, total_size):
7116   """Calculates the ETA based on size written and total size.
7117
7118   @param time_taken: The time taken so far
7119   @param written: amount written so far
7120   @param total_size: The total size of data to be written
7121   @return: The remaining time in seconds
7122
7123   """
7124   avg_time = time_taken / float(written)
7125   return (total_size - written) * avg_time
7126
7127
7128 def _WipeDisks(lu, instance):
7129   """Wipes instance disks.
7130
7131   @type lu: L{LogicalUnit}
7132   @param lu: the logical unit on whose behalf we execute
7133   @type instance: L{objects.Instance}
7134   @param instance: the instance whose disks we should create
7135   @return: the success of the wipe
7136
7137   """
7138   node = instance.primary_node
7139
7140   for device in instance.disks:
7141     lu.cfg.SetDiskID(device, node)
7142
7143   logging.info("Pause sync of instance %s disks", instance.name)
7144   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7145
7146   for idx, success in enumerate(result.payload):
7147     if not success:
7148       logging.warn("pause-sync of instance %s for disks %d failed",
7149                    instance.name, idx)
7150
7151   try:
7152     for idx, device in enumerate(instance.disks):
7153       lu.LogInfo("* Wiping disk %d", idx)
7154       logging.info("Wiping disk %d for instance %s, node %s",
7155                    idx, instance.name, node)
7156
7157       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7158       # MAX_WIPE_CHUNK at max
7159       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7160                             constants.MIN_WIPE_CHUNK_PERCENT)
7161
7162       offset = 0
7163       size = device.size
7164       last_output = 0
7165       start_time = time.time()
7166
7167       while offset < size:
7168         wipe_size = min(wipe_chunk_size, size - offset)
7169         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7170         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7171                      (idx, offset, wipe_size))
7172         now = time.time()
7173         offset += wipe_size
7174         if now - last_output >= 60:
7175           eta = _CalcEta(now - start_time, offset, size)
7176           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7177                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7178           last_output = now
7179   finally:
7180     logging.info("Resume sync of instance %s disks", instance.name)
7181
7182     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7183
7184     for idx, success in enumerate(result.payload):
7185       if not success:
7186         lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
7187                       " look at the status and troubleshoot the issue.", idx)
7188         logging.warn("resume-sync of instance %s for disks %d failed",
7189                      instance.name, idx)
7190
7191
7192 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7193   """Create all disks for an instance.
7194
7195   This abstracts away some work from AddInstance.
7196
7197   @type lu: L{LogicalUnit}
7198   @param lu: the logical unit on whose behalf we execute
7199   @type instance: L{objects.Instance}
7200   @param instance: the instance whose disks we should create
7201   @type to_skip: list
7202   @param to_skip: list of indices to skip
7203   @type target_node: string
7204   @param target_node: if passed, overrides the target node for creation
7205   @rtype: boolean
7206   @return: the success of the creation
7207
7208   """
7209   info = _GetInstanceInfoText(instance)
7210   if target_node is None:
7211     pnode = instance.primary_node
7212     all_nodes = instance.all_nodes
7213   else:
7214     pnode = target_node
7215     all_nodes = [pnode]
7216
7217   if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7218     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7219     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7220
7221     result.Raise("Failed to create directory '%s' on"
7222                  " node %s" % (file_storage_dir, pnode))
7223
7224   # Note: this needs to be kept in sync with adding of disks in
7225   # LUInstanceSetParams
7226   for idx, device in enumerate(instance.disks):
7227     if to_skip and idx in to_skip:
7228       continue
7229     logging.info("Creating volume %s for instance %s",
7230                  device.iv_name, instance.name)
7231     #HARDCODE
7232     for node in all_nodes:
7233       f_create = node == pnode
7234       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7235
7236
7237 def _RemoveDisks(lu, instance, target_node=None):
7238   """Remove all disks for an instance.
7239
7240   This abstracts away some work from `AddInstance()` and
7241   `RemoveInstance()`. Note that in case some of the devices couldn't
7242   be removed, the removal will continue with the other ones (compare
7243   with `_CreateDisks()`).
7244
7245   @type lu: L{LogicalUnit}
7246   @param lu: the logical unit on whose behalf we execute
7247   @type instance: L{objects.Instance}
7248   @param instance: the instance whose disks we should remove
7249   @type target_node: string
7250   @param target_node: used to override the node on which to remove the disks
7251   @rtype: boolean
7252   @return: the success of the removal
7253
7254   """
7255   logging.info("Removing block devices for instance %s", instance.name)
7256
7257   all_result = True
7258   for device in instance.disks:
7259     if target_node:
7260       edata = [(target_node, device)]
7261     else:
7262       edata = device.ComputeNodeTree(instance.primary_node)
7263     for node, disk in edata:
7264       lu.cfg.SetDiskID(disk, node)
7265       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7266       if msg:
7267         lu.LogWarning("Could not remove block device %s on node %s,"
7268                       " continuing anyway: %s", device.iv_name, node, msg)
7269         all_result = False
7270
7271   if instance.disk_template == constants.DT_FILE:
7272     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7273     if target_node:
7274       tgt = target_node
7275     else:
7276       tgt = instance.primary_node
7277     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7278     if result.fail_msg:
7279       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7280                     file_storage_dir, instance.primary_node, result.fail_msg)
7281       all_result = False
7282
7283   return all_result
7284
7285
7286 def _ComputeDiskSizePerVG(disk_template, disks):
7287   """Compute disk size requirements in the volume group
7288
7289   """
7290   def _compute(disks, payload):
7291     """Universal algorithm.
7292
7293     """
7294     vgs = {}
7295     for disk in disks:
7296       vgs[disk[constants.IDISK_VG]] = \
7297         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7298
7299     return vgs
7300
7301   # Required free disk space as a function of disk and swap space
7302   req_size_dict = {
7303     constants.DT_DISKLESS: {},
7304     constants.DT_PLAIN: _compute(disks, 0),
7305     # 128 MB are added for drbd metadata for each disk
7306     constants.DT_DRBD8: _compute(disks, 128),
7307     constants.DT_FILE: {},
7308     constants.DT_SHARED_FILE: {},
7309   }
7310
7311   if disk_template not in req_size_dict:
7312     raise errors.ProgrammerError("Disk template '%s' size requirement"
7313                                  " is unknown" %  disk_template)
7314
7315   return req_size_dict[disk_template]
7316
7317
7318 def _ComputeDiskSize(disk_template, disks):
7319   """Compute disk size requirements in the volume group
7320
7321   """
7322   # Required free disk space as a function of disk and swap space
7323   req_size_dict = {
7324     constants.DT_DISKLESS: None,
7325     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7326     # 128 MB are added for drbd metadata for each disk
7327     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7328     constants.DT_FILE: None,
7329     constants.DT_SHARED_FILE: 0,
7330     constants.DT_BLOCK: 0,
7331   }
7332
7333   if disk_template not in req_size_dict:
7334     raise errors.ProgrammerError("Disk template '%s' size requirement"
7335                                  " is unknown" %  disk_template)
7336
7337   return req_size_dict[disk_template]
7338
7339
7340 def _FilterVmNodes(lu, nodenames):
7341   """Filters out non-vm_capable nodes from a list.
7342
7343   @type lu: L{LogicalUnit}
7344   @param lu: the logical unit for which we check
7345   @type nodenames: list
7346   @param nodenames: the list of nodes on which we should check
7347   @rtype: list
7348   @return: the list of vm-capable nodes
7349
7350   """
7351   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7352   return [name for name in nodenames if name not in vm_nodes]
7353
7354
7355 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7356   """Hypervisor parameter validation.
7357
7358   This function abstract the hypervisor parameter validation to be
7359   used in both instance create and instance modify.
7360
7361   @type lu: L{LogicalUnit}
7362   @param lu: the logical unit for which we check
7363   @type nodenames: list
7364   @param nodenames: the list of nodes on which we should check
7365   @type hvname: string
7366   @param hvname: the name of the hypervisor we should use
7367   @type hvparams: dict
7368   @param hvparams: the parameters which we need to check
7369   @raise errors.OpPrereqError: if the parameters are not valid
7370
7371   """
7372   nodenames = _FilterVmNodes(lu, nodenames)
7373   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7374                                                   hvname,
7375                                                   hvparams)
7376   for node in nodenames:
7377     info = hvinfo[node]
7378     if info.offline:
7379       continue
7380     info.Raise("Hypervisor parameter validation failed on node %s" % node)
7381
7382
7383 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7384   """OS parameters validation.
7385
7386   @type lu: L{LogicalUnit}
7387   @param lu: the logical unit for which we check
7388   @type required: boolean
7389   @param required: whether the validation should fail if the OS is not
7390       found
7391   @type nodenames: list
7392   @param nodenames: the list of nodes on which we should check
7393   @type osname: string
7394   @param osname: the name of the hypervisor we should use
7395   @type osparams: dict
7396   @param osparams: the parameters which we need to check
7397   @raise errors.OpPrereqError: if the parameters are not valid
7398
7399   """
7400   nodenames = _FilterVmNodes(lu, nodenames)
7401   result = lu.rpc.call_os_validate(required, nodenames, osname,
7402                                    [constants.OS_VALIDATE_PARAMETERS],
7403                                    osparams)
7404   for node, nres in result.items():
7405     # we don't check for offline cases since this should be run only
7406     # against the master node and/or an instance's nodes
7407     nres.Raise("OS Parameters validation failed on node %s" % node)
7408     if not nres.payload:
7409       lu.LogInfo("OS %s not found on node %s, validation skipped",
7410                  osname, node)
7411
7412
7413 class LUInstanceCreate(LogicalUnit):
7414   """Create an instance.
7415
7416   """
7417   HPATH = "instance-add"
7418   HTYPE = constants.HTYPE_INSTANCE
7419   REQ_BGL = False
7420
7421   def CheckArguments(self):
7422     """Check arguments.
7423
7424     """
7425     # do not require name_check to ease forward/backward compatibility
7426     # for tools
7427     if self.op.no_install and self.op.start:
7428       self.LogInfo("No-installation mode selected, disabling startup")
7429       self.op.start = False
7430     # validate/normalize the instance name
7431     self.op.instance_name = \
7432       netutils.Hostname.GetNormalizedName(self.op.instance_name)
7433
7434     if self.op.ip_check and not self.op.name_check:
7435       # TODO: make the ip check more flexible and not depend on the name check
7436       raise errors.OpPrereqError("Cannot do ip check without a name check",
7437                                  errors.ECODE_INVAL)
7438
7439     # check nics' parameter names
7440     for nic in self.op.nics:
7441       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7442
7443     # check disks. parameter names and consistent adopt/no-adopt strategy
7444     has_adopt = has_no_adopt = False
7445     for disk in self.op.disks:
7446       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7447       if constants.IDISK_ADOPT in disk:
7448         has_adopt = True
7449       else:
7450         has_no_adopt = True
7451     if has_adopt and has_no_adopt:
7452       raise errors.OpPrereqError("Either all disks are adopted or none is",
7453                                  errors.ECODE_INVAL)
7454     if has_adopt:
7455       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7456         raise errors.OpPrereqError("Disk adoption is not supported for the"
7457                                    " '%s' disk template" %
7458                                    self.op.disk_template,
7459                                    errors.ECODE_INVAL)
7460       if self.op.iallocator is not None:
7461         raise errors.OpPrereqError("Disk adoption not allowed with an"
7462                                    " iallocator script", errors.ECODE_INVAL)
7463       if self.op.mode == constants.INSTANCE_IMPORT:
7464         raise errors.OpPrereqError("Disk adoption not allowed for"
7465                                    " instance import", errors.ECODE_INVAL)
7466     else:
7467       if self.op.disk_template in constants.DTS_MUST_ADOPT:
7468         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7469                                    " but no 'adopt' parameter given" %
7470                                    self.op.disk_template,
7471                                    errors.ECODE_INVAL)
7472
7473     self.adopt_disks = has_adopt
7474
7475     # instance name verification
7476     if self.op.name_check:
7477       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7478       self.op.instance_name = self.hostname1.name
7479       # used in CheckPrereq for ip ping check
7480       self.check_ip = self.hostname1.ip
7481     else:
7482       self.check_ip = None
7483
7484     # file storage checks
7485     if (self.op.file_driver and
7486         not self.op.file_driver in constants.FILE_DRIVER):
7487       raise errors.OpPrereqError("Invalid file driver name '%s'" %
7488                                  self.op.file_driver, errors.ECODE_INVAL)
7489
7490     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7491       raise errors.OpPrereqError("File storage directory path not absolute",
7492                                  errors.ECODE_INVAL)
7493
7494     ### Node/iallocator related checks
7495     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7496
7497     if self.op.pnode is not None:
7498       if self.op.disk_template in constants.DTS_INT_MIRROR:
7499         if self.op.snode is None:
7500           raise errors.OpPrereqError("The networked disk templates need"
7501                                      " a mirror node", errors.ECODE_INVAL)
7502       elif self.op.snode:
7503         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7504                         " template")
7505         self.op.snode = None
7506
7507     self._cds = _GetClusterDomainSecret()
7508
7509     if self.op.mode == constants.INSTANCE_IMPORT:
7510       # On import force_variant must be True, because if we forced it at
7511       # initial install, our only chance when importing it back is that it
7512       # works again!
7513       self.op.force_variant = True
7514
7515       if self.op.no_install:
7516         self.LogInfo("No-installation mode has no effect during import")
7517
7518     elif self.op.mode == constants.INSTANCE_CREATE:
7519       if self.op.os_type is None:
7520         raise errors.OpPrereqError("No guest OS specified",
7521                                    errors.ECODE_INVAL)
7522       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7523         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7524                                    " installation" % self.op.os_type,
7525                                    errors.ECODE_STATE)
7526       if self.op.disk_template is None:
7527         raise errors.OpPrereqError("No disk template specified",
7528                                    errors.ECODE_INVAL)
7529
7530     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7531       # Check handshake to ensure both clusters have the same domain secret
7532       src_handshake = self.op.source_handshake
7533       if not src_handshake:
7534         raise errors.OpPrereqError("Missing source handshake",
7535                                    errors.ECODE_INVAL)
7536
7537       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7538                                                            src_handshake)
7539       if errmsg:
7540         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7541                                    errors.ECODE_INVAL)
7542
7543       # Load and check source CA
7544       self.source_x509_ca_pem = self.op.source_x509_ca
7545       if not self.source_x509_ca_pem:
7546         raise errors.OpPrereqError("Missing source X509 CA",
7547                                    errors.ECODE_INVAL)
7548
7549       try:
7550         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7551                                                     self._cds)
7552       except OpenSSL.crypto.Error, err:
7553         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7554                                    (err, ), errors.ECODE_INVAL)
7555
7556       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7557       if errcode is not None:
7558         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7559                                    errors.ECODE_INVAL)
7560
7561       self.source_x509_ca = cert
7562
7563       src_instance_name = self.op.source_instance_name
7564       if not src_instance_name:
7565         raise errors.OpPrereqError("Missing source instance name",
7566                                    errors.ECODE_INVAL)
7567
7568       self.source_instance_name = \
7569           netutils.GetHostname(name=src_instance_name).name
7570
7571     else:
7572       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7573                                  self.op.mode, errors.ECODE_INVAL)
7574
7575   def ExpandNames(self):
7576     """ExpandNames for CreateInstance.
7577
7578     Figure out the right locks for instance creation.
7579
7580     """
7581     self.needed_locks = {}
7582
7583     instance_name = self.op.instance_name
7584     # this is just a preventive check, but someone might still add this
7585     # instance in the meantime, and creation will fail at lock-add time
7586     if instance_name in self.cfg.GetInstanceList():
7587       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7588                                  instance_name, errors.ECODE_EXISTS)
7589
7590     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7591
7592     if self.op.iallocator:
7593       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7594     else:
7595       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7596       nodelist = [self.op.pnode]
7597       if self.op.snode is not None:
7598         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7599         nodelist.append(self.op.snode)
7600       self.needed_locks[locking.LEVEL_NODE] = nodelist
7601
7602     # in case of import lock the source node too
7603     if self.op.mode == constants.INSTANCE_IMPORT:
7604       src_node = self.op.src_node
7605       src_path = self.op.src_path
7606
7607       if src_path is None:
7608         self.op.src_path = src_path = self.op.instance_name
7609
7610       if src_node is None:
7611         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7612         self.op.src_node = None
7613         if os.path.isabs(src_path):
7614           raise errors.OpPrereqError("Importing an instance from an absolute"
7615                                      " path requires a source node option.",
7616                                      errors.ECODE_INVAL)
7617       else:
7618         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7619         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7620           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7621         if not os.path.isabs(src_path):
7622           self.op.src_path = src_path = \
7623             utils.PathJoin(constants.EXPORT_DIR, src_path)
7624
7625   def _RunAllocator(self):
7626     """Run the allocator based on input opcode.
7627
7628     """
7629     nics = [n.ToDict() for n in self.nics]
7630     ial = IAllocator(self.cfg, self.rpc,
7631                      mode=constants.IALLOCATOR_MODE_ALLOC,
7632                      name=self.op.instance_name,
7633                      disk_template=self.op.disk_template,
7634                      tags=[],
7635                      os=self.op.os_type,
7636                      vcpus=self.be_full[constants.BE_VCPUS],
7637                      mem_size=self.be_full[constants.BE_MEMORY],
7638                      disks=self.disks,
7639                      nics=nics,
7640                      hypervisor=self.op.hypervisor,
7641                      )
7642
7643     ial.Run(self.op.iallocator)
7644
7645     if not ial.success:
7646       raise errors.OpPrereqError("Can't compute nodes using"
7647                                  " iallocator '%s': %s" %
7648                                  (self.op.iallocator, ial.info),
7649                                  errors.ECODE_NORES)
7650     if len(ial.result) != ial.required_nodes:
7651       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7652                                  " of nodes (%s), required %s" %
7653                                  (self.op.iallocator, len(ial.result),
7654                                   ial.required_nodes), errors.ECODE_FAULT)
7655     self.op.pnode = ial.result[0]
7656     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7657                  self.op.instance_name, self.op.iallocator,
7658                  utils.CommaJoin(ial.result))
7659     if ial.required_nodes == 2:
7660       self.op.snode = ial.result[1]
7661
7662   def BuildHooksEnv(self):
7663     """Build hooks env.
7664
7665     This runs on master, primary and secondary nodes of the instance.
7666
7667     """
7668     env = {
7669       "ADD_MODE": self.op.mode,
7670       }
7671     if self.op.mode == constants.INSTANCE_IMPORT:
7672       env["SRC_NODE"] = self.op.src_node
7673       env["SRC_PATH"] = self.op.src_path
7674       env["SRC_IMAGES"] = self.src_images
7675
7676     env.update(_BuildInstanceHookEnv(
7677       name=self.op.instance_name,
7678       primary_node=self.op.pnode,
7679       secondary_nodes=self.secondaries,
7680       status=self.op.start,
7681       os_type=self.op.os_type,
7682       memory=self.be_full[constants.BE_MEMORY],
7683       vcpus=self.be_full[constants.BE_VCPUS],
7684       nics=_NICListToTuple(self, self.nics),
7685       disk_template=self.op.disk_template,
7686       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7687              for d in self.disks],
7688       bep=self.be_full,
7689       hvp=self.hv_full,
7690       hypervisor_name=self.op.hypervisor,
7691     ))
7692
7693     return env
7694
7695   def BuildHooksNodes(self):
7696     """Build hooks nodes.
7697
7698     """
7699     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7700     return nl, nl
7701
7702   def _ReadExportInfo(self):
7703     """Reads the export information from disk.
7704
7705     It will override the opcode source node and path with the actual
7706     information, if these two were not specified before.
7707
7708     @return: the export information
7709
7710     """
7711     assert self.op.mode == constants.INSTANCE_IMPORT
7712
7713     src_node = self.op.src_node
7714     src_path = self.op.src_path
7715
7716     if src_node is None:
7717       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7718       exp_list = self.rpc.call_export_list(locked_nodes)
7719       found = False
7720       for node in exp_list:
7721         if exp_list[node].fail_msg:
7722           continue
7723         if src_path in exp_list[node].payload:
7724           found = True
7725           self.op.src_node = src_node = node
7726           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7727                                                        src_path)
7728           break
7729       if not found:
7730         raise errors.OpPrereqError("No export found for relative path %s" %
7731                                     src_path, errors.ECODE_INVAL)
7732
7733     _CheckNodeOnline(self, src_node)
7734     result = self.rpc.call_export_info(src_node, src_path)
7735     result.Raise("No export or invalid export found in dir %s" % src_path)
7736
7737     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7738     if not export_info.has_section(constants.INISECT_EXP):
7739       raise errors.ProgrammerError("Corrupted export config",
7740                                    errors.ECODE_ENVIRON)
7741
7742     ei_version = export_info.get(constants.INISECT_EXP, "version")
7743     if (int(ei_version) != constants.EXPORT_VERSION):
7744       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7745                                  (ei_version, constants.EXPORT_VERSION),
7746                                  errors.ECODE_ENVIRON)
7747     return export_info
7748
7749   def _ReadExportParams(self, einfo):
7750     """Use export parameters as defaults.
7751
7752     In case the opcode doesn't specify (as in override) some instance
7753     parameters, then try to use them from the export information, if
7754     that declares them.
7755
7756     """
7757     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7758
7759     if self.op.disk_template is None:
7760       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7761         self.op.disk_template = einfo.get(constants.INISECT_INS,
7762                                           "disk_template")
7763       else:
7764         raise errors.OpPrereqError("No disk template specified and the export"
7765                                    " is missing the disk_template information",
7766                                    errors.ECODE_INVAL)
7767
7768     if not self.op.disks:
7769       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7770         disks = []
7771         # TODO: import the disk iv_name too
7772         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7773           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7774           disks.append({constants.IDISK_SIZE: disk_sz})
7775         self.op.disks = disks
7776       else:
7777         raise errors.OpPrereqError("No disk info specified and the export"
7778                                    " is missing the disk information",
7779                                    errors.ECODE_INVAL)
7780
7781     if (not self.op.nics and
7782         einfo.has_option(constants.INISECT_INS, "nic_count")):
7783       nics = []
7784       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7785         ndict = {}
7786         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7787           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7788           ndict[name] = v
7789         nics.append(ndict)
7790       self.op.nics = nics
7791
7792     if (self.op.hypervisor is None and
7793         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7794       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7795     if einfo.has_section(constants.INISECT_HYP):
7796       # use the export parameters but do not override the ones
7797       # specified by the user
7798       for name, value in einfo.items(constants.INISECT_HYP):
7799         if name not in self.op.hvparams:
7800           self.op.hvparams[name] = value
7801
7802     if einfo.has_section(constants.INISECT_BEP):
7803       # use the parameters, without overriding
7804       for name, value in einfo.items(constants.INISECT_BEP):
7805         if name not in self.op.beparams:
7806           self.op.beparams[name] = value
7807     else:
7808       # try to read the parameters old style, from the main section
7809       for name in constants.BES_PARAMETERS:
7810         if (name not in self.op.beparams and
7811             einfo.has_option(constants.INISECT_INS, name)):
7812           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7813
7814     if einfo.has_section(constants.INISECT_OSP):
7815       # use the parameters, without overriding
7816       for name, value in einfo.items(constants.INISECT_OSP):
7817         if name not in self.op.osparams:
7818           self.op.osparams[name] = value
7819
7820   def _RevertToDefaults(self, cluster):
7821     """Revert the instance parameters to the default values.
7822
7823     """
7824     # hvparams
7825     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7826     for name in self.op.hvparams.keys():
7827       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7828         del self.op.hvparams[name]
7829     # beparams
7830     be_defs = cluster.SimpleFillBE({})
7831     for name in self.op.beparams.keys():
7832       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7833         del self.op.beparams[name]
7834     # nic params
7835     nic_defs = cluster.SimpleFillNIC({})
7836     for nic in self.op.nics:
7837       for name in constants.NICS_PARAMETERS:
7838         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7839           del nic[name]
7840     # osparams
7841     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7842     for name in self.op.osparams.keys():
7843       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7844         del self.op.osparams[name]
7845
7846   def CheckPrereq(self):
7847     """Check prerequisites.
7848
7849     """
7850     if self.op.mode == constants.INSTANCE_IMPORT:
7851       export_info = self._ReadExportInfo()
7852       self._ReadExportParams(export_info)
7853
7854     if (not self.cfg.GetVGName() and
7855         self.op.disk_template not in constants.DTS_NOT_LVM):
7856       raise errors.OpPrereqError("Cluster does not support lvm-based"
7857                                  " instances", errors.ECODE_STATE)
7858
7859     if self.op.hypervisor is None:
7860       self.op.hypervisor = self.cfg.GetHypervisorType()
7861
7862     cluster = self.cfg.GetClusterInfo()
7863     enabled_hvs = cluster.enabled_hypervisors
7864     if self.op.hypervisor not in enabled_hvs:
7865       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7866                                  " cluster (%s)" % (self.op.hypervisor,
7867                                   ",".join(enabled_hvs)),
7868                                  errors.ECODE_STATE)
7869
7870     # check hypervisor parameter syntax (locally)
7871     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7872     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7873                                       self.op.hvparams)
7874     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7875     hv_type.CheckParameterSyntax(filled_hvp)
7876     self.hv_full = filled_hvp
7877     # check that we don't specify global parameters on an instance
7878     _CheckGlobalHvParams(self.op.hvparams)
7879
7880     # fill and remember the beparams dict
7881     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7882     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7883
7884     # build os parameters
7885     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7886
7887     # now that hvp/bep are in final format, let's reset to defaults,
7888     # if told to do so
7889     if self.op.identify_defaults:
7890       self._RevertToDefaults(cluster)
7891
7892     # NIC buildup
7893     self.nics = []
7894     for idx, nic in enumerate(self.op.nics):
7895       nic_mode_req = nic.get(constants.INIC_MODE, None)
7896       nic_mode = nic_mode_req
7897       if nic_mode is None:
7898         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7899
7900       # in routed mode, for the first nic, the default ip is 'auto'
7901       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7902         default_ip_mode = constants.VALUE_AUTO
7903       else:
7904         default_ip_mode = constants.VALUE_NONE
7905
7906       # ip validity checks
7907       ip = nic.get(constants.INIC_IP, default_ip_mode)
7908       if ip is None or ip.lower() == constants.VALUE_NONE:
7909         nic_ip = None
7910       elif ip.lower() == constants.VALUE_AUTO:
7911         if not self.op.name_check:
7912           raise errors.OpPrereqError("IP address set to auto but name checks"
7913                                      " have been skipped",
7914                                      errors.ECODE_INVAL)
7915         nic_ip = self.hostname1.ip
7916       else:
7917         if not netutils.IPAddress.IsValid(ip):
7918           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7919                                      errors.ECODE_INVAL)
7920         nic_ip = ip
7921
7922       # TODO: check the ip address for uniqueness
7923       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7924         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7925                                    errors.ECODE_INVAL)
7926
7927       # MAC address verification
7928       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
7929       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7930         mac = utils.NormalizeAndValidateMac(mac)
7931
7932         try:
7933           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7934         except errors.ReservationError:
7935           raise errors.OpPrereqError("MAC address %s already in use"
7936                                      " in cluster" % mac,
7937                                      errors.ECODE_NOTUNIQUE)
7938
7939       #  Build nic parameters
7940       link = nic.get(constants.INIC_LINK, None)
7941       nicparams = {}
7942       if nic_mode_req:
7943         nicparams[constants.NIC_MODE] = nic_mode_req
7944       if link:
7945         nicparams[constants.NIC_LINK] = link
7946
7947       check_params = cluster.SimpleFillNIC(nicparams)
7948       objects.NIC.CheckParameterSyntax(check_params)
7949       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7950
7951     # disk checks/pre-build
7952     default_vg = self.cfg.GetVGName()
7953     self.disks = []
7954     for disk in self.op.disks:
7955       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
7956       if mode not in constants.DISK_ACCESS_SET:
7957         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7958                                    mode, errors.ECODE_INVAL)
7959       size = disk.get(constants.IDISK_SIZE, None)
7960       if size is None:
7961         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7962       try:
7963         size = int(size)
7964       except (TypeError, ValueError):
7965         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7966                                    errors.ECODE_INVAL)
7967       new_disk = {
7968         constants.IDISK_SIZE: size,
7969         constants.IDISK_MODE: mode,
7970         constants.IDISK_VG: disk.get(constants.IDISK_VG, default_vg),
7971         }
7972       if constants.IDISK_ADOPT in disk:
7973         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
7974       self.disks.append(new_disk)
7975
7976     if self.op.mode == constants.INSTANCE_IMPORT:
7977
7978       # Check that the new instance doesn't have less disks than the export
7979       instance_disks = len(self.disks)
7980       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7981       if instance_disks < export_disks:
7982         raise errors.OpPrereqError("Not enough disks to import."
7983                                    " (instance: %d, export: %d)" %
7984                                    (instance_disks, export_disks),
7985                                    errors.ECODE_INVAL)
7986
7987       disk_images = []
7988       for idx in range(export_disks):
7989         option = 'disk%d_dump' % idx
7990         if export_info.has_option(constants.INISECT_INS, option):
7991           # FIXME: are the old os-es, disk sizes, etc. useful?
7992           export_name = export_info.get(constants.INISECT_INS, option)
7993           image = utils.PathJoin(self.op.src_path, export_name)
7994           disk_images.append(image)
7995         else:
7996           disk_images.append(False)
7997
7998       self.src_images = disk_images
7999
8000       old_name = export_info.get(constants.INISECT_INS, 'name')
8001       try:
8002         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8003       except (TypeError, ValueError), err:
8004         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8005                                    " an integer: %s" % str(err),
8006                                    errors.ECODE_STATE)
8007       if self.op.instance_name == old_name:
8008         for idx, nic in enumerate(self.nics):
8009           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8010             nic_mac_ini = 'nic%d_mac' % idx
8011             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8012
8013     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8014
8015     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8016     if self.op.ip_check:
8017       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8018         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8019                                    (self.check_ip, self.op.instance_name),
8020                                    errors.ECODE_NOTUNIQUE)
8021
8022     #### mac address generation
8023     # By generating here the mac address both the allocator and the hooks get
8024     # the real final mac address rather than the 'auto' or 'generate' value.
8025     # There is a race condition between the generation and the instance object
8026     # creation, which means that we know the mac is valid now, but we're not
8027     # sure it will be when we actually add the instance. If things go bad
8028     # adding the instance will abort because of a duplicate mac, and the
8029     # creation job will fail.
8030     for nic in self.nics:
8031       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8032         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8033
8034     #### allocator run
8035
8036     if self.op.iallocator is not None:
8037       self._RunAllocator()
8038
8039     #### node related checks
8040
8041     # check primary node
8042     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8043     assert self.pnode is not None, \
8044       "Cannot retrieve locked node %s" % self.op.pnode
8045     if pnode.offline:
8046       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8047                                  pnode.name, errors.ECODE_STATE)
8048     if pnode.drained:
8049       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8050                                  pnode.name, errors.ECODE_STATE)
8051     if not pnode.vm_capable:
8052       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8053                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8054
8055     self.secondaries = []
8056
8057     # mirror node verification
8058     if self.op.disk_template in constants.DTS_INT_MIRROR:
8059       if self.op.snode == pnode.name:
8060         raise errors.OpPrereqError("The secondary node cannot be the"
8061                                    " primary node.", errors.ECODE_INVAL)
8062       _CheckNodeOnline(self, self.op.snode)
8063       _CheckNodeNotDrained(self, self.op.snode)
8064       _CheckNodeVmCapable(self, self.op.snode)
8065       self.secondaries.append(self.op.snode)
8066
8067     nodenames = [pnode.name] + self.secondaries
8068
8069     if not self.adopt_disks:
8070       # Check lv size requirements, if not adopting
8071       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8072       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8073
8074     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8075       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8076                                 disk[constants.IDISK_ADOPT])
8077                      for disk in self.disks])
8078       if len(all_lvs) != len(self.disks):
8079         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8080                                    errors.ECODE_INVAL)
8081       for lv_name in all_lvs:
8082         try:
8083           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8084           # to ReserveLV uses the same syntax
8085           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8086         except errors.ReservationError:
8087           raise errors.OpPrereqError("LV named %s used by another instance" %
8088                                      lv_name, errors.ECODE_NOTUNIQUE)
8089
8090       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8091       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8092
8093       node_lvs = self.rpc.call_lv_list([pnode.name],
8094                                        vg_names.payload.keys())[pnode.name]
8095       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8096       node_lvs = node_lvs.payload
8097
8098       delta = all_lvs.difference(node_lvs.keys())
8099       if delta:
8100         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8101                                    utils.CommaJoin(delta),
8102                                    errors.ECODE_INVAL)
8103       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8104       if online_lvs:
8105         raise errors.OpPrereqError("Online logical volumes found, cannot"
8106                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8107                                    errors.ECODE_STATE)
8108       # update the size of disk based on what is found
8109       for dsk in self.disks:
8110         dsk[constants.IDISK_SIZE] = \
8111           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8112                                         dsk[constants.IDISK_ADOPT])][0]))
8113
8114     elif self.op.disk_template == constants.DT_BLOCK:
8115       # Normalize and de-duplicate device paths
8116       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8117                        for disk in self.disks])
8118       if len(all_disks) != len(self.disks):
8119         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8120                                    errors.ECODE_INVAL)
8121       baddisks = [d for d in all_disks
8122                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8123       if baddisks:
8124         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8125                                    " cannot be adopted" %
8126                                    (", ".join(baddisks),
8127                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8128                                    errors.ECODE_INVAL)
8129
8130       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8131                                             list(all_disks))[pnode.name]
8132       node_disks.Raise("Cannot get block device information from node %s" %
8133                        pnode.name)
8134       node_disks = node_disks.payload
8135       delta = all_disks.difference(node_disks.keys())
8136       if delta:
8137         raise errors.OpPrereqError("Missing block device(s): %s" %
8138                                    utils.CommaJoin(delta),
8139                                    errors.ECODE_INVAL)
8140       for dsk in self.disks:
8141         dsk[constants.IDISK_SIZE] = \
8142           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8143
8144     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8145
8146     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8147     # check OS parameters (remotely)
8148     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8149
8150     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8151
8152     # memory check on primary node
8153     if self.op.start:
8154       _CheckNodeFreeMemory(self, self.pnode.name,
8155                            "creating instance %s" % self.op.instance_name,
8156                            self.be_full[constants.BE_MEMORY],
8157                            self.op.hypervisor)
8158
8159     self.dry_run_result = list(nodenames)
8160
8161   def Exec(self, feedback_fn):
8162     """Create and add the instance to the cluster.
8163
8164     """
8165     instance = self.op.instance_name
8166     pnode_name = self.pnode.name
8167
8168     ht_kind = self.op.hypervisor
8169     if ht_kind in constants.HTS_REQ_PORT:
8170       network_port = self.cfg.AllocatePort()
8171     else:
8172       network_port = None
8173
8174     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8175       # this is needed because os.path.join does not accept None arguments
8176       if self.op.file_storage_dir is None:
8177         string_file_storage_dir = ""
8178       else:
8179         string_file_storage_dir = self.op.file_storage_dir
8180
8181       # build the full file storage dir path
8182       if self.op.disk_template == constants.DT_SHARED_FILE:
8183         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8184       else:
8185         get_fsd_fn = self.cfg.GetFileStorageDir
8186
8187       file_storage_dir = utils.PathJoin(get_fsd_fn(),
8188                                         string_file_storage_dir, instance)
8189     else:
8190       file_storage_dir = ""
8191
8192     disks = _GenerateDiskTemplate(self,
8193                                   self.op.disk_template,
8194                                   instance, pnode_name,
8195                                   self.secondaries,
8196                                   self.disks,
8197                                   file_storage_dir,
8198                                   self.op.file_driver,
8199                                   0,
8200                                   feedback_fn)
8201
8202     iobj = objects.Instance(name=instance, os=self.op.os_type,
8203                             primary_node=pnode_name,
8204                             nics=self.nics, disks=disks,
8205                             disk_template=self.op.disk_template,
8206                             admin_up=False,
8207                             network_port=network_port,
8208                             beparams=self.op.beparams,
8209                             hvparams=self.op.hvparams,
8210                             hypervisor=self.op.hypervisor,
8211                             osparams=self.op.osparams,
8212                             )
8213
8214     if self.adopt_disks:
8215       if self.op.disk_template == constants.DT_PLAIN:
8216         # rename LVs to the newly-generated names; we need to construct
8217         # 'fake' LV disks with the old data, plus the new unique_id
8218         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8219         rename_to = []
8220         for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8221           rename_to.append(t_dsk.logical_id)
8222           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8223           self.cfg.SetDiskID(t_dsk, pnode_name)
8224         result = self.rpc.call_blockdev_rename(pnode_name,
8225                                                zip(tmp_disks, rename_to))
8226         result.Raise("Failed to rename adoped LVs")
8227     else:
8228       feedback_fn("* creating instance disks...")
8229       try:
8230         _CreateDisks(self, iobj)
8231       except errors.OpExecError:
8232         self.LogWarning("Device creation failed, reverting...")
8233         try:
8234           _RemoveDisks(self, iobj)
8235         finally:
8236           self.cfg.ReleaseDRBDMinors(instance)
8237           raise
8238
8239       if self.cfg.GetClusterInfo().prealloc_wipe_disks:
8240         feedback_fn("* wiping instance disks...")
8241         try:
8242           _WipeDisks(self, iobj)
8243         except errors.OpExecError:
8244           self.LogWarning("Device wiping failed, reverting...")
8245           try:
8246             _RemoveDisks(self, iobj)
8247           finally:
8248             self.cfg.ReleaseDRBDMinors(instance)
8249             raise
8250
8251     feedback_fn("adding instance %s to cluster config" % instance)
8252
8253     self.cfg.AddInstance(iobj, self.proc.GetECId())
8254
8255     # Declare that we don't want to remove the instance lock anymore, as we've
8256     # added the instance to the config
8257     del self.remove_locks[locking.LEVEL_INSTANCE]
8258     # Unlock all the nodes
8259     if self.op.mode == constants.INSTANCE_IMPORT:
8260       nodes_keep = [self.op.src_node]
8261       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
8262                        if node != self.op.src_node]
8263       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
8264       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
8265     else:
8266       self.context.glm.release(locking.LEVEL_NODE)
8267       del self.acquired_locks[locking.LEVEL_NODE]
8268
8269     if self.op.wait_for_sync:
8270       disk_abort = not _WaitForSync(self, iobj)
8271     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8272       # make sure the disks are not degraded (still sync-ing is ok)
8273       time.sleep(15)
8274       feedback_fn("* checking mirrors status")
8275       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8276     else:
8277       disk_abort = False
8278
8279     if disk_abort:
8280       _RemoveDisks(self, iobj)
8281       self.cfg.RemoveInstance(iobj.name)
8282       # Make sure the instance lock gets removed
8283       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8284       raise errors.OpExecError("There are some degraded disks for"
8285                                " this instance")
8286
8287     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8288       if self.op.mode == constants.INSTANCE_CREATE:
8289         if not self.op.no_install:
8290           feedback_fn("* running the instance OS create scripts...")
8291           # FIXME: pass debug option from opcode to backend
8292           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8293                                                  self.op.debug_level)
8294           result.Raise("Could not add os for instance %s"
8295                        " on node %s" % (instance, pnode_name))
8296
8297       elif self.op.mode == constants.INSTANCE_IMPORT:
8298         feedback_fn("* running the instance OS import scripts...")
8299
8300         transfers = []
8301
8302         for idx, image in enumerate(self.src_images):
8303           if not image:
8304             continue
8305
8306           # FIXME: pass debug option from opcode to backend
8307           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8308                                              constants.IEIO_FILE, (image, ),
8309                                              constants.IEIO_SCRIPT,
8310                                              (iobj.disks[idx], idx),
8311                                              None)
8312           transfers.append(dt)
8313
8314         import_result = \
8315           masterd.instance.TransferInstanceData(self, feedback_fn,
8316                                                 self.op.src_node, pnode_name,
8317                                                 self.pnode.secondary_ip,
8318                                                 iobj, transfers)
8319         if not compat.all(import_result):
8320           self.LogWarning("Some disks for instance %s on node %s were not"
8321                           " imported successfully" % (instance, pnode_name))
8322
8323       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8324         feedback_fn("* preparing remote import...")
8325         # The source cluster will stop the instance before attempting to make a
8326         # connection. In some cases stopping an instance can take a long time,
8327         # hence the shutdown timeout is added to the connection timeout.
8328         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8329                            self.op.source_shutdown_timeout)
8330         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8331
8332         assert iobj.primary_node == self.pnode.name
8333         disk_results = \
8334           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8335                                         self.source_x509_ca,
8336                                         self._cds, timeouts)
8337         if not compat.all(disk_results):
8338           # TODO: Should the instance still be started, even if some disks
8339           # failed to import (valid for local imports, too)?
8340           self.LogWarning("Some disks for instance %s on node %s were not"
8341                           " imported successfully" % (instance, pnode_name))
8342
8343         # Run rename script on newly imported instance
8344         assert iobj.name == instance
8345         feedback_fn("Running rename script for %s" % instance)
8346         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8347                                                    self.source_instance_name,
8348                                                    self.op.debug_level)
8349         if result.fail_msg:
8350           self.LogWarning("Failed to run rename script for %s on node"
8351                           " %s: %s" % (instance, pnode_name, result.fail_msg))
8352
8353       else:
8354         # also checked in the prereq part
8355         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8356                                      % self.op.mode)
8357
8358     if self.op.start:
8359       iobj.admin_up = True
8360       self.cfg.Update(iobj, feedback_fn)
8361       logging.info("Starting instance %s on node %s", instance, pnode_name)
8362       feedback_fn("* starting instance...")
8363       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8364       result.Raise("Could not start instance")
8365
8366     return list(iobj.all_nodes)
8367
8368
8369 class LUInstanceConsole(NoHooksLU):
8370   """Connect to an instance's console.
8371
8372   This is somewhat special in that it returns the command line that
8373   you need to run on the master node in order to connect to the
8374   console.
8375
8376   """
8377   REQ_BGL = False
8378
8379   def ExpandNames(self):
8380     self._ExpandAndLockInstance()
8381
8382   def CheckPrereq(self):
8383     """Check prerequisites.
8384
8385     This checks that the instance is in the cluster.
8386
8387     """
8388     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8389     assert self.instance is not None, \
8390       "Cannot retrieve locked instance %s" % self.op.instance_name
8391     _CheckNodeOnline(self, self.instance.primary_node)
8392
8393   def Exec(self, feedback_fn):
8394     """Connect to the console of an instance
8395
8396     """
8397     instance = self.instance
8398     node = instance.primary_node
8399
8400     node_insts = self.rpc.call_instance_list([node],
8401                                              [instance.hypervisor])[node]
8402     node_insts.Raise("Can't get node information from %s" % node)
8403
8404     if instance.name not in node_insts.payload:
8405       if instance.admin_up:
8406         state = constants.INSTST_ERRORDOWN
8407       else:
8408         state = constants.INSTST_ADMINDOWN
8409       raise errors.OpExecError("Instance %s is not running (state %s)" %
8410                                (instance.name, state))
8411
8412     logging.debug("Connecting to console of %s on %s", instance.name, node)
8413
8414     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8415
8416
8417 def _GetInstanceConsole(cluster, instance):
8418   """Returns console information for an instance.
8419
8420   @type cluster: L{objects.Cluster}
8421   @type instance: L{objects.Instance}
8422   @rtype: dict
8423
8424   """
8425   hyper = hypervisor.GetHypervisor(instance.hypervisor)
8426   # beparams and hvparams are passed separately, to avoid editing the
8427   # instance and then saving the defaults in the instance itself.
8428   hvparams = cluster.FillHV(instance)
8429   beparams = cluster.FillBE(instance)
8430   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8431
8432   assert console.instance == instance.name
8433   assert console.Validate()
8434
8435   return console.ToDict()
8436
8437
8438 class LUInstanceReplaceDisks(LogicalUnit):
8439   """Replace the disks of an instance.
8440
8441   """
8442   HPATH = "mirrors-replace"
8443   HTYPE = constants.HTYPE_INSTANCE
8444   REQ_BGL = False
8445
8446   def CheckArguments(self):
8447     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8448                                   self.op.iallocator)
8449
8450   def ExpandNames(self):
8451     self._ExpandAndLockInstance()
8452
8453     if self.op.iallocator is not None:
8454       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8455
8456     elif self.op.remote_node is not None:
8457       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8458       self.op.remote_node = remote_node
8459
8460       # Warning: do not remove the locking of the new secondary here
8461       # unless DRBD8.AddChildren is changed to work in parallel;
8462       # currently it doesn't since parallel invocations of
8463       # FindUnusedMinor will conflict
8464       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8465       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8466
8467     else:
8468       self.needed_locks[locking.LEVEL_NODE] = []
8469       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8470
8471     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8472                                    self.op.iallocator, self.op.remote_node,
8473                                    self.op.disks, False, self.op.early_release)
8474
8475     self.tasklets = [self.replacer]
8476
8477   def DeclareLocks(self, level):
8478     # If we're not already locking all nodes in the set we have to declare the
8479     # instance's primary/secondary nodes.
8480     if (level == locking.LEVEL_NODE and
8481         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8482       self._LockInstancesNodes()
8483
8484   def BuildHooksEnv(self):
8485     """Build hooks env.
8486
8487     This runs on the master, the primary and all the secondaries.
8488
8489     """
8490     instance = self.replacer.instance
8491     env = {
8492       "MODE": self.op.mode,
8493       "NEW_SECONDARY": self.op.remote_node,
8494       "OLD_SECONDARY": instance.secondary_nodes[0],
8495       }
8496     env.update(_BuildInstanceHookEnvByObject(self, instance))
8497     return env
8498
8499   def BuildHooksNodes(self):
8500     """Build hooks nodes.
8501
8502     """
8503     instance = self.replacer.instance
8504     nl = [
8505       self.cfg.GetMasterNode(),
8506       instance.primary_node,
8507       ]
8508     if self.op.remote_node is not None:
8509       nl.append(self.op.remote_node)
8510     return nl, nl
8511
8512
8513 class TLReplaceDisks(Tasklet):
8514   """Replaces disks for an instance.
8515
8516   Note: Locking is not within the scope of this class.
8517
8518   """
8519   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8520                disks, delay_iallocator, early_release):
8521     """Initializes this class.
8522
8523     """
8524     Tasklet.__init__(self, lu)
8525
8526     # Parameters
8527     self.instance_name = instance_name
8528     self.mode = mode
8529     self.iallocator_name = iallocator_name
8530     self.remote_node = remote_node
8531     self.disks = disks
8532     self.delay_iallocator = delay_iallocator
8533     self.early_release = early_release
8534
8535     # Runtime data
8536     self.instance = None
8537     self.new_node = None
8538     self.target_node = None
8539     self.other_node = None
8540     self.remote_node_info = None
8541     self.node_secondary_ip = None
8542
8543   @staticmethod
8544   def CheckArguments(mode, remote_node, iallocator):
8545     """Helper function for users of this class.
8546
8547     """
8548     # check for valid parameter combination
8549     if mode == constants.REPLACE_DISK_CHG:
8550       if remote_node is None and iallocator is None:
8551         raise errors.OpPrereqError("When changing the secondary either an"
8552                                    " iallocator script must be used or the"
8553                                    " new node given", errors.ECODE_INVAL)
8554
8555       if remote_node is not None and iallocator is not None:
8556         raise errors.OpPrereqError("Give either the iallocator or the new"
8557                                    " secondary, not both", errors.ECODE_INVAL)
8558
8559     elif remote_node is not None or iallocator is not None:
8560       # Not replacing the secondary
8561       raise errors.OpPrereqError("The iallocator and new node options can"
8562                                  " only be used when changing the"
8563                                  " secondary node", errors.ECODE_INVAL)
8564
8565   @staticmethod
8566   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8567     """Compute a new secondary node using an IAllocator.
8568
8569     """
8570     ial = IAllocator(lu.cfg, lu.rpc,
8571                      mode=constants.IALLOCATOR_MODE_RELOC,
8572                      name=instance_name,
8573                      relocate_from=relocate_from)
8574
8575     ial.Run(iallocator_name)
8576
8577     if not ial.success:
8578       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8579                                  " %s" % (iallocator_name, ial.info),
8580                                  errors.ECODE_NORES)
8581
8582     if len(ial.result) != ial.required_nodes:
8583       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8584                                  " of nodes (%s), required %s" %
8585                                  (iallocator_name,
8586                                   len(ial.result), ial.required_nodes),
8587                                  errors.ECODE_FAULT)
8588
8589     remote_node_name = ial.result[0]
8590
8591     lu.LogInfo("Selected new secondary for instance '%s': %s",
8592                instance_name, remote_node_name)
8593
8594     return remote_node_name
8595
8596   def _FindFaultyDisks(self, node_name):
8597     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8598                                     node_name, True)
8599
8600   def _CheckDisksActivated(self, instance):
8601     """Checks if the instance disks are activated.
8602
8603     @param instance: The instance to check disks
8604     @return: True if they are activated, False otherwise
8605
8606     """
8607     nodes = instance.all_nodes
8608
8609     for idx, dev in enumerate(instance.disks):
8610       for node in nodes:
8611         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8612         self.cfg.SetDiskID(dev, node)
8613
8614         result = self.rpc.call_blockdev_find(node, dev)
8615
8616         if result.offline:
8617           continue
8618         elif result.fail_msg or not result.payload:
8619           return False
8620
8621     return True
8622
8623
8624   def CheckPrereq(self):
8625     """Check prerequisites.
8626
8627     This checks that the instance is in the cluster.
8628
8629     """
8630     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8631     assert instance is not None, \
8632       "Cannot retrieve locked instance %s" % self.instance_name
8633
8634     if instance.disk_template != constants.DT_DRBD8:
8635       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8636                                  " instances", errors.ECODE_INVAL)
8637
8638     if len(instance.secondary_nodes) != 1:
8639       raise errors.OpPrereqError("The instance has a strange layout,"
8640                                  " expected one secondary but found %d" %
8641                                  len(instance.secondary_nodes),
8642                                  errors.ECODE_FAULT)
8643
8644     if not self.delay_iallocator:
8645       self._CheckPrereq2()
8646
8647   def _CheckPrereq2(self):
8648     """Check prerequisites, second part.
8649
8650     This function should always be part of CheckPrereq. It was separated and is
8651     now called from Exec because during node evacuation iallocator was only
8652     called with an unmodified cluster model, not taking planned changes into
8653     account.
8654
8655     """
8656     instance = self.instance
8657     secondary_node = instance.secondary_nodes[0]
8658
8659     if self.iallocator_name is None:
8660       remote_node = self.remote_node
8661     else:
8662       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8663                                        instance.name, instance.secondary_nodes)
8664
8665     if remote_node is not None:
8666       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8667       assert self.remote_node_info is not None, \
8668         "Cannot retrieve locked node %s" % remote_node
8669     else:
8670       self.remote_node_info = None
8671
8672     if remote_node == self.instance.primary_node:
8673       raise errors.OpPrereqError("The specified node is the primary node of"
8674                                  " the instance.", errors.ECODE_INVAL)
8675
8676     if remote_node == secondary_node:
8677       raise errors.OpPrereqError("The specified node is already the"
8678                                  " secondary node of the instance.",
8679                                  errors.ECODE_INVAL)
8680
8681     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8682                                     constants.REPLACE_DISK_CHG):
8683       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8684                                  errors.ECODE_INVAL)
8685
8686     if self.mode == constants.REPLACE_DISK_AUTO:
8687       if not self._CheckDisksActivated(instance):
8688         raise errors.OpPrereqError("Please run activate-disks on instance %s"
8689                                    " first" % self.instance_name,
8690                                    errors.ECODE_STATE)
8691       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8692       faulty_secondary = self._FindFaultyDisks(secondary_node)
8693
8694       if faulty_primary and faulty_secondary:
8695         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8696                                    " one node and can not be repaired"
8697                                    " automatically" % self.instance_name,
8698                                    errors.ECODE_STATE)
8699
8700       if faulty_primary:
8701         self.disks = faulty_primary
8702         self.target_node = instance.primary_node
8703         self.other_node = secondary_node
8704         check_nodes = [self.target_node, self.other_node]
8705       elif faulty_secondary:
8706         self.disks = faulty_secondary
8707         self.target_node = secondary_node
8708         self.other_node = instance.primary_node
8709         check_nodes = [self.target_node, self.other_node]
8710       else:
8711         self.disks = []
8712         check_nodes = []
8713
8714     else:
8715       # Non-automatic modes
8716       if self.mode == constants.REPLACE_DISK_PRI:
8717         self.target_node = instance.primary_node
8718         self.other_node = secondary_node
8719         check_nodes = [self.target_node, self.other_node]
8720
8721       elif self.mode == constants.REPLACE_DISK_SEC:
8722         self.target_node = secondary_node
8723         self.other_node = instance.primary_node
8724         check_nodes = [self.target_node, self.other_node]
8725
8726       elif self.mode == constants.REPLACE_DISK_CHG:
8727         self.new_node = remote_node
8728         self.other_node = instance.primary_node
8729         self.target_node = secondary_node
8730         check_nodes = [self.new_node, self.other_node]
8731
8732         _CheckNodeNotDrained(self.lu, remote_node)
8733         _CheckNodeVmCapable(self.lu, remote_node)
8734
8735         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8736         assert old_node_info is not None
8737         if old_node_info.offline and not self.early_release:
8738           # doesn't make sense to delay the release
8739           self.early_release = True
8740           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8741                           " early-release mode", secondary_node)
8742
8743       else:
8744         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8745                                      self.mode)
8746
8747       # If not specified all disks should be replaced
8748       if not self.disks:
8749         self.disks = range(len(self.instance.disks))
8750
8751     for node in check_nodes:
8752       _CheckNodeOnline(self.lu, node)
8753
8754     # Check whether disks are valid
8755     for disk_idx in self.disks:
8756       instance.FindDisk(disk_idx)
8757
8758     # Get secondary node IP addresses
8759     node_2nd_ip = {}
8760
8761     for node_name in [self.target_node, self.other_node, self.new_node]:
8762       if node_name is not None:
8763         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8764
8765     self.node_secondary_ip = node_2nd_ip
8766
8767   def Exec(self, feedback_fn):
8768     """Execute disk replacement.
8769
8770     This dispatches the disk replacement to the appropriate handler.
8771
8772     """
8773     if self.delay_iallocator:
8774       self._CheckPrereq2()
8775
8776     if not self.disks:
8777       feedback_fn("No disks need replacement")
8778       return
8779
8780     feedback_fn("Replacing disk(s) %s for %s" %
8781                 (utils.CommaJoin(self.disks), self.instance.name))
8782
8783     activate_disks = (not self.instance.admin_up)
8784
8785     # Activate the instance disks if we're replacing them on a down instance
8786     if activate_disks:
8787       _StartInstanceDisks(self.lu, self.instance, True)
8788
8789     try:
8790       # Should we replace the secondary node?
8791       if self.new_node is not None:
8792         fn = self._ExecDrbd8Secondary
8793       else:
8794         fn = self._ExecDrbd8DiskOnly
8795
8796       return fn(feedback_fn)
8797
8798     finally:
8799       # Deactivate the instance disks if we're replacing them on a
8800       # down instance
8801       if activate_disks:
8802         _SafeShutdownInstanceDisks(self.lu, self.instance)
8803
8804   def _CheckVolumeGroup(self, nodes):
8805     self.lu.LogInfo("Checking volume groups")
8806
8807     vgname = self.cfg.GetVGName()
8808
8809     # Make sure volume group exists on all involved nodes
8810     results = self.rpc.call_vg_list(nodes)
8811     if not results:
8812       raise errors.OpExecError("Can't list volume groups on the nodes")
8813
8814     for node in nodes:
8815       res = results[node]
8816       res.Raise("Error checking node %s" % node)
8817       if vgname not in res.payload:
8818         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8819                                  (vgname, node))
8820
8821   def _CheckDisksExistence(self, nodes):
8822     # Check disk existence
8823     for idx, dev in enumerate(self.instance.disks):
8824       if idx not in self.disks:
8825         continue
8826
8827       for node in nodes:
8828         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8829         self.cfg.SetDiskID(dev, node)
8830
8831         result = self.rpc.call_blockdev_find(node, dev)
8832
8833         msg = result.fail_msg
8834         if msg or not result.payload:
8835           if not msg:
8836             msg = "disk not found"
8837           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8838                                    (idx, node, msg))
8839
8840   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8841     for idx, dev in enumerate(self.instance.disks):
8842       if idx not in self.disks:
8843         continue
8844
8845       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8846                       (idx, node_name))
8847
8848       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8849                                    ldisk=ldisk):
8850         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8851                                  " replace disks for instance %s" %
8852                                  (node_name, self.instance.name))
8853
8854   def _CreateNewStorage(self, node_name):
8855     vgname = self.cfg.GetVGName()
8856     iv_names = {}
8857
8858     for idx, dev in enumerate(self.instance.disks):
8859       if idx not in self.disks:
8860         continue
8861
8862       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8863
8864       self.cfg.SetDiskID(dev, node_name)
8865
8866       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8867       names = _GenerateUniqueNames(self.lu, lv_names)
8868
8869       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8870                              logical_id=(vgname, names[0]))
8871       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8872                              logical_id=(vgname, names[1]))
8873
8874       new_lvs = [lv_data, lv_meta]
8875       old_lvs = dev.children
8876       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8877
8878       # we pass force_create=True to force the LVM creation
8879       for new_lv in new_lvs:
8880         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8881                         _GetInstanceInfoText(self.instance), False)
8882
8883     return iv_names
8884
8885   def _CheckDevices(self, node_name, iv_names):
8886     for name, (dev, _, _) in iv_names.iteritems():
8887       self.cfg.SetDiskID(dev, node_name)
8888
8889       result = self.rpc.call_blockdev_find(node_name, dev)
8890
8891       msg = result.fail_msg
8892       if msg or not result.payload:
8893         if not msg:
8894           msg = "disk not found"
8895         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8896                                  (name, msg))
8897
8898       if result.payload.is_degraded:
8899         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8900
8901   def _RemoveOldStorage(self, node_name, iv_names):
8902     for name, (_, old_lvs, _) in iv_names.iteritems():
8903       self.lu.LogInfo("Remove logical volumes for %s" % name)
8904
8905       for lv in old_lvs:
8906         self.cfg.SetDiskID(lv, node_name)
8907
8908         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8909         if msg:
8910           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8911                              hint="remove unused LVs manually")
8912
8913   def _ReleaseNodeLock(self, node_name):
8914     """Releases the lock for a given node."""
8915     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8916
8917   def _ExecDrbd8DiskOnly(self, feedback_fn):
8918     """Replace a disk on the primary or secondary for DRBD 8.
8919
8920     The algorithm for replace is quite complicated:
8921
8922       1. for each disk to be replaced:
8923
8924         1. create new LVs on the target node with unique names
8925         1. detach old LVs from the drbd device
8926         1. rename old LVs to name_replaced.<time_t>
8927         1. rename new LVs to old LVs
8928         1. attach the new LVs (with the old names now) to the drbd device
8929
8930       1. wait for sync across all devices
8931
8932       1. for each modified disk:
8933
8934         1. remove old LVs (which have the name name_replaces.<time_t>)
8935
8936     Failures are not very well handled.
8937
8938     """
8939     steps_total = 6
8940
8941     # Step: check device activation
8942     self.lu.LogStep(1, steps_total, "Check device existence")
8943     self._CheckDisksExistence([self.other_node, self.target_node])
8944     self._CheckVolumeGroup([self.target_node, self.other_node])
8945
8946     # Step: check other node consistency
8947     self.lu.LogStep(2, steps_total, "Check peer consistency")
8948     self._CheckDisksConsistency(self.other_node,
8949                                 self.other_node == self.instance.primary_node,
8950                                 False)
8951
8952     # Step: create new storage
8953     self.lu.LogStep(3, steps_total, "Allocate new storage")
8954     iv_names = self._CreateNewStorage(self.target_node)
8955
8956     # Step: for each lv, detach+rename*2+attach
8957     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8958     for dev, old_lvs, new_lvs in iv_names.itervalues():
8959       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8960
8961       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8962                                                      old_lvs)
8963       result.Raise("Can't detach drbd from local storage on node"
8964                    " %s for device %s" % (self.target_node, dev.iv_name))
8965       #dev.children = []
8966       #cfg.Update(instance)
8967
8968       # ok, we created the new LVs, so now we know we have the needed
8969       # storage; as such, we proceed on the target node to rename
8970       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8971       # using the assumption that logical_id == physical_id (which in
8972       # turn is the unique_id on that node)
8973
8974       # FIXME(iustin): use a better name for the replaced LVs
8975       temp_suffix = int(time.time())
8976       ren_fn = lambda d, suff: (d.physical_id[0],
8977                                 d.physical_id[1] + "_replaced-%s" % suff)
8978
8979       # Build the rename list based on what LVs exist on the node
8980       rename_old_to_new = []
8981       for to_ren in old_lvs:
8982         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8983         if not result.fail_msg and result.payload:
8984           # device exists
8985           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8986
8987       self.lu.LogInfo("Renaming the old LVs on the target node")
8988       result = self.rpc.call_blockdev_rename(self.target_node,
8989                                              rename_old_to_new)
8990       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8991
8992       # Now we rename the new LVs to the old LVs
8993       self.lu.LogInfo("Renaming the new LVs on the target node")
8994       rename_new_to_old = [(new, old.physical_id)
8995                            for old, new in zip(old_lvs, new_lvs)]
8996       result = self.rpc.call_blockdev_rename(self.target_node,
8997                                              rename_new_to_old)
8998       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8999
9000       for old, new in zip(old_lvs, new_lvs):
9001         new.logical_id = old.logical_id
9002         self.cfg.SetDiskID(new, self.target_node)
9003
9004       for disk in old_lvs:
9005         disk.logical_id = ren_fn(disk, temp_suffix)
9006         self.cfg.SetDiskID(disk, self.target_node)
9007
9008       # Now that the new lvs have the old name, we can add them to the device
9009       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9010       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9011                                                   new_lvs)
9012       msg = result.fail_msg
9013       if msg:
9014         for new_lv in new_lvs:
9015           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9016                                                new_lv).fail_msg
9017           if msg2:
9018             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9019                                hint=("cleanup manually the unused logical"
9020                                      "volumes"))
9021         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9022
9023       dev.children = new_lvs
9024
9025       self.cfg.Update(self.instance, feedback_fn)
9026
9027     cstep = 5
9028     if self.early_release:
9029       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9030       cstep += 1
9031       self._RemoveOldStorage(self.target_node, iv_names)
9032       # WARNING: we release both node locks here, do not do other RPCs
9033       # than WaitForSync to the primary node
9034       self._ReleaseNodeLock([self.target_node, self.other_node])
9035
9036     # Wait for sync
9037     # This can fail as the old devices are degraded and _WaitForSync
9038     # does a combined result over all disks, so we don't check its return value
9039     self.lu.LogStep(cstep, steps_total, "Sync devices")
9040     cstep += 1
9041     _WaitForSync(self.lu, self.instance)
9042
9043     # Check all devices manually
9044     self._CheckDevices(self.instance.primary_node, iv_names)
9045
9046     # Step: remove old storage
9047     if not self.early_release:
9048       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9049       cstep += 1
9050       self._RemoveOldStorage(self.target_node, iv_names)
9051
9052   def _ExecDrbd8Secondary(self, feedback_fn):
9053     """Replace the secondary node for DRBD 8.
9054
9055     The algorithm for replace is quite complicated:
9056       - for all disks of the instance:
9057         - create new LVs on the new node with same names
9058         - shutdown the drbd device on the old secondary
9059         - disconnect the drbd network on the primary
9060         - create the drbd device on the new secondary
9061         - network attach the drbd on the primary, using an artifice:
9062           the drbd code for Attach() will connect to the network if it
9063           finds a device which is connected to the good local disks but
9064           not network enabled
9065       - wait for sync across all devices
9066       - remove all disks from the old secondary
9067
9068     Failures are not very well handled.
9069
9070     """
9071     steps_total = 6
9072
9073     # Step: check device activation
9074     self.lu.LogStep(1, steps_total, "Check device existence")
9075     self._CheckDisksExistence([self.instance.primary_node])
9076     self._CheckVolumeGroup([self.instance.primary_node])
9077
9078     # Step: check other node consistency
9079     self.lu.LogStep(2, steps_total, "Check peer consistency")
9080     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9081
9082     # Step: create new storage
9083     self.lu.LogStep(3, steps_total, "Allocate new storage")
9084     for idx, dev in enumerate(self.instance.disks):
9085       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9086                       (self.new_node, idx))
9087       # we pass force_create=True to force LVM creation
9088       for new_lv in dev.children:
9089         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9090                         _GetInstanceInfoText(self.instance), False)
9091
9092     # Step 4: dbrd minors and drbd setups changes
9093     # after this, we must manually remove the drbd minors on both the
9094     # error and the success paths
9095     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9096     minors = self.cfg.AllocateDRBDMinor([self.new_node
9097                                          for dev in self.instance.disks],
9098                                         self.instance.name)
9099     logging.debug("Allocated minors %r", minors)
9100
9101     iv_names = {}
9102     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9103       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9104                       (self.new_node, idx))
9105       # create new devices on new_node; note that we create two IDs:
9106       # one without port, so the drbd will be activated without
9107       # networking information on the new node at this stage, and one
9108       # with network, for the latter activation in step 4
9109       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9110       if self.instance.primary_node == o_node1:
9111         p_minor = o_minor1
9112       else:
9113         assert self.instance.primary_node == o_node2, "Three-node instance?"
9114         p_minor = o_minor2
9115
9116       new_alone_id = (self.instance.primary_node, self.new_node, None,
9117                       p_minor, new_minor, o_secret)
9118       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9119                     p_minor, new_minor, o_secret)
9120
9121       iv_names[idx] = (dev, dev.children, new_net_id)
9122       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9123                     new_net_id)
9124       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9125                               logical_id=new_alone_id,
9126                               children=dev.children,
9127                               size=dev.size)
9128       try:
9129         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9130                               _GetInstanceInfoText(self.instance), False)
9131       except errors.GenericError:
9132         self.cfg.ReleaseDRBDMinors(self.instance.name)
9133         raise
9134
9135     # We have new devices, shutdown the drbd on the old secondary
9136     for idx, dev in enumerate(self.instance.disks):
9137       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9138       self.cfg.SetDiskID(dev, self.target_node)
9139       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9140       if msg:
9141         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9142                            "node: %s" % (idx, msg),
9143                            hint=("Please cleanup this device manually as"
9144                                  " soon as possible"))
9145
9146     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9147     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9148                                                self.node_secondary_ip,
9149                                                self.instance.disks)\
9150                                               [self.instance.primary_node]
9151
9152     msg = result.fail_msg
9153     if msg:
9154       # detaches didn't succeed (unlikely)
9155       self.cfg.ReleaseDRBDMinors(self.instance.name)
9156       raise errors.OpExecError("Can't detach the disks from the network on"
9157                                " old node: %s" % (msg,))
9158
9159     # if we managed to detach at least one, we update all the disks of
9160     # the instance to point to the new secondary
9161     self.lu.LogInfo("Updating instance configuration")
9162     for dev, _, new_logical_id in iv_names.itervalues():
9163       dev.logical_id = new_logical_id
9164       self.cfg.SetDiskID(dev, self.instance.primary_node)
9165
9166     self.cfg.Update(self.instance, feedback_fn)
9167
9168     # and now perform the drbd attach
9169     self.lu.LogInfo("Attaching primary drbds to new secondary"
9170                     " (standalone => connected)")
9171     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9172                                             self.new_node],
9173                                            self.node_secondary_ip,
9174                                            self.instance.disks,
9175                                            self.instance.name,
9176                                            False)
9177     for to_node, to_result in result.items():
9178       msg = to_result.fail_msg
9179       if msg:
9180         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9181                            to_node, msg,
9182                            hint=("please do a gnt-instance info to see the"
9183                                  " status of disks"))
9184     cstep = 5
9185     if self.early_release:
9186       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9187       cstep += 1
9188       self._RemoveOldStorage(self.target_node, iv_names)
9189       # WARNING: we release all node locks here, do not do other RPCs
9190       # than WaitForSync to the primary node
9191       self._ReleaseNodeLock([self.instance.primary_node,
9192                              self.target_node,
9193                              self.new_node])
9194
9195     # Wait for sync
9196     # This can fail as the old devices are degraded and _WaitForSync
9197     # does a combined result over all disks, so we don't check its return value
9198     self.lu.LogStep(cstep, steps_total, "Sync devices")
9199     cstep += 1
9200     _WaitForSync(self.lu, self.instance)
9201
9202     # Check all devices manually
9203     self._CheckDevices(self.instance.primary_node, iv_names)
9204
9205     # Step: remove old storage
9206     if not self.early_release:
9207       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9208       self._RemoveOldStorage(self.target_node, iv_names)
9209
9210
9211 class LURepairNodeStorage(NoHooksLU):
9212   """Repairs the volume group on a node.
9213
9214   """
9215   REQ_BGL = False
9216
9217   def CheckArguments(self):
9218     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9219
9220     storage_type = self.op.storage_type
9221
9222     if (constants.SO_FIX_CONSISTENCY not in
9223         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9224       raise errors.OpPrereqError("Storage units of type '%s' can not be"
9225                                  " repaired" % storage_type,
9226                                  errors.ECODE_INVAL)
9227
9228   def ExpandNames(self):
9229     self.needed_locks = {
9230       locking.LEVEL_NODE: [self.op.node_name],
9231       }
9232
9233   def _CheckFaultyDisks(self, instance, node_name):
9234     """Ensure faulty disks abort the opcode or at least warn."""
9235     try:
9236       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9237                                   node_name, True):
9238         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9239                                    " node '%s'" % (instance.name, node_name),
9240                                    errors.ECODE_STATE)
9241     except errors.OpPrereqError, err:
9242       if self.op.ignore_consistency:
9243         self.proc.LogWarning(str(err.args[0]))
9244       else:
9245         raise
9246
9247   def CheckPrereq(self):
9248     """Check prerequisites.
9249
9250     """
9251     # Check whether any instance on this node has faulty disks
9252     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9253       if not inst.admin_up:
9254         continue
9255       check_nodes = set(inst.all_nodes)
9256       check_nodes.discard(self.op.node_name)
9257       for inst_node_name in check_nodes:
9258         self._CheckFaultyDisks(inst, inst_node_name)
9259
9260   def Exec(self, feedback_fn):
9261     feedback_fn("Repairing storage unit '%s' on %s ..." %
9262                 (self.op.name, self.op.node_name))
9263
9264     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9265     result = self.rpc.call_storage_execute(self.op.node_name,
9266                                            self.op.storage_type, st_args,
9267                                            self.op.name,
9268                                            constants.SO_FIX_CONSISTENCY)
9269     result.Raise("Failed to repair storage unit '%s' on %s" %
9270                  (self.op.name, self.op.node_name))
9271
9272
9273 class LUNodeEvacStrategy(NoHooksLU):
9274   """Computes the node evacuation strategy.
9275
9276   """
9277   REQ_BGL = False
9278
9279   def CheckArguments(self):
9280     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9281
9282   def ExpandNames(self):
9283     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9284     self.needed_locks = locks = {}
9285     if self.op.remote_node is None:
9286       locks[locking.LEVEL_NODE] = locking.ALL_SET
9287     else:
9288       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9289       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9290
9291   def Exec(self, feedback_fn):
9292     if self.op.remote_node is not None:
9293       instances = []
9294       for node in self.op.nodes:
9295         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9296       result = []
9297       for i in instances:
9298         if i.primary_node == self.op.remote_node:
9299           raise errors.OpPrereqError("Node %s is the primary node of"
9300                                      " instance %s, cannot use it as"
9301                                      " secondary" %
9302                                      (self.op.remote_node, i.name),
9303                                      errors.ECODE_INVAL)
9304         result.append([i.name, self.op.remote_node])
9305     else:
9306       ial = IAllocator(self.cfg, self.rpc,
9307                        mode=constants.IALLOCATOR_MODE_MEVAC,
9308                        evac_nodes=self.op.nodes)
9309       ial.Run(self.op.iallocator, validate=True)
9310       if not ial.success:
9311         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9312                                  errors.ECODE_NORES)
9313       result = ial.result
9314     return result
9315
9316
9317 class LUInstanceGrowDisk(LogicalUnit):
9318   """Grow a disk of an instance.
9319
9320   """
9321   HPATH = "disk-grow"
9322   HTYPE = constants.HTYPE_INSTANCE
9323   REQ_BGL = False
9324
9325   def ExpandNames(self):
9326     self._ExpandAndLockInstance()
9327     self.needed_locks[locking.LEVEL_NODE] = []
9328     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9329
9330   def DeclareLocks(self, level):
9331     if level == locking.LEVEL_NODE:
9332       self._LockInstancesNodes()
9333
9334   def BuildHooksEnv(self):
9335     """Build hooks env.
9336
9337     This runs on the master, the primary and all the secondaries.
9338
9339     """
9340     env = {
9341       "DISK": self.op.disk,
9342       "AMOUNT": self.op.amount,
9343       }
9344     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9345     return env
9346
9347   def BuildHooksNodes(self):
9348     """Build hooks nodes.
9349
9350     """
9351     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9352     return (nl, nl)
9353
9354   def CheckPrereq(self):
9355     """Check prerequisites.
9356
9357     This checks that the instance is in the cluster.
9358
9359     """
9360     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9361     assert instance is not None, \
9362       "Cannot retrieve locked instance %s" % self.op.instance_name
9363     nodenames = list(instance.all_nodes)
9364     for node in nodenames:
9365       _CheckNodeOnline(self, node)
9366
9367     self.instance = instance
9368
9369     if instance.disk_template not in constants.DTS_GROWABLE:
9370       raise errors.OpPrereqError("Instance's disk layout does not support"
9371                                  " growing.", errors.ECODE_INVAL)
9372
9373     self.disk = instance.FindDisk(self.op.disk)
9374
9375     if instance.disk_template not in (constants.DT_FILE,
9376                                       constants.DT_SHARED_FILE):
9377       # TODO: check the free disk space for file, when that feature will be
9378       # supported
9379       _CheckNodesFreeDiskPerVG(self, nodenames,
9380                                self.disk.ComputeGrowth(self.op.amount))
9381
9382   def Exec(self, feedback_fn):
9383     """Execute disk grow.
9384
9385     """
9386     instance = self.instance
9387     disk = self.disk
9388
9389     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9390     if not disks_ok:
9391       raise errors.OpExecError("Cannot activate block device to grow")
9392
9393     for node in instance.all_nodes:
9394       self.cfg.SetDiskID(disk, node)
9395       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9396       result.Raise("Grow request failed to node %s" % node)
9397
9398       # TODO: Rewrite code to work properly
9399       # DRBD goes into sync mode for a short amount of time after executing the
9400       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9401       # calling "resize" in sync mode fails. Sleeping for a short amount of
9402       # time is a work-around.
9403       time.sleep(5)
9404
9405     disk.RecordGrow(self.op.amount)
9406     self.cfg.Update(instance, feedback_fn)
9407     if self.op.wait_for_sync:
9408       disk_abort = not _WaitForSync(self, instance, disks=[disk])
9409       if disk_abort:
9410         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9411                              " status.\nPlease check the instance.")
9412       if not instance.admin_up:
9413         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9414     elif not instance.admin_up:
9415       self.proc.LogWarning("Not shutting down the disk even if the instance is"
9416                            " not supposed to be running because no wait for"
9417                            " sync mode was requested.")
9418
9419
9420 class LUInstanceQueryData(NoHooksLU):
9421   """Query runtime instance data.
9422
9423   """
9424   REQ_BGL = False
9425
9426   def ExpandNames(self):
9427     self.needed_locks = {}
9428
9429     # Use locking if requested or when non-static information is wanted
9430     if not (self.op.static or self.op.use_locking):
9431       self.LogWarning("Non-static data requested, locks need to be acquired")
9432       self.op.use_locking = True
9433
9434     if self.op.instances or not self.op.use_locking:
9435       # Expand instance names right here
9436       self.wanted_names = _GetWantedInstances(self, self.op.instances)
9437     else:
9438       # Will use acquired locks
9439       self.wanted_names = None
9440
9441     if self.op.use_locking:
9442       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9443
9444       if self.wanted_names is None:
9445         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9446       else:
9447         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9448
9449       self.needed_locks[locking.LEVEL_NODE] = []
9450       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9451       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9452
9453   def DeclareLocks(self, level):
9454     if self.op.use_locking and level == locking.LEVEL_NODE:
9455       self._LockInstancesNodes()
9456
9457   def CheckPrereq(self):
9458     """Check prerequisites.
9459
9460     This only checks the optional instance list against the existing names.
9461
9462     """
9463     if self.wanted_names is None:
9464       assert self.op.use_locking, "Locking was not used"
9465       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9466
9467     self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9468                              for name in self.wanted_names]
9469
9470   def _ComputeBlockdevStatus(self, node, instance_name, dev):
9471     """Returns the status of a block device
9472
9473     """
9474     if self.op.static or not node:
9475       return None
9476
9477     self.cfg.SetDiskID(dev, node)
9478
9479     result = self.rpc.call_blockdev_find(node, dev)
9480     if result.offline:
9481       return None
9482
9483     result.Raise("Can't compute disk status for %s" % instance_name)
9484
9485     status = result.payload
9486     if status is None:
9487       return None
9488
9489     return (status.dev_path, status.major, status.minor,
9490             status.sync_percent, status.estimated_time,
9491             status.is_degraded, status.ldisk_status)
9492
9493   def _ComputeDiskStatus(self, instance, snode, dev):
9494     """Compute block device status.
9495
9496     """
9497     if dev.dev_type in constants.LDS_DRBD:
9498       # we change the snode then (otherwise we use the one passed in)
9499       if dev.logical_id[0] == instance.primary_node:
9500         snode = dev.logical_id[1]
9501       else:
9502         snode = dev.logical_id[0]
9503
9504     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9505                                               instance.name, dev)
9506     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9507
9508     if dev.children:
9509       dev_children = [self._ComputeDiskStatus(instance, snode, child)
9510                       for child in dev.children]
9511     else:
9512       dev_children = []
9513
9514     return {
9515       "iv_name": dev.iv_name,
9516       "dev_type": dev.dev_type,
9517       "logical_id": dev.logical_id,
9518       "physical_id": dev.physical_id,
9519       "pstatus": dev_pstatus,
9520       "sstatus": dev_sstatus,
9521       "children": dev_children,
9522       "mode": dev.mode,
9523       "size": dev.size,
9524       }
9525
9526   def Exec(self, feedback_fn):
9527     """Gather and return data"""
9528     result = {}
9529
9530     cluster = self.cfg.GetClusterInfo()
9531
9532     for instance in self.wanted_instances:
9533       if not self.op.static:
9534         remote_info = self.rpc.call_instance_info(instance.primary_node,
9535                                                   instance.name,
9536                                                   instance.hypervisor)
9537         remote_info.Raise("Error checking node %s" % instance.primary_node)
9538         remote_info = remote_info.payload
9539         if remote_info and "state" in remote_info:
9540           remote_state = "up"
9541         else:
9542           remote_state = "down"
9543       else:
9544         remote_state = None
9545       if instance.admin_up:
9546         config_state = "up"
9547       else:
9548         config_state = "down"
9549
9550       disks = [self._ComputeDiskStatus(instance, None, device)
9551                for device in instance.disks]
9552
9553       result[instance.name] = {
9554         "name": instance.name,
9555         "config_state": config_state,
9556         "run_state": remote_state,
9557         "pnode": instance.primary_node,
9558         "snodes": instance.secondary_nodes,
9559         "os": instance.os,
9560         # this happens to be the same format used for hooks
9561         "nics": _NICListToTuple(self, instance.nics),
9562         "disk_template": instance.disk_template,
9563         "disks": disks,
9564         "hypervisor": instance.hypervisor,
9565         "network_port": instance.network_port,
9566         "hv_instance": instance.hvparams,
9567         "hv_actual": cluster.FillHV(instance, skip_globals=True),
9568         "be_instance": instance.beparams,
9569         "be_actual": cluster.FillBE(instance),
9570         "os_instance": instance.osparams,
9571         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9572         "serial_no": instance.serial_no,
9573         "mtime": instance.mtime,
9574         "ctime": instance.ctime,
9575         "uuid": instance.uuid,
9576         }
9577
9578     return result
9579
9580
9581 class LUInstanceSetParams(LogicalUnit):
9582   """Modifies an instances's parameters.
9583
9584   """
9585   HPATH = "instance-modify"
9586   HTYPE = constants.HTYPE_INSTANCE
9587   REQ_BGL = False
9588
9589   def CheckArguments(self):
9590     if not (self.op.nics or self.op.disks or self.op.disk_template or
9591             self.op.hvparams or self.op.beparams or self.op.os_name):
9592       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9593
9594     if self.op.hvparams:
9595       _CheckGlobalHvParams(self.op.hvparams)
9596
9597     # Disk validation
9598     disk_addremove = 0
9599     for disk_op, disk_dict in self.op.disks:
9600       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9601       if disk_op == constants.DDM_REMOVE:
9602         disk_addremove += 1
9603         continue
9604       elif disk_op == constants.DDM_ADD:
9605         disk_addremove += 1
9606       else:
9607         if not isinstance(disk_op, int):
9608           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9609         if not isinstance(disk_dict, dict):
9610           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9611           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9612
9613       if disk_op == constants.DDM_ADD:
9614         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9615         if mode not in constants.DISK_ACCESS_SET:
9616           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9617                                      errors.ECODE_INVAL)
9618         size = disk_dict.get(constants.IDISK_SIZE, None)
9619         if size is None:
9620           raise errors.OpPrereqError("Required disk parameter size missing",
9621                                      errors.ECODE_INVAL)
9622         try:
9623           size = int(size)
9624         except (TypeError, ValueError), err:
9625           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9626                                      str(err), errors.ECODE_INVAL)
9627         disk_dict[constants.IDISK_SIZE] = size
9628       else:
9629         # modification of disk
9630         if constants.IDISK_SIZE in disk_dict:
9631           raise errors.OpPrereqError("Disk size change not possible, use"
9632                                      " grow-disk", errors.ECODE_INVAL)
9633
9634     if disk_addremove > 1:
9635       raise errors.OpPrereqError("Only one disk add or remove operation"
9636                                  " supported at a time", errors.ECODE_INVAL)
9637
9638     if self.op.disks and self.op.disk_template is not None:
9639       raise errors.OpPrereqError("Disk template conversion and other disk"
9640                                  " changes not supported at the same time",
9641                                  errors.ECODE_INVAL)
9642
9643     if (self.op.disk_template and
9644         self.op.disk_template in constants.DTS_INT_MIRROR and
9645         self.op.remote_node is None):
9646       raise errors.OpPrereqError("Changing the disk template to a mirrored"
9647                                  " one requires specifying a secondary node",
9648                                  errors.ECODE_INVAL)
9649
9650     # NIC validation
9651     nic_addremove = 0
9652     for nic_op, nic_dict in self.op.nics:
9653       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9654       if nic_op == constants.DDM_REMOVE:
9655         nic_addremove += 1
9656         continue
9657       elif nic_op == constants.DDM_ADD:
9658         nic_addremove += 1
9659       else:
9660         if not isinstance(nic_op, int):
9661           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9662         if not isinstance(nic_dict, dict):
9663           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9664           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9665
9666       # nic_dict should be a dict
9667       nic_ip = nic_dict.get(constants.INIC_IP, None)
9668       if nic_ip is not None:
9669         if nic_ip.lower() == constants.VALUE_NONE:
9670           nic_dict[constants.INIC_IP] = None
9671         else:
9672           if not netutils.IPAddress.IsValid(nic_ip):
9673             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9674                                        errors.ECODE_INVAL)
9675
9676       nic_bridge = nic_dict.get('bridge', None)
9677       nic_link = nic_dict.get(constants.INIC_LINK, None)
9678       if nic_bridge and nic_link:
9679         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9680                                    " at the same time", errors.ECODE_INVAL)
9681       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9682         nic_dict['bridge'] = None
9683       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9684         nic_dict[constants.INIC_LINK] = None
9685
9686       if nic_op == constants.DDM_ADD:
9687         nic_mac = nic_dict.get(constants.INIC_MAC, None)
9688         if nic_mac is None:
9689           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9690
9691       if constants.INIC_MAC in nic_dict:
9692         nic_mac = nic_dict[constants.INIC_MAC]
9693         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9694           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9695
9696         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9697           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9698                                      " modifying an existing nic",
9699                                      errors.ECODE_INVAL)
9700
9701     if nic_addremove > 1:
9702       raise errors.OpPrereqError("Only one NIC add or remove operation"
9703                                  " supported at a time", errors.ECODE_INVAL)
9704
9705   def ExpandNames(self):
9706     self._ExpandAndLockInstance()
9707     self.needed_locks[locking.LEVEL_NODE] = []
9708     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9709
9710   def DeclareLocks(self, level):
9711     if level == locking.LEVEL_NODE:
9712       self._LockInstancesNodes()
9713       if self.op.disk_template and self.op.remote_node:
9714         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9715         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9716
9717   def BuildHooksEnv(self):
9718     """Build hooks env.
9719
9720     This runs on the master, primary and secondaries.
9721
9722     """
9723     args = dict()
9724     if constants.BE_MEMORY in self.be_new:
9725       args['memory'] = self.be_new[constants.BE_MEMORY]
9726     if constants.BE_VCPUS in self.be_new:
9727       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9728     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9729     # information at all.
9730     if self.op.nics:
9731       args['nics'] = []
9732       nic_override = dict(self.op.nics)
9733       for idx, nic in enumerate(self.instance.nics):
9734         if idx in nic_override:
9735           this_nic_override = nic_override[idx]
9736         else:
9737           this_nic_override = {}
9738         if constants.INIC_IP in this_nic_override:
9739           ip = this_nic_override[constants.INIC_IP]
9740         else:
9741           ip = nic.ip
9742         if constants.INIC_MAC in this_nic_override:
9743           mac = this_nic_override[constants.INIC_MAC]
9744         else:
9745           mac = nic.mac
9746         if idx in self.nic_pnew:
9747           nicparams = self.nic_pnew[idx]
9748         else:
9749           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9750         mode = nicparams[constants.NIC_MODE]
9751         link = nicparams[constants.NIC_LINK]
9752         args['nics'].append((ip, mac, mode, link))
9753       if constants.DDM_ADD in nic_override:
9754         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
9755         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
9756         nicparams = self.nic_pnew[constants.DDM_ADD]
9757         mode = nicparams[constants.NIC_MODE]
9758         link = nicparams[constants.NIC_LINK]
9759         args['nics'].append((ip, mac, mode, link))
9760       elif constants.DDM_REMOVE in nic_override:
9761         del args['nics'][-1]
9762
9763     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9764     if self.op.disk_template:
9765       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9766
9767     return env
9768
9769   def BuildHooksNodes(self):
9770     """Build hooks nodes.
9771
9772     """
9773     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9774     return (nl, nl)
9775
9776   def CheckPrereq(self):
9777     """Check prerequisites.
9778
9779     This only checks the instance list against the existing names.
9780
9781     """
9782     # checking the new params on the primary/secondary nodes
9783
9784     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9785     cluster = self.cluster = self.cfg.GetClusterInfo()
9786     assert self.instance is not None, \
9787       "Cannot retrieve locked instance %s" % self.op.instance_name
9788     pnode = instance.primary_node
9789     nodelist = list(instance.all_nodes)
9790
9791     # OS change
9792     if self.op.os_name and not self.op.force:
9793       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9794                       self.op.force_variant)
9795       instance_os = self.op.os_name
9796     else:
9797       instance_os = instance.os
9798
9799     if self.op.disk_template:
9800       if instance.disk_template == self.op.disk_template:
9801         raise errors.OpPrereqError("Instance already has disk template %s" %
9802                                    instance.disk_template, errors.ECODE_INVAL)
9803
9804       if (instance.disk_template,
9805           self.op.disk_template) not in self._DISK_CONVERSIONS:
9806         raise errors.OpPrereqError("Unsupported disk template conversion from"
9807                                    " %s to %s" % (instance.disk_template,
9808                                                   self.op.disk_template),
9809                                    errors.ECODE_INVAL)
9810       _CheckInstanceDown(self, instance, "cannot change disk template")
9811       if self.op.disk_template in constants.DTS_INT_MIRROR:
9812         if self.op.remote_node == pnode:
9813           raise errors.OpPrereqError("Given new secondary node %s is the same"
9814                                      " as the primary node of the instance" %
9815                                      self.op.remote_node, errors.ECODE_STATE)
9816         _CheckNodeOnline(self, self.op.remote_node)
9817         _CheckNodeNotDrained(self, self.op.remote_node)
9818         # FIXME: here we assume that the old instance type is DT_PLAIN
9819         assert instance.disk_template == constants.DT_PLAIN
9820         disks = [{constants.IDISK_SIZE: d.size,
9821                   constants.IDISK_VG: d.logical_id[0]}
9822                  for d in instance.disks]
9823         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9824         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9825
9826     # hvparams processing
9827     if self.op.hvparams:
9828       hv_type = instance.hypervisor
9829       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9830       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9831       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9832
9833       # local check
9834       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9835       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9836       self.hv_new = hv_new # the new actual values
9837       self.hv_inst = i_hvdict # the new dict (without defaults)
9838     else:
9839       self.hv_new = self.hv_inst = {}
9840
9841     # beparams processing
9842     if self.op.beparams:
9843       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9844                                    use_none=True)
9845       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9846       be_new = cluster.SimpleFillBE(i_bedict)
9847       self.be_new = be_new # the new actual values
9848       self.be_inst = i_bedict # the new dict (without defaults)
9849     else:
9850       self.be_new = self.be_inst = {}
9851
9852     # osparams processing
9853     if self.op.osparams:
9854       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9855       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9856       self.os_inst = i_osdict # the new dict (without defaults)
9857     else:
9858       self.os_inst = {}
9859
9860     self.warn = []
9861
9862     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9863       mem_check_list = [pnode]
9864       if be_new[constants.BE_AUTO_BALANCE]:
9865         # either we changed auto_balance to yes or it was from before
9866         mem_check_list.extend(instance.secondary_nodes)
9867       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9868                                                   instance.hypervisor)
9869       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9870                                          instance.hypervisor)
9871       pninfo = nodeinfo[pnode]
9872       msg = pninfo.fail_msg
9873       if msg:
9874         # Assume the primary node is unreachable and go ahead
9875         self.warn.append("Can't get info from primary node %s: %s" %
9876                          (pnode,  msg))
9877       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9878         self.warn.append("Node data from primary node %s doesn't contain"
9879                          " free memory information" % pnode)
9880       elif instance_info.fail_msg:
9881         self.warn.append("Can't get instance runtime information: %s" %
9882                         instance_info.fail_msg)
9883       else:
9884         if instance_info.payload:
9885           current_mem = int(instance_info.payload['memory'])
9886         else:
9887           # Assume instance not running
9888           # (there is a slight race condition here, but it's not very probable,
9889           # and we have no other way to check)
9890           current_mem = 0
9891         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9892                     pninfo.payload['memory_free'])
9893         if miss_mem > 0:
9894           raise errors.OpPrereqError("This change will prevent the instance"
9895                                      " from starting, due to %d MB of memory"
9896                                      " missing on its primary node" % miss_mem,
9897                                      errors.ECODE_NORES)
9898
9899       if be_new[constants.BE_AUTO_BALANCE]:
9900         for node, nres in nodeinfo.items():
9901           if node not in instance.secondary_nodes:
9902             continue
9903           msg = nres.fail_msg
9904           if msg:
9905             self.warn.append("Can't get info from secondary node %s: %s" %
9906                              (node, msg))
9907           elif not isinstance(nres.payload.get('memory_free', None), int):
9908             self.warn.append("Secondary node %s didn't return free"
9909                              " memory information" % node)
9910           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9911             self.warn.append("Not enough memory to failover instance to"
9912                              " secondary node %s" % node)
9913
9914     # NIC processing
9915     self.nic_pnew = {}
9916     self.nic_pinst = {}
9917     for nic_op, nic_dict in self.op.nics:
9918       if nic_op == constants.DDM_REMOVE:
9919         if not instance.nics:
9920           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9921                                      errors.ECODE_INVAL)
9922         continue
9923       if nic_op != constants.DDM_ADD:
9924         # an existing nic
9925         if not instance.nics:
9926           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9927                                      " no NICs" % nic_op,
9928                                      errors.ECODE_INVAL)
9929         if nic_op < 0 or nic_op >= len(instance.nics):
9930           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9931                                      " are 0 to %d" %
9932                                      (nic_op, len(instance.nics) - 1),
9933                                      errors.ECODE_INVAL)
9934         old_nic_params = instance.nics[nic_op].nicparams
9935         old_nic_ip = instance.nics[nic_op].ip
9936       else:
9937         old_nic_params = {}
9938         old_nic_ip = None
9939
9940       update_params_dict = dict([(key, nic_dict[key])
9941                                  for key in constants.NICS_PARAMETERS
9942                                  if key in nic_dict])
9943
9944       if 'bridge' in nic_dict:
9945         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9946
9947       new_nic_params = _GetUpdatedParams(old_nic_params,
9948                                          update_params_dict)
9949       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9950       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9951       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9952       self.nic_pinst[nic_op] = new_nic_params
9953       self.nic_pnew[nic_op] = new_filled_nic_params
9954       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9955
9956       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9957         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9958         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9959         if msg:
9960           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9961           if self.op.force:
9962             self.warn.append(msg)
9963           else:
9964             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9965       if new_nic_mode == constants.NIC_MODE_ROUTED:
9966         if constants.INIC_IP in nic_dict:
9967           nic_ip = nic_dict[constants.INIC_IP]
9968         else:
9969           nic_ip = old_nic_ip
9970         if nic_ip is None:
9971           raise errors.OpPrereqError('Cannot set the nic ip to None'
9972                                      ' on a routed nic', errors.ECODE_INVAL)
9973       if constants.INIC_MAC in nic_dict:
9974         nic_mac = nic_dict[constants.INIC_MAC]
9975         if nic_mac is None:
9976           raise errors.OpPrereqError('Cannot set the nic mac to None',
9977                                      errors.ECODE_INVAL)
9978         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9979           # otherwise generate the mac
9980           nic_dict[constants.INIC_MAC] = \
9981             self.cfg.GenerateMAC(self.proc.GetECId())
9982         else:
9983           # or validate/reserve the current one
9984           try:
9985             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9986           except errors.ReservationError:
9987             raise errors.OpPrereqError("MAC address %s already in use"
9988                                        " in cluster" % nic_mac,
9989                                        errors.ECODE_NOTUNIQUE)
9990
9991     # DISK processing
9992     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9993       raise errors.OpPrereqError("Disk operations not supported for"
9994                                  " diskless instances",
9995                                  errors.ECODE_INVAL)
9996     for disk_op, _ in self.op.disks:
9997       if disk_op == constants.DDM_REMOVE:
9998         if len(instance.disks) == 1:
9999           raise errors.OpPrereqError("Cannot remove the last disk of"
10000                                      " an instance", errors.ECODE_INVAL)
10001         _CheckInstanceDown(self, instance, "cannot remove disks")
10002
10003       if (disk_op == constants.DDM_ADD and
10004           len(instance.disks) >= constants.MAX_DISKS):
10005         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10006                                    " add more" % constants.MAX_DISKS,
10007                                    errors.ECODE_STATE)
10008       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10009         # an existing disk
10010         if disk_op < 0 or disk_op >= len(instance.disks):
10011           raise errors.OpPrereqError("Invalid disk index %s, valid values"
10012                                      " are 0 to %d" %
10013                                      (disk_op, len(instance.disks)),
10014                                      errors.ECODE_INVAL)
10015
10016     return
10017
10018   def _ConvertPlainToDrbd(self, feedback_fn):
10019     """Converts an instance from plain to drbd.
10020
10021     """
10022     feedback_fn("Converting template to drbd")
10023     instance = self.instance
10024     pnode = instance.primary_node
10025     snode = self.op.remote_node
10026
10027     # create a fake disk info for _GenerateDiskTemplate
10028     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode}
10029                  for d in instance.disks]
10030     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10031                                       instance.name, pnode, [snode],
10032                                       disk_info, None, None, 0, feedback_fn)
10033     info = _GetInstanceInfoText(instance)
10034     feedback_fn("Creating aditional volumes...")
10035     # first, create the missing data and meta devices
10036     for disk in new_disks:
10037       # unfortunately this is... not too nice
10038       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10039                             info, True)
10040       for child in disk.children:
10041         _CreateSingleBlockDev(self, snode, instance, child, info, True)
10042     # at this stage, all new LVs have been created, we can rename the
10043     # old ones
10044     feedback_fn("Renaming original volumes...")
10045     rename_list = [(o, n.children[0].logical_id)
10046                    for (o, n) in zip(instance.disks, new_disks)]
10047     result = self.rpc.call_blockdev_rename(pnode, rename_list)
10048     result.Raise("Failed to rename original LVs")
10049
10050     feedback_fn("Initializing DRBD devices...")
10051     # all child devices are in place, we can now create the DRBD devices
10052     for disk in new_disks:
10053       for node in [pnode, snode]:
10054         f_create = node == pnode
10055         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10056
10057     # at this point, the instance has been modified
10058     instance.disk_template = constants.DT_DRBD8
10059     instance.disks = new_disks
10060     self.cfg.Update(instance, feedback_fn)
10061
10062     # disks are created, waiting for sync
10063     disk_abort = not _WaitForSync(self, instance)
10064     if disk_abort:
10065       raise errors.OpExecError("There are some degraded disks for"
10066                                " this instance, please cleanup manually")
10067
10068   def _ConvertDrbdToPlain(self, feedback_fn):
10069     """Converts an instance from drbd to plain.
10070
10071     """
10072     instance = self.instance
10073     assert len(instance.secondary_nodes) == 1
10074     pnode = instance.primary_node
10075     snode = instance.secondary_nodes[0]
10076     feedback_fn("Converting template to plain")
10077
10078     old_disks = instance.disks
10079     new_disks = [d.children[0] for d in old_disks]
10080
10081     # copy over size and mode
10082     for parent, child in zip(old_disks, new_disks):
10083       child.size = parent.size
10084       child.mode = parent.mode
10085
10086     # update instance structure
10087     instance.disks = new_disks
10088     instance.disk_template = constants.DT_PLAIN
10089     self.cfg.Update(instance, feedback_fn)
10090
10091     feedback_fn("Removing volumes on the secondary node...")
10092     for disk in old_disks:
10093       self.cfg.SetDiskID(disk, snode)
10094       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10095       if msg:
10096         self.LogWarning("Could not remove block device %s on node %s,"
10097                         " continuing anyway: %s", disk.iv_name, snode, msg)
10098
10099     feedback_fn("Removing unneeded volumes on the primary node...")
10100     for idx, disk in enumerate(old_disks):
10101       meta = disk.children[1]
10102       self.cfg.SetDiskID(meta, pnode)
10103       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10104       if msg:
10105         self.LogWarning("Could not remove metadata for disk %d on node %s,"
10106                         " continuing anyway: %s", idx, pnode, msg)
10107
10108   def Exec(self, feedback_fn):
10109     """Modifies an instance.
10110
10111     All parameters take effect only at the next restart of the instance.
10112
10113     """
10114     # Process here the warnings from CheckPrereq, as we don't have a
10115     # feedback_fn there.
10116     for warn in self.warn:
10117       feedback_fn("WARNING: %s" % warn)
10118
10119     result = []
10120     instance = self.instance
10121     # disk changes
10122     for disk_op, disk_dict in self.op.disks:
10123       if disk_op == constants.DDM_REMOVE:
10124         # remove the last disk
10125         device = instance.disks.pop()
10126         device_idx = len(instance.disks)
10127         for node, disk in device.ComputeNodeTree(instance.primary_node):
10128           self.cfg.SetDiskID(disk, node)
10129           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10130           if msg:
10131             self.LogWarning("Could not remove disk/%d on node %s: %s,"
10132                             " continuing anyway", device_idx, node, msg)
10133         result.append(("disk/%d" % device_idx, "remove"))
10134       elif disk_op == constants.DDM_ADD:
10135         # add a new disk
10136         if instance.disk_template in (constants.DT_FILE,
10137                                         constants.DT_SHARED_FILE):
10138           file_driver, file_path = instance.disks[0].logical_id
10139           file_path = os.path.dirname(file_path)
10140         else:
10141           file_driver = file_path = None
10142         disk_idx_base = len(instance.disks)
10143         new_disk = _GenerateDiskTemplate(self,
10144                                          instance.disk_template,
10145                                          instance.name, instance.primary_node,
10146                                          instance.secondary_nodes,
10147                                          [disk_dict],
10148                                          file_path,
10149                                          file_driver,
10150                                          disk_idx_base, feedback_fn)[0]
10151         instance.disks.append(new_disk)
10152         info = _GetInstanceInfoText(instance)
10153
10154         logging.info("Creating volume %s for instance %s",
10155                      new_disk.iv_name, instance.name)
10156         # Note: this needs to be kept in sync with _CreateDisks
10157         #HARDCODE
10158         for node in instance.all_nodes:
10159           f_create = node == instance.primary_node
10160           try:
10161             _CreateBlockDev(self, node, instance, new_disk,
10162                             f_create, info, f_create)
10163           except errors.OpExecError, err:
10164             self.LogWarning("Failed to create volume %s (%s) on"
10165                             " node %s: %s",
10166                             new_disk.iv_name, new_disk, node, err)
10167         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10168                        (new_disk.size, new_disk.mode)))
10169       else:
10170         # change a given disk
10171         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10172         result.append(("disk.mode/%d" % disk_op,
10173                        disk_dict[constants.IDISK_MODE]))
10174
10175     if self.op.disk_template:
10176       r_shut = _ShutdownInstanceDisks(self, instance)
10177       if not r_shut:
10178         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10179                                  " proceed with disk template conversion")
10180       mode = (instance.disk_template, self.op.disk_template)
10181       try:
10182         self._DISK_CONVERSIONS[mode](self, feedback_fn)
10183       except:
10184         self.cfg.ReleaseDRBDMinors(instance.name)
10185         raise
10186       result.append(("disk_template", self.op.disk_template))
10187
10188     # NIC changes
10189     for nic_op, nic_dict in self.op.nics:
10190       if nic_op == constants.DDM_REMOVE:
10191         # remove the last nic
10192         del instance.nics[-1]
10193         result.append(("nic.%d" % len(instance.nics), "remove"))
10194       elif nic_op == constants.DDM_ADD:
10195         # mac and bridge should be set, by now
10196         mac = nic_dict[constants.INIC_MAC]
10197         ip = nic_dict.get(constants.INIC_IP, None)
10198         nicparams = self.nic_pinst[constants.DDM_ADD]
10199         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10200         instance.nics.append(new_nic)
10201         result.append(("nic.%d" % (len(instance.nics) - 1),
10202                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
10203                        (new_nic.mac, new_nic.ip,
10204                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10205                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10206                        )))
10207       else:
10208         for key in (constants.INIC_MAC, constants.INIC_IP):
10209           if key in nic_dict:
10210             setattr(instance.nics[nic_op], key, nic_dict[key])
10211         if nic_op in self.nic_pinst:
10212           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10213         for key, val in nic_dict.iteritems():
10214           result.append(("nic.%s/%d" % (key, nic_op), val))
10215
10216     # hvparams changes
10217     if self.op.hvparams:
10218       instance.hvparams = self.hv_inst
10219       for key, val in self.op.hvparams.iteritems():
10220         result.append(("hv/%s" % key, val))
10221
10222     # beparams changes
10223     if self.op.beparams:
10224       instance.beparams = self.be_inst
10225       for key, val in self.op.beparams.iteritems():
10226         result.append(("be/%s" % key, val))
10227
10228     # OS change
10229     if self.op.os_name:
10230       instance.os = self.op.os_name
10231
10232     # osparams changes
10233     if self.op.osparams:
10234       instance.osparams = self.os_inst
10235       for key, val in self.op.osparams.iteritems():
10236         result.append(("os/%s" % key, val))
10237
10238     self.cfg.Update(instance, feedback_fn)
10239
10240     return result
10241
10242   _DISK_CONVERSIONS = {
10243     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10244     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10245     }
10246
10247
10248 class LUBackupQuery(NoHooksLU):
10249   """Query the exports list
10250
10251   """
10252   REQ_BGL = False
10253
10254   def ExpandNames(self):
10255     self.needed_locks = {}
10256     self.share_locks[locking.LEVEL_NODE] = 1
10257     if not self.op.nodes:
10258       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10259     else:
10260       self.needed_locks[locking.LEVEL_NODE] = \
10261         _GetWantedNodes(self, self.op.nodes)
10262
10263   def Exec(self, feedback_fn):
10264     """Compute the list of all the exported system images.
10265
10266     @rtype: dict
10267     @return: a dictionary with the structure node->(export-list)
10268         where export-list is a list of the instances exported on
10269         that node.
10270
10271     """
10272     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
10273     rpcresult = self.rpc.call_export_list(self.nodes)
10274     result = {}
10275     for node in rpcresult:
10276       if rpcresult[node].fail_msg:
10277         result[node] = False
10278       else:
10279         result[node] = rpcresult[node].payload
10280
10281     return result
10282
10283
10284 class LUBackupPrepare(NoHooksLU):
10285   """Prepares an instance for an export and returns useful information.
10286
10287   """
10288   REQ_BGL = False
10289
10290   def ExpandNames(self):
10291     self._ExpandAndLockInstance()
10292
10293   def CheckPrereq(self):
10294     """Check prerequisites.
10295
10296     """
10297     instance_name = self.op.instance_name
10298
10299     self.instance = self.cfg.GetInstanceInfo(instance_name)
10300     assert self.instance is not None, \
10301           "Cannot retrieve locked instance %s" % self.op.instance_name
10302     _CheckNodeOnline(self, self.instance.primary_node)
10303
10304     self._cds = _GetClusterDomainSecret()
10305
10306   def Exec(self, feedback_fn):
10307     """Prepares an instance for an export.
10308
10309     """
10310     instance = self.instance
10311
10312     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10313       salt = utils.GenerateSecret(8)
10314
10315       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10316       result = self.rpc.call_x509_cert_create(instance.primary_node,
10317                                               constants.RIE_CERT_VALIDITY)
10318       result.Raise("Can't create X509 key and certificate on %s" % result.node)
10319
10320       (name, cert_pem) = result.payload
10321
10322       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10323                                              cert_pem)
10324
10325       return {
10326         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10327         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10328                           salt),
10329         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10330         }
10331
10332     return None
10333
10334
10335 class LUBackupExport(LogicalUnit):
10336   """Export an instance to an image in the cluster.
10337
10338   """
10339   HPATH = "instance-export"
10340   HTYPE = constants.HTYPE_INSTANCE
10341   REQ_BGL = False
10342
10343   def CheckArguments(self):
10344     """Check the arguments.
10345
10346     """
10347     self.x509_key_name = self.op.x509_key_name
10348     self.dest_x509_ca_pem = self.op.destination_x509_ca
10349
10350     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10351       if not self.x509_key_name:
10352         raise errors.OpPrereqError("Missing X509 key name for encryption",
10353                                    errors.ECODE_INVAL)
10354
10355       if not self.dest_x509_ca_pem:
10356         raise errors.OpPrereqError("Missing destination X509 CA",
10357                                    errors.ECODE_INVAL)
10358
10359   def ExpandNames(self):
10360     self._ExpandAndLockInstance()
10361
10362     # Lock all nodes for local exports
10363     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10364       # FIXME: lock only instance primary and destination node
10365       #
10366       # Sad but true, for now we have do lock all nodes, as we don't know where
10367       # the previous export might be, and in this LU we search for it and
10368       # remove it from its current node. In the future we could fix this by:
10369       #  - making a tasklet to search (share-lock all), then create the
10370       #    new one, then one to remove, after
10371       #  - removing the removal operation altogether
10372       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10373
10374   def DeclareLocks(self, level):
10375     """Last minute lock declaration."""
10376     # All nodes are locked anyway, so nothing to do here.
10377
10378   def BuildHooksEnv(self):
10379     """Build hooks env.
10380
10381     This will run on the master, primary node and target node.
10382
10383     """
10384     env = {
10385       "EXPORT_MODE": self.op.mode,
10386       "EXPORT_NODE": self.op.target_node,
10387       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10388       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10389       # TODO: Generic function for boolean env variables
10390       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10391       }
10392
10393     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10394
10395     return env
10396
10397   def BuildHooksNodes(self):
10398     """Build hooks nodes.
10399
10400     """
10401     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10402
10403     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10404       nl.append(self.op.target_node)
10405
10406     return (nl, nl)
10407
10408   def CheckPrereq(self):
10409     """Check prerequisites.
10410
10411     This checks that the instance and node names are valid.
10412
10413     """
10414     instance_name = self.op.instance_name
10415
10416     self.instance = self.cfg.GetInstanceInfo(instance_name)
10417     assert self.instance is not None, \
10418           "Cannot retrieve locked instance %s" % self.op.instance_name
10419     _CheckNodeOnline(self, self.instance.primary_node)
10420
10421     if (self.op.remove_instance and self.instance.admin_up and
10422         not self.op.shutdown):
10423       raise errors.OpPrereqError("Can not remove instance without shutting it"
10424                                  " down before")
10425
10426     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10427       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10428       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10429       assert self.dst_node is not None
10430
10431       _CheckNodeOnline(self, self.dst_node.name)
10432       _CheckNodeNotDrained(self, self.dst_node.name)
10433
10434       self._cds = None
10435       self.dest_disk_info = None
10436       self.dest_x509_ca = None
10437
10438     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10439       self.dst_node = None
10440
10441       if len(self.op.target_node) != len(self.instance.disks):
10442         raise errors.OpPrereqError(("Received destination information for %s"
10443                                     " disks, but instance %s has %s disks") %
10444                                    (len(self.op.target_node), instance_name,
10445                                     len(self.instance.disks)),
10446                                    errors.ECODE_INVAL)
10447
10448       cds = _GetClusterDomainSecret()
10449
10450       # Check X509 key name
10451       try:
10452         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10453       except (TypeError, ValueError), err:
10454         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10455
10456       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10457         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10458                                    errors.ECODE_INVAL)
10459
10460       # Load and verify CA
10461       try:
10462         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10463       except OpenSSL.crypto.Error, err:
10464         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10465                                    (err, ), errors.ECODE_INVAL)
10466
10467       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10468       if errcode is not None:
10469         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10470                                    (msg, ), errors.ECODE_INVAL)
10471
10472       self.dest_x509_ca = cert
10473
10474       # Verify target information
10475       disk_info = []
10476       for idx, disk_data in enumerate(self.op.target_node):
10477         try:
10478           (host, port, magic) = \
10479             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10480         except errors.GenericError, err:
10481           raise errors.OpPrereqError("Target info for disk %s: %s" %
10482                                      (idx, err), errors.ECODE_INVAL)
10483
10484         disk_info.append((host, port, magic))
10485
10486       assert len(disk_info) == len(self.op.target_node)
10487       self.dest_disk_info = disk_info
10488
10489     else:
10490       raise errors.ProgrammerError("Unhandled export mode %r" %
10491                                    self.op.mode)
10492
10493     # instance disk type verification
10494     # TODO: Implement export support for file-based disks
10495     for disk in self.instance.disks:
10496       if disk.dev_type == constants.LD_FILE:
10497         raise errors.OpPrereqError("Export not supported for instances with"
10498                                    " file-based disks", errors.ECODE_INVAL)
10499
10500   def _CleanupExports(self, feedback_fn):
10501     """Removes exports of current instance from all other nodes.
10502
10503     If an instance in a cluster with nodes A..D was exported to node C, its
10504     exports will be removed from the nodes A, B and D.
10505
10506     """
10507     assert self.op.mode != constants.EXPORT_MODE_REMOTE
10508
10509     nodelist = self.cfg.GetNodeList()
10510     nodelist.remove(self.dst_node.name)
10511
10512     # on one-node clusters nodelist will be empty after the removal
10513     # if we proceed the backup would be removed because OpBackupQuery
10514     # substitutes an empty list with the full cluster node list.
10515     iname = self.instance.name
10516     if nodelist:
10517       feedback_fn("Removing old exports for instance %s" % iname)
10518       exportlist = self.rpc.call_export_list(nodelist)
10519       for node in exportlist:
10520         if exportlist[node].fail_msg:
10521           continue
10522         if iname in exportlist[node].payload:
10523           msg = self.rpc.call_export_remove(node, iname).fail_msg
10524           if msg:
10525             self.LogWarning("Could not remove older export for instance %s"
10526                             " on node %s: %s", iname, node, msg)
10527
10528   def Exec(self, feedback_fn):
10529     """Export an instance to an image in the cluster.
10530
10531     """
10532     assert self.op.mode in constants.EXPORT_MODES
10533
10534     instance = self.instance
10535     src_node = instance.primary_node
10536
10537     if self.op.shutdown:
10538       # shutdown the instance, but not the disks
10539       feedback_fn("Shutting down instance %s" % instance.name)
10540       result = self.rpc.call_instance_shutdown(src_node, instance,
10541                                                self.op.shutdown_timeout)
10542       # TODO: Maybe ignore failures if ignore_remove_failures is set
10543       result.Raise("Could not shutdown instance %s on"
10544                    " node %s" % (instance.name, src_node))
10545
10546     # set the disks ID correctly since call_instance_start needs the
10547     # correct drbd minor to create the symlinks
10548     for disk in instance.disks:
10549       self.cfg.SetDiskID(disk, src_node)
10550
10551     activate_disks = (not instance.admin_up)
10552
10553     if activate_disks:
10554       # Activate the instance disks if we'exporting a stopped instance
10555       feedback_fn("Activating disks for %s" % instance.name)
10556       _StartInstanceDisks(self, instance, None)
10557
10558     try:
10559       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10560                                                      instance)
10561
10562       helper.CreateSnapshots()
10563       try:
10564         if (self.op.shutdown and instance.admin_up and
10565             not self.op.remove_instance):
10566           assert not activate_disks
10567           feedback_fn("Starting instance %s" % instance.name)
10568           result = self.rpc.call_instance_start(src_node, instance, None, None)
10569           msg = result.fail_msg
10570           if msg:
10571             feedback_fn("Failed to start instance: %s" % msg)
10572             _ShutdownInstanceDisks(self, instance)
10573             raise errors.OpExecError("Could not start instance: %s" % msg)
10574
10575         if self.op.mode == constants.EXPORT_MODE_LOCAL:
10576           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10577         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10578           connect_timeout = constants.RIE_CONNECT_TIMEOUT
10579           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10580
10581           (key_name, _, _) = self.x509_key_name
10582
10583           dest_ca_pem = \
10584             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10585                                             self.dest_x509_ca)
10586
10587           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10588                                                      key_name, dest_ca_pem,
10589                                                      timeouts)
10590       finally:
10591         helper.Cleanup()
10592
10593       # Check for backwards compatibility
10594       assert len(dresults) == len(instance.disks)
10595       assert compat.all(isinstance(i, bool) for i in dresults), \
10596              "Not all results are boolean: %r" % dresults
10597
10598     finally:
10599       if activate_disks:
10600         feedback_fn("Deactivating disks for %s" % instance.name)
10601         _ShutdownInstanceDisks(self, instance)
10602
10603     if not (compat.all(dresults) and fin_resu):
10604       failures = []
10605       if not fin_resu:
10606         failures.append("export finalization")
10607       if not compat.all(dresults):
10608         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10609                                if not dsk)
10610         failures.append("disk export: disk(s) %s" % fdsk)
10611
10612       raise errors.OpExecError("Export failed, errors in %s" %
10613                                utils.CommaJoin(failures))
10614
10615     # At this point, the export was successful, we can cleanup/finish
10616
10617     # Remove instance if requested
10618     if self.op.remove_instance:
10619       feedback_fn("Removing instance %s" % instance.name)
10620       _RemoveInstance(self, feedback_fn, instance,
10621                       self.op.ignore_remove_failures)
10622
10623     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10624       self._CleanupExports(feedback_fn)
10625
10626     return fin_resu, dresults
10627
10628
10629 class LUBackupRemove(NoHooksLU):
10630   """Remove exports related to the named instance.
10631
10632   """
10633   REQ_BGL = False
10634
10635   def ExpandNames(self):
10636     self.needed_locks = {}
10637     # We need all nodes to be locked in order for RemoveExport to work, but we
10638     # don't need to lock the instance itself, as nothing will happen to it (and
10639     # we can remove exports also for a removed instance)
10640     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10641
10642   def Exec(self, feedback_fn):
10643     """Remove any export.
10644
10645     """
10646     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10647     # If the instance was not found we'll try with the name that was passed in.
10648     # This will only work if it was an FQDN, though.
10649     fqdn_warn = False
10650     if not instance_name:
10651       fqdn_warn = True
10652       instance_name = self.op.instance_name
10653
10654     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10655     exportlist = self.rpc.call_export_list(locked_nodes)
10656     found = False
10657     for node in exportlist:
10658       msg = exportlist[node].fail_msg
10659       if msg:
10660         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10661         continue
10662       if instance_name in exportlist[node].payload:
10663         found = True
10664         result = self.rpc.call_export_remove(node, instance_name)
10665         msg = result.fail_msg
10666         if msg:
10667           logging.error("Could not remove export for instance %s"
10668                         " on node %s: %s", instance_name, node, msg)
10669
10670     if fqdn_warn and not found:
10671       feedback_fn("Export not found. If trying to remove an export belonging"
10672                   " to a deleted instance please use its Fully Qualified"
10673                   " Domain Name.")
10674
10675
10676 class LUGroupAdd(LogicalUnit):
10677   """Logical unit for creating node groups.
10678
10679   """
10680   HPATH = "group-add"
10681   HTYPE = constants.HTYPE_GROUP
10682   REQ_BGL = False
10683
10684   def ExpandNames(self):
10685     # We need the new group's UUID here so that we can create and acquire the
10686     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10687     # that it should not check whether the UUID exists in the configuration.
10688     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10689     self.needed_locks = {}
10690     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10691
10692   def CheckPrereq(self):
10693     """Check prerequisites.
10694
10695     This checks that the given group name is not an existing node group
10696     already.
10697
10698     """
10699     try:
10700       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10701     except errors.OpPrereqError:
10702       pass
10703     else:
10704       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10705                                  " node group (UUID: %s)" %
10706                                  (self.op.group_name, existing_uuid),
10707                                  errors.ECODE_EXISTS)
10708
10709     if self.op.ndparams:
10710       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10711
10712   def BuildHooksEnv(self):
10713     """Build hooks env.
10714
10715     """
10716     return {
10717       "GROUP_NAME": self.op.group_name,
10718       }
10719
10720   def BuildHooksNodes(self):
10721     """Build hooks nodes.
10722
10723     """
10724     mn = self.cfg.GetMasterNode()
10725     return ([mn], [mn])
10726
10727   def Exec(self, feedback_fn):
10728     """Add the node group to the cluster.
10729
10730     """
10731     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10732                                   uuid=self.group_uuid,
10733                                   alloc_policy=self.op.alloc_policy,
10734                                   ndparams=self.op.ndparams)
10735
10736     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10737     del self.remove_locks[locking.LEVEL_NODEGROUP]
10738
10739
10740 class LUGroupAssignNodes(NoHooksLU):
10741   """Logical unit for assigning nodes to groups.
10742
10743   """
10744   REQ_BGL = False
10745
10746   def ExpandNames(self):
10747     # These raise errors.OpPrereqError on their own:
10748     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10749     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10750
10751     # We want to lock all the affected nodes and groups. We have readily
10752     # available the list of nodes, and the *destination* group. To gather the
10753     # list of "source" groups, we need to fetch node information.
10754     self.node_data = self.cfg.GetAllNodesInfo()
10755     affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10756     affected_groups.add(self.group_uuid)
10757
10758     self.needed_locks = {
10759       locking.LEVEL_NODEGROUP: list(affected_groups),
10760       locking.LEVEL_NODE: self.op.nodes,
10761       }
10762
10763   def CheckPrereq(self):
10764     """Check prerequisites.
10765
10766     """
10767     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10768     instance_data = self.cfg.GetAllInstancesInfo()
10769
10770     if self.group is None:
10771       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10772                                (self.op.group_name, self.group_uuid))
10773
10774     (new_splits, previous_splits) = \
10775       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10776                                              for node in self.op.nodes],
10777                                             self.node_data, instance_data)
10778
10779     if new_splits:
10780       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10781
10782       if not self.op.force:
10783         raise errors.OpExecError("The following instances get split by this"
10784                                  " change and --force was not given: %s" %
10785                                  fmt_new_splits)
10786       else:
10787         self.LogWarning("This operation will split the following instances: %s",
10788                         fmt_new_splits)
10789
10790         if previous_splits:
10791           self.LogWarning("In addition, these already-split instances continue"
10792                           " to be spit across groups: %s",
10793                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10794
10795   def Exec(self, feedback_fn):
10796     """Assign nodes to a new group.
10797
10798     """
10799     for node in self.op.nodes:
10800       self.node_data[node].group = self.group_uuid
10801
10802     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10803
10804   @staticmethod
10805   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10806     """Check for split instances after a node assignment.
10807
10808     This method considers a series of node assignments as an atomic operation,
10809     and returns information about split instances after applying the set of
10810     changes.
10811
10812     In particular, it returns information about newly split instances, and
10813     instances that were already split, and remain so after the change.
10814
10815     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10816     considered.
10817
10818     @type changes: list of (node_name, new_group_uuid) pairs.
10819     @param changes: list of node assignments to consider.
10820     @param node_data: a dict with data for all nodes
10821     @param instance_data: a dict with all instances to consider
10822     @rtype: a two-tuple
10823     @return: a list of instances that were previously okay and result split as a
10824       consequence of this change, and a list of instances that were previously
10825       split and this change does not fix.
10826
10827     """
10828     changed_nodes = dict((node, group) for node, group in changes
10829                          if node_data[node].group != group)
10830
10831     all_split_instances = set()
10832     previously_split_instances = set()
10833
10834     def InstanceNodes(instance):
10835       return [instance.primary_node] + list(instance.secondary_nodes)
10836
10837     for inst in instance_data.values():
10838       if inst.disk_template not in constants.DTS_INT_MIRROR:
10839         continue
10840
10841       instance_nodes = InstanceNodes(inst)
10842
10843       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10844         previously_split_instances.add(inst.name)
10845
10846       if len(set(changed_nodes.get(node, node_data[node].group)
10847                  for node in instance_nodes)) > 1:
10848         all_split_instances.add(inst.name)
10849
10850     return (list(all_split_instances - previously_split_instances),
10851             list(previously_split_instances & all_split_instances))
10852
10853
10854 class _GroupQuery(_QueryBase):
10855   FIELDS = query.GROUP_FIELDS
10856
10857   def ExpandNames(self, lu):
10858     lu.needed_locks = {}
10859
10860     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10861     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10862
10863     if not self.names:
10864       self.wanted = [name_to_uuid[name]
10865                      for name in utils.NiceSort(name_to_uuid.keys())]
10866     else:
10867       # Accept names to be either names or UUIDs.
10868       missing = []
10869       self.wanted = []
10870       all_uuid = frozenset(self._all_groups.keys())
10871
10872       for name in self.names:
10873         if name in all_uuid:
10874           self.wanted.append(name)
10875         elif name in name_to_uuid:
10876           self.wanted.append(name_to_uuid[name])
10877         else:
10878           missing.append(name)
10879
10880       if missing:
10881         raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10882                                    errors.ECODE_NOENT)
10883
10884   def DeclareLocks(self, lu, level):
10885     pass
10886
10887   def _GetQueryData(self, lu):
10888     """Computes the list of node groups and their attributes.
10889
10890     """
10891     do_nodes = query.GQ_NODE in self.requested_data
10892     do_instances = query.GQ_INST in self.requested_data
10893
10894     group_to_nodes = None
10895     group_to_instances = None
10896
10897     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10898     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10899     # latter GetAllInstancesInfo() is not enough, for we have to go through
10900     # instance->node. Hence, we will need to process nodes even if we only need
10901     # instance information.
10902     if do_nodes or do_instances:
10903       all_nodes = lu.cfg.GetAllNodesInfo()
10904       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10905       node_to_group = {}
10906
10907       for node in all_nodes.values():
10908         if node.group in group_to_nodes:
10909           group_to_nodes[node.group].append(node.name)
10910           node_to_group[node.name] = node.group
10911
10912       if do_instances:
10913         all_instances = lu.cfg.GetAllInstancesInfo()
10914         group_to_instances = dict((uuid, []) for uuid in self.wanted)
10915
10916         for instance in all_instances.values():
10917           node = instance.primary_node
10918           if node in node_to_group:
10919             group_to_instances[node_to_group[node]].append(instance.name)
10920
10921         if not do_nodes:
10922           # Do not pass on node information if it was not requested.
10923           group_to_nodes = None
10924
10925     return query.GroupQueryData([self._all_groups[uuid]
10926                                  for uuid in self.wanted],
10927                                 group_to_nodes, group_to_instances)
10928
10929
10930 class LUGroupQuery(NoHooksLU):
10931   """Logical unit for querying node groups.
10932
10933   """
10934   REQ_BGL = False
10935
10936   def CheckArguments(self):
10937     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10938                           self.op.output_fields, False)
10939
10940   def ExpandNames(self):
10941     self.gq.ExpandNames(self)
10942
10943   def Exec(self, feedback_fn):
10944     return self.gq.OldStyleQuery(self)
10945
10946
10947 class LUGroupSetParams(LogicalUnit):
10948   """Modifies the parameters of a node group.
10949
10950   """
10951   HPATH = "group-modify"
10952   HTYPE = constants.HTYPE_GROUP
10953   REQ_BGL = False
10954
10955   def CheckArguments(self):
10956     all_changes = [
10957       self.op.ndparams,
10958       self.op.alloc_policy,
10959       ]
10960
10961     if all_changes.count(None) == len(all_changes):
10962       raise errors.OpPrereqError("Please pass at least one modification",
10963                                  errors.ECODE_INVAL)
10964
10965   def ExpandNames(self):
10966     # This raises errors.OpPrereqError on its own:
10967     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10968
10969     self.needed_locks = {
10970       locking.LEVEL_NODEGROUP: [self.group_uuid],
10971       }
10972
10973   def CheckPrereq(self):
10974     """Check prerequisites.
10975
10976     """
10977     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10978
10979     if self.group is None:
10980       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10981                                (self.op.group_name, self.group_uuid))
10982
10983     if self.op.ndparams:
10984       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10985       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10986       self.new_ndparams = new_ndparams
10987
10988   def BuildHooksEnv(self):
10989     """Build hooks env.
10990
10991     """
10992     return {
10993       "GROUP_NAME": self.op.group_name,
10994       "NEW_ALLOC_POLICY": self.op.alloc_policy,
10995       }
10996
10997   def BuildHooksNodes(self):
10998     """Build hooks nodes.
10999
11000     """
11001     mn = self.cfg.GetMasterNode()
11002     return ([mn], [mn])
11003
11004   def Exec(self, feedback_fn):
11005     """Modifies the node group.
11006
11007     """
11008     result = []
11009
11010     if self.op.ndparams:
11011       self.group.ndparams = self.new_ndparams
11012       result.append(("ndparams", str(self.group.ndparams)))
11013
11014     if self.op.alloc_policy:
11015       self.group.alloc_policy = self.op.alloc_policy
11016
11017     self.cfg.Update(self.group, feedback_fn)
11018     return result
11019
11020
11021
11022 class LUGroupRemove(LogicalUnit):
11023   HPATH = "group-remove"
11024   HTYPE = constants.HTYPE_GROUP
11025   REQ_BGL = False
11026
11027   def ExpandNames(self):
11028     # This will raises errors.OpPrereqError on its own:
11029     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11030     self.needed_locks = {
11031       locking.LEVEL_NODEGROUP: [self.group_uuid],
11032       }
11033
11034   def CheckPrereq(self):
11035     """Check prerequisites.
11036
11037     This checks that the given group name exists as a node group, that is
11038     empty (i.e., contains no nodes), and that is not the last group of the
11039     cluster.
11040
11041     """
11042     # Verify that the group is empty.
11043     group_nodes = [node.name
11044                    for node in self.cfg.GetAllNodesInfo().values()
11045                    if node.group == self.group_uuid]
11046
11047     if group_nodes:
11048       raise errors.OpPrereqError("Group '%s' not empty, has the following"
11049                                  " nodes: %s" %
11050                                  (self.op.group_name,
11051                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
11052                                  errors.ECODE_STATE)
11053
11054     # Verify the cluster would not be left group-less.
11055     if len(self.cfg.GetNodeGroupList()) == 1:
11056       raise errors.OpPrereqError("Group '%s' is the only group,"
11057                                  " cannot be removed" %
11058                                  self.op.group_name,
11059                                  errors.ECODE_STATE)
11060
11061   def BuildHooksEnv(self):
11062     """Build hooks env.
11063
11064     """
11065     return {
11066       "GROUP_NAME": self.op.group_name,
11067       }
11068
11069   def BuildHooksNodes(self):
11070     """Build hooks nodes.
11071
11072     """
11073     mn = self.cfg.GetMasterNode()
11074     return ([mn], [mn])
11075
11076   def Exec(self, feedback_fn):
11077     """Remove the node group.
11078
11079     """
11080     try:
11081       self.cfg.RemoveNodeGroup(self.group_uuid)
11082     except errors.ConfigurationError:
11083       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11084                                (self.op.group_name, self.group_uuid))
11085
11086     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11087
11088
11089 class LUGroupRename(LogicalUnit):
11090   HPATH = "group-rename"
11091   HTYPE = constants.HTYPE_GROUP
11092   REQ_BGL = False
11093
11094   def ExpandNames(self):
11095     # This raises errors.OpPrereqError on its own:
11096     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11097
11098     self.needed_locks = {
11099       locking.LEVEL_NODEGROUP: [self.group_uuid],
11100       }
11101
11102   def CheckPrereq(self):
11103     """Check prerequisites.
11104
11105     Ensures requested new name is not yet used.
11106
11107     """
11108     try:
11109       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11110     except errors.OpPrereqError:
11111       pass
11112     else:
11113       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11114                                  " node group (UUID: %s)" %
11115                                  (self.op.new_name, new_name_uuid),
11116                                  errors.ECODE_EXISTS)
11117
11118   def BuildHooksEnv(self):
11119     """Build hooks env.
11120
11121     """
11122     return {
11123       "OLD_NAME": self.op.group_name,
11124       "NEW_NAME": self.op.new_name,
11125       }
11126
11127   def BuildHooksNodes(self):
11128     """Build hooks nodes.
11129
11130     """
11131     mn = self.cfg.GetMasterNode()
11132
11133     all_nodes = self.cfg.GetAllNodesInfo()
11134     all_nodes.pop(mn, None)
11135
11136     run_nodes = [mn]
11137     run_nodes.extend(node.name for node in all_nodes.values()
11138                      if node.group == self.group_uuid)
11139
11140     return (run_nodes, run_nodes)
11141
11142   def Exec(self, feedback_fn):
11143     """Rename the node group.
11144
11145     """
11146     group = self.cfg.GetNodeGroup(self.group_uuid)
11147
11148     if group is None:
11149       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11150                                (self.op.group_name, self.group_uuid))
11151
11152     group.name = self.op.new_name
11153     self.cfg.Update(group, feedback_fn)
11154
11155     return self.op.new_name
11156
11157
11158 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11159   """Generic tags LU.
11160
11161   This is an abstract class which is the parent of all the other tags LUs.
11162
11163   """
11164   def ExpandNames(self):
11165     self.group_uuid = None
11166     self.needed_locks = {}
11167     if self.op.kind == constants.TAG_NODE:
11168       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11169       self.needed_locks[locking.LEVEL_NODE] = self.op.name
11170     elif self.op.kind == constants.TAG_INSTANCE:
11171       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11172       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11173     elif self.op.kind == constants.TAG_NODEGROUP:
11174       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11175
11176     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11177     # not possible to acquire the BGL based on opcode parameters)
11178
11179   def CheckPrereq(self):
11180     """Check prerequisites.
11181
11182     """
11183     if self.op.kind == constants.TAG_CLUSTER:
11184       self.target = self.cfg.GetClusterInfo()
11185     elif self.op.kind == constants.TAG_NODE:
11186       self.target = self.cfg.GetNodeInfo(self.op.name)
11187     elif self.op.kind == constants.TAG_INSTANCE:
11188       self.target = self.cfg.GetInstanceInfo(self.op.name)
11189     elif self.op.kind == constants.TAG_NODEGROUP:
11190       self.target = self.cfg.GetNodeGroup(self.group_uuid)
11191     else:
11192       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11193                                  str(self.op.kind), errors.ECODE_INVAL)
11194
11195
11196 class LUTagsGet(TagsLU):
11197   """Returns the tags of a given object.
11198
11199   """
11200   REQ_BGL = False
11201
11202   def ExpandNames(self):
11203     TagsLU.ExpandNames(self)
11204
11205     # Share locks as this is only a read operation
11206     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11207
11208   def Exec(self, feedback_fn):
11209     """Returns the tag list.
11210
11211     """
11212     return list(self.target.GetTags())
11213
11214
11215 class LUTagsSearch(NoHooksLU):
11216   """Searches the tags for a given pattern.
11217
11218   """
11219   REQ_BGL = False
11220
11221   def ExpandNames(self):
11222     self.needed_locks = {}
11223
11224   def CheckPrereq(self):
11225     """Check prerequisites.
11226
11227     This checks the pattern passed for validity by compiling it.
11228
11229     """
11230     try:
11231       self.re = re.compile(self.op.pattern)
11232     except re.error, err:
11233       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11234                                  (self.op.pattern, err), errors.ECODE_INVAL)
11235
11236   def Exec(self, feedback_fn):
11237     """Returns the tag list.
11238
11239     """
11240     cfg = self.cfg
11241     tgts = [("/cluster", cfg.GetClusterInfo())]
11242     ilist = cfg.GetAllInstancesInfo().values()
11243     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11244     nlist = cfg.GetAllNodesInfo().values()
11245     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11246     tgts.extend(("/nodegroup/%s" % n.name, n)
11247                 for n in cfg.GetAllNodeGroupsInfo().values())
11248     results = []
11249     for path, target in tgts:
11250       for tag in target.GetTags():
11251         if self.re.search(tag):
11252           results.append((path, tag))
11253     return results
11254
11255
11256 class LUTagsSet(TagsLU):
11257   """Sets a tag on a given object.
11258
11259   """
11260   REQ_BGL = False
11261
11262   def CheckPrereq(self):
11263     """Check prerequisites.
11264
11265     This checks the type and length of the tag name and value.
11266
11267     """
11268     TagsLU.CheckPrereq(self)
11269     for tag in self.op.tags:
11270       objects.TaggableObject.ValidateTag(tag)
11271
11272   def Exec(self, feedback_fn):
11273     """Sets the tag.
11274
11275     """
11276     try:
11277       for tag in self.op.tags:
11278         self.target.AddTag(tag)
11279     except errors.TagError, err:
11280       raise errors.OpExecError("Error while setting tag: %s" % str(err))
11281     self.cfg.Update(self.target, feedback_fn)
11282
11283
11284 class LUTagsDel(TagsLU):
11285   """Delete a list of tags from a given object.
11286
11287   """
11288   REQ_BGL = False
11289
11290   def CheckPrereq(self):
11291     """Check prerequisites.
11292
11293     This checks that we have the given tag.
11294
11295     """
11296     TagsLU.CheckPrereq(self)
11297     for tag in self.op.tags:
11298       objects.TaggableObject.ValidateTag(tag)
11299     del_tags = frozenset(self.op.tags)
11300     cur_tags = self.target.GetTags()
11301
11302     diff_tags = del_tags - cur_tags
11303     if diff_tags:
11304       diff_names = ("'%s'" % i for i in sorted(diff_tags))
11305       raise errors.OpPrereqError("Tag(s) %s not found" %
11306                                  (utils.CommaJoin(diff_names), ),
11307                                  errors.ECODE_NOENT)
11308
11309   def Exec(self, feedback_fn):
11310     """Remove the tag from the object.
11311
11312     """
11313     for tag in self.op.tags:
11314       self.target.RemoveTag(tag)
11315     self.cfg.Update(self.target, feedback_fn)
11316
11317
11318 class LUTestDelay(NoHooksLU):
11319   """Sleep for a specified amount of time.
11320
11321   This LU sleeps on the master and/or nodes for a specified amount of
11322   time.
11323
11324   """
11325   REQ_BGL = False
11326
11327   def ExpandNames(self):
11328     """Expand names and set required locks.
11329
11330     This expands the node list, if any.
11331
11332     """
11333     self.needed_locks = {}
11334     if self.op.on_nodes:
11335       # _GetWantedNodes can be used here, but is not always appropriate to use
11336       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11337       # more information.
11338       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11339       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11340
11341   def _TestDelay(self):
11342     """Do the actual sleep.
11343
11344     """
11345     if self.op.on_master:
11346       if not utils.TestDelay(self.op.duration):
11347         raise errors.OpExecError("Error during master delay test")
11348     if self.op.on_nodes:
11349       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11350       for node, node_result in result.items():
11351         node_result.Raise("Failure during rpc call to node %s" % node)
11352
11353   def Exec(self, feedback_fn):
11354     """Execute the test delay opcode, with the wanted repetitions.
11355
11356     """
11357     if self.op.repeat == 0:
11358       self._TestDelay()
11359     else:
11360       top_value = self.op.repeat - 1
11361       for i in range(self.op.repeat):
11362         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11363         self._TestDelay()
11364
11365
11366 class LUTestJqueue(NoHooksLU):
11367   """Utility LU to test some aspects of the job queue.
11368
11369   """
11370   REQ_BGL = False
11371
11372   # Must be lower than default timeout for WaitForJobChange to see whether it
11373   # notices changed jobs
11374   _CLIENT_CONNECT_TIMEOUT = 20.0
11375   _CLIENT_CONFIRM_TIMEOUT = 60.0
11376
11377   @classmethod
11378   def _NotifyUsingSocket(cls, cb, errcls):
11379     """Opens a Unix socket and waits for another program to connect.
11380
11381     @type cb: callable
11382     @param cb: Callback to send socket name to client
11383     @type errcls: class
11384     @param errcls: Exception class to use for errors
11385
11386     """
11387     # Using a temporary directory as there's no easy way to create temporary
11388     # sockets without writing a custom loop around tempfile.mktemp and
11389     # socket.bind
11390     tmpdir = tempfile.mkdtemp()
11391     try:
11392       tmpsock = utils.PathJoin(tmpdir, "sock")
11393
11394       logging.debug("Creating temporary socket at %s", tmpsock)
11395       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11396       try:
11397         sock.bind(tmpsock)
11398         sock.listen(1)
11399
11400         # Send details to client
11401         cb(tmpsock)
11402
11403         # Wait for client to connect before continuing
11404         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11405         try:
11406           (conn, _) = sock.accept()
11407         except socket.error, err:
11408           raise errcls("Client didn't connect in time (%s)" % err)
11409       finally:
11410         sock.close()
11411     finally:
11412       # Remove as soon as client is connected
11413       shutil.rmtree(tmpdir)
11414
11415     # Wait for client to close
11416     try:
11417       try:
11418         # pylint: disable-msg=E1101
11419         # Instance of '_socketobject' has no ... member
11420         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11421         conn.recv(1)
11422       except socket.error, err:
11423         raise errcls("Client failed to confirm notification (%s)" % err)
11424     finally:
11425       conn.close()
11426
11427   def _SendNotification(self, test, arg, sockname):
11428     """Sends a notification to the client.
11429
11430     @type test: string
11431     @param test: Test name
11432     @param arg: Test argument (depends on test)
11433     @type sockname: string
11434     @param sockname: Socket path
11435
11436     """
11437     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11438
11439   def _Notify(self, prereq, test, arg):
11440     """Notifies the client of a test.
11441
11442     @type prereq: bool
11443     @param prereq: Whether this is a prereq-phase test
11444     @type test: string
11445     @param test: Test name
11446     @param arg: Test argument (depends on test)
11447
11448     """
11449     if prereq:
11450       errcls = errors.OpPrereqError
11451     else:
11452       errcls = errors.OpExecError
11453
11454     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11455                                                   test, arg),
11456                                    errcls)
11457
11458   def CheckArguments(self):
11459     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11460     self.expandnames_calls = 0
11461
11462   def ExpandNames(self):
11463     checkargs_calls = getattr(self, "checkargs_calls", 0)
11464     if checkargs_calls < 1:
11465       raise errors.ProgrammerError("CheckArguments was not called")
11466
11467     self.expandnames_calls += 1
11468
11469     if self.op.notify_waitlock:
11470       self._Notify(True, constants.JQT_EXPANDNAMES, None)
11471
11472     self.LogInfo("Expanding names")
11473
11474     # Get lock on master node (just to get a lock, not for a particular reason)
11475     self.needed_locks = {
11476       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11477       }
11478
11479   def Exec(self, feedback_fn):
11480     if self.expandnames_calls < 1:
11481       raise errors.ProgrammerError("ExpandNames was not called")
11482
11483     if self.op.notify_exec:
11484       self._Notify(False, constants.JQT_EXEC, None)
11485
11486     self.LogInfo("Executing")
11487
11488     if self.op.log_messages:
11489       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11490       for idx, msg in enumerate(self.op.log_messages):
11491         self.LogInfo("Sending log message %s", idx + 1)
11492         feedback_fn(constants.JQT_MSGPREFIX + msg)
11493         # Report how many test messages have been sent
11494         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11495
11496     if self.op.fail:
11497       raise errors.OpExecError("Opcode failure was requested")
11498
11499     return True
11500
11501
11502 class IAllocator(object):
11503   """IAllocator framework.
11504
11505   An IAllocator instance has three sets of attributes:
11506     - cfg that is needed to query the cluster
11507     - input data (all members of the _KEYS class attribute are required)
11508     - four buffer attributes (in|out_data|text), that represent the
11509       input (to the external script) in text and data structure format,
11510       and the output from it, again in two formats
11511     - the result variables from the script (success, info, nodes) for
11512       easy usage
11513
11514   """
11515   # pylint: disable-msg=R0902
11516   # lots of instance attributes
11517   _ALLO_KEYS = [
11518     "name", "mem_size", "disks", "disk_template",
11519     "os", "tags", "nics", "vcpus", "hypervisor",
11520     ]
11521   _RELO_KEYS = [
11522     "name", "relocate_from",
11523     ]
11524   _EVAC_KEYS = [
11525     "evac_nodes",
11526     ]
11527
11528   def __init__(self, cfg, rpc, mode, **kwargs):
11529     self.cfg = cfg
11530     self.rpc = rpc
11531     # init buffer variables
11532     self.in_text = self.out_text = self.in_data = self.out_data = None
11533     # init all input fields so that pylint is happy
11534     self.mode = mode
11535     self.mem_size = self.disks = self.disk_template = None
11536     self.os = self.tags = self.nics = self.vcpus = None
11537     self.hypervisor = None
11538     self.relocate_from = None
11539     self.name = None
11540     self.evac_nodes = None
11541     # computed fields
11542     self.required_nodes = None
11543     # init result fields
11544     self.success = self.info = self.result = None
11545     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11546       keyset = self._ALLO_KEYS
11547       fn = self._AddNewInstance
11548     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11549       keyset = self._RELO_KEYS
11550       fn = self._AddRelocateInstance
11551     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11552       keyset = self._EVAC_KEYS
11553       fn = self._AddEvacuateNodes
11554     else:
11555       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11556                                    " IAllocator" % self.mode)
11557     for key in kwargs:
11558       if key not in keyset:
11559         raise errors.ProgrammerError("Invalid input parameter '%s' to"
11560                                      " IAllocator" % key)
11561       setattr(self, key, kwargs[key])
11562
11563     for key in keyset:
11564       if key not in kwargs:
11565         raise errors.ProgrammerError("Missing input parameter '%s' to"
11566                                      " IAllocator" % key)
11567     self._BuildInputData(fn)
11568
11569   def _ComputeClusterData(self):
11570     """Compute the generic allocator input data.
11571
11572     This is the data that is independent of the actual operation.
11573
11574     """
11575     cfg = self.cfg
11576     cluster_info = cfg.GetClusterInfo()
11577     # cluster data
11578     data = {
11579       "version": constants.IALLOCATOR_VERSION,
11580       "cluster_name": cfg.GetClusterName(),
11581       "cluster_tags": list(cluster_info.GetTags()),
11582       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11583       # we don't have job IDs
11584       }
11585     ninfo = cfg.GetAllNodesInfo()
11586     iinfo = cfg.GetAllInstancesInfo().values()
11587     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11588
11589     # node data
11590     node_list = [n.name for n in ninfo.values() if n.vm_capable]
11591
11592     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11593       hypervisor_name = self.hypervisor
11594     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11595       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11596     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11597       hypervisor_name = cluster_info.enabled_hypervisors[0]
11598
11599     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11600                                         hypervisor_name)
11601     node_iinfo = \
11602       self.rpc.call_all_instances_info(node_list,
11603                                        cluster_info.enabled_hypervisors)
11604
11605     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11606
11607     config_ndata = self._ComputeBasicNodeData(ninfo)
11608     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11609                                                  i_list, config_ndata)
11610     assert len(data["nodes"]) == len(ninfo), \
11611         "Incomplete node data computed"
11612
11613     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11614
11615     self.in_data = data
11616
11617   @staticmethod
11618   def _ComputeNodeGroupData(cfg):
11619     """Compute node groups data.
11620
11621     """
11622     ng = {}
11623     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11624       ng[guuid] = {
11625         "name": gdata.name,
11626         "alloc_policy": gdata.alloc_policy,
11627         }
11628     return ng
11629
11630   @staticmethod
11631   def _ComputeBasicNodeData(node_cfg):
11632     """Compute global node data.
11633
11634     @rtype: dict
11635     @returns: a dict of name: (node dict, node config)
11636
11637     """
11638     node_results = {}
11639     for ninfo in node_cfg.values():
11640       # fill in static (config-based) values
11641       pnr = {
11642         "tags": list(ninfo.GetTags()),
11643         "primary_ip": ninfo.primary_ip,
11644         "secondary_ip": ninfo.secondary_ip,
11645         "offline": ninfo.offline,
11646         "drained": ninfo.drained,
11647         "master_candidate": ninfo.master_candidate,
11648         "group": ninfo.group,
11649         "master_capable": ninfo.master_capable,
11650         "vm_capable": ninfo.vm_capable,
11651         }
11652
11653       node_results[ninfo.name] = pnr
11654
11655     return node_results
11656
11657   @staticmethod
11658   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11659                               node_results):
11660     """Compute global node data.
11661
11662     @param node_results: the basic node structures as filled from the config
11663
11664     """
11665     # make a copy of the current dict
11666     node_results = dict(node_results)
11667     for nname, nresult in node_data.items():
11668       assert nname in node_results, "Missing basic data for node %s" % nname
11669       ninfo = node_cfg[nname]
11670
11671       if not (ninfo.offline or ninfo.drained):
11672         nresult.Raise("Can't get data for node %s" % nname)
11673         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11674                                 nname)
11675         remote_info = nresult.payload
11676
11677         for attr in ['memory_total', 'memory_free', 'memory_dom0',
11678                      'vg_size', 'vg_free', 'cpu_total']:
11679           if attr not in remote_info:
11680             raise errors.OpExecError("Node '%s' didn't return attribute"
11681                                      " '%s'" % (nname, attr))
11682           if not isinstance(remote_info[attr], int):
11683             raise errors.OpExecError("Node '%s' returned invalid value"
11684                                      " for '%s': %s" %
11685                                      (nname, attr, remote_info[attr]))
11686         # compute memory used by primary instances
11687         i_p_mem = i_p_up_mem = 0
11688         for iinfo, beinfo in i_list:
11689           if iinfo.primary_node == nname:
11690             i_p_mem += beinfo[constants.BE_MEMORY]
11691             if iinfo.name not in node_iinfo[nname].payload:
11692               i_used_mem = 0
11693             else:
11694               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11695             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11696             remote_info['memory_free'] -= max(0, i_mem_diff)
11697
11698             if iinfo.admin_up:
11699               i_p_up_mem += beinfo[constants.BE_MEMORY]
11700
11701         # compute memory used by instances
11702         pnr_dyn = {
11703           "total_memory": remote_info['memory_total'],
11704           "reserved_memory": remote_info['memory_dom0'],
11705           "free_memory": remote_info['memory_free'],
11706           "total_disk": remote_info['vg_size'],
11707           "free_disk": remote_info['vg_free'],
11708           "total_cpus": remote_info['cpu_total'],
11709           "i_pri_memory": i_p_mem,
11710           "i_pri_up_memory": i_p_up_mem,
11711           }
11712         pnr_dyn.update(node_results[nname])
11713         node_results[nname] = pnr_dyn
11714
11715     return node_results
11716
11717   @staticmethod
11718   def _ComputeInstanceData(cluster_info, i_list):
11719     """Compute global instance data.
11720
11721     """
11722     instance_data = {}
11723     for iinfo, beinfo in i_list:
11724       nic_data = []
11725       for nic in iinfo.nics:
11726         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11727         nic_dict = {"mac": nic.mac,
11728                     "ip": nic.ip,
11729                     "mode": filled_params[constants.NIC_MODE],
11730                     "link": filled_params[constants.NIC_LINK],
11731                    }
11732         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11733           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11734         nic_data.append(nic_dict)
11735       pir = {
11736         "tags": list(iinfo.GetTags()),
11737         "admin_up": iinfo.admin_up,
11738         "vcpus": beinfo[constants.BE_VCPUS],
11739         "memory": beinfo[constants.BE_MEMORY],
11740         "os": iinfo.os,
11741         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11742         "nics": nic_data,
11743         "disks": [{constants.IDISK_SIZE: dsk.size,
11744                    constants.IDISK_MODE: dsk.mode}
11745                   for dsk in iinfo.disks],
11746         "disk_template": iinfo.disk_template,
11747         "hypervisor": iinfo.hypervisor,
11748         }
11749       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11750                                                  pir["disks"])
11751       instance_data[iinfo.name] = pir
11752
11753     return instance_data
11754
11755   def _AddNewInstance(self):
11756     """Add new instance data to allocator structure.
11757
11758     This in combination with _AllocatorGetClusterData will create the
11759     correct structure needed as input for the allocator.
11760
11761     The checks for the completeness of the opcode must have already been
11762     done.
11763
11764     """
11765     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11766
11767     if self.disk_template in constants.DTS_INT_MIRROR:
11768       self.required_nodes = 2
11769     else:
11770       self.required_nodes = 1
11771     request = {
11772       "name": self.name,
11773       "disk_template": self.disk_template,
11774       "tags": self.tags,
11775       "os": self.os,
11776       "vcpus": self.vcpus,
11777       "memory": self.mem_size,
11778       "disks": self.disks,
11779       "disk_space_total": disk_space,
11780       "nics": self.nics,
11781       "required_nodes": self.required_nodes,
11782       }
11783     return request
11784
11785   def _AddRelocateInstance(self):
11786     """Add relocate instance data to allocator structure.
11787
11788     This in combination with _IAllocatorGetClusterData will create the
11789     correct structure needed as input for the allocator.
11790
11791     The checks for the completeness of the opcode must have already been
11792     done.
11793
11794     """
11795     instance = self.cfg.GetInstanceInfo(self.name)
11796     if instance is None:
11797       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11798                                    " IAllocator" % self.name)
11799
11800     if instance.disk_template not in constants.DTS_MIRRORED:
11801       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11802                                  errors.ECODE_INVAL)
11803
11804     if instance.disk_template in constants.DTS_INT_MIRROR and \
11805         len(instance.secondary_nodes) != 1:
11806       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11807                                  errors.ECODE_STATE)
11808
11809     self.required_nodes = 1
11810     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
11811     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11812
11813     request = {
11814       "name": self.name,
11815       "disk_space_total": disk_space,
11816       "required_nodes": self.required_nodes,
11817       "relocate_from": self.relocate_from,
11818       }
11819     return request
11820
11821   def _AddEvacuateNodes(self):
11822     """Add evacuate nodes data to allocator structure.
11823
11824     """
11825     request = {
11826       "evac_nodes": self.evac_nodes
11827       }
11828     return request
11829
11830   def _BuildInputData(self, fn):
11831     """Build input data structures.
11832
11833     """
11834     self._ComputeClusterData()
11835
11836     request = fn()
11837     request["type"] = self.mode
11838     self.in_data["request"] = request
11839
11840     self.in_text = serializer.Dump(self.in_data)
11841
11842   def Run(self, name, validate=True, call_fn=None):
11843     """Run an instance allocator and return the results.
11844
11845     """
11846     if call_fn is None:
11847       call_fn = self.rpc.call_iallocator_runner
11848
11849     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11850     result.Raise("Failure while running the iallocator script")
11851
11852     self.out_text = result.payload
11853     if validate:
11854       self._ValidateResult()
11855
11856   def _ValidateResult(self):
11857     """Process the allocator results.
11858
11859     This will process and if successful save the result in
11860     self.out_data and the other parameters.
11861
11862     """
11863     try:
11864       rdict = serializer.Load(self.out_text)
11865     except Exception, err:
11866       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11867
11868     if not isinstance(rdict, dict):
11869       raise errors.OpExecError("Can't parse iallocator results: not a dict")
11870
11871     # TODO: remove backwards compatiblity in later versions
11872     if "nodes" in rdict and "result" not in rdict:
11873       rdict["result"] = rdict["nodes"]
11874       del rdict["nodes"]
11875
11876     for key in "success", "info", "result":
11877       if key not in rdict:
11878         raise errors.OpExecError("Can't parse iallocator results:"
11879                                  " missing key '%s'" % key)
11880       setattr(self, key, rdict[key])
11881
11882     if not isinstance(rdict["result"], list):
11883       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11884                                " is not a list")
11885
11886     if self.mode == constants.IALLOCATOR_MODE_RELOC:
11887       assert self.relocate_from is not None
11888       assert self.required_nodes == 1
11889
11890       node2group = dict((name, ndata["group"])
11891                         for (name, ndata) in self.in_data["nodes"].items())
11892
11893       fn = compat.partial(self._NodesToGroups, node2group,
11894                           self.in_data["nodegroups"])
11895
11896       request_groups = fn(self.relocate_from)
11897       result_groups = fn(rdict["result"])
11898
11899       if result_groups != request_groups:
11900         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
11901                                  " differ from original groups (%s)" %
11902                                  (utils.CommaJoin(result_groups),
11903                                   utils.CommaJoin(request_groups)))
11904
11905     self.out_data = rdict
11906
11907   @staticmethod
11908   def _NodesToGroups(node2group, groups, nodes):
11909     """Returns a list of unique group names for a list of nodes.
11910
11911     @type node2group: dict
11912     @param node2group: Map from node name to group UUID
11913     @type groups: dict
11914     @param groups: Group information
11915     @type nodes: list
11916     @param nodes: Node names
11917
11918     """
11919     result = set()
11920
11921     for node in nodes:
11922       try:
11923         group_uuid = node2group[node]
11924       except KeyError:
11925         # Ignore unknown node
11926         pass
11927       else:
11928         try:
11929           group = groups[group_uuid]
11930         except KeyError:
11931           # Can't find group, let's use UUID
11932           group_name = group_uuid
11933         else:
11934           group_name = group["name"]
11935
11936         result.add(group_name)
11937
11938     return sorted(result)
11939
11940
11941 class LUTestAllocator(NoHooksLU):
11942   """Run allocator tests.
11943
11944   This LU runs the allocator tests
11945
11946   """
11947   def CheckPrereq(self):
11948     """Check prerequisites.
11949
11950     This checks the opcode parameters depending on the director and mode test.
11951
11952     """
11953     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11954       for attr in ["mem_size", "disks", "disk_template",
11955                    "os", "tags", "nics", "vcpus"]:
11956         if not hasattr(self.op, attr):
11957           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11958                                      attr, errors.ECODE_INVAL)
11959       iname = self.cfg.ExpandInstanceName(self.op.name)
11960       if iname is not None:
11961         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11962                                    iname, errors.ECODE_EXISTS)
11963       if not isinstance(self.op.nics, list):
11964         raise errors.OpPrereqError("Invalid parameter 'nics'",
11965                                    errors.ECODE_INVAL)
11966       if not isinstance(self.op.disks, list):
11967         raise errors.OpPrereqError("Invalid parameter 'disks'",
11968                                    errors.ECODE_INVAL)
11969       for row in self.op.disks:
11970         if (not isinstance(row, dict) or
11971             "size" not in row or
11972             not isinstance(row["size"], int) or
11973             "mode" not in row or
11974             row["mode"] not in ['r', 'w']):
11975           raise errors.OpPrereqError("Invalid contents of the 'disks'"
11976                                      " parameter", errors.ECODE_INVAL)
11977       if self.op.hypervisor is None:
11978         self.op.hypervisor = self.cfg.GetHypervisorType()
11979     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11980       fname = _ExpandInstanceName(self.cfg, self.op.name)
11981       self.op.name = fname
11982       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11983     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11984       if not hasattr(self.op, "evac_nodes"):
11985         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11986                                    " opcode input", errors.ECODE_INVAL)
11987     else:
11988       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11989                                  self.op.mode, errors.ECODE_INVAL)
11990
11991     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11992       if self.op.allocator is None:
11993         raise errors.OpPrereqError("Missing allocator name",
11994                                    errors.ECODE_INVAL)
11995     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11996       raise errors.OpPrereqError("Wrong allocator test '%s'" %
11997                                  self.op.direction, errors.ECODE_INVAL)
11998
11999   def Exec(self, feedback_fn):
12000     """Run the allocator test.
12001
12002     """
12003     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12004       ial = IAllocator(self.cfg, self.rpc,
12005                        mode=self.op.mode,
12006                        name=self.op.name,
12007                        mem_size=self.op.mem_size,
12008                        disks=self.op.disks,
12009                        disk_template=self.op.disk_template,
12010                        os=self.op.os,
12011                        tags=self.op.tags,
12012                        nics=self.op.nics,
12013                        vcpus=self.op.vcpus,
12014                        hypervisor=self.op.hypervisor,
12015                        )
12016     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12017       ial = IAllocator(self.cfg, self.rpc,
12018                        mode=self.op.mode,
12019                        name=self.op.name,
12020                        relocate_from=list(self.relocate_from),
12021                        )
12022     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12023       ial = IAllocator(self.cfg, self.rpc,
12024                        mode=self.op.mode,
12025                        evac_nodes=self.op.evac_nodes)
12026     else:
12027       raise errors.ProgrammerError("Uncatched mode %s in"
12028                                    " LUTestAllocator.Exec", self.op.mode)
12029
12030     if self.op.direction == constants.IALLOCATOR_DIR_IN:
12031       result = ial.in_text
12032     else:
12033       ial.Run(self.op.allocator, validate=False)
12034       result = ial.out_text
12035     return result
12036
12037
12038 #: Query type implementations
12039 _QUERY_IMPL = {
12040   constants.QR_INSTANCE: _InstanceQuery,
12041   constants.QR_NODE: _NodeQuery,
12042   constants.QR_GROUP: _GroupQuery,
12043   constants.QR_OS: _OsQuery,
12044   }
12045
12046 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12047
12048
12049 def _GetQueryImplementation(name):
12050   """Returns the implemtnation for a query type.
12051
12052   @param name: Query type, must be one of L{constants.QR_VIA_OP}
12053
12054   """
12055   try:
12056     return _QUERY_IMPL[name]
12057   except KeyError:
12058     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12059                                errors.ECODE_INVAL)