code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 class ResultWithJobs:
  78   """Data container for LU results with jobs.
  79
  80   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  81   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  82   contained in the C{jobs} attribute and include the job IDs in the opcode
  83   result.
  84
  85   """
  86   def __init__(self, jobs, **kwargs):
  87     """Initializes this class.
  88
  89     Additional return values can be specified as keyword arguments.
  90
  91     @type jobs: list of lists of L{opcode.OpCode}
  92     @param jobs: A list of lists of opcode objects
  93
  94     """
  95     self.jobs = jobs
  96     self.other = kwargs
  97
  98
  99 class LogicalUnit(object):
 100   """Logical Unit base class.
 101
 102   Subclasses must follow these rules:
 103     - implement ExpandNames
 104     - implement CheckPrereq (except when tasklets are used)
 105     - implement Exec (except when tasklets are used)
 106     - implement BuildHooksEnv
 107     - implement BuildHooksNodes
 108     - redefine HPATH and HTYPE
 109     - optionally redefine their run requirements:
 110         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 111
 112   Note that all commands require root permissions.
 113
 114   @ivar dry_run_result: the value (if any) that will be returned to the caller
 115       in dry-run mode (signalled by opcode dry_run parameter)
 116
 117   """
 118   HPATH = None
 119   HTYPE = None
 120   REQ_BGL = True
 121
 122   def __init__(self, processor, op, context, rpc):
 123     """Constructor for LogicalUnit.
 124
 125     This needs to be overridden in derived classes in order to check op
 126     validity.
 127
 128     """
 129     self.proc = processor
 130     self.op = op
 131     self.cfg = context.cfg
 132     self.context = context
 133     self.rpc = rpc
 134     # Dicts used to declare locking needs to mcpu
 135     self.needed_locks = None
 136     self.acquired_locks = {}
 137     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 138     self.add_locks = {}
 139     self.remove_locks = {}
 140     # Used to force good behavior when calling helper functions
 141     self.recalculate_locks = {}
 142     # logging
 143     self.Log = processor.Log # pylint: disable-msg=C0103
 144     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 145     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 146     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 147     # support for dry-run
 148     self.dry_run_result = None
 149     # support for generic debug attribute
 150     if (not hasattr(self.op, "debug_level") or
 151         not isinstance(self.op.debug_level, int)):
 152       self.op.debug_level = 0
 153
 154     # Tasklets
 155     self.tasklets = None
 156
 157     # Validate opcode parameters and set defaults
 158     self.op.Validate(True)
 159
 160     self.CheckArguments()
 161
 162   def CheckArguments(self):
 163     """Check syntactic validity for the opcode arguments.
 164
 165     This method is for doing a simple syntactic check and ensure
 166     validity of opcode parameters, without any cluster-related
 167     checks. While the same can be accomplished in ExpandNames and/or
 168     CheckPrereq, doing these separate is better because:
 169
 170       - ExpandNames is left as as purely a lock-related function
 171       - CheckPrereq is run after we have acquired locks (and possible
 172         waited for them)
 173
 174     The function is allowed to change the self.op attribute so that
 175     later methods can no longer worry about missing parameters.
 176
 177     """
 178     pass
 179
 180   def ExpandNames(self):
 181     """Expand names for this LU.
 182
 183     This method is called before starting to execute the opcode, and it should
 184     update all the parameters of the opcode to their canonical form (e.g. a
 185     short node name must be fully expanded after this method has successfully
 186     completed). This way locking, hooks, logging, etc. can work correctly.
 187
 188     LUs which implement this method must also populate the self.needed_locks
 189     member, as a dict with lock levels as keys, and a list of needed lock names
 190     as values. Rules:
 191
 192       - use an empty dict if you don't need any lock
 193       - if you don't need any lock at a particular level omit that level
 194       - don't put anything for the BGL level
 195       - if you want all locks at a level use locking.ALL_SET as a value
 196
 197     If you need to share locks (rather than acquire them exclusively) at one
 198     level you can modify self.share_locks, setting a true value (usually 1) for
 199     that level. By default locks are not shared.
 200
 201     This function can also define a list of tasklets, which then will be
 202     executed in order instead of the usual LU-level CheckPrereq and Exec
 203     functions, if those are not defined by the LU.
 204
 205     Examples::
 206
 207       # Acquire all nodes and one instance
 208       self.needed_locks = {
 209         locking.LEVEL_NODE: locking.ALL_SET,
 210         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 211       }
 212       # Acquire just two nodes
 213       self.needed_locks = {
 214         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 215       }
 216       # Acquire no locks
 217       self.needed_locks = {} # No, you can't leave it to the default value None
 218
 219     """
 220     # The implementation of this method is mandatory only if the new LU is
 221     # concurrent, so that old LUs don't need to be changed all at the same
 222     # time.
 223     if self.REQ_BGL:
 224       self.needed_locks = {} # Exclusive LUs don't need locks.
 225     else:
 226       raise NotImplementedError
 227
 228   def DeclareLocks(self, level):
 229     """Declare LU locking needs for a level
 230
 231     While most LUs can just declare their locking needs at ExpandNames time,
 232     sometimes there's the need to calculate some locks after having acquired
 233     the ones before. This function is called just before acquiring locks at a
 234     particular level, but after acquiring the ones at lower levels, and permits
 235     such calculations. It can be used to modify self.needed_locks, and by
 236     default it does nothing.
 237
 238     This function is only called if you have something already set in
 239     self.needed_locks for the level.
 240
 241     @param level: Locking level which is going to be locked
 242     @type level: member of ganeti.locking.LEVELS
 243
 244     """
 245
 246   def CheckPrereq(self):
 247     """Check prerequisites for this LU.
 248
 249     This method should check that the prerequisites for the execution
 250     of this LU are fulfilled. It can do internode communication, but
 251     it should be idempotent - no cluster or system changes are
 252     allowed.
 253
 254     The method should raise errors.OpPrereqError in case something is
 255     not fulfilled. Its return value is ignored.
 256
 257     This method should also update all the parameters of the opcode to
 258     their canonical form if it hasn't been done by ExpandNames before.
 259
 260     """
 261     if self.tasklets is not None:
 262       for (idx, tl) in enumerate(self.tasklets):
 263         logging.debug("Checking prerequisites for tasklet %s/%s",
 264                       idx + 1, len(self.tasklets))
 265         tl.CheckPrereq()
 266     else:
 267       pass
 268
 269   def Exec(self, feedback_fn):
 270     """Execute the LU.
 271
 272     This method should implement the actual work. It should raise
 273     errors.OpExecError for failures that are somewhat dealt with in
 274     code, or expected.
 275
 276     """
 277     if self.tasklets is not None:
 278       for (idx, tl) in enumerate(self.tasklets):
 279         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 280         tl.Exec(feedback_fn)
 281     else:
 282       raise NotImplementedError
 283
 284   def BuildHooksEnv(self):
 285     """Build hooks environment for this LU.
 286
 287     @rtype: dict
 288     @return: Dictionary containing the environment that will be used for
 289       running the hooks for this LU. The keys of the dict must not be prefixed
 290       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 291       will extend the environment with additional variables. If no environment
 292       should be defined, an empty dictionary should be returned (not C{None}).
 293     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 294       will not be called.
 295
 296     """
 297     raise NotImplementedError
 298
 299   def BuildHooksNodes(self):
 300     """Build list of nodes to run LU's hooks.
 301
 302     @rtype: tuple; (list, list)
 303     @return: Tuple containing a list of node names on which the hook
 304       should run before the execution and a list of node names on which the
 305       hook should run after the execution. No nodes should be returned as an
 306       empty list (and not None).
 307     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 308       will not be called.
 309
 310     """
 311     raise NotImplementedError
 312
 313   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 314     """Notify the LU about the results of its hooks.
 315
 316     This method is called every time a hooks phase is executed, and notifies
 317     the Logical Unit about the hooks' result. The LU can then use it to alter
 318     its result based on the hooks.  By default the method does nothing and the
 319     previous result is passed back unchanged but any LU can define it if it
 320     wants to use the local cluster hook-scripts somehow.
 321
 322     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 323         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 324     @param hook_results: the results of the multi-node hooks rpc call
 325     @param feedback_fn: function used send feedback back to the caller
 326     @param lu_result: the previous Exec result this LU had, or None
 327         in the PRE phase
 328     @return: the new Exec result, based on the previous result
 329         and hook results
 330
 331     """
 332     # API must be kept, thus we ignore the unused argument and could
 333     # be a function warnings
 334     # pylint: disable-msg=W0613,R0201
 335     return lu_result
 336
 337   def _ExpandAndLockInstance(self):
 338     """Helper function to expand and lock an instance.
 339
 340     Many LUs that work on an instance take its name in self.op.instance_name
 341     and need to expand it and then declare the expanded name for locking. This
 342     function does it, and then updates self.op.instance_name to the expanded
 343     name. It also initializes needed_locks as a dict, if this hasn't been done
 344     before.
 345
 346     """
 347     if self.needed_locks is None:
 348       self.needed_locks = {}
 349     else:
 350       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 351         "_ExpandAndLockInstance called with instance-level locks set"
 352     self.op.instance_name = _ExpandInstanceName(self.cfg,
 353                                                 self.op.instance_name)
 354     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 355
 356   def _LockInstancesNodes(self, primary_only=False):
 357     """Helper function to declare instances' nodes for locking.
 358
 359     This function should be called after locking one or more instances to lock
 360     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 361     with all primary or secondary nodes for instances already locked and
 362     present in self.needed_locks[locking.LEVEL_INSTANCE].
 363
 364     It should be called from DeclareLocks, and for safety only works if
 365     self.recalculate_locks[locking.LEVEL_NODE] is set.
 366
 367     In the future it may grow parameters to just lock some instance's nodes, or
 368     to just lock primaries or secondary nodes, if needed.
 369
 370     If should be called in DeclareLocks in a way similar to::
 371
 372       if level == locking.LEVEL_NODE:
 373         self._LockInstancesNodes()
 374
 375     @type primary_only: boolean
 376     @param primary_only: only lock primary nodes of locked instances
 377
 378     """
 379     assert locking.LEVEL_NODE in self.recalculate_locks, \
 380       "_LockInstancesNodes helper function called with no nodes to recalculate"
 381
 382     # TODO: check if we're really been called with the instance locks held
 383
 384     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 385     # future we might want to have different behaviors depending on the value
 386     # of self.recalculate_locks[locking.LEVEL_NODE]
 387     wanted_nodes = []
 388     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 389       instance = self.context.cfg.GetInstanceInfo(instance_name)
 390       wanted_nodes.append(instance.primary_node)
 391       if not primary_only:
 392         wanted_nodes.extend(instance.secondary_nodes)
 393
 394     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 395       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 396     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 397       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 398
 399     del self.recalculate_locks[locking.LEVEL_NODE]
 400
 401
 402 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 403   """Simple LU which runs no hooks.
 404
 405   This LU is intended as a parent for other LogicalUnits which will
 406   run no hooks, in order to reduce duplicate code.
 407
 408   """
 409   HPATH = None
 410   HTYPE = None
 411
 412   def BuildHooksEnv(self):
 413     """Empty BuildHooksEnv for NoHooksLu.
 414
 415     This just raises an error.
 416
 417     """
 418     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 419
 420   def BuildHooksNodes(self):
 421     """Empty BuildHooksNodes for NoHooksLU.
 422
 423     """
 424     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 425
 426
 427 class Tasklet:
 428   """Tasklet base class.
 429
 430   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 431   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 432   tasklets know nothing about locks.
 433
 434   Subclasses must follow these rules:
 435     - Implement CheckPrereq
 436     - Implement Exec
 437
 438   """
 439   def __init__(self, lu):
 440     self.lu = lu
 441
 442     # Shortcuts
 443     self.cfg = lu.cfg
 444     self.rpc = lu.rpc
 445
 446   def CheckPrereq(self):
 447     """Check prerequisites for this tasklets.
 448
 449     This method should check whether the prerequisites for the execution of
 450     this tasklet are fulfilled. It can do internode communication, but it
 451     should be idempotent - no cluster or system changes are allowed.
 452
 453     The method should raise errors.OpPrereqError in case something is not
 454     fulfilled. Its return value is ignored.
 455
 456     This method should also update all parameters to their canonical form if it
 457     hasn't been done before.
 458
 459     """
 460     pass
 461
 462   def Exec(self, feedback_fn):
 463     """Execute the tasklet.
 464
 465     This method should implement the actual work. It should raise
 466     errors.OpExecError for failures that are somewhat dealt with in code, or
 467     expected.
 468
 469     """
 470     raise NotImplementedError
 471
 472
 473 class _QueryBase:
 474   """Base for query utility classes.
 475
 476   """
 477   #: Attribute holding field definitions
 478   FIELDS = None
 479
 480   def __init__(self, filter_, fields, use_locking):
 481     """Initializes this class.
 482
 483     """
 484     self.use_locking = use_locking
 485
 486     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 487                              namefield="name")
 488     self.requested_data = self.query.RequestedData()
 489     self.names = self.query.RequestedNames()
 490
 491     # Sort only if no names were requested
 492     self.sort_by_name = not self.names
 493
 494     self.do_locking = None
 495     self.wanted = None
 496
 497   def _GetNames(self, lu, all_names, lock_level):
 498     """Helper function to determine names asked for in the query.
 499
 500     """
 501     if self.do_locking:
 502       names = lu.acquired_locks[lock_level]
 503     else:
 504       names = all_names
 505
 506     if self.wanted == locking.ALL_SET:
 507       assert not self.names
 508       # caller didn't specify names, so ordering is not important
 509       return utils.NiceSort(names)
 510
 511     # caller specified names and we must keep the same order
 512     assert self.names
 513     assert not self.do_locking or lu.acquired_locks[lock_level]
 514
 515     missing = set(self.wanted).difference(names)
 516     if missing:
 517       raise errors.OpExecError("Some items were removed before retrieving"
 518                                " their data: %s" % missing)
 519
 520     # Return expanded names
 521     return self.wanted
 522
 523   def ExpandNames(self, lu):
 524     """Expand names for this query.
 525
 526     See L{LogicalUnit.ExpandNames}.
 527
 528     """
 529     raise NotImplementedError()
 530
 531   def DeclareLocks(self, lu, level):
 532     """Declare locks for this query.
 533
 534     See L{LogicalUnit.DeclareLocks}.
 535
 536     """
 537     raise NotImplementedError()
 538
 539   def _GetQueryData(self, lu):
 540     """Collects all data for this query.
 541
 542     @return: Query data object
 543
 544     """
 545     raise NotImplementedError()
 546
 547   def NewStyleQuery(self, lu):
 548     """Collect data and execute query.
 549
 550     """
 551     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 552                                   sort_by_name=self.sort_by_name)
 553
 554   def OldStyleQuery(self, lu):
 555     """Collect data and execute query.
 556
 557     """
 558     return self.query.OldStyleQuery(self._GetQueryData(lu),
 559                                     sort_by_name=self.sort_by_name)
 560
 561
 562 def _GetWantedNodes(lu, nodes):
 563   """Returns list of checked and expanded node names.
 564
 565   @type lu: L{LogicalUnit}
 566   @param lu: the logical unit on whose behalf we execute
 567   @type nodes: list
 568   @param nodes: list of node names or None for all nodes
 569   @rtype: list
 570   @return: the list of nodes, sorted
 571   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 572
 573   """
 574   if nodes:
 575     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 576
 577   return utils.NiceSort(lu.cfg.GetNodeList())
 578
 579
 580 def _GetWantedInstances(lu, instances):
 581   """Returns list of checked and expanded instance names.
 582
 583   @type lu: L{LogicalUnit}
 584   @param lu: the logical unit on whose behalf we execute
 585   @type instances: list
 586   @param instances: list of instance names or None for all instances
 587   @rtype: list
 588   @return: the list of instances, sorted
 589   @raise errors.OpPrereqError: if the instances parameter is wrong type
 590   @raise errors.OpPrereqError: if any of the passed instances is not found
 591
 592   """
 593   if instances:
 594     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 595   else:
 596     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 597   return wanted
 598
 599
 600 def _GetUpdatedParams(old_params, update_dict,
 601                       use_default=True, use_none=False):
 602   """Return the new version of a parameter dictionary.
 603
 604   @type old_params: dict
 605   @param old_params: old parameters
 606   @type update_dict: dict
 607   @param update_dict: dict containing new parameter values, or
 608       constants.VALUE_DEFAULT to reset the parameter to its default
 609       value
 610   @param use_default: boolean
 611   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 612       values as 'to be deleted' values
 613   @param use_none: boolean
 614   @type use_none: whether to recognise C{None} values as 'to be
 615       deleted' values
 616   @rtype: dict
 617   @return: the new parameter dictionary
 618
 619   """
 620   params_copy = copy.deepcopy(old_params)
 621   for key, val in update_dict.iteritems():
 622     if ((use_default and val == constants.VALUE_DEFAULT) or
 623         (use_none and val is None)):
 624       try:
 625         del params_copy[key]
 626       except KeyError:
 627         pass
 628     else:
 629       params_copy[key] = val
 630   return params_copy
 631
 632
 633 def _RunPostHook(lu, node_name):
 634   """Runs the post-hook for an opcode on a single node.
 635
 636   """
 637   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 638   try:
 639     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 640   except:
 641     # pylint: disable-msg=W0702
 642     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 643
 644
 645 def _CheckOutputFields(static, dynamic, selected):
 646   """Checks whether all selected fields are valid.
 647
 648   @type static: L{utils.FieldSet}
 649   @param static: static fields set
 650   @type dynamic: L{utils.FieldSet}
 651   @param dynamic: dynamic fields set
 652
 653   """
 654   f = utils.FieldSet()
 655   f.Extend(static)
 656   f.Extend(dynamic)
 657
 658   delta = f.NonMatching(selected)
 659   if delta:
 660     raise errors.OpPrereqError("Unknown output fields selected: %s"
 661                                % ",".join(delta), errors.ECODE_INVAL)
 662
 663
 664 def _CheckGlobalHvParams(params):
 665   """Validates that given hypervisor params are not global ones.
 666
 667   This will ensure that instances don't get customised versions of
 668   global params.
 669
 670   """
 671   used_globals = constants.HVC_GLOBALS.intersection(params)
 672   if used_globals:
 673     msg = ("The following hypervisor parameters are global and cannot"
 674            " be customized at instance level, please modify them at"
 675            " cluster level: %s" % utils.CommaJoin(used_globals))
 676     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 677
 678
 679 def _CheckNodeOnline(lu, node, msg=None):
 680   """Ensure that a given node is online.
 681
 682   @param lu: the LU on behalf of which we make the check
 683   @param node: the node to check
 684   @param msg: if passed, should be a message to replace the default one
 685   @raise errors.OpPrereqError: if the node is offline
 686
 687   """
 688   if msg is None:
 689     msg = "Can't use offline node"
 690   if lu.cfg.GetNodeInfo(node).offline:
 691     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 692
 693
 694 def _CheckNodeNotDrained(lu, node):
 695   """Ensure that a given node is not drained.
 696
 697   @param lu: the LU on behalf of which we make the check
 698   @param node: the node to check
 699   @raise errors.OpPrereqError: if the node is drained
 700
 701   """
 702   if lu.cfg.GetNodeInfo(node).drained:
 703     raise errors.OpPrereqError("Can't use drained node %s" % node,
 704                                errors.ECODE_STATE)
 705
 706
 707 def _CheckNodeVmCapable(lu, node):
 708   """Ensure that a given node is vm capable.
 709
 710   @param lu: the LU on behalf of which we make the check
 711   @param node: the node to check
 712   @raise errors.OpPrereqError: if the node is not vm capable
 713
 714   """
 715   if not lu.cfg.GetNodeInfo(node).vm_capable:
 716     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 717                                errors.ECODE_STATE)
 718
 719
 720 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 721   """Ensure that a node supports a given OS.
 722
 723   @param lu: the LU on behalf of which we make the check
 724   @param node: the node to check
 725   @param os_name: the OS to query about
 726   @param force_variant: whether to ignore variant errors
 727   @raise errors.OpPrereqError: if the node is not supporting the OS
 728
 729   """
 730   result = lu.rpc.call_os_get(node, os_name)
 731   result.Raise("OS '%s' not in supported OS list for node %s" %
 732                (os_name, node),
 733                prereq=True, ecode=errors.ECODE_INVAL)
 734   if not force_variant:
 735     _CheckOSVariant(result.payload, os_name)
 736
 737
 738 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 739   """Ensure that a node has the given secondary ip.
 740
 741   @type lu: L{LogicalUnit}
 742   @param lu: the LU on behalf of which we make the check
 743   @type node: string
 744   @param node: the node to check
 745   @type secondary_ip: string
 746   @param secondary_ip: the ip to check
 747   @type prereq: boolean
 748   @param prereq: whether to throw a prerequisite or an execute error
 749   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 750   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 751
 752   """
 753   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 754   result.Raise("Failure checking secondary ip on node %s" % node,
 755                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 756   if not result.payload:
 757     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 758            " please fix and re-run this command" % secondary_ip)
 759     if prereq:
 760       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 761     else:
 762       raise errors.OpExecError(msg)
 763
 764
 765 def _GetClusterDomainSecret():
 766   """Reads the cluster domain secret.
 767
 768   """
 769   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 770                                strict=True)
 771
 772
 773 def _CheckInstanceDown(lu, instance, reason):
 774   """Ensure that an instance is not running."""
 775   if instance.admin_up:
 776     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 777                                (instance.name, reason), errors.ECODE_STATE)
 778
 779   pnode = instance.primary_node
 780   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 781   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 782               prereq=True, ecode=errors.ECODE_ENVIRON)
 783
 784   if instance.name in ins_l.payload:
 785     raise errors.OpPrereqError("Instance %s is running, %s" %
 786                                (instance.name, reason), errors.ECODE_STATE)
 787
 788
 789 def _ExpandItemName(fn, name, kind):
 790   """Expand an item name.
 791
 792   @param fn: the function to use for expansion
 793   @param name: requested item name
 794   @param kind: text description ('Node' or 'Instance')
 795   @return: the resolved (full) name
 796   @raise errors.OpPrereqError: if the item is not found
 797
 798   """
 799   full_name = fn(name)
 800   if full_name is None:
 801     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 802                                errors.ECODE_NOENT)
 803   return full_name
 804
 805
 806 def _ExpandNodeName(cfg, name):
 807   """Wrapper over L{_ExpandItemName} for nodes."""
 808   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 809
 810
 811 def _ExpandInstanceName(cfg, name):
 812   """Wrapper over L{_ExpandItemName} for instance."""
 813   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 814
 815
 816 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 817                           memory, vcpus, nics, disk_template, disks,
 818                           bep, hvp, hypervisor_name):
 819   """Builds instance related env variables for hooks
 820
 821   This builds the hook environment from individual variables.
 822
 823   @type name: string
 824   @param name: the name of the instance
 825   @type primary_node: string
 826   @param primary_node: the name of the instance's primary node
 827   @type secondary_nodes: list
 828   @param secondary_nodes: list of secondary nodes as strings
 829   @type os_type: string
 830   @param os_type: the name of the instance's OS
 831   @type status: boolean
 832   @param status: the should_run status of the instance
 833   @type memory: string
 834   @param memory: the memory size of the instance
 835   @type vcpus: string
 836   @param vcpus: the count of VCPUs the instance has
 837   @type nics: list
 838   @param nics: list of tuples (ip, mac, mode, link) representing
 839       the NICs the instance has
 840   @type disk_template: string
 841   @param disk_template: the disk template of the instance
 842   @type disks: list
 843   @param disks: the list of (size, mode) pairs
 844   @type bep: dict
 845   @param bep: the backend parameters for the instance
 846   @type hvp: dict
 847   @param hvp: the hypervisor parameters for the instance
 848   @type hypervisor_name: string
 849   @param hypervisor_name: the hypervisor for the instance
 850   @rtype: dict
 851   @return: the hook environment for this instance
 852
 853   """
 854   if status:
 855     str_status = "up"
 856   else:
 857     str_status = "down"
 858   env = {
 859     "OP_TARGET": name,
 860     "INSTANCE_NAME": name,
 861     "INSTANCE_PRIMARY": primary_node,
 862     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 863     "INSTANCE_OS_TYPE": os_type,
 864     "INSTANCE_STATUS": str_status,
 865     "INSTANCE_MEMORY": memory,
 866     "INSTANCE_VCPUS": vcpus,
 867     "INSTANCE_DISK_TEMPLATE": disk_template,
 868     "INSTANCE_HYPERVISOR": hypervisor_name,
 869   }
 870
 871   if nics:
 872     nic_count = len(nics)
 873     for idx, (ip, mac, mode, link) in enumerate(nics):
 874       if ip is None:
 875         ip = ""
 876       env["INSTANCE_NIC%d_IP" % idx] = ip
 877       env["INSTANCE_NIC%d_MAC" % idx] = mac
 878       env["INSTANCE_NIC%d_MODE" % idx] = mode
 879       env["INSTANCE_NIC%d_LINK" % idx] = link
 880       if mode == constants.NIC_MODE_BRIDGED:
 881         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 882   else:
 883     nic_count = 0
 884
 885   env["INSTANCE_NIC_COUNT"] = nic_count
 886
 887   if disks:
 888     disk_count = len(disks)
 889     for idx, (size, mode) in enumerate(disks):
 890       env["INSTANCE_DISK%d_SIZE" % idx] = size
 891       env["INSTANCE_DISK%d_MODE" % idx] = mode
 892   else:
 893     disk_count = 0
 894
 895   env["INSTANCE_DISK_COUNT"] = disk_count
 896
 897   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 898     for key, value in source.items():
 899       env["INSTANCE_%s_%s" % (kind, key)] = value
 900
 901   return env
 902
 903
 904 def _NICListToTuple(lu, nics):
 905   """Build a list of nic information tuples.
 906
 907   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 908   value in LUInstanceQueryData.
 909
 910   @type lu:  L{LogicalUnit}
 911   @param lu: the logical unit on whose behalf we execute
 912   @type nics: list of L{objects.NIC}
 913   @param nics: list of nics to convert to hooks tuples
 914
 915   """
 916   hooks_nics = []
 917   cluster = lu.cfg.GetClusterInfo()
 918   for nic in nics:
 919     ip = nic.ip
 920     mac = nic.mac
 921     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 922     mode = filled_params[constants.NIC_MODE]
 923     link = filled_params[constants.NIC_LINK]
 924     hooks_nics.append((ip, mac, mode, link))
 925   return hooks_nics
 926
 927
 928 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 929   """Builds instance related env variables for hooks from an object.
 930
 931   @type lu: L{LogicalUnit}
 932   @param lu: the logical unit on whose behalf we execute
 933   @type instance: L{objects.Instance}
 934   @param instance: the instance for which we should build the
 935       environment
 936   @type override: dict
 937   @param override: dictionary with key/values that will override
 938       our values
 939   @rtype: dict
 940   @return: the hook environment dictionary
 941
 942   """
 943   cluster = lu.cfg.GetClusterInfo()
 944   bep = cluster.FillBE(instance)
 945   hvp = cluster.FillHV(instance)
 946   args = {
 947     'name': instance.name,
 948     'primary_node': instance.primary_node,
 949     'secondary_nodes': instance.secondary_nodes,
 950     'os_type': instance.os,
 951     'status': instance.admin_up,
 952     'memory': bep[constants.BE_MEMORY],
 953     'vcpus': bep[constants.BE_VCPUS],
 954     'nics': _NICListToTuple(lu, instance.nics),
 955     'disk_template': instance.disk_template,
 956     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 957     'bep': bep,
 958     'hvp': hvp,
 959     'hypervisor_name': instance.hypervisor,
 960   }
 961   if override:
 962     args.update(override)
 963   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 964
 965
 966 def _AdjustCandidatePool(lu, exceptions):
 967   """Adjust the candidate pool after node operations.
 968
 969   """
 970   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 971   if mod_list:
 972     lu.LogInfo("Promoted nodes to master candidate role: %s",
 973                utils.CommaJoin(node.name for node in mod_list))
 974     for name in mod_list:
 975       lu.context.ReaddNode(name)
 976   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 977   if mc_now > mc_max:
 978     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 979                (mc_now, mc_max))
 980
 981
 982 def _DecideSelfPromotion(lu, exceptions=None):
 983   """Decide whether I should promote myself as a master candidate.
 984
 985   """
 986   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 987   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 988   # the new node will increase mc_max with one, so:
 989   mc_should = min(mc_should + 1, cp_size)
 990   return mc_now < mc_should
 991
 992
 993 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 994   """Check that the brigdes needed by a list of nics exist.
 995
 996   """
 997   cluster = lu.cfg.GetClusterInfo()
 998   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 999   brlist = [params[constants.NIC_LINK] for params in paramslist
1000             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1001   if brlist:
1002     result = lu.rpc.call_bridges_exist(target_node, brlist)
1003     result.Raise("Error checking bridges on destination node '%s'" %
1004                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1005
1006
1007 def _CheckInstanceBridgesExist(lu, instance, node=None):
1008   """Check that the brigdes needed by an instance exist.
1009
1010   """
1011   if node is None:
1012     node = instance.primary_node
1013   _CheckNicsBridgesExist(lu, instance.nics, node)
1014
1015
1016 def _CheckOSVariant(os_obj, name):
1017   """Check whether an OS name conforms to the os variants specification.
1018
1019   @type os_obj: L{objects.OS}
1020   @param os_obj: OS object to check
1021   @type name: string
1022   @param name: OS name passed by the user, to check for validity
1023
1024   """
1025   if not os_obj.supported_variants:
1026     return
1027   variant = objects.OS.GetVariant(name)
1028   if not variant:
1029     raise errors.OpPrereqError("OS name must include a variant",
1030                                errors.ECODE_INVAL)
1031
1032   if variant not in os_obj.supported_variants:
1033     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1034
1035
1036 def _GetNodeInstancesInner(cfg, fn):
1037   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1038
1039
1040 def _GetNodeInstances(cfg, node_name):
1041   """Returns a list of all primary and secondary instances on a node.
1042
1043   """
1044
1045   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1046
1047
1048 def _GetNodePrimaryInstances(cfg, node_name):
1049   """Returns primary instances on a node.
1050
1051   """
1052   return _GetNodeInstancesInner(cfg,
1053                                 lambda inst: node_name == inst.primary_node)
1054
1055
1056 def _GetNodeSecondaryInstances(cfg, node_name):
1057   """Returns secondary instances on a node.
1058
1059   """
1060   return _GetNodeInstancesInner(cfg,
1061                                 lambda inst: node_name in inst.secondary_nodes)
1062
1063
1064 def _GetStorageTypeArgs(cfg, storage_type):
1065   """Returns the arguments for a storage type.
1066
1067   """
1068   # Special case for file storage
1069   if storage_type == constants.ST_FILE:
1070     # storage.FileStorage wants a list of storage directories
1071     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1072
1073   return []
1074
1075
1076 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1077   faulty = []
1078
1079   for dev in instance.disks:
1080     cfg.SetDiskID(dev, node_name)
1081
1082   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1083   result.Raise("Failed to get disk status from node %s" % node_name,
1084                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1085
1086   for idx, bdev_status in enumerate(result.payload):
1087     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1088       faulty.append(idx)
1089
1090   return faulty
1091
1092
1093 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1094   """Check the sanity of iallocator and node arguments and use the
1095   cluster-wide iallocator if appropriate.
1096
1097   Check that at most one of (iallocator, node) is specified. If none is
1098   specified, then the LU's opcode's iallocator slot is filled with the
1099   cluster-wide default iallocator.
1100
1101   @type iallocator_slot: string
1102   @param iallocator_slot: the name of the opcode iallocator slot
1103   @type node_slot: string
1104   @param node_slot: the name of the opcode target node slot
1105
1106   """
1107   node = getattr(lu.op, node_slot, None)
1108   iallocator = getattr(lu.op, iallocator_slot, None)
1109
1110   if node is not None and iallocator is not None:
1111     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1112                                errors.ECODE_INVAL)
1113   elif node is None and iallocator is None:
1114     default_iallocator = lu.cfg.GetDefaultIAllocator()
1115     if default_iallocator:
1116       setattr(lu.op, iallocator_slot, default_iallocator)
1117     else:
1118       raise errors.OpPrereqError("No iallocator or node given and no"
1119                                  " cluster-wide default iallocator found."
1120                                  " Please specify either an iallocator or a"
1121                                  " node, or set a cluster-wide default"
1122                                  " iallocator.")
1123
1124
1125 class LUClusterPostInit(LogicalUnit):
1126   """Logical unit for running hooks after cluster initialization.
1127
1128   """
1129   HPATH = "cluster-init"
1130   HTYPE = constants.HTYPE_CLUSTER
1131
1132   def BuildHooksEnv(self):
1133     """Build hooks env.
1134
1135     """
1136     return {
1137       "OP_TARGET": self.cfg.GetClusterName(),
1138       }
1139
1140   def BuildHooksNodes(self):
1141     """Build hooks nodes.
1142
1143     """
1144     return ([], [self.cfg.GetMasterNode()])
1145
1146   def Exec(self, feedback_fn):
1147     """Nothing to do.
1148
1149     """
1150     return True
1151
1152
1153 class LUClusterDestroy(LogicalUnit):
1154   """Logical unit for destroying the cluster.
1155
1156   """
1157   HPATH = "cluster-destroy"
1158   HTYPE = constants.HTYPE_CLUSTER
1159
1160   def BuildHooksEnv(self):
1161     """Build hooks env.
1162
1163     """
1164     return {
1165       "OP_TARGET": self.cfg.GetClusterName(),
1166       }
1167
1168   def BuildHooksNodes(self):
1169     """Build hooks nodes.
1170
1171     """
1172     return ([], [])
1173
1174   def CheckPrereq(self):
1175     """Check prerequisites.
1176
1177     This checks whether the cluster is empty.
1178
1179     Any errors are signaled by raising errors.OpPrereqError.
1180
1181     """
1182     master = self.cfg.GetMasterNode()
1183
1184     nodelist = self.cfg.GetNodeList()
1185     if len(nodelist) != 1 or nodelist[0] != master:
1186       raise errors.OpPrereqError("There are still %d node(s) in"
1187                                  " this cluster." % (len(nodelist) - 1),
1188                                  errors.ECODE_INVAL)
1189     instancelist = self.cfg.GetInstanceList()
1190     if instancelist:
1191       raise errors.OpPrereqError("There are still %d instance(s) in"
1192                                  " this cluster." % len(instancelist),
1193                                  errors.ECODE_INVAL)
1194
1195   def Exec(self, feedback_fn):
1196     """Destroys the cluster.
1197
1198     """
1199     master = self.cfg.GetMasterNode()
1200
1201     # Run post hooks on master node before it's removed
1202     _RunPostHook(self, master)
1203
1204     result = self.rpc.call_node_stop_master(master, False)
1205     result.Raise("Could not disable the master role")
1206
1207     return master
1208
1209
1210 def _VerifyCertificate(filename):
1211   """Verifies a certificate for LUClusterVerify.
1212
1213   @type filename: string
1214   @param filename: Path to PEM file
1215
1216   """
1217   try:
1218     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1219                                            utils.ReadFile(filename))
1220   except Exception, err: # pylint: disable-msg=W0703
1221     return (LUClusterVerify.ETYPE_ERROR,
1222             "Failed to load X509 certificate %s: %s" % (filename, err))
1223
1224   (errcode, msg) = \
1225     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1226                                 constants.SSL_CERT_EXPIRATION_ERROR)
1227
1228   if msg:
1229     fnamemsg = "While verifying %s: %s" % (filename, msg)
1230   else:
1231     fnamemsg = None
1232
1233   if errcode is None:
1234     return (None, fnamemsg)
1235   elif errcode == utils.CERT_WARNING:
1236     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1237   elif errcode == utils.CERT_ERROR:
1238     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1239
1240   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1241
1242
1243 class LUClusterVerify(LogicalUnit):
1244   """Verifies the cluster status.
1245
1246   """
1247   HPATH = "cluster-verify"
1248   HTYPE = constants.HTYPE_CLUSTER
1249   REQ_BGL = False
1250
1251   TCLUSTER = "cluster"
1252   TNODE = "node"
1253   TINSTANCE = "instance"
1254
1255   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1256   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1257   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1258   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1259   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1260   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1261   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1262   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1263   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1264   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1265   ENODEDRBD = (TNODE, "ENODEDRBD")
1266   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1267   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1268   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1269   ENODEHV = (TNODE, "ENODEHV")
1270   ENODELVM = (TNODE, "ENODELVM")
1271   ENODEN1 = (TNODE, "ENODEN1")
1272   ENODENET = (TNODE, "ENODENET")
1273   ENODEOS = (TNODE, "ENODEOS")
1274   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1275   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1276   ENODERPC = (TNODE, "ENODERPC")
1277   ENODESSH = (TNODE, "ENODESSH")
1278   ENODEVERSION = (TNODE, "ENODEVERSION")
1279   ENODESETUP = (TNODE, "ENODESETUP")
1280   ENODETIME = (TNODE, "ENODETIME")
1281   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1282
1283   ETYPE_FIELD = "code"
1284   ETYPE_ERROR = "ERROR"
1285   ETYPE_WARNING = "WARNING"
1286
1287   _HOOKS_INDENT_RE = re.compile("^", re.M)
1288
1289   class NodeImage(object):
1290     """A class representing the logical and physical status of a node.
1291
1292     @type name: string
1293     @ivar name: the node name to which this object refers
1294     @ivar volumes: a structure as returned from
1295         L{ganeti.backend.GetVolumeList} (runtime)
1296     @ivar instances: a list of running instances (runtime)
1297     @ivar pinst: list of configured primary instances (config)
1298     @ivar sinst: list of configured secondary instances (config)
1299     @ivar sbp: dictionary of {primary-node: list of instances} for all
1300         instances for which this node is secondary (config)
1301     @ivar mfree: free memory, as reported by hypervisor (runtime)
1302     @ivar dfree: free disk, as reported by the node (runtime)
1303     @ivar offline: the offline status (config)
1304     @type rpc_fail: boolean
1305     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1306         not whether the individual keys were correct) (runtime)
1307     @type lvm_fail: boolean
1308     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1309     @type hyp_fail: boolean
1310     @ivar hyp_fail: whether the RPC call didn't return the instance list
1311     @type ghost: boolean
1312     @ivar ghost: whether this is a known node or not (config)
1313     @type os_fail: boolean
1314     @ivar os_fail: whether the RPC call didn't return valid OS data
1315     @type oslist: list
1316     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1317     @type vm_capable: boolean
1318     @ivar vm_capable: whether the node can host instances
1319
1320     """
1321     def __init__(self, offline=False, name=None, vm_capable=True):
1322       self.name = name
1323       self.volumes = {}
1324       self.instances = []
1325       self.pinst = []
1326       self.sinst = []
1327       self.sbp = {}
1328       self.mfree = 0
1329       self.dfree = 0
1330       self.offline = offline
1331       self.vm_capable = vm_capable
1332       self.rpc_fail = False
1333       self.lvm_fail = False
1334       self.hyp_fail = False
1335       self.ghost = False
1336       self.os_fail = False
1337       self.oslist = {}
1338
1339   def ExpandNames(self):
1340     self.needed_locks = {
1341       locking.LEVEL_NODE: locking.ALL_SET,
1342       locking.LEVEL_INSTANCE: locking.ALL_SET,
1343     }
1344     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1345
1346   def _Error(self, ecode, item, msg, *args, **kwargs):
1347     """Format an error message.
1348
1349     Based on the opcode's error_codes parameter, either format a
1350     parseable error code, or a simpler error string.
1351
1352     This must be called only from Exec and functions called from Exec.
1353
1354     """
1355     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1356     itype, etxt = ecode
1357     # first complete the msg
1358     if args:
1359       msg = msg % args
1360     # then format the whole message
1361     if self.op.error_codes:
1362       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1363     else:
1364       if item:
1365         item = " " + item
1366       else:
1367         item = ""
1368       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1369     # and finally report it via the feedback_fn
1370     self._feedback_fn("  - %s" % msg)
1371
1372   def _ErrorIf(self, cond, *args, **kwargs):
1373     """Log an error message if the passed condition is True.
1374
1375     """
1376     cond = bool(cond) or self.op.debug_simulate_errors
1377     if cond:
1378       self._Error(*args, **kwargs)
1379     # do not mark the operation as failed for WARN cases only
1380     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1381       self.bad = self.bad or cond
1382
1383   def _VerifyNode(self, ninfo, nresult):
1384     """Perform some basic validation on data returned from a node.
1385
1386       - check the result data structure is well formed and has all the
1387         mandatory fields
1388       - check ganeti version
1389
1390     @type ninfo: L{objects.Node}
1391     @param ninfo: the node to check
1392     @param nresult: the results from the node
1393     @rtype: boolean
1394     @return: whether overall this call was successful (and we can expect
1395          reasonable values in the respose)
1396
1397     """
1398     node = ninfo.name
1399     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1400
1401     # main result, nresult should be a non-empty dict
1402     test = not nresult or not isinstance(nresult, dict)
1403     _ErrorIf(test, self.ENODERPC, node,
1404                   "unable to verify node: no data returned")
1405     if test:
1406       return False
1407
1408     # compares ganeti version
1409     local_version = constants.PROTOCOL_VERSION
1410     remote_version = nresult.get("version", None)
1411     test = not (remote_version and
1412                 isinstance(remote_version, (list, tuple)) and
1413                 len(remote_version) == 2)
1414     _ErrorIf(test, self.ENODERPC, node,
1415              "connection to node returned invalid data")
1416     if test:
1417       return False
1418
1419     test = local_version != remote_version[0]
1420     _ErrorIf(test, self.ENODEVERSION, node,
1421              "incompatible protocol versions: master %s,"
1422              " node %s", local_version, remote_version[0])
1423     if test:
1424       return False
1425
1426     # node seems compatible, we can actually try to look into its results
1427
1428     # full package version
1429     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1430                   self.ENODEVERSION, node,
1431                   "software version mismatch: master %s, node %s",
1432                   constants.RELEASE_VERSION, remote_version[1],
1433                   code=self.ETYPE_WARNING)
1434
1435     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1436     if ninfo.vm_capable and isinstance(hyp_result, dict):
1437       for hv_name, hv_result in hyp_result.iteritems():
1438         test = hv_result is not None
1439         _ErrorIf(test, self.ENODEHV, node,
1440                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1441
1442     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1443     if ninfo.vm_capable and isinstance(hvp_result, list):
1444       for item, hv_name, hv_result in hvp_result:
1445         _ErrorIf(True, self.ENODEHV, node,
1446                  "hypervisor %s parameter verify failure (source %s): %s",
1447                  hv_name, item, hv_result)
1448
1449     test = nresult.get(constants.NV_NODESETUP,
1450                        ["Missing NODESETUP results"])
1451     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1452              "; ".join(test))
1453
1454     return True
1455
1456   def _VerifyNodeTime(self, ninfo, nresult,
1457                       nvinfo_starttime, nvinfo_endtime):
1458     """Check the node time.
1459
1460     @type ninfo: L{objects.Node}
1461     @param ninfo: the node to check
1462     @param nresult: the remote results for the node
1463     @param nvinfo_starttime: the start time of the RPC call
1464     @param nvinfo_endtime: the end time of the RPC call
1465
1466     """
1467     node = ninfo.name
1468     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1469
1470     ntime = nresult.get(constants.NV_TIME, None)
1471     try:
1472       ntime_merged = utils.MergeTime(ntime)
1473     except (ValueError, TypeError):
1474       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1475       return
1476
1477     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1478       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1479     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1480       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1481     else:
1482       ntime_diff = None
1483
1484     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1485              "Node time diverges by at least %s from master node time",
1486              ntime_diff)
1487
1488   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1489     """Check the node time.
1490
1491     @type ninfo: L{objects.Node}
1492     @param ninfo: the node to check
1493     @param nresult: the remote results for the node
1494     @param vg_name: the configured VG name
1495
1496     """
1497     if vg_name is None:
1498       return
1499
1500     node = ninfo.name
1501     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1502
1503     # checks vg existence and size > 20G
1504     vglist = nresult.get(constants.NV_VGLIST, None)
1505     test = not vglist
1506     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1507     if not test:
1508       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1509                                             constants.MIN_VG_SIZE)
1510       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1511
1512     # check pv names
1513     pvlist = nresult.get(constants.NV_PVLIST, None)
1514     test = pvlist is None
1515     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1516     if not test:
1517       # check that ':' is not present in PV names, since it's a
1518       # special character for lvcreate (denotes the range of PEs to
1519       # use on the PV)
1520       for _, pvname, owner_vg in pvlist:
1521         test = ":" in pvname
1522         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1523                  " '%s' of VG '%s'", pvname, owner_vg)
1524
1525   def _VerifyNodeNetwork(self, ninfo, nresult):
1526     """Check the node time.
1527
1528     @type ninfo: L{objects.Node}
1529     @param ninfo: the node to check
1530     @param nresult: the remote results for the node
1531
1532     """
1533     node = ninfo.name
1534     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1535
1536     test = constants.NV_NODELIST not in nresult
1537     _ErrorIf(test, self.ENODESSH, node,
1538              "node hasn't returned node ssh connectivity data")
1539     if not test:
1540       if nresult[constants.NV_NODELIST]:
1541         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1542           _ErrorIf(True, self.ENODESSH, node,
1543                    "ssh communication with node '%s': %s", a_node, a_msg)
1544
1545     test = constants.NV_NODENETTEST not in nresult
1546     _ErrorIf(test, self.ENODENET, node,
1547              "node hasn't returned node tcp connectivity data")
1548     if not test:
1549       if nresult[constants.NV_NODENETTEST]:
1550         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1551         for anode in nlist:
1552           _ErrorIf(True, self.ENODENET, node,
1553                    "tcp communication with node '%s': %s",
1554                    anode, nresult[constants.NV_NODENETTEST][anode])
1555
1556     test = constants.NV_MASTERIP not in nresult
1557     _ErrorIf(test, self.ENODENET, node,
1558              "node hasn't returned node master IP reachability data")
1559     if not test:
1560       if not nresult[constants.NV_MASTERIP]:
1561         if node == self.master_node:
1562           msg = "the master node cannot reach the master IP (not configured?)"
1563         else:
1564           msg = "cannot reach the master IP"
1565         _ErrorIf(True, self.ENODENET, node, msg)
1566
1567   def _VerifyInstance(self, instance, instanceconfig, node_image,
1568                       diskstatus):
1569     """Verify an instance.
1570
1571     This function checks to see if the required block devices are
1572     available on the instance's node.
1573
1574     """
1575     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1576     node_current = instanceconfig.primary_node
1577
1578     node_vol_should = {}
1579     instanceconfig.MapLVsByNode(node_vol_should)
1580
1581     for node in node_vol_should:
1582       n_img = node_image[node]
1583       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1584         # ignore missing volumes on offline or broken nodes
1585         continue
1586       for volume in node_vol_should[node]:
1587         test = volume not in n_img.volumes
1588         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1589                  "volume %s missing on node %s", volume, node)
1590
1591     if instanceconfig.admin_up:
1592       pri_img = node_image[node_current]
1593       test = instance not in pri_img.instances and not pri_img.offline
1594       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1595                "instance not running on its primary node %s",
1596                node_current)
1597
1598     for node, n_img in node_image.items():
1599       if node != node_current:
1600         test = instance in n_img.instances
1601         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1602                  "instance should not run on node %s", node)
1603
1604     diskdata = [(nname, success, status, idx)
1605                 for (nname, disks) in diskstatus.items()
1606                 for idx, (success, status) in enumerate(disks)]
1607
1608     for nname, success, bdev_status, idx in diskdata:
1609       # the 'ghost node' construction in Exec() ensures that we have a
1610       # node here
1611       snode = node_image[nname]
1612       bad_snode = snode.ghost or snode.offline
1613       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1614                self.EINSTANCEFAULTYDISK, instance,
1615                "couldn't retrieve status for disk/%s on %s: %s",
1616                idx, nname, bdev_status)
1617       _ErrorIf((instanceconfig.admin_up and success and
1618                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1619                self.EINSTANCEFAULTYDISK, instance,
1620                "disk/%s on %s is faulty", idx, nname)
1621
1622   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1623     """Verify if there are any unknown volumes in the cluster.
1624
1625     The .os, .swap and backup volumes are ignored. All other volumes are
1626     reported as unknown.
1627
1628     @type reserved: L{ganeti.utils.FieldSet}
1629     @param reserved: a FieldSet of reserved volume names
1630
1631     """
1632     for node, n_img in node_image.items():
1633       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1634         # skip non-healthy nodes
1635         continue
1636       for volume in n_img.volumes:
1637         test = ((node not in node_vol_should or
1638                 volume not in node_vol_should[node]) and
1639                 not reserved.Matches(volume))
1640         self._ErrorIf(test, self.ENODEORPHANLV, node,
1641                       "volume %s is unknown", volume)
1642
1643   def _VerifyOrphanInstances(self, instancelist, node_image):
1644     """Verify the list of running instances.
1645
1646     This checks what instances are running but unknown to the cluster.
1647
1648     """
1649     for node, n_img in node_image.items():
1650       for o_inst in n_img.instances:
1651         test = o_inst not in instancelist
1652         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1653                       "instance %s on node %s should not exist", o_inst, node)
1654
1655   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1656     """Verify N+1 Memory Resilience.
1657
1658     Check that if one single node dies we can still start all the
1659     instances it was primary for.
1660
1661     """
1662     cluster_info = self.cfg.GetClusterInfo()
1663     for node, n_img in node_image.items():
1664       # This code checks that every node which is now listed as
1665       # secondary has enough memory to host all instances it is
1666       # supposed to should a single other node in the cluster fail.
1667       # FIXME: not ready for failover to an arbitrary node
1668       # FIXME: does not support file-backed instances
1669       # WARNING: we currently take into account down instances as well
1670       # as up ones, considering that even if they're down someone
1671       # might want to start them even in the event of a node failure.
1672       if n_img.offline:
1673         # we're skipping offline nodes from the N+1 warning, since
1674         # most likely we don't have good memory infromation from them;
1675         # we already list instances living on such nodes, and that's
1676         # enough warning
1677         continue
1678       for prinode, instances in n_img.sbp.items():
1679         needed_mem = 0
1680         for instance in instances:
1681           bep = cluster_info.FillBE(instance_cfg[instance])
1682           if bep[constants.BE_AUTO_BALANCE]:
1683             needed_mem += bep[constants.BE_MEMORY]
1684         test = n_img.mfree < needed_mem
1685         self._ErrorIf(test, self.ENODEN1, node,
1686                       "not enough memory to accomodate instance failovers"
1687                       " should node %s fail (%dMiB needed, %dMiB available)",
1688                       prinode, needed_mem, n_img.mfree)
1689
1690   @classmethod
1691   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1692                    (files_all, files_all_opt, files_mc, files_vm)):
1693     """Verifies file checksums collected from all nodes.
1694
1695     @param errorif: Callback for reporting errors
1696     @param nodeinfo: List of L{objects.Node} objects
1697     @param master_node: Name of master node
1698     @param all_nvinfo: RPC results
1699
1700     """
1701     node_names = frozenset(node.name for node in nodeinfo)
1702
1703     assert master_node in node_names
1704     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1705             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1706            "Found file listed in more than one file list"
1707
1708     # Define functions determining which nodes to consider for a file
1709     file2nodefn = dict([(filename, fn)
1710       for (files, fn) in [(files_all, None),
1711                           (files_all_opt, None),
1712                           (files_mc, lambda node: (node.master_candidate or
1713                                                    node.name == master_node)),
1714                           (files_vm, lambda node: node.vm_capable)]
1715       for filename in files])
1716
1717     fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1718
1719     for node in nodeinfo:
1720       nresult = all_nvinfo[node.name]
1721
1722       if nresult.fail_msg or not nresult.payload:
1723         node_files = None
1724       else:
1725         node_files = nresult.payload.get(constants.NV_FILELIST, None)
1726
1727       test = not (node_files and isinstance(node_files, dict))
1728       errorif(test, cls.ENODEFILECHECK, node.name,
1729               "Node did not return file checksum data")
1730       if test:
1731         continue
1732
1733       for (filename, checksum) in node_files.items():
1734         # Check if the file should be considered for a node
1735         fn = file2nodefn[filename]
1736         if fn is None or fn(node):
1737           fileinfo[filename].setdefault(checksum, set()).add(node.name)
1738
1739     for (filename, checksums) in fileinfo.items():
1740       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1741
1742       # Nodes having the file
1743       with_file = frozenset(node_name
1744                             for nodes in fileinfo[filename].values()
1745                             for node_name in nodes)
1746
1747       # Nodes missing file
1748       missing_file = node_names - with_file
1749
1750       if filename in files_all_opt:
1751         # All or no nodes
1752         errorif(missing_file and missing_file != node_names,
1753                 cls.ECLUSTERFILECHECK, None,
1754                 "File %s is optional, but it must exist on all or no nodes (not"
1755                 " found on %s)",
1756                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1757       else:
1758         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1759                 "File %s is missing from node(s) %s", filename,
1760                 utils.CommaJoin(utils.NiceSort(missing_file)))
1761
1762       # See if there are multiple versions of the file
1763       test = len(checksums) > 1
1764       if test:
1765         variants = ["variant %s on %s" %
1766                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1767                     for (idx, (checksum, nodes)) in
1768                       enumerate(sorted(checksums.items()))]
1769       else:
1770         variants = []
1771
1772       errorif(test, cls.ECLUSTERFILECHECK, None,
1773               "File %s found with %s different checksums (%s)",
1774               filename, len(checksums), "; ".join(variants))
1775
1776   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1777                       drbd_map):
1778     """Verifies and the node DRBD status.
1779
1780     @type ninfo: L{objects.Node}
1781     @param ninfo: the node to check
1782     @param nresult: the remote results for the node
1783     @param instanceinfo: the dict of instances
1784     @param drbd_helper: the configured DRBD usermode helper
1785     @param drbd_map: the DRBD map as returned by
1786         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1787
1788     """
1789     node = ninfo.name
1790     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1791
1792     if drbd_helper:
1793       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1794       test = (helper_result == None)
1795       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1796                "no drbd usermode helper returned")
1797       if helper_result:
1798         status, payload = helper_result
1799         test = not status
1800         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1801                  "drbd usermode helper check unsuccessful: %s", payload)
1802         test = status and (payload != drbd_helper)
1803         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1804                  "wrong drbd usermode helper: %s", payload)
1805
1806     # compute the DRBD minors
1807     node_drbd = {}
1808     for minor, instance in drbd_map[node].items():
1809       test = instance not in instanceinfo
1810       _ErrorIf(test, self.ECLUSTERCFG, None,
1811                "ghost instance '%s' in temporary DRBD map", instance)
1812         # ghost instance should not be running, but otherwise we
1813         # don't give double warnings (both ghost instance and
1814         # unallocated minor in use)
1815       if test:
1816         node_drbd[minor] = (instance, False)
1817       else:
1818         instance = instanceinfo[instance]
1819         node_drbd[minor] = (instance.name, instance.admin_up)
1820
1821     # and now check them
1822     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1823     test = not isinstance(used_minors, (tuple, list))
1824     _ErrorIf(test, self.ENODEDRBD, node,
1825              "cannot parse drbd status file: %s", str(used_minors))
1826     if test:
1827       # we cannot check drbd status
1828       return
1829
1830     for minor, (iname, must_exist) in node_drbd.items():
1831       test = minor not in used_minors and must_exist
1832       _ErrorIf(test, self.ENODEDRBD, node,
1833                "drbd minor %d of instance %s is not active", minor, iname)
1834     for minor in used_minors:
1835       test = minor not in node_drbd
1836       _ErrorIf(test, self.ENODEDRBD, node,
1837                "unallocated drbd minor %d is in use", minor)
1838
1839   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1840     """Builds the node OS structures.
1841
1842     @type ninfo: L{objects.Node}
1843     @param ninfo: the node to check
1844     @param nresult: the remote results for the node
1845     @param nimg: the node image object
1846
1847     """
1848     node = ninfo.name
1849     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1850
1851     remote_os = nresult.get(constants.NV_OSLIST, None)
1852     test = (not isinstance(remote_os, list) or
1853             not compat.all(isinstance(v, list) and len(v) == 7
1854                            for v in remote_os))
1855
1856     _ErrorIf(test, self.ENODEOS, node,
1857              "node hasn't returned valid OS data")
1858
1859     nimg.os_fail = test
1860
1861     if test:
1862       return
1863
1864     os_dict = {}
1865
1866     for (name, os_path, status, diagnose,
1867          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1868
1869       if name not in os_dict:
1870         os_dict[name] = []
1871
1872       # parameters is a list of lists instead of list of tuples due to
1873       # JSON lacking a real tuple type, fix it:
1874       parameters = [tuple(v) for v in parameters]
1875       os_dict[name].append((os_path, status, diagnose,
1876                             set(variants), set(parameters), set(api_ver)))
1877
1878     nimg.oslist = os_dict
1879
1880   def _VerifyNodeOS(self, ninfo, nimg, base):
1881     """Verifies the node OS list.
1882
1883     @type ninfo: L{objects.Node}
1884     @param ninfo: the node to check
1885     @param nimg: the node image object
1886     @param base: the 'template' node we match against (e.g. from the master)
1887
1888     """
1889     node = ninfo.name
1890     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1891
1892     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1893
1894     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1895     for os_name, os_data in nimg.oslist.items():
1896       assert os_data, "Empty OS status for OS %s?!" % os_name
1897       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1898       _ErrorIf(not f_status, self.ENODEOS, node,
1899                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1900       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1901                "OS '%s' has multiple entries (first one shadows the rest): %s",
1902                os_name, utils.CommaJoin([v[0] for v in os_data]))
1903       # this will catched in backend too
1904       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1905                and not f_var, self.ENODEOS, node,
1906                "OS %s with API at least %d does not declare any variant",
1907                os_name, constants.OS_API_V15)
1908       # comparisons with the 'base' image
1909       test = os_name not in base.oslist
1910       _ErrorIf(test, self.ENODEOS, node,
1911                "Extra OS %s not present on reference node (%s)",
1912                os_name, base.name)
1913       if test:
1914         continue
1915       assert base.oslist[os_name], "Base node has empty OS status?"
1916       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1917       if not b_status:
1918         # base OS is invalid, skipping
1919         continue
1920       for kind, a, b in [("API version", f_api, b_api),
1921                          ("variants list", f_var, b_var),
1922                          ("parameters", beautify_params(f_param),
1923                           beautify_params(b_param))]:
1924         _ErrorIf(a != b, self.ENODEOS, node,
1925                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
1926                  kind, os_name, base.name,
1927                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
1928
1929     # check any missing OSes
1930     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1931     _ErrorIf(missing, self.ENODEOS, node,
1932              "OSes present on reference node %s but missing on this node: %s",
1933              base.name, utils.CommaJoin(missing))
1934
1935   def _VerifyOob(self, ninfo, nresult):
1936     """Verifies out of band functionality of a node.
1937
1938     @type ninfo: L{objects.Node}
1939     @param ninfo: the node to check
1940     @param nresult: the remote results for the node
1941
1942     """
1943     node = ninfo.name
1944     # We just have to verify the paths on master and/or master candidates
1945     # as the oob helper is invoked on the master
1946     if ((ninfo.master_candidate or ninfo.master_capable) and
1947         constants.NV_OOB_PATHS in nresult):
1948       for path_result in nresult[constants.NV_OOB_PATHS]:
1949         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1950
1951   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1952     """Verifies and updates the node volume data.
1953
1954     This function will update a L{NodeImage}'s internal structures
1955     with data from the remote call.
1956
1957     @type ninfo: L{objects.Node}
1958     @param ninfo: the node to check
1959     @param nresult: the remote results for the node
1960     @param nimg: the node image object
1961     @param vg_name: the configured VG name
1962
1963     """
1964     node = ninfo.name
1965     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1966
1967     nimg.lvm_fail = True
1968     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1969     if vg_name is None:
1970       pass
1971     elif isinstance(lvdata, basestring):
1972       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1973                utils.SafeEncode(lvdata))
1974     elif not isinstance(lvdata, dict):
1975       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1976     else:
1977       nimg.volumes = lvdata
1978       nimg.lvm_fail = False
1979
1980   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1981     """Verifies and updates the node instance list.
1982
1983     If the listing was successful, then updates this node's instance
1984     list. Otherwise, it marks the RPC call as failed for the instance
1985     list key.
1986
1987     @type ninfo: L{objects.Node}
1988     @param ninfo: the node to check
1989     @param nresult: the remote results for the node
1990     @param nimg: the node image object
1991
1992     """
1993     idata = nresult.get(constants.NV_INSTANCELIST, None)
1994     test = not isinstance(idata, list)
1995     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1996                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1997     if test:
1998       nimg.hyp_fail = True
1999     else:
2000       nimg.instances = idata
2001
2002   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2003     """Verifies and computes a node information map
2004
2005     @type ninfo: L{objects.Node}
2006     @param ninfo: the node to check
2007     @param nresult: the remote results for the node
2008     @param nimg: the node image object
2009     @param vg_name: the configured VG name
2010
2011     """
2012     node = ninfo.name
2013     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2014
2015     # try to read free memory (from the hypervisor)
2016     hv_info = nresult.get(constants.NV_HVINFO, None)
2017     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2018     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2019     if not test:
2020       try:
2021         nimg.mfree = int(hv_info["memory_free"])
2022       except (ValueError, TypeError):
2023         _ErrorIf(True, self.ENODERPC, node,
2024                  "node returned invalid nodeinfo, check hypervisor")
2025
2026     # FIXME: devise a free space model for file based instances as well
2027     if vg_name is not None:
2028       test = (constants.NV_VGLIST not in nresult or
2029               vg_name not in nresult[constants.NV_VGLIST])
2030       _ErrorIf(test, self.ENODELVM, node,
2031                "node didn't return data for the volume group '%s'"
2032                " - it is either missing or broken", vg_name)
2033       if not test:
2034         try:
2035           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2036         except (ValueError, TypeError):
2037           _ErrorIf(True, self.ENODERPC, node,
2038                    "node returned invalid LVM info, check LVM status")
2039
2040   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2041     """Gets per-disk status information for all instances.
2042
2043     @type nodelist: list of strings
2044     @param nodelist: Node names
2045     @type node_image: dict of (name, L{objects.Node})
2046     @param node_image: Node objects
2047     @type instanceinfo: dict of (name, L{objects.Instance})
2048     @param instanceinfo: Instance objects
2049     @rtype: {instance: {node: [(succes, payload)]}}
2050     @return: a dictionary of per-instance dictionaries with nodes as
2051         keys and disk information as values; the disk information is a
2052         list of tuples (success, payload)
2053
2054     """
2055     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2056
2057     node_disks = {}
2058     node_disks_devonly = {}
2059     diskless_instances = set()
2060     diskless = constants.DT_DISKLESS
2061
2062     for nname in nodelist:
2063       node_instances = list(itertools.chain(node_image[nname].pinst,
2064                                             node_image[nname].sinst))
2065       diskless_instances.update(inst for inst in node_instances
2066                                 if instanceinfo[inst].disk_template == diskless)
2067       disks = [(inst, disk)
2068                for inst in node_instances
2069                for disk in instanceinfo[inst].disks]
2070
2071       if not disks:
2072         # No need to collect data
2073         continue
2074
2075       node_disks[nname] = disks
2076
2077       # Creating copies as SetDiskID below will modify the objects and that can
2078       # lead to incorrect data returned from nodes
2079       devonly = [dev.Copy() for (_, dev) in disks]
2080
2081       for dev in devonly:
2082         self.cfg.SetDiskID(dev, nname)
2083
2084       node_disks_devonly[nname] = devonly
2085
2086     assert len(node_disks) == len(node_disks_devonly)
2087
2088     # Collect data from all nodes with disks
2089     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2090                                                           node_disks_devonly)
2091
2092     assert len(result) == len(node_disks)
2093
2094     instdisk = {}
2095
2096     for (nname, nres) in result.items():
2097       disks = node_disks[nname]
2098
2099       if nres.offline:
2100         # No data from this node
2101         data = len(disks) * [(False, "node offline")]
2102       else:
2103         msg = nres.fail_msg
2104         _ErrorIf(msg, self.ENODERPC, nname,
2105                  "while getting disk information: %s", msg)
2106         if msg:
2107           # No data from this node
2108           data = len(disks) * [(False, msg)]
2109         else:
2110           data = []
2111           for idx, i in enumerate(nres.payload):
2112             if isinstance(i, (tuple, list)) and len(i) == 2:
2113               data.append(i)
2114             else:
2115               logging.warning("Invalid result from node %s, entry %d: %s",
2116                               nname, idx, i)
2117               data.append((False, "Invalid result from the remote node"))
2118
2119       for ((inst, _), status) in zip(disks, data):
2120         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2121
2122     # Add empty entries for diskless instances.
2123     for inst in diskless_instances:
2124       assert inst not in instdisk
2125       instdisk[inst] = {}
2126
2127     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2128                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2129                       compat.all(isinstance(s, (tuple, list)) and
2130                                  len(s) == 2 for s in statuses)
2131                       for inst, nnames in instdisk.items()
2132                       for nname, statuses in nnames.items())
2133     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2134
2135     return instdisk
2136
2137   def _VerifyHVP(self, hvp_data):
2138     """Verifies locally the syntax of the hypervisor parameters.
2139
2140     """
2141     for item, hv_name, hv_params in hvp_data:
2142       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2143              (item, hv_name))
2144       try:
2145         hv_class = hypervisor.GetHypervisor(hv_name)
2146         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2147         hv_class.CheckParameterSyntax(hv_params)
2148       except errors.GenericError, err:
2149         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2150
2151   def BuildHooksEnv(self):
2152     """Build hooks env.
2153
2154     Cluster-Verify hooks just ran in the post phase and their failure makes
2155     the output be logged in the verify output and the verification to fail.
2156
2157     """
2158     cfg = self.cfg
2159
2160     env = {
2161       "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2162       }
2163
2164     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2165                for node in cfg.GetAllNodesInfo().values())
2166
2167     return env
2168
2169   def BuildHooksNodes(self):
2170     """Build hooks nodes.
2171
2172     """
2173     return ([], self.cfg.GetNodeList())
2174
2175   def Exec(self, feedback_fn):
2176     """Verify integrity of cluster, performing various test on nodes.
2177
2178     """
2179     # This method has too many local variables. pylint: disable-msg=R0914
2180     self.bad = False
2181     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2182     verbose = self.op.verbose
2183     self._feedback_fn = feedback_fn
2184     feedback_fn("* Verifying global settings")
2185     for msg in self.cfg.VerifyConfig():
2186       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2187
2188     # Check the cluster certificates
2189     for cert_filename in constants.ALL_CERT_FILES:
2190       (errcode, msg) = _VerifyCertificate(cert_filename)
2191       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2192
2193     vg_name = self.cfg.GetVGName()
2194     drbd_helper = self.cfg.GetDRBDHelper()
2195     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2196     cluster = self.cfg.GetClusterInfo()
2197     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2198     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2199     nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2200     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2201     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2202                         for iname in instancelist)
2203     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2204     i_non_redundant = [] # Non redundant instances
2205     i_non_a_balanced = [] # Non auto-balanced instances
2206     n_offline = 0 # Count of offline nodes
2207     n_drained = 0 # Count of nodes being drained
2208     node_vol_should = {}
2209
2210     # FIXME: verify OS list
2211
2212     # File verification
2213     filemap = _ComputeAncillaryFiles(cluster, False)
2214
2215     # do local checksums
2216     master_node = self.master_node = self.cfg.GetMasterNode()
2217     master_ip = self.cfg.GetMasterIP()
2218
2219     # Compute the set of hypervisor parameters
2220     hvp_data = []
2221     for hv_name in hypervisors:
2222       hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2223     for os_name, os_hvp in cluster.os_hvp.items():
2224       for hv_name, hv_params in os_hvp.items():
2225         if not hv_params:
2226           continue
2227         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2228         hvp_data.append(("os %s" % os_name, hv_name, full_params))
2229     # TODO: collapse identical parameter values in a single one
2230     for instance in instanceinfo.values():
2231       if not instance.hvparams:
2232         continue
2233       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2234                        cluster.FillHV(instance)))
2235     # and verify them locally
2236     self._VerifyHVP(hvp_data)
2237
2238     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2239     node_verify_param = {
2240       constants.NV_FILELIST:
2241         utils.UniqueSequence(filename
2242                              for files in filemap
2243                              for filename in files),
2244       constants.NV_NODELIST: [node.name for node in nodeinfo
2245                               if not node.offline],
2246       constants.NV_HYPERVISOR: hypervisors,
2247       constants.NV_HVPARAMS: hvp_data,
2248       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2249                                   node.secondary_ip) for node in nodeinfo
2250                                  if not node.offline],
2251       constants.NV_INSTANCELIST: hypervisors,
2252       constants.NV_VERSION: None,
2253       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2254       constants.NV_NODESETUP: None,
2255       constants.NV_TIME: None,
2256       constants.NV_MASTERIP: (master_node, master_ip),
2257       constants.NV_OSLIST: None,
2258       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2259       }
2260
2261     if vg_name is not None:
2262       node_verify_param[constants.NV_VGLIST] = None
2263       node_verify_param[constants.NV_LVLIST] = vg_name
2264       node_verify_param[constants.NV_PVLIST] = [vg_name]
2265       node_verify_param[constants.NV_DRBDLIST] = None
2266
2267     if drbd_helper:
2268       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2269
2270     # Build our expected cluster state
2271     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2272                                                  name=node.name,
2273                                                  vm_capable=node.vm_capable))
2274                       for node in nodeinfo)
2275
2276     # Gather OOB paths
2277     oob_paths = []
2278     for node in nodeinfo:
2279       path = _SupportsOob(self.cfg, node)
2280       if path and path not in oob_paths:
2281         oob_paths.append(path)
2282
2283     if oob_paths:
2284       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2285
2286     for instance in instancelist:
2287       inst_config = instanceinfo[instance]
2288
2289       for nname in inst_config.all_nodes:
2290         if nname not in node_image:
2291           # ghost node
2292           gnode = self.NodeImage(name=nname)
2293           gnode.ghost = True
2294           node_image[nname] = gnode
2295
2296       inst_config.MapLVsByNode(node_vol_should)
2297
2298       pnode = inst_config.primary_node
2299       node_image[pnode].pinst.append(instance)
2300
2301       for snode in inst_config.secondary_nodes:
2302         nimg = node_image[snode]
2303         nimg.sinst.append(instance)
2304         if pnode not in nimg.sbp:
2305           nimg.sbp[pnode] = []
2306         nimg.sbp[pnode].append(instance)
2307
2308     # At this point, we have the in-memory data structures complete,
2309     # except for the runtime information, which we'll gather next
2310
2311     # Due to the way our RPC system works, exact response times cannot be
2312     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2313     # time before and after executing the request, we can at least have a time
2314     # window.
2315     nvinfo_starttime = time.time()
2316     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2317                                            self.cfg.GetClusterName())
2318     nvinfo_endtime = time.time()
2319
2320     all_drbd_map = self.cfg.ComputeDRBDMap()
2321
2322     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2323     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2324
2325     feedback_fn("* Verifying configuration file consistency")
2326     self._VerifyFiles(_ErrorIf, nodeinfo, master_node, all_nvinfo, filemap)
2327
2328     feedback_fn("* Verifying node status")
2329
2330     refos_img = None
2331
2332     for node_i in nodeinfo:
2333       node = node_i.name
2334       nimg = node_image[node]
2335
2336       if node_i.offline:
2337         if verbose:
2338           feedback_fn("* Skipping offline node %s" % (node,))
2339         n_offline += 1
2340         continue
2341
2342       if node == master_node:
2343         ntype = "master"
2344       elif node_i.master_candidate:
2345         ntype = "master candidate"
2346       elif node_i.drained:
2347         ntype = "drained"
2348         n_drained += 1
2349       else:
2350         ntype = "regular"
2351       if verbose:
2352         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2353
2354       msg = all_nvinfo[node].fail_msg
2355       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2356       if msg:
2357         nimg.rpc_fail = True
2358         continue
2359
2360       nresult = all_nvinfo[node].payload
2361
2362       nimg.call_ok = self._VerifyNode(node_i, nresult)
2363       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2364       self._VerifyNodeNetwork(node_i, nresult)
2365       self._VerifyOob(node_i, nresult)
2366
2367       if nimg.vm_capable:
2368         self._VerifyNodeLVM(node_i, nresult, vg_name)
2369         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2370                              all_drbd_map)
2371
2372         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2373         self._UpdateNodeInstances(node_i, nresult, nimg)
2374         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2375         self._UpdateNodeOS(node_i, nresult, nimg)
2376         if not nimg.os_fail:
2377           if refos_img is None:
2378             refos_img = nimg
2379           self._VerifyNodeOS(node_i, nimg, refos_img)
2380
2381     feedback_fn("* Verifying instance status")
2382     for instance in instancelist:
2383       if verbose:
2384         feedback_fn("* Verifying instance %s" % instance)
2385       inst_config = instanceinfo[instance]
2386       self._VerifyInstance(instance, inst_config, node_image,
2387                            instdisk[instance])
2388       inst_nodes_offline = []
2389
2390       pnode = inst_config.primary_node
2391       pnode_img = node_image[pnode]
2392       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2393                self.ENODERPC, pnode, "instance %s, connection to"
2394                " primary node failed", instance)
2395
2396       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2397                self.EINSTANCEBADNODE, instance,
2398                "instance is marked as running and lives on offline node %s",
2399                inst_config.primary_node)
2400
2401       # If the instance is non-redundant we cannot survive losing its primary
2402       # node, so we are not N+1 compliant. On the other hand we have no disk
2403       # templates with more than one secondary so that situation is not well
2404       # supported either.
2405       # FIXME: does not support file-backed instances
2406       if not inst_config.secondary_nodes:
2407         i_non_redundant.append(instance)
2408
2409       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2410                instance, "instance has multiple secondary nodes: %s",
2411                utils.CommaJoin(inst_config.secondary_nodes),
2412                code=self.ETYPE_WARNING)
2413
2414       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2415         pnode = inst_config.primary_node
2416         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2417         instance_groups = {}
2418
2419         for node in instance_nodes:
2420           instance_groups.setdefault(nodeinfo_byname[node].group,
2421                                      []).append(node)
2422
2423         pretty_list = [
2424           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2425           # Sort so that we always list the primary node first.
2426           for group, nodes in sorted(instance_groups.items(),
2427                                      key=lambda (_, nodes): pnode in nodes,
2428                                      reverse=True)]
2429
2430         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2431                       instance, "instance has primary and secondary nodes in"
2432                       " different groups: %s", utils.CommaJoin(pretty_list),
2433                       code=self.ETYPE_WARNING)
2434
2435       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2436         i_non_a_balanced.append(instance)
2437
2438       for snode in inst_config.secondary_nodes:
2439         s_img = node_image[snode]
2440         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2441                  "instance %s, connection to secondary node failed", instance)
2442
2443         if s_img.offline:
2444           inst_nodes_offline.append(snode)
2445
2446       # warn that the instance lives on offline nodes
2447       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2448                "instance has offline secondary node(s) %s",
2449                utils.CommaJoin(inst_nodes_offline))
2450       # ... or ghost/non-vm_capable nodes
2451       for node in inst_config.all_nodes:
2452         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2453                  "instance lives on ghost node %s", node)
2454         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2455                  instance, "instance lives on non-vm_capable node %s", node)
2456
2457     feedback_fn("* Verifying orphan volumes")
2458     reserved = utils.FieldSet(*cluster.reserved_lvs)
2459     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2460
2461     feedback_fn("* Verifying orphan instances")
2462     self._VerifyOrphanInstances(instancelist, node_image)
2463
2464     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2465       feedback_fn("* Verifying N+1 Memory redundancy")
2466       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2467
2468     feedback_fn("* Other Notes")
2469     if i_non_redundant:
2470       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2471                   % len(i_non_redundant))
2472
2473     if i_non_a_balanced:
2474       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2475                   % len(i_non_a_balanced))
2476
2477     if n_offline:
2478       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2479
2480     if n_drained:
2481       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2482
2483     return not self.bad
2484
2485   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2486     """Analyze the post-hooks' result
2487
2488     This method analyses the hook result, handles it, and sends some
2489     nicely-formatted feedback back to the user.
2490
2491     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2492         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2493     @param hooks_results: the results of the multi-node hooks rpc call
2494     @param feedback_fn: function used send feedback back to the caller
2495     @param lu_result: previous Exec result
2496     @return: the new Exec result, based on the previous result
2497         and hook results
2498
2499     """
2500     # We only really run POST phase hooks, and are only interested in
2501     # their results
2502     if phase == constants.HOOKS_PHASE_POST:
2503       # Used to change hooks' output to proper indentation
2504       feedback_fn("* Hooks Results")
2505       assert hooks_results, "invalid result from hooks"
2506
2507       for node_name in hooks_results:
2508         res = hooks_results[node_name]
2509         msg = res.fail_msg
2510         test = msg and not res.offline
2511         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2512                       "Communication failure in hooks execution: %s", msg)
2513         if res.offline or msg:
2514           # No need to investigate payload if node is offline or gave an error.
2515           # override manually lu_result here as _ErrorIf only
2516           # overrides self.bad
2517           lu_result = 1
2518           continue
2519         for script, hkr, output in res.payload:
2520           test = hkr == constants.HKR_FAIL
2521           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2522                         "Script %s failed, output:", script)
2523           if test:
2524             output = self._HOOKS_INDENT_RE.sub('      ', output)
2525             feedback_fn("%s" % output)
2526             lu_result = 0
2527
2528       return lu_result
2529
2530
2531 class LUClusterVerifyDisks(NoHooksLU):
2532   """Verifies the cluster disks status.
2533
2534   """
2535   REQ_BGL = False
2536
2537   def ExpandNames(self):
2538     self.needed_locks = {
2539       locking.LEVEL_NODE: locking.ALL_SET,
2540       locking.LEVEL_INSTANCE: locking.ALL_SET,
2541     }
2542     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2543
2544   def Exec(self, feedback_fn):
2545     """Verify integrity of cluster disks.
2546
2547     @rtype: tuple of three items
2548     @return: a tuple of (dict of node-to-node_error, list of instances
2549         which need activate-disks, dict of instance: (node, volume) for
2550         missing volumes
2551
2552     """
2553     result = res_nodes, res_instances, res_missing = {}, [], {}
2554
2555     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2556     instances = self.cfg.GetAllInstancesInfo().values()
2557
2558     nv_dict = {}
2559     for inst in instances:
2560       inst_lvs = {}
2561       if not inst.admin_up:
2562         continue
2563       inst.MapLVsByNode(inst_lvs)
2564       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2565       for node, vol_list in inst_lvs.iteritems():
2566         for vol in vol_list:
2567           nv_dict[(node, vol)] = inst
2568
2569     if not nv_dict:
2570       return result
2571
2572     node_lvs = self.rpc.call_lv_list(nodes, [])
2573     for node, node_res in node_lvs.items():
2574       if node_res.offline:
2575         continue
2576       msg = node_res.fail_msg
2577       if msg:
2578         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2579         res_nodes[node] = msg
2580         continue
2581
2582       lvs = node_res.payload
2583       for lv_name, (_, _, lv_online) in lvs.items():
2584         inst = nv_dict.pop((node, lv_name), None)
2585         if (not lv_online and inst is not None
2586             and inst.name not in res_instances):
2587           res_instances.append(inst.name)
2588
2589     # any leftover items in nv_dict are missing LVs, let's arrange the
2590     # data better
2591     for key, inst in nv_dict.iteritems():
2592       if inst.name not in res_missing:
2593         res_missing[inst.name] = []
2594       res_missing[inst.name].append(key)
2595
2596     return result
2597
2598
2599 class LUClusterRepairDiskSizes(NoHooksLU):
2600   """Verifies the cluster disks sizes.
2601
2602   """
2603   REQ_BGL = False
2604
2605   def ExpandNames(self):
2606     if self.op.instances:
2607       self.wanted_names = []
2608       for name in self.op.instances:
2609         full_name = _ExpandInstanceName(self.cfg, name)
2610         self.wanted_names.append(full_name)
2611       self.needed_locks = {
2612         locking.LEVEL_NODE: [],
2613         locking.LEVEL_INSTANCE: self.wanted_names,
2614         }
2615       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2616     else:
2617       self.wanted_names = None
2618       self.needed_locks = {
2619         locking.LEVEL_NODE: locking.ALL_SET,
2620         locking.LEVEL_INSTANCE: locking.ALL_SET,
2621         }
2622     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2623
2624   def DeclareLocks(self, level):
2625     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2626       self._LockInstancesNodes(primary_only=True)
2627
2628   def CheckPrereq(self):
2629     """Check prerequisites.
2630
2631     This only checks the optional instance list against the existing names.
2632
2633     """
2634     if self.wanted_names is None:
2635       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2636
2637     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2638                              in self.wanted_names]
2639
2640   def _EnsureChildSizes(self, disk):
2641     """Ensure children of the disk have the needed disk size.
2642
2643     This is valid mainly for DRBD8 and fixes an issue where the
2644     children have smaller disk size.
2645
2646     @param disk: an L{ganeti.objects.Disk} object
2647
2648     """
2649     if disk.dev_type == constants.LD_DRBD8:
2650       assert disk.children, "Empty children for DRBD8?"
2651       fchild = disk.children[0]
2652       mismatch = fchild.size < disk.size
2653       if mismatch:
2654         self.LogInfo("Child disk has size %d, parent %d, fixing",
2655                      fchild.size, disk.size)
2656         fchild.size = disk.size
2657
2658       # and we recurse on this child only, not on the metadev
2659       return self._EnsureChildSizes(fchild) or mismatch
2660     else:
2661       return False
2662
2663   def Exec(self, feedback_fn):
2664     """Verify the size of cluster disks.
2665
2666     """
2667     # TODO: check child disks too
2668     # TODO: check differences in size between primary/secondary nodes
2669     per_node_disks = {}
2670     for instance in self.wanted_instances:
2671       pnode = instance.primary_node
2672       if pnode not in per_node_disks:
2673         per_node_disks[pnode] = []
2674       for idx, disk in enumerate(instance.disks):
2675         per_node_disks[pnode].append((instance, idx, disk))
2676
2677     changed = []
2678     for node, dskl in per_node_disks.items():
2679       newl = [v[2].Copy() for v in dskl]
2680       for dsk in newl:
2681         self.cfg.SetDiskID(dsk, node)
2682       result = self.rpc.call_blockdev_getsize(node, newl)
2683       if result.fail_msg:
2684         self.LogWarning("Failure in blockdev_getsize call to node"
2685                         " %s, ignoring", node)
2686         continue
2687       if len(result.payload) != len(dskl):
2688         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2689                         " result.payload=%s", node, len(dskl), result.payload)
2690         self.LogWarning("Invalid result from node %s, ignoring node results",
2691                         node)
2692         continue
2693       for ((instance, idx, disk), size) in zip(dskl, result.payload):
2694         if size is None:
2695           self.LogWarning("Disk %d of instance %s did not return size"
2696                           " information, ignoring", idx, instance.name)
2697           continue
2698         if not isinstance(size, (int, long)):
2699           self.LogWarning("Disk %d of instance %s did not return valid"
2700                           " size information, ignoring", idx, instance.name)
2701           continue
2702         size = size >> 20
2703         if size != disk.size:
2704           self.LogInfo("Disk %d of instance %s has mismatched size,"
2705                        " correcting: recorded %d, actual %d", idx,
2706                        instance.name, disk.size, size)
2707           disk.size = size
2708           self.cfg.Update(instance, feedback_fn)
2709           changed.append((instance.name, idx, size))
2710         if self._EnsureChildSizes(disk):
2711           self.cfg.Update(instance, feedback_fn)
2712           changed.append((instance.name, idx, disk.size))
2713     return changed
2714
2715
2716 class LUClusterRename(LogicalUnit):
2717   """Rename the cluster.
2718
2719   """
2720   HPATH = "cluster-rename"
2721   HTYPE = constants.HTYPE_CLUSTER
2722
2723   def BuildHooksEnv(self):
2724     """Build hooks env.
2725
2726     """
2727     return {
2728       "OP_TARGET": self.cfg.GetClusterName(),
2729       "NEW_NAME": self.op.name,
2730       }
2731
2732   def BuildHooksNodes(self):
2733     """Build hooks nodes.
2734
2735     """
2736     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2737
2738   def CheckPrereq(self):
2739     """Verify that the passed name is a valid one.
2740
2741     """
2742     hostname = netutils.GetHostname(name=self.op.name,
2743                                     family=self.cfg.GetPrimaryIPFamily())
2744
2745     new_name = hostname.name
2746     self.ip = new_ip = hostname.ip
2747     old_name = self.cfg.GetClusterName()
2748     old_ip = self.cfg.GetMasterIP()
2749     if new_name == old_name and new_ip == old_ip:
2750       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2751                                  " cluster has changed",
2752                                  errors.ECODE_INVAL)
2753     if new_ip != old_ip:
2754       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2755         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2756                                    " reachable on the network" %
2757                                    new_ip, errors.ECODE_NOTUNIQUE)
2758
2759     self.op.name = new_name
2760
2761   def Exec(self, feedback_fn):
2762     """Rename the cluster.
2763
2764     """
2765     clustername = self.op.name
2766     ip = self.ip
2767
2768     # shutdown the master IP
2769     master = self.cfg.GetMasterNode()
2770     result = self.rpc.call_node_stop_master(master, False)
2771     result.Raise("Could not disable the master role")
2772
2773     try:
2774       cluster = self.cfg.GetClusterInfo()
2775       cluster.cluster_name = clustername
2776       cluster.master_ip = ip
2777       self.cfg.Update(cluster, feedback_fn)
2778
2779       # update the known hosts file
2780       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2781       node_list = self.cfg.GetOnlineNodeList()
2782       try:
2783         node_list.remove(master)
2784       except ValueError:
2785         pass
2786       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2787     finally:
2788       result = self.rpc.call_node_start_master(master, False, False)
2789       msg = result.fail_msg
2790       if msg:
2791         self.LogWarning("Could not re-enable the master role on"
2792                         " the master, please restart manually: %s", msg)
2793
2794     return clustername
2795
2796
2797 class LUClusterSetParams(LogicalUnit):
2798   """Change the parameters of the cluster.
2799
2800   """
2801   HPATH = "cluster-modify"
2802   HTYPE = constants.HTYPE_CLUSTER
2803   REQ_BGL = False
2804
2805   def CheckArguments(self):
2806     """Check parameters
2807
2808     """
2809     if self.op.uid_pool:
2810       uidpool.CheckUidPool(self.op.uid_pool)
2811
2812     if self.op.add_uids:
2813       uidpool.CheckUidPool(self.op.add_uids)
2814
2815     if self.op.remove_uids:
2816       uidpool.CheckUidPool(self.op.remove_uids)
2817
2818   def ExpandNames(self):
2819     # FIXME: in the future maybe other cluster params won't require checking on
2820     # all nodes to be modified.
2821     self.needed_locks = {
2822       locking.LEVEL_NODE: locking.ALL_SET,
2823     }
2824     self.share_locks[locking.LEVEL_NODE] = 1
2825
2826   def BuildHooksEnv(self):
2827     """Build hooks env.
2828
2829     """
2830     return {
2831       "OP_TARGET": self.cfg.GetClusterName(),
2832       "NEW_VG_NAME": self.op.vg_name,
2833       }
2834
2835   def BuildHooksNodes(self):
2836     """Build hooks nodes.
2837
2838     """
2839     mn = self.cfg.GetMasterNode()
2840     return ([mn], [mn])
2841
2842   def CheckPrereq(self):
2843     """Check prerequisites.
2844
2845     This checks whether the given params don't conflict and
2846     if the given volume group is valid.
2847
2848     """
2849     if self.op.vg_name is not None and not self.op.vg_name:
2850       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2851         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2852                                    " instances exist", errors.ECODE_INVAL)
2853
2854     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2855       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2856         raise errors.OpPrereqError("Cannot disable drbd helper while"
2857                                    " drbd-based instances exist",
2858                                    errors.ECODE_INVAL)
2859
2860     node_list = self.acquired_locks[locking.LEVEL_NODE]
2861
2862     # if vg_name not None, checks given volume group on all nodes
2863     if self.op.vg_name:
2864       vglist = self.rpc.call_vg_list(node_list)
2865       for node in node_list:
2866         msg = vglist[node].fail_msg
2867         if msg:
2868           # ignoring down node
2869           self.LogWarning("Error while gathering data on node %s"
2870                           " (ignoring node): %s", node, msg)
2871           continue
2872         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2873                                               self.op.vg_name,
2874                                               constants.MIN_VG_SIZE)
2875         if vgstatus:
2876           raise errors.OpPrereqError("Error on node '%s': %s" %
2877                                      (node, vgstatus), errors.ECODE_ENVIRON)
2878
2879     if self.op.drbd_helper:
2880       # checks given drbd helper on all nodes
2881       helpers = self.rpc.call_drbd_helper(node_list)
2882       for node in node_list:
2883         ninfo = self.cfg.GetNodeInfo(node)
2884         if ninfo.offline:
2885           self.LogInfo("Not checking drbd helper on offline node %s", node)
2886           continue
2887         msg = helpers[node].fail_msg
2888         if msg:
2889           raise errors.OpPrereqError("Error checking drbd helper on node"
2890                                      " '%s': %s" % (node, msg),
2891                                      errors.ECODE_ENVIRON)
2892         node_helper = helpers[node].payload
2893         if node_helper != self.op.drbd_helper:
2894           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2895                                      (node, node_helper), errors.ECODE_ENVIRON)
2896
2897     self.cluster = cluster = self.cfg.GetClusterInfo()
2898     # validate params changes
2899     if self.op.beparams:
2900       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2901       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2902
2903     if self.op.ndparams:
2904       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2905       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2906
2907       # TODO: we need a more general way to handle resetting
2908       # cluster-level parameters to default values
2909       if self.new_ndparams["oob_program"] == "":
2910         self.new_ndparams["oob_program"] = \
2911             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2912
2913     if self.op.nicparams:
2914       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2915       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2916       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2917       nic_errors = []
2918
2919       # check all instances for consistency
2920       for instance in self.cfg.GetAllInstancesInfo().values():
2921         for nic_idx, nic in enumerate(instance.nics):
2922           params_copy = copy.deepcopy(nic.nicparams)
2923           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2924
2925           # check parameter syntax
2926           try:
2927             objects.NIC.CheckParameterSyntax(params_filled)
2928           except errors.ConfigurationError, err:
2929             nic_errors.append("Instance %s, nic/%d: %s" %
2930                               (instance.name, nic_idx, err))
2931
2932           # if we're moving instances to routed, check that they have an ip
2933           target_mode = params_filled[constants.NIC_MODE]
2934           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2935             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2936                               (instance.name, nic_idx))
2937       if nic_errors:
2938         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2939                                    "\n".join(nic_errors))
2940
2941     # hypervisor list/parameters
2942     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2943     if self.op.hvparams:
2944       for hv_name, hv_dict in self.op.hvparams.items():
2945         if hv_name not in self.new_hvparams:
2946           self.new_hvparams[hv_name] = hv_dict
2947         else:
2948           self.new_hvparams[hv_name].update(hv_dict)
2949
2950     # os hypervisor parameters
2951     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2952     if self.op.os_hvp:
2953       for os_name, hvs in self.op.os_hvp.items():
2954         if os_name not in self.new_os_hvp:
2955           self.new_os_hvp[os_name] = hvs
2956         else:
2957           for hv_name, hv_dict in hvs.items():
2958             if hv_name not in self.new_os_hvp[os_name]:
2959               self.new_os_hvp[os_name][hv_name] = hv_dict
2960             else:
2961               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2962
2963     # os parameters
2964     self.new_osp = objects.FillDict(cluster.osparams, {})
2965     if self.op.osparams:
2966       for os_name, osp in self.op.osparams.items():
2967         if os_name not in self.new_osp:
2968           self.new_osp[os_name] = {}
2969
2970         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2971                                                   use_none=True)
2972
2973         if not self.new_osp[os_name]:
2974           # we removed all parameters
2975           del self.new_osp[os_name]
2976         else:
2977           # check the parameter validity (remote check)
2978           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2979                          os_name, self.new_osp[os_name])
2980
2981     # changes to the hypervisor list
2982     if self.op.enabled_hypervisors is not None:
2983       self.hv_list = self.op.enabled_hypervisors
2984       for hv in self.hv_list:
2985         # if the hypervisor doesn't already exist in the cluster
2986         # hvparams, we initialize it to empty, and then (in both
2987         # cases) we make sure to fill the defaults, as we might not
2988         # have a complete defaults list if the hypervisor wasn't
2989         # enabled before
2990         if hv not in new_hvp:
2991           new_hvp[hv] = {}
2992         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2993         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2994     else:
2995       self.hv_list = cluster.enabled_hypervisors
2996
2997     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2998       # either the enabled list has changed, or the parameters have, validate
2999       for hv_name, hv_params in self.new_hvparams.items():
3000         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3001             (self.op.enabled_hypervisors and
3002              hv_name in self.op.enabled_hypervisors)):
3003           # either this is a new hypervisor, or its parameters have changed
3004           hv_class = hypervisor.GetHypervisor(hv_name)
3005           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3006           hv_class.CheckParameterSyntax(hv_params)
3007           _CheckHVParams(self, node_list, hv_name, hv_params)
3008
3009     if self.op.os_hvp:
3010       # no need to check any newly-enabled hypervisors, since the
3011       # defaults have already been checked in the above code-block
3012       for os_name, os_hvp in self.new_os_hvp.items():
3013         for hv_name, hv_params in os_hvp.items():
3014           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3015           # we need to fill in the new os_hvp on top of the actual hv_p
3016           cluster_defaults = self.new_hvparams.get(hv_name, {})
3017           new_osp = objects.FillDict(cluster_defaults, hv_params)
3018           hv_class = hypervisor.GetHypervisor(hv_name)
3019           hv_class.CheckParameterSyntax(new_osp)
3020           _CheckHVParams(self, node_list, hv_name, new_osp)
3021
3022     if self.op.default_iallocator:
3023       alloc_script = utils.FindFile(self.op.default_iallocator,
3024                                     constants.IALLOCATOR_SEARCH_PATH,
3025                                     os.path.isfile)
3026       if alloc_script is None:
3027         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3028                                    " specified" % self.op.default_iallocator,
3029                                    errors.ECODE_INVAL)
3030
3031   def Exec(self, feedback_fn):
3032     """Change the parameters of the cluster.
3033
3034     """
3035     if self.op.vg_name is not None:
3036       new_volume = self.op.vg_name
3037       if not new_volume:
3038         new_volume = None
3039       if new_volume != self.cfg.GetVGName():
3040         self.cfg.SetVGName(new_volume)
3041       else:
3042         feedback_fn("Cluster LVM configuration already in desired"
3043                     " state, not changing")
3044     if self.op.drbd_helper is not None:
3045       new_helper = self.op.drbd_helper
3046       if not new_helper:
3047         new_helper = None
3048       if new_helper != self.cfg.GetDRBDHelper():
3049         self.cfg.SetDRBDHelper(new_helper)
3050       else:
3051         feedback_fn("Cluster DRBD helper already in desired state,"
3052                     " not changing")
3053     if self.op.hvparams:
3054       self.cluster.hvparams = self.new_hvparams
3055     if self.op.os_hvp:
3056       self.cluster.os_hvp = self.new_os_hvp
3057     if self.op.enabled_hypervisors is not None:
3058       self.cluster.hvparams = self.new_hvparams
3059       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3060     if self.op.beparams:
3061       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3062     if self.op.nicparams:
3063       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3064     if self.op.osparams:
3065       self.cluster.osparams = self.new_osp
3066     if self.op.ndparams:
3067       self.cluster.ndparams = self.new_ndparams
3068
3069     if self.op.candidate_pool_size is not None:
3070       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3071       # we need to update the pool size here, otherwise the save will fail
3072       _AdjustCandidatePool(self, [])
3073
3074     if self.op.maintain_node_health is not None:
3075       self.cluster.maintain_node_health = self.op.maintain_node_health
3076
3077     if self.op.prealloc_wipe_disks is not None:
3078       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3079
3080     if self.op.add_uids is not None:
3081       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3082
3083     if self.op.remove_uids is not None:
3084       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3085
3086     if self.op.uid_pool is not None:
3087       self.cluster.uid_pool = self.op.uid_pool
3088
3089     if self.op.default_iallocator is not None:
3090       self.cluster.default_iallocator = self.op.default_iallocator
3091
3092     if self.op.reserved_lvs is not None:
3093       self.cluster.reserved_lvs = self.op.reserved_lvs
3094
3095     def helper_os(aname, mods, desc):
3096       desc += " OS list"
3097       lst = getattr(self.cluster, aname)
3098       for key, val in mods:
3099         if key == constants.DDM_ADD:
3100           if val in lst:
3101             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3102           else:
3103             lst.append(val)
3104         elif key == constants.DDM_REMOVE:
3105           if val in lst:
3106             lst.remove(val)
3107           else:
3108             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3109         else:
3110           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3111
3112     if self.op.hidden_os:
3113       helper_os("hidden_os", self.op.hidden_os, "hidden")
3114
3115     if self.op.blacklisted_os:
3116       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3117
3118     if self.op.master_netdev:
3119       master = self.cfg.GetMasterNode()
3120       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3121                   self.cluster.master_netdev)
3122       result = self.rpc.call_node_stop_master(master, False)
3123       result.Raise("Could not disable the master ip")
3124       feedback_fn("Changing master_netdev from %s to %s" %
3125                   (self.cluster.master_netdev, self.op.master_netdev))
3126       self.cluster.master_netdev = self.op.master_netdev
3127
3128     self.cfg.Update(self.cluster, feedback_fn)
3129
3130     if self.op.master_netdev:
3131       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3132                   self.op.master_netdev)
3133       result = self.rpc.call_node_start_master(master, False, False)
3134       if result.fail_msg:
3135         self.LogWarning("Could not re-enable the master ip on"
3136                         " the master, please restart manually: %s",
3137                         result.fail_msg)
3138
3139
3140 def _UploadHelper(lu, nodes, fname):
3141   """Helper for uploading a file and showing warnings.
3142
3143   """
3144   if os.path.exists(fname):
3145     result = lu.rpc.call_upload_file(nodes, fname)
3146     for to_node, to_result in result.items():
3147       msg = to_result.fail_msg
3148       if msg:
3149         msg = ("Copy of file %s to node %s failed: %s" %
3150                (fname, to_node, msg))
3151         lu.proc.LogWarning(msg)
3152
3153
3154 def _ComputeAncillaryFiles(cluster, redist):
3155   """Compute files external to Ganeti which need to be consistent.
3156
3157   @type redist: boolean
3158   @param redist: Whether to include files which need to be redistributed
3159
3160   """
3161   # Compute files for all nodes
3162   files_all = set([
3163     constants.SSH_KNOWN_HOSTS_FILE,
3164     constants.CONFD_HMAC_KEY,
3165     constants.CLUSTER_DOMAIN_SECRET_FILE,
3166     ])
3167
3168   if not redist:
3169     files_all.update(constants.ALL_CERT_FILES)
3170     files_all.update(ssconf.SimpleStore().GetFileList())
3171
3172   if cluster.modify_etc_hosts:
3173     files_all.add(constants.ETC_HOSTS)
3174
3175   # Files which must either exist on all nodes or on none
3176   files_all_opt = set([
3177     constants.RAPI_USERS_FILE,
3178     ])
3179
3180   # Files which should only be on master candidates
3181   files_mc = set()
3182   if not redist:
3183     files_mc.add(constants.CLUSTER_CONF_FILE)
3184
3185   # Files which should only be on VM-capable nodes
3186   files_vm = set(filename
3187     for hv_name in cluster.enabled_hypervisors
3188     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3189
3190   # Filenames must be unique
3191   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3192           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3193          "Found file listed in more than one file list"
3194
3195   return (files_all, files_all_opt, files_mc, files_vm)
3196
3197
3198 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3199   """Distribute additional files which are part of the cluster configuration.
3200
3201   ConfigWriter takes care of distributing the config and ssconf files, but
3202   there are more files which should be distributed to all nodes. This function
3203   makes sure those are copied.
3204
3205   @param lu: calling logical unit
3206   @param additional_nodes: list of nodes not in the config to distribute to
3207   @type additional_vm: boolean
3208   @param additional_vm: whether the additional nodes are vm-capable or not
3209
3210   """
3211   # Gather target nodes
3212   cluster = lu.cfg.GetClusterInfo()
3213   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3214
3215   online_nodes = lu.cfg.GetOnlineNodeList()
3216   vm_nodes = lu.cfg.GetVmCapableNodeList()
3217
3218   if additional_nodes is not None:
3219     online_nodes.extend(additional_nodes)
3220     if additional_vm:
3221       vm_nodes.extend(additional_nodes)
3222
3223   # Never distribute to master node
3224   for nodelist in [online_nodes, vm_nodes]:
3225     if master_info.name in nodelist:
3226       nodelist.remove(master_info.name)
3227
3228   # Gather file lists
3229   (files_all, files_all_opt, files_mc, files_vm) = \
3230     _ComputeAncillaryFiles(cluster, True)
3231
3232   # Never re-distribute configuration file from here
3233   assert not (constants.CLUSTER_CONF_FILE in files_all or
3234               constants.CLUSTER_CONF_FILE in files_vm)
3235   assert not files_mc, "Master candidates not handled in this function"
3236
3237   filemap = [
3238     (online_nodes, files_all),
3239     (online_nodes, files_all_opt),
3240     (vm_nodes, files_vm),
3241     ]
3242
3243   # Upload the files
3244   for (node_list, files) in filemap:
3245     for fname in files:
3246       _UploadHelper(lu, node_list, fname)
3247
3248
3249 class LUClusterRedistConf(NoHooksLU):
3250   """Force the redistribution of cluster configuration.
3251
3252   This is a very simple LU.
3253
3254   """
3255   REQ_BGL = False
3256
3257   def ExpandNames(self):
3258     self.needed_locks = {
3259       locking.LEVEL_NODE: locking.ALL_SET,
3260     }
3261     self.share_locks[locking.LEVEL_NODE] = 1
3262
3263   def Exec(self, feedback_fn):
3264     """Redistribute the configuration.
3265
3266     """
3267     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3268     _RedistributeAncillaryFiles(self)
3269
3270
3271 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3272   """Sleep and poll for an instance's disk to sync.
3273
3274   """
3275   if not instance.disks or disks is not None and not disks:
3276     return True
3277
3278   disks = _ExpandCheckDisks(instance, disks)
3279
3280   if not oneshot:
3281     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3282
3283   node = instance.primary_node
3284
3285   for dev in disks:
3286     lu.cfg.SetDiskID(dev, node)
3287
3288   # TODO: Convert to utils.Retry
3289
3290   retries = 0
3291   degr_retries = 10 # in seconds, as we sleep 1 second each time
3292   while True:
3293     max_time = 0
3294     done = True
3295     cumul_degraded = False
3296     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3297     msg = rstats.fail_msg
3298     if msg:
3299       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3300       retries += 1
3301       if retries >= 10:
3302         raise errors.RemoteError("Can't contact node %s for mirror data,"
3303                                  " aborting." % node)
3304       time.sleep(6)
3305       continue
3306     rstats = rstats.payload
3307     retries = 0
3308     for i, mstat in enumerate(rstats):
3309       if mstat is None:
3310         lu.LogWarning("Can't compute data for node %s/%s",
3311                            node, disks[i].iv_name)
3312         continue
3313
3314       cumul_degraded = (cumul_degraded or
3315                         (mstat.is_degraded and mstat.sync_percent is None))
3316       if mstat.sync_percent is not None:
3317         done = False
3318         if mstat.estimated_time is not None:
3319           rem_time = ("%s remaining (estimated)" %
3320                       utils.FormatSeconds(mstat.estimated_time))
3321           max_time = mstat.estimated_time
3322         else:
3323           rem_time = "no time estimate"
3324         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3325                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3326
3327     # if we're done but degraded, let's do a few small retries, to
3328     # make sure we see a stable and not transient situation; therefore
3329     # we force restart of the loop
3330     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3331       logging.info("Degraded disks found, %d retries left", degr_retries)
3332       degr_retries -= 1
3333       time.sleep(1)
3334       continue
3335
3336     if done or oneshot:
3337       break
3338
3339     time.sleep(min(60, max_time))
3340
3341   if done:
3342     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3343   return not cumul_degraded
3344
3345
3346 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3347   """Check that mirrors are not degraded.
3348
3349   The ldisk parameter, if True, will change the test from the
3350   is_degraded attribute (which represents overall non-ok status for
3351   the device(s)) to the ldisk (representing the local storage status).
3352
3353   """
3354   lu.cfg.SetDiskID(dev, node)
3355
3356   result = True
3357
3358   if on_primary or dev.AssembleOnSecondary():
3359     rstats = lu.rpc.call_blockdev_find(node, dev)
3360     msg = rstats.fail_msg
3361     if msg:
3362       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3363       result = False
3364     elif not rstats.payload:
3365       lu.LogWarning("Can't find disk on node %s", node)
3366       result = False
3367     else:
3368       if ldisk:
3369         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3370       else:
3371         result = result and not rstats.payload.is_degraded
3372
3373   if dev.children:
3374     for child in dev.children:
3375       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3376
3377   return result
3378
3379
3380 class LUOobCommand(NoHooksLU):
3381   """Logical unit for OOB handling.
3382
3383   """
3384   REG_BGL = False
3385   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3386
3387   def CheckPrereq(self):
3388     """Check prerequisites.
3389
3390     This checks:
3391      - the node exists in the configuration
3392      - OOB is supported
3393
3394     Any errors are signaled by raising errors.OpPrereqError.
3395
3396     """
3397     self.nodes = []
3398     self.master_node = self.cfg.GetMasterNode()
3399
3400     assert self.op.power_delay >= 0.0
3401
3402     if self.op.node_names:
3403       if self.op.command in self._SKIP_MASTER:
3404         if self.master_node in self.op.node_names:
3405           master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3406           master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3407
3408           if master_oob_handler:
3409             additional_text = ("Run '%s %s %s' if you want to operate on the"
3410                                " master regardless") % (master_oob_handler,
3411                                                         self.op.command,
3412                                                         self.master_node)
3413           else:
3414             additional_text = "The master node does not support out-of-band"
3415
3416           raise errors.OpPrereqError(("Operating on the master node %s is not"
3417                                       " allowed for %s\n%s") %
3418                                      (self.master_node, self.op.command,
3419                                       additional_text), errors.ECODE_INVAL)
3420     else:
3421       self.op.node_names = self.cfg.GetNodeList()
3422       if self.op.command in self._SKIP_MASTER:
3423         self.op.node_names.remove(self.master_node)
3424
3425     if self.op.command in self._SKIP_MASTER:
3426       assert self.master_node not in self.op.node_names
3427
3428     for node_name in self.op.node_names:
3429       node = self.cfg.GetNodeInfo(node_name)
3430
3431       if node is None:
3432         raise errors.OpPrereqError("Node %s not found" % node_name,
3433                                    errors.ECODE_NOENT)
3434       else:
3435         self.nodes.append(node)
3436
3437       if (not self.op.ignore_status and
3438           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3439         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3440                                     " not marked offline") % node_name,
3441                                    errors.ECODE_STATE)
3442
3443   def ExpandNames(self):
3444     """Gather locks we need.
3445
3446     """
3447     if self.op.node_names:
3448       self.op.node_names = [_ExpandNodeName(self.cfg, name)
3449                             for name in self.op.node_names]
3450       lock_names = self.op.node_names
3451     else:
3452       lock_names = locking.ALL_SET
3453
3454     self.needed_locks = {
3455       locking.LEVEL_NODE: lock_names,
3456       }
3457
3458   def Exec(self, feedback_fn):
3459     """Execute OOB and return result if we expect any.
3460
3461     """
3462     master_node = self.master_node
3463     ret = []
3464
3465     for idx, node in enumerate(self.nodes):
3466       node_entry = [(constants.RS_NORMAL, node.name)]
3467       ret.append(node_entry)
3468
3469       oob_program = _SupportsOob(self.cfg, node)
3470
3471       if not oob_program:
3472         node_entry.append((constants.RS_UNAVAIL, None))
3473         continue
3474
3475       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3476                    self.op.command, oob_program, node.name)
3477       result = self.rpc.call_run_oob(master_node, oob_program,
3478                                      self.op.command, node.name,
3479                                      self.op.timeout)
3480
3481       if result.fail_msg:
3482         self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3483                         node.name, result.fail_msg)
3484         node_entry.append((constants.RS_NODATA, None))
3485       else:
3486         try:
3487           self._CheckPayload(result)
3488         except errors.OpExecError, err:
3489           self.LogWarning("The payload returned by '%s' is not valid: %s",
3490                           node.name, err)
3491           node_entry.append((constants.RS_NODATA, None))
3492         else:
3493           if self.op.command == constants.OOB_HEALTH:
3494             # For health we should log important events
3495             for item, status in result.payload:
3496               if status in [constants.OOB_STATUS_WARNING,
3497                             constants.OOB_STATUS_CRITICAL]:
3498                 self.LogWarning("On node '%s' item '%s' has status '%s'",
3499                                 node.name, item, status)
3500
3501           if self.op.command == constants.OOB_POWER_ON:
3502             node.powered = True
3503           elif self.op.command == constants.OOB_POWER_OFF:
3504             node.powered = False
3505           elif self.op.command == constants.OOB_POWER_STATUS:
3506             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3507             if powered != node.powered:
3508               logging.warning(("Recorded power state (%s) of node '%s' does not"
3509                                " match actual power state (%s)"), node.powered,
3510                               node.name, powered)
3511
3512           # For configuration changing commands we should update the node
3513           if self.op.command in (constants.OOB_POWER_ON,
3514                                  constants.OOB_POWER_OFF):
3515             self.cfg.Update(node, feedback_fn)
3516
3517           node_entry.append((constants.RS_NORMAL, result.payload))
3518
3519           if (self.op.command == constants.OOB_POWER_ON and
3520               idx < len(self.nodes) - 1):
3521             time.sleep(self.op.power_delay)
3522
3523     return ret
3524
3525   def _CheckPayload(self, result):
3526     """Checks if the payload is valid.
3527
3528     @param result: RPC result
3529     @raises errors.OpExecError: If payload is not valid
3530
3531     """
3532     errs = []
3533     if self.op.command == constants.OOB_HEALTH:
3534       if not isinstance(result.payload, list):
3535         errs.append("command 'health' is expected to return a list but got %s" %
3536                     type(result.payload))
3537       else:
3538         for item, status in result.payload:
3539           if status not in constants.OOB_STATUSES:
3540             errs.append("health item '%s' has invalid status '%s'" %
3541                         (item, status))
3542
3543     if self.op.command == constants.OOB_POWER_STATUS:
3544       if not isinstance(result.payload, dict):
3545         errs.append("power-status is expected to return a dict but got %s" %
3546                     type(result.payload))
3547
3548     if self.op.command in [
3549         constants.OOB_POWER_ON,
3550         constants.OOB_POWER_OFF,
3551         constants.OOB_POWER_CYCLE,
3552         ]:
3553       if result.payload is not None:
3554         errs.append("%s is expected to not return payload but got '%s'" %
3555                     (self.op.command, result.payload))
3556
3557     if errs:
3558       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3559                                utils.CommaJoin(errs))
3560
3561 class _OsQuery(_QueryBase):
3562   FIELDS = query.OS_FIELDS
3563
3564   def ExpandNames(self, lu):
3565     # Lock all nodes in shared mode
3566     # Temporary removal of locks, should be reverted later
3567     # TODO: reintroduce locks when they are lighter-weight
3568     lu.needed_locks = {}
3569     #self.share_locks[locking.LEVEL_NODE] = 1
3570     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3571
3572     # The following variables interact with _QueryBase._GetNames
3573     if self.names:
3574       self.wanted = self.names
3575     else:
3576       self.wanted = locking.ALL_SET
3577
3578     self.do_locking = self.use_locking
3579
3580   def DeclareLocks(self, lu, level):
3581     pass
3582
3583   @staticmethod
3584   def _DiagnoseByOS(rlist):
3585     """Remaps a per-node return list into an a per-os per-node dictionary
3586
3587     @param rlist: a map with node names as keys and OS objects as values
3588
3589     @rtype: dict
3590     @return: a dictionary with osnames as keys and as value another
3591         map, with nodes as keys and tuples of (path, status, diagnose,
3592         variants, parameters, api_versions) as values, eg::
3593
3594           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3595                                      (/srv/..., False, "invalid api")],
3596                            "node2": [(/srv/..., True, "", [], [])]}
3597           }
3598
3599     """
3600     all_os = {}
3601     # we build here the list of nodes that didn't fail the RPC (at RPC
3602     # level), so that nodes with a non-responding node daemon don't
3603     # make all OSes invalid
3604     good_nodes = [node_name for node_name in rlist
3605                   if not rlist[node_name].fail_msg]
3606     for node_name, nr in rlist.items():
3607       if nr.fail_msg or not nr.payload:
3608         continue
3609       for (name, path, status, diagnose, variants,
3610            params, api_versions) in nr.payload:
3611         if name not in all_os:
3612           # build a list of nodes for this os containing empty lists
3613           # for each node in node_list
3614           all_os[name] = {}
3615           for nname in good_nodes:
3616             all_os[name][nname] = []
3617         # convert params from [name, help] to (name, help)
3618         params = [tuple(v) for v in params]
3619         all_os[name][node_name].append((path, status, diagnose,
3620                                         variants, params, api_versions))
3621     return all_os
3622
3623   def _GetQueryData(self, lu):
3624     """Computes the list of nodes and their attributes.
3625
3626     """
3627     # Locking is not used
3628     assert not (lu.acquired_locks or self.do_locking or self.use_locking)
3629
3630     valid_nodes = [node.name
3631                    for node in lu.cfg.GetAllNodesInfo().values()
3632                    if not node.offline and node.vm_capable]
3633     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3634     cluster = lu.cfg.GetClusterInfo()
3635
3636     data = {}
3637
3638     for (os_name, os_data) in pol.items():
3639       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3640                           hidden=(os_name in cluster.hidden_os),
3641                           blacklisted=(os_name in cluster.blacklisted_os))
3642
3643       variants = set()
3644       parameters = set()
3645       api_versions = set()
3646
3647       for idx, osl in enumerate(os_data.values()):
3648         info.valid = bool(info.valid and osl and osl[0][1])
3649         if not info.valid:
3650           break
3651
3652         (node_variants, node_params, node_api) = osl[0][3:6]
3653         if idx == 0:
3654           # First entry
3655           variants.update(node_variants)
3656           parameters.update(node_params)
3657           api_versions.update(node_api)
3658         else:
3659           # Filter out inconsistent values
3660           variants.intersection_update(node_variants)
3661           parameters.intersection_update(node_params)
3662           api_versions.intersection_update(node_api)
3663
3664       info.variants = list(variants)
3665       info.parameters = list(parameters)
3666       info.api_versions = list(api_versions)
3667
3668       data[os_name] = info
3669
3670     # Prepare data in requested order
3671     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3672             if name in data]
3673
3674
3675 class LUOsDiagnose(NoHooksLU):
3676   """Logical unit for OS diagnose/query.
3677
3678   """
3679   REQ_BGL = False
3680
3681   @staticmethod
3682   def _BuildFilter(fields, names):
3683     """Builds a filter for querying OSes.
3684
3685     """
3686     name_filter = qlang.MakeSimpleFilter("name", names)
3687
3688     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3689     # respective field is not requested
3690     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3691                      for fname in ["hidden", "blacklisted"]
3692                      if fname not in fields]
3693     if "valid" not in fields:
3694       status_filter.append([qlang.OP_TRUE, "valid"])
3695
3696     if status_filter:
3697       status_filter.insert(0, qlang.OP_AND)
3698     else:
3699       status_filter = None
3700
3701     if name_filter and status_filter:
3702       return [qlang.OP_AND, name_filter, status_filter]
3703     elif name_filter:
3704       return name_filter
3705     else:
3706       return status_filter
3707
3708   def CheckArguments(self):
3709     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3710                        self.op.output_fields, False)
3711
3712   def ExpandNames(self):
3713     self.oq.ExpandNames(self)
3714
3715   def Exec(self, feedback_fn):
3716     return self.oq.OldStyleQuery(self)
3717
3718
3719 class LUNodeRemove(LogicalUnit):
3720   """Logical unit for removing a node.
3721
3722   """
3723   HPATH = "node-remove"
3724   HTYPE = constants.HTYPE_NODE
3725
3726   def BuildHooksEnv(self):
3727     """Build hooks env.
3728
3729     This doesn't run on the target node in the pre phase as a failed
3730     node would then be impossible to remove.
3731
3732     """
3733     return {
3734       "OP_TARGET": self.op.node_name,
3735       "NODE_NAME": self.op.node_name,
3736       }
3737
3738   def BuildHooksNodes(self):
3739     """Build hooks nodes.
3740
3741     """
3742     all_nodes = self.cfg.GetNodeList()
3743     try:
3744       all_nodes.remove(self.op.node_name)
3745     except ValueError:
3746       logging.warning("Node '%s', which is about to be removed, was not found"
3747                       " in the list of all nodes", self.op.node_name)
3748     return (all_nodes, all_nodes)
3749
3750   def CheckPrereq(self):
3751     """Check prerequisites.
3752
3753     This checks:
3754      - the node exists in the configuration
3755      - it does not have primary or secondary instances
3756      - it's not the master
3757
3758     Any errors are signaled by raising errors.OpPrereqError.
3759
3760     """
3761     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3762     node = self.cfg.GetNodeInfo(self.op.node_name)
3763     assert node is not None
3764
3765     instance_list = self.cfg.GetInstanceList()
3766
3767     masternode = self.cfg.GetMasterNode()
3768     if node.name == masternode:
3769       raise errors.OpPrereqError("Node is the master node,"
3770                                  " you need to failover first.",
3771                                  errors.ECODE_INVAL)
3772
3773     for instance_name in instance_list:
3774       instance = self.cfg.GetInstanceInfo(instance_name)
3775       if node.name in instance.all_nodes:
3776         raise errors.OpPrereqError("Instance %s is still running on the node,"
3777                                    " please remove first." % instance_name,
3778                                    errors.ECODE_INVAL)
3779     self.op.node_name = node.name
3780     self.node = node
3781
3782   def Exec(self, feedback_fn):
3783     """Removes the node from the cluster.
3784
3785     """
3786     node = self.node
3787     logging.info("Stopping the node daemon and removing configs from node %s",
3788                  node.name)
3789
3790     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3791
3792     # Promote nodes to master candidate as needed
3793     _AdjustCandidatePool(self, exceptions=[node.name])
3794     self.context.RemoveNode(node.name)
3795
3796     # Run post hooks on the node before it's removed
3797     _RunPostHook(self, node.name)
3798
3799     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3800     msg = result.fail_msg
3801     if msg:
3802       self.LogWarning("Errors encountered on the remote node while leaving"
3803                       " the cluster: %s", msg)
3804
3805     # Remove node from our /etc/hosts
3806     if self.cfg.GetClusterInfo().modify_etc_hosts:
3807       master_node = self.cfg.GetMasterNode()
3808       result = self.rpc.call_etc_hosts_modify(master_node,
3809                                               constants.ETC_HOSTS_REMOVE,
3810                                               node.name, None)
3811       result.Raise("Can't update hosts file with new host data")
3812       _RedistributeAncillaryFiles(self)
3813
3814
3815 class _NodeQuery(_QueryBase):
3816   FIELDS = query.NODE_FIELDS
3817
3818   def ExpandNames(self, lu):
3819     lu.needed_locks = {}
3820     lu.share_locks[locking.LEVEL_NODE] = 1
3821
3822     if self.names:
3823       self.wanted = _GetWantedNodes(lu, self.names)
3824     else:
3825       self.wanted = locking.ALL_SET
3826
3827     self.do_locking = (self.use_locking and
3828                        query.NQ_LIVE in self.requested_data)
3829
3830     if self.do_locking:
3831       # if we don't request only static fields, we need to lock the nodes
3832       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3833
3834   def DeclareLocks(self, lu, level):
3835     pass
3836
3837   def _GetQueryData(self, lu):
3838     """Computes the list of nodes and their attributes.
3839
3840     """
3841     all_info = lu.cfg.GetAllNodesInfo()
3842
3843     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3844
3845     # Gather data as requested
3846     if query.NQ_LIVE in self.requested_data:
3847       # filter out non-vm_capable nodes
3848       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3849
3850       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3851                                         lu.cfg.GetHypervisorType())
3852       live_data = dict((name, nresult.payload)
3853                        for (name, nresult) in node_data.items()
3854                        if not nresult.fail_msg and nresult.payload)
3855     else:
3856       live_data = None
3857
3858     if query.NQ_INST in self.requested_data:
3859       node_to_primary = dict([(name, set()) for name in nodenames])
3860       node_to_secondary = dict([(name, set()) for name in nodenames])
3861
3862       inst_data = lu.cfg.GetAllInstancesInfo()
3863
3864       for inst in inst_data.values():
3865         if inst.primary_node in node_to_primary:
3866           node_to_primary[inst.primary_node].add(inst.name)
3867         for secnode in inst.secondary_nodes:
3868           if secnode in node_to_secondary:
3869             node_to_secondary[secnode].add(inst.name)
3870     else:
3871       node_to_primary = None
3872       node_to_secondary = None
3873
3874     if query.NQ_OOB in self.requested_data:
3875       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3876                          for name, node in all_info.iteritems())
3877     else:
3878       oob_support = None
3879
3880     if query.NQ_GROUP in self.requested_data:
3881       groups = lu.cfg.GetAllNodeGroupsInfo()
3882     else:
3883       groups = {}
3884
3885     return query.NodeQueryData([all_info[name] for name in nodenames],
3886                                live_data, lu.cfg.GetMasterNode(),
3887                                node_to_primary, node_to_secondary, groups,
3888                                oob_support, lu.cfg.GetClusterInfo())
3889
3890
3891 class LUNodeQuery(NoHooksLU):
3892   """Logical unit for querying nodes.
3893
3894   """
3895   # pylint: disable-msg=W0142
3896   REQ_BGL = False
3897
3898   def CheckArguments(self):
3899     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3900                          self.op.output_fields, self.op.use_locking)
3901
3902   def ExpandNames(self):
3903     self.nq.ExpandNames(self)
3904
3905   def Exec(self, feedback_fn):
3906     return self.nq.OldStyleQuery(self)
3907
3908
3909 class LUNodeQueryvols(NoHooksLU):
3910   """Logical unit for getting volumes on node(s).
3911
3912   """
3913   REQ_BGL = False
3914   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3915   _FIELDS_STATIC = utils.FieldSet("node")
3916
3917   def CheckArguments(self):
3918     _CheckOutputFields(static=self._FIELDS_STATIC,
3919                        dynamic=self._FIELDS_DYNAMIC,
3920                        selected=self.op.output_fields)
3921
3922   def ExpandNames(self):
3923     self.needed_locks = {}
3924     self.share_locks[locking.LEVEL_NODE] = 1
3925     if not self.op.nodes:
3926       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3927     else:
3928       self.needed_locks[locking.LEVEL_NODE] = \
3929         _GetWantedNodes(self, self.op.nodes)
3930
3931   def Exec(self, feedback_fn):
3932     """Computes the list of nodes and their attributes.
3933
3934     """
3935     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3936     volumes = self.rpc.call_node_volumes(nodenames)
3937
3938     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3939              in self.cfg.GetInstanceList()]
3940
3941     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3942
3943     output = []
3944     for node in nodenames:
3945       nresult = volumes[node]
3946       if nresult.offline:
3947         continue
3948       msg = nresult.fail_msg
3949       if msg:
3950         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3951         continue
3952
3953       node_vols = nresult.payload[:]
3954       node_vols.sort(key=lambda vol: vol['dev'])
3955
3956       for vol in node_vols:
3957         node_output = []
3958         for field in self.op.output_fields:
3959           if field == "node":
3960             val = node
3961           elif field == "phys":
3962             val = vol['dev']
3963           elif field == "vg":
3964             val = vol['vg']
3965           elif field == "name":
3966             val = vol['name']
3967           elif field == "size":
3968             val = int(float(vol['size']))
3969           elif field == "instance":
3970             for inst in ilist:
3971               if node not in lv_by_node[inst]:
3972                 continue
3973               if vol['name'] in lv_by_node[inst][node]:
3974                 val = inst.name
3975                 break
3976             else:
3977               val = '-'
3978           else:
3979             raise errors.ParameterError(field)
3980           node_output.append(str(val))
3981
3982         output.append(node_output)
3983
3984     return output
3985
3986
3987 class LUNodeQueryStorage(NoHooksLU):
3988   """Logical unit for getting information on storage units on node(s).
3989
3990   """
3991   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3992   REQ_BGL = False
3993
3994   def CheckArguments(self):
3995     _CheckOutputFields(static=self._FIELDS_STATIC,
3996                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3997                        selected=self.op.output_fields)
3998
3999   def ExpandNames(self):
4000     self.needed_locks = {}
4001     self.share_locks[locking.LEVEL_NODE] = 1
4002
4003     if self.op.nodes:
4004       self.needed_locks[locking.LEVEL_NODE] = \
4005         _GetWantedNodes(self, self.op.nodes)
4006     else:
4007       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4008
4009   def Exec(self, feedback_fn):
4010     """Computes the list of nodes and their attributes.
4011
4012     """
4013     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
4014
4015     # Always get name to sort by
4016     if constants.SF_NAME in self.op.output_fields:
4017       fields = self.op.output_fields[:]
4018     else:
4019       fields = [constants.SF_NAME] + self.op.output_fields
4020
4021     # Never ask for node or type as it's only known to the LU
4022     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4023       while extra in fields:
4024         fields.remove(extra)
4025
4026     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4027     name_idx = field_idx[constants.SF_NAME]
4028
4029     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4030     data = self.rpc.call_storage_list(self.nodes,
4031                                       self.op.storage_type, st_args,
4032                                       self.op.name, fields)
4033
4034     result = []
4035
4036     for node in utils.NiceSort(self.nodes):
4037       nresult = data[node]
4038       if nresult.offline:
4039         continue
4040
4041       msg = nresult.fail_msg
4042       if msg:
4043         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4044         continue
4045
4046       rows = dict([(row[name_idx], row) for row in nresult.payload])
4047
4048       for name in utils.NiceSort(rows.keys()):
4049         row = rows[name]
4050
4051         out = []
4052
4053         for field in self.op.output_fields:
4054           if field == constants.SF_NODE:
4055             val = node
4056           elif field == constants.SF_TYPE:
4057             val = self.op.storage_type
4058           elif field in field_idx:
4059             val = row[field_idx[field]]
4060           else:
4061             raise errors.ParameterError(field)
4062
4063           out.append(val)
4064
4065         result.append(out)
4066
4067     return result
4068
4069
4070 class _InstanceQuery(_QueryBase):
4071   FIELDS = query.INSTANCE_FIELDS
4072
4073   def ExpandNames(self, lu):
4074     lu.needed_locks = {}
4075     lu.share_locks[locking.LEVEL_INSTANCE] = 1
4076     lu.share_locks[locking.LEVEL_NODE] = 1
4077
4078     if self.names:
4079       self.wanted = _GetWantedInstances(lu, self.names)
4080     else:
4081       self.wanted = locking.ALL_SET
4082
4083     self.do_locking = (self.use_locking and
4084                        query.IQ_LIVE in self.requested_data)
4085     if self.do_locking:
4086       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4087       lu.needed_locks[locking.LEVEL_NODE] = []
4088       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4089
4090   def DeclareLocks(self, lu, level):
4091     if level == locking.LEVEL_NODE and self.do_locking:
4092       lu._LockInstancesNodes() # pylint: disable-msg=W0212
4093
4094   def _GetQueryData(self, lu):
4095     """Computes the list of instances and their attributes.
4096
4097     """
4098     cluster = lu.cfg.GetClusterInfo()
4099     all_info = lu.cfg.GetAllInstancesInfo()
4100
4101     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4102
4103     instance_list = [all_info[name] for name in instance_names]
4104     nodes = frozenset(itertools.chain(*(inst.all_nodes
4105                                         for inst in instance_list)))
4106     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4107     bad_nodes = []
4108     offline_nodes = []
4109     wrongnode_inst = set()
4110
4111     # Gather data as requested
4112     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4113       live_data = {}
4114       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4115       for name in nodes:
4116         result = node_data[name]
4117         if result.offline:
4118           # offline nodes will be in both lists
4119           assert result.fail_msg
4120           offline_nodes.append(name)
4121         if result.fail_msg:
4122           bad_nodes.append(name)
4123         elif result.payload:
4124           for inst in result.payload:
4125             if inst in all_info:
4126               if all_info[inst].primary_node == name:
4127                 live_data.update(result.payload)
4128               else:
4129                 wrongnode_inst.add(inst)
4130             else:
4131               # orphan instance; we don't list it here as we don't
4132               # handle this case yet in the output of instance listing
4133               logging.warning("Orphan instance '%s' found on node %s",
4134                               inst, name)
4135         # else no instance is alive
4136     else:
4137       live_data = {}
4138
4139     if query.IQ_DISKUSAGE in self.requested_data:
4140       disk_usage = dict((inst.name,
4141                          _ComputeDiskSize(inst.disk_template,
4142                                           [{constants.IDISK_SIZE: disk.size}
4143                                            for disk in inst.disks]))
4144                         for inst in instance_list)
4145     else:
4146       disk_usage = None
4147
4148     if query.IQ_CONSOLE in self.requested_data:
4149       consinfo = {}
4150       for inst in instance_list:
4151         if inst.name in live_data:
4152           # Instance is running
4153           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4154         else:
4155           consinfo[inst.name] = None
4156       assert set(consinfo.keys()) == set(instance_names)
4157     else:
4158       consinfo = None
4159
4160     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4161                                    disk_usage, offline_nodes, bad_nodes,
4162                                    live_data, wrongnode_inst, consinfo)
4163
4164
4165 class LUQuery(NoHooksLU):
4166   """Query for resources/items of a certain kind.
4167
4168   """
4169   # pylint: disable-msg=W0142
4170   REQ_BGL = False
4171
4172   def CheckArguments(self):
4173     qcls = _GetQueryImplementation(self.op.what)
4174
4175     self.impl = qcls(self.op.filter, self.op.fields, False)
4176
4177   def ExpandNames(self):
4178     self.impl.ExpandNames(self)
4179
4180   def DeclareLocks(self, level):
4181     self.impl.DeclareLocks(self, level)
4182
4183   def Exec(self, feedback_fn):
4184     return self.impl.NewStyleQuery(self)
4185
4186
4187 class LUQueryFields(NoHooksLU):
4188   """Query for resources/items of a certain kind.
4189
4190   """
4191   # pylint: disable-msg=W0142
4192   REQ_BGL = False
4193
4194   def CheckArguments(self):
4195     self.qcls = _GetQueryImplementation(self.op.what)
4196
4197   def ExpandNames(self):
4198     self.needed_locks = {}
4199
4200   def Exec(self, feedback_fn):
4201     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4202
4203
4204 class LUNodeModifyStorage(NoHooksLU):
4205   """Logical unit for modifying a storage volume on a node.
4206
4207   """
4208   REQ_BGL = False
4209
4210   def CheckArguments(self):
4211     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4212
4213     storage_type = self.op.storage_type
4214
4215     try:
4216       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4217     except KeyError:
4218       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4219                                  " modified" % storage_type,
4220                                  errors.ECODE_INVAL)
4221
4222     diff = set(self.op.changes.keys()) - modifiable
4223     if diff:
4224       raise errors.OpPrereqError("The following fields can not be modified for"
4225                                  " storage units of type '%s': %r" %
4226                                  (storage_type, list(diff)),
4227                                  errors.ECODE_INVAL)
4228
4229   def ExpandNames(self):
4230     self.needed_locks = {
4231       locking.LEVEL_NODE: self.op.node_name,
4232       }
4233
4234   def Exec(self, feedback_fn):
4235     """Computes the list of nodes and their attributes.
4236
4237     """
4238     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4239     result = self.rpc.call_storage_modify(self.op.node_name,
4240                                           self.op.storage_type, st_args,
4241                                           self.op.name, self.op.changes)
4242     result.Raise("Failed to modify storage unit '%s' on %s" %
4243                  (self.op.name, self.op.node_name))
4244
4245
4246 class LUNodeAdd(LogicalUnit):
4247   """Logical unit for adding node to the cluster.
4248
4249   """
4250   HPATH = "node-add"
4251   HTYPE = constants.HTYPE_NODE
4252   _NFLAGS = ["master_capable", "vm_capable"]
4253
4254   def CheckArguments(self):
4255     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4256     # validate/normalize the node name
4257     self.hostname = netutils.GetHostname(name=self.op.node_name,
4258                                          family=self.primary_ip_family)
4259     self.op.node_name = self.hostname.name
4260
4261     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4262       raise errors.OpPrereqError("Cannot readd the master node",
4263                                  errors.ECODE_STATE)
4264
4265     if self.op.readd and self.op.group:
4266       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4267                                  " being readded", errors.ECODE_INVAL)
4268
4269   def BuildHooksEnv(self):
4270     """Build hooks env.
4271
4272     This will run on all nodes before, and on all nodes + the new node after.
4273
4274     """
4275     return {
4276       "OP_TARGET": self.op.node_name,
4277       "NODE_NAME": self.op.node_name,
4278       "NODE_PIP": self.op.primary_ip,
4279       "NODE_SIP": self.op.secondary_ip,
4280       "MASTER_CAPABLE": str(self.op.master_capable),
4281       "VM_CAPABLE": str(self.op.vm_capable),
4282       }
4283
4284   def BuildHooksNodes(self):
4285     """Build hooks nodes.
4286
4287     """
4288     # Exclude added node
4289     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4290     post_nodes = pre_nodes + [self.op.node_name, ]
4291
4292     return (pre_nodes, post_nodes)
4293
4294   def CheckPrereq(self):
4295     """Check prerequisites.
4296
4297     This checks:
4298      - the new node is not already in the config
4299      - it is resolvable
4300      - its parameters (single/dual homed) matches the cluster
4301
4302     Any errors are signaled by raising errors.OpPrereqError.
4303
4304     """
4305     cfg = self.cfg
4306     hostname = self.hostname
4307     node = hostname.name
4308     primary_ip = self.op.primary_ip = hostname.ip
4309     if self.op.secondary_ip is None:
4310       if self.primary_ip_family == netutils.IP6Address.family:
4311         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4312                                    " IPv4 address must be given as secondary",
4313                                    errors.ECODE_INVAL)
4314       self.op.secondary_ip = primary_ip
4315
4316     secondary_ip = self.op.secondary_ip
4317     if not netutils.IP4Address.IsValid(secondary_ip):
4318       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4319                                  " address" % secondary_ip, errors.ECODE_INVAL)
4320
4321     node_list = cfg.GetNodeList()
4322     if not self.op.readd and node in node_list:
4323       raise errors.OpPrereqError("Node %s is already in the configuration" %
4324                                  node, errors.ECODE_EXISTS)
4325     elif self.op.readd and node not in node_list:
4326       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4327                                  errors.ECODE_NOENT)
4328
4329     self.changed_primary_ip = False
4330
4331     for existing_node_name in node_list:
4332       existing_node = cfg.GetNodeInfo(existing_node_name)
4333
4334       if self.op.readd and node == existing_node_name:
4335         if existing_node.secondary_ip != secondary_ip:
4336           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4337                                      " address configuration as before",
4338                                      errors.ECODE_INVAL)
4339         if existing_node.primary_ip != primary_ip:
4340           self.changed_primary_ip = True
4341
4342         continue
4343
4344       if (existing_node.primary_ip == primary_ip or
4345           existing_node.secondary_ip == primary_ip or
4346           existing_node.primary_ip == secondary_ip or
4347           existing_node.secondary_ip == secondary_ip):
4348         raise errors.OpPrereqError("New node ip address(es) conflict with"
4349                                    " existing node %s" % existing_node.name,
4350                                    errors.ECODE_NOTUNIQUE)
4351
4352     # After this 'if' block, None is no longer a valid value for the
4353     # _capable op attributes
4354     if self.op.readd:
4355       old_node = self.cfg.GetNodeInfo(node)
4356       assert old_node is not None, "Can't retrieve locked node %s" % node
4357       for attr in self._NFLAGS:
4358         if getattr(self.op, attr) is None:
4359           setattr(self.op, attr, getattr(old_node, attr))
4360     else:
4361       for attr in self._NFLAGS:
4362         if getattr(self.op, attr) is None:
4363           setattr(self.op, attr, True)
4364
4365     if self.op.readd and not self.op.vm_capable:
4366       pri, sec = cfg.GetNodeInstances(node)
4367       if pri or sec:
4368         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4369                                    " flag set to false, but it already holds"
4370                                    " instances" % node,
4371                                    errors.ECODE_STATE)
4372
4373     # check that the type of the node (single versus dual homed) is the
4374     # same as for the master
4375     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4376     master_singlehomed = myself.secondary_ip == myself.primary_ip
4377     newbie_singlehomed = secondary_ip == primary_ip
4378     if master_singlehomed != newbie_singlehomed:
4379       if master_singlehomed:
4380         raise errors.OpPrereqError("The master has no secondary ip but the"
4381                                    " new node has one",
4382                                    errors.ECODE_INVAL)
4383       else:
4384         raise errors.OpPrereqError("The master has a secondary ip but the"
4385                                    " new node doesn't have one",
4386                                    errors.ECODE_INVAL)
4387
4388     # checks reachability
4389     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4390       raise errors.OpPrereqError("Node not reachable by ping",
4391                                  errors.ECODE_ENVIRON)
4392
4393     if not newbie_singlehomed:
4394       # check reachability from my secondary ip to newbie's secondary ip
4395       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4396                            source=myself.secondary_ip):
4397         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4398                                    " based ping to node daemon port",
4399                                    errors.ECODE_ENVIRON)
4400
4401     if self.op.readd:
4402       exceptions = [node]
4403     else:
4404       exceptions = []
4405
4406     if self.op.master_capable:
4407       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4408     else:
4409       self.master_candidate = False
4410
4411     if self.op.readd:
4412       self.new_node = old_node
4413     else:
4414       node_group = cfg.LookupNodeGroup(self.op.group)
4415       self.new_node = objects.Node(name=node,
4416                                    primary_ip=primary_ip,
4417                                    secondary_ip=secondary_ip,
4418                                    master_candidate=self.master_candidate,
4419                                    offline=False, drained=False,
4420                                    group=node_group)
4421
4422     if self.op.ndparams:
4423       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4424
4425   def Exec(self, feedback_fn):
4426     """Adds the new node to the cluster.
4427
4428     """
4429     new_node = self.new_node
4430     node = new_node.name
4431
4432     # We adding a new node so we assume it's powered
4433     new_node.powered = True
4434
4435     # for re-adds, reset the offline/drained/master-candidate flags;
4436     # we need to reset here, otherwise offline would prevent RPC calls
4437     # later in the procedure; this also means that if the re-add
4438     # fails, we are left with a non-offlined, broken node
4439     if self.op.readd:
4440       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4441       self.LogInfo("Readding a node, the offline/drained flags were reset")
4442       # if we demote the node, we do cleanup later in the procedure
4443       new_node.master_candidate = self.master_candidate
4444       if self.changed_primary_ip:
4445         new_node.primary_ip = self.op.primary_ip
4446
4447     # copy the master/vm_capable flags
4448     for attr in self._NFLAGS:
4449       setattr(new_node, attr, getattr(self.op, attr))
4450
4451     # notify the user about any possible mc promotion
4452     if new_node.master_candidate:
4453       self.LogInfo("Node will be a master candidate")
4454
4455     if self.op.ndparams:
4456       new_node.ndparams = self.op.ndparams
4457     else:
4458       new_node.ndparams = {}
4459
4460     # check connectivity
4461     result = self.rpc.call_version([node])[node]
4462     result.Raise("Can't get version information from node %s" % node)
4463     if constants.PROTOCOL_VERSION == result.payload:
4464       logging.info("Communication to node %s fine, sw version %s match",
4465                    node, result.payload)
4466     else:
4467       raise errors.OpExecError("Version mismatch master version %s,"
4468                                " node version %s" %
4469                                (constants.PROTOCOL_VERSION, result.payload))
4470
4471     # Add node to our /etc/hosts, and add key to known_hosts
4472     if self.cfg.GetClusterInfo().modify_etc_hosts:
4473       master_node = self.cfg.GetMasterNode()
4474       result = self.rpc.call_etc_hosts_modify(master_node,
4475                                               constants.ETC_HOSTS_ADD,
4476                                               self.hostname.name,
4477                                               self.hostname.ip)
4478       result.Raise("Can't update hosts file with new host data")
4479
4480     if new_node.secondary_ip != new_node.primary_ip:
4481       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4482                                False)
4483
4484     node_verify_list = [self.cfg.GetMasterNode()]
4485     node_verify_param = {
4486       constants.NV_NODELIST: [node],
4487       # TODO: do a node-net-test as well?
4488     }
4489
4490     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4491                                        self.cfg.GetClusterName())
4492     for verifier in node_verify_list:
4493       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4494       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4495       if nl_payload:
4496         for failed in nl_payload:
4497           feedback_fn("ssh/hostname verification failed"
4498                       " (checking from %s): %s" %
4499                       (verifier, nl_payload[failed]))
4500         raise errors.OpExecError("ssh/hostname verification failed")
4501
4502     if self.op.readd:
4503       _RedistributeAncillaryFiles(self)
4504       self.context.ReaddNode(new_node)
4505       # make sure we redistribute the config
4506       self.cfg.Update(new_node, feedback_fn)
4507       # and make sure the new node will not have old files around
4508       if not new_node.master_candidate:
4509         result = self.rpc.call_node_demote_from_mc(new_node.name)
4510         msg = result.fail_msg
4511         if msg:
4512           self.LogWarning("Node failed to demote itself from master"
4513                           " candidate status: %s" % msg)
4514     else:
4515       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4516                                   additional_vm=self.op.vm_capable)
4517       self.context.AddNode(new_node, self.proc.GetECId())
4518
4519
4520 class LUNodeSetParams(LogicalUnit):
4521   """Modifies the parameters of a node.
4522
4523   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4524       to the node role (as _ROLE_*)
4525   @cvar _R2F: a dictionary from node role to tuples of flags
4526   @cvar _FLAGS: a list of attribute names corresponding to the flags
4527
4528   """
4529   HPATH = "node-modify"
4530   HTYPE = constants.HTYPE_NODE
4531   REQ_BGL = False
4532   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4533   _F2R = {
4534     (True, False, False): _ROLE_CANDIDATE,
4535     (False, True, False): _ROLE_DRAINED,
4536     (False, False, True): _ROLE_OFFLINE,
4537     (False, False, False): _ROLE_REGULAR,
4538     }
4539   _R2F = dict((v, k) for k, v in _F2R.items())
4540   _FLAGS = ["master_candidate", "drained", "offline"]
4541
4542   def CheckArguments(self):
4543     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4544     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4545                 self.op.master_capable, self.op.vm_capable,
4546                 self.op.secondary_ip, self.op.ndparams]
4547     if all_mods.count(None) == len(all_mods):
4548       raise errors.OpPrereqError("Please pass at least one modification",
4549                                  errors.ECODE_INVAL)
4550     if all_mods.count(True) > 1:
4551       raise errors.OpPrereqError("Can't set the node into more than one"
4552                                  " state at the same time",
4553                                  errors.ECODE_INVAL)
4554
4555     # Boolean value that tells us whether we might be demoting from MC
4556     self.might_demote = (self.op.master_candidate == False or
4557                          self.op.offline == True or
4558                          self.op.drained == True or
4559                          self.op.master_capable == False)
4560
4561     if self.op.secondary_ip:
4562       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4563         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4564                                    " address" % self.op.secondary_ip,
4565                                    errors.ECODE_INVAL)
4566
4567     self.lock_all = self.op.auto_promote and self.might_demote
4568     self.lock_instances = self.op.secondary_ip is not None
4569
4570   def ExpandNames(self):
4571     if self.lock_all:
4572       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4573     else:
4574       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4575
4576     if self.lock_instances:
4577       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4578
4579   def DeclareLocks(self, level):
4580     # If we have locked all instances, before waiting to lock nodes, release
4581     # all the ones living on nodes unrelated to the current operation.
4582     if level == locking.LEVEL_NODE and self.lock_instances:
4583       instances_release = []
4584       instances_keep = []
4585       self.affected_instances = []
4586       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4587         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4588           instance = self.context.cfg.GetInstanceInfo(instance_name)
4589           i_mirrored = instance.disk_template in constants.DTS_INT_MIRROR
4590           if i_mirrored and self.op.node_name in instance.all_nodes:
4591             instances_keep.append(instance_name)
4592             self.affected_instances.append(instance)
4593           else:
4594             instances_release.append(instance_name)
4595         if instances_release:
4596           self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4597           self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4598
4599   def BuildHooksEnv(self):
4600     """Build hooks env.
4601
4602     This runs on the master node.
4603
4604     """
4605     return {
4606       "OP_TARGET": self.op.node_name,
4607       "MASTER_CANDIDATE": str(self.op.master_candidate),
4608       "OFFLINE": str(self.op.offline),
4609       "DRAINED": str(self.op.drained),
4610       "MASTER_CAPABLE": str(self.op.master_capable),
4611       "VM_CAPABLE": str(self.op.vm_capable),
4612       }
4613
4614   def BuildHooksNodes(self):
4615     """Build hooks nodes.
4616
4617     """
4618     nl = [self.cfg.GetMasterNode(), self.op.node_name]
4619     return (nl, nl)
4620
4621   def CheckPrereq(self):
4622     """Check prerequisites.
4623
4624     This only checks the instance list against the existing names.
4625
4626     """
4627     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4628
4629     if (self.op.master_candidate is not None or
4630         self.op.drained is not None or
4631         self.op.offline is not None):
4632       # we can't change the master's node flags
4633       if self.op.node_name == self.cfg.GetMasterNode():
4634         raise errors.OpPrereqError("The master role can be changed"
4635                                    " only via master-failover",
4636                                    errors.ECODE_INVAL)
4637
4638     if self.op.master_candidate and not node.master_capable:
4639       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4640                                  " it a master candidate" % node.name,
4641                                  errors.ECODE_STATE)
4642
4643     if self.op.vm_capable == False:
4644       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4645       if ipri or isec:
4646         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4647                                    " the vm_capable flag" % node.name,
4648                                    errors.ECODE_STATE)
4649
4650     if node.master_candidate and self.might_demote and not self.lock_all:
4651       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4652       # check if after removing the current node, we're missing master
4653       # candidates
4654       (mc_remaining, mc_should, _) = \
4655           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4656       if mc_remaining < mc_should:
4657         raise errors.OpPrereqError("Not enough master candidates, please"
4658                                    " pass auto promote option to allow"
4659                                    " promotion", errors.ECODE_STATE)
4660
4661     self.old_flags = old_flags = (node.master_candidate,
4662                                   node.drained, node.offline)
4663     assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4664     self.old_role = old_role = self._F2R[old_flags]
4665
4666     # Check for ineffective changes
4667     for attr in self._FLAGS:
4668       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4669         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4670         setattr(self.op, attr, None)
4671
4672     # Past this point, any flag change to False means a transition
4673     # away from the respective state, as only real changes are kept
4674
4675     # TODO: We might query the real power state if it supports OOB
4676     if _SupportsOob(self.cfg, node):
4677       if self.op.offline is False and not (node.powered or
4678                                            self.op.powered == True):
4679         raise errors.OpPrereqError(("Please power on node %s first before you"
4680                                     " can reset offline state") %
4681                                    self.op.node_name)
4682     elif self.op.powered is not None:
4683       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4684                                   " which does not support out-of-band"
4685                                   " handling") % self.op.node_name)
4686
4687     # If we're being deofflined/drained, we'll MC ourself if needed
4688     if (self.op.drained == False or self.op.offline == False or
4689         (self.op.master_capable and not node.master_capable)):
4690       if _DecideSelfPromotion(self):
4691         self.op.master_candidate = True
4692         self.LogInfo("Auto-promoting node to master candidate")
4693
4694     # If we're no longer master capable, we'll demote ourselves from MC
4695     if self.op.master_capable == False and node.master_candidate:
4696       self.LogInfo("Demoting from master candidate")
4697       self.op.master_candidate = False
4698
4699     # Compute new role
4700     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4701     if self.op.master_candidate:
4702       new_role = self._ROLE_CANDIDATE
4703     elif self.op.drained:
4704       new_role = self._ROLE_DRAINED
4705     elif self.op.offline:
4706       new_role = self._ROLE_OFFLINE
4707     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4708       # False is still in new flags, which means we're un-setting (the
4709       # only) True flag
4710       new_role = self._ROLE_REGULAR
4711     else: # no new flags, nothing, keep old role
4712       new_role = old_role
4713
4714     self.new_role = new_role
4715
4716     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4717       # Trying to transition out of offline status
4718       result = self.rpc.call_version([node.name])[node.name]
4719       if result.fail_msg:
4720         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4721                                    " to report its version: %s" %
4722                                    (node.name, result.fail_msg),
4723                                    errors.ECODE_STATE)
4724       else:
4725         self.LogWarning("Transitioning node from offline to online state"
4726                         " without using re-add. Please make sure the node"
4727                         " is healthy!")
4728
4729     if self.op.secondary_ip:
4730       # Ok even without locking, because this can't be changed by any LU
4731       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4732       master_singlehomed = master.secondary_ip == master.primary_ip
4733       if master_singlehomed and self.op.secondary_ip:
4734         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4735                                    " homed cluster", errors.ECODE_INVAL)
4736
4737       if node.offline:
4738         if self.affected_instances:
4739           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4740                                      " node has instances (%s) configured"
4741                                      " to use it" % self.affected_instances)
4742       else:
4743         # On online nodes, check that no instances are running, and that
4744         # the node has the new ip and we can reach it.
4745         for instance in self.affected_instances:
4746           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4747
4748         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4749         if master.name != node.name:
4750           # check reachability from master secondary ip to new secondary ip
4751           if not netutils.TcpPing(self.op.secondary_ip,
4752                                   constants.DEFAULT_NODED_PORT,
4753                                   source=master.secondary_ip):
4754             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4755                                        " based ping to node daemon port",
4756                                        errors.ECODE_ENVIRON)
4757
4758     if self.op.ndparams:
4759       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4760       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4761       self.new_ndparams = new_ndparams
4762
4763   def Exec(self, feedback_fn):
4764     """Modifies a node.
4765
4766     """
4767     node = self.node
4768     old_role = self.old_role
4769     new_role = self.new_role
4770
4771     result = []
4772
4773     if self.op.ndparams:
4774       node.ndparams = self.new_ndparams
4775
4776     if self.op.powered is not None:
4777       node.powered = self.op.powered
4778
4779     for attr in ["master_capable", "vm_capable"]:
4780       val = getattr(self.op, attr)
4781       if val is not None:
4782         setattr(node, attr, val)
4783         result.append((attr, str(val)))
4784
4785     if new_role != old_role:
4786       # Tell the node to demote itself, if no longer MC and not offline
4787       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4788         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4789         if msg:
4790           self.LogWarning("Node failed to demote itself: %s", msg)
4791
4792       new_flags = self._R2F[new_role]
4793       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4794         if of != nf:
4795           result.append((desc, str(nf)))
4796       (node.master_candidate, node.drained, node.offline) = new_flags
4797
4798       # we locked all nodes, we adjust the CP before updating this node
4799       if self.lock_all:
4800         _AdjustCandidatePool(self, [node.name])
4801
4802     if self.op.secondary_ip:
4803       node.secondary_ip = self.op.secondary_ip
4804       result.append(("secondary_ip", self.op.secondary_ip))
4805
4806     # this will trigger configuration file update, if needed
4807     self.cfg.Update(node, feedback_fn)
4808
4809     # this will trigger job queue propagation or cleanup if the mc
4810     # flag changed
4811     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4812       self.context.ReaddNode(node)
4813
4814     return result
4815
4816
4817 class LUNodePowercycle(NoHooksLU):
4818   """Powercycles a node.
4819
4820   """
4821   REQ_BGL = False
4822
4823   def CheckArguments(self):
4824     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4825     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4826       raise errors.OpPrereqError("The node is the master and the force"
4827                                  " parameter was not set",
4828                                  errors.ECODE_INVAL)
4829
4830   def ExpandNames(self):
4831     """Locking for PowercycleNode.
4832
4833     This is a last-resort option and shouldn't block on other
4834     jobs. Therefore, we grab no locks.
4835
4836     """
4837     self.needed_locks = {}
4838
4839   def Exec(self, feedback_fn):
4840     """Reboots a node.
4841
4842     """
4843     result = self.rpc.call_node_powercycle(self.op.node_name,
4844                                            self.cfg.GetHypervisorType())
4845     result.Raise("Failed to schedule the reboot")
4846     return result.payload
4847
4848
4849 class LUClusterQuery(NoHooksLU):
4850   """Query cluster configuration.
4851
4852   """
4853   REQ_BGL = False
4854
4855   def ExpandNames(self):
4856     self.needed_locks = {}
4857
4858   def Exec(self, feedback_fn):
4859     """Return cluster config.
4860
4861     """
4862     cluster = self.cfg.GetClusterInfo()
4863     os_hvp = {}
4864
4865     # Filter just for enabled hypervisors
4866     for os_name, hv_dict in cluster.os_hvp.items():
4867       os_hvp[os_name] = {}
4868       for hv_name, hv_params in hv_dict.items():
4869         if hv_name in cluster.enabled_hypervisors:
4870           os_hvp[os_name][hv_name] = hv_params
4871
4872     # Convert ip_family to ip_version
4873     primary_ip_version = constants.IP4_VERSION
4874     if cluster.primary_ip_family == netutils.IP6Address.family:
4875       primary_ip_version = constants.IP6_VERSION
4876
4877     result = {
4878       "software_version": constants.RELEASE_VERSION,
4879       "protocol_version": constants.PROTOCOL_VERSION,
4880       "config_version": constants.CONFIG_VERSION,
4881       "os_api_version": max(constants.OS_API_VERSIONS),
4882       "export_version": constants.EXPORT_VERSION,
4883       "architecture": (platform.architecture()[0], platform.machine()),
4884       "name": cluster.cluster_name,
4885       "master": cluster.master_node,
4886       "default_hypervisor": cluster.enabled_hypervisors[0],
4887       "enabled_hypervisors": cluster.enabled_hypervisors,
4888       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4889                         for hypervisor_name in cluster.enabled_hypervisors]),
4890       "os_hvp": os_hvp,
4891       "beparams": cluster.beparams,
4892       "osparams": cluster.osparams,
4893       "nicparams": cluster.nicparams,
4894       "ndparams": cluster.ndparams,
4895       "candidate_pool_size": cluster.candidate_pool_size,
4896       "master_netdev": cluster.master_netdev,
4897       "volume_group_name": cluster.volume_group_name,
4898       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4899       "file_storage_dir": cluster.file_storage_dir,
4900       "shared_file_storage_dir": cluster.shared_file_storage_dir,
4901       "maintain_node_health": cluster.maintain_node_health,
4902       "ctime": cluster.ctime,
4903       "mtime": cluster.mtime,
4904       "uuid": cluster.uuid,
4905       "tags": list(cluster.GetTags()),
4906       "uid_pool": cluster.uid_pool,
4907       "default_iallocator": cluster.default_iallocator,
4908       "reserved_lvs": cluster.reserved_lvs,
4909       "primary_ip_version": primary_ip_version,
4910       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4911       "hidden_os": cluster.hidden_os,
4912       "blacklisted_os": cluster.blacklisted_os,
4913       }
4914
4915     return result
4916
4917
4918 class LUClusterConfigQuery(NoHooksLU):
4919   """Return configuration values.
4920
4921   """
4922   REQ_BGL = False
4923   _FIELDS_DYNAMIC = utils.FieldSet()
4924   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4925                                   "watcher_pause", "volume_group_name")
4926
4927   def CheckArguments(self):
4928     _CheckOutputFields(static=self._FIELDS_STATIC,
4929                        dynamic=self._FIELDS_DYNAMIC,
4930                        selected=self.op.output_fields)
4931
4932   def ExpandNames(self):
4933     self.needed_locks = {}
4934
4935   def Exec(self, feedback_fn):
4936     """Dump a representation of the cluster config to the standard output.
4937
4938     """
4939     values = []
4940     for field in self.op.output_fields:
4941       if field == "cluster_name":
4942         entry = self.cfg.GetClusterName()
4943       elif field == "master_node":
4944         entry = self.cfg.GetMasterNode()
4945       elif field == "drain_flag":
4946         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4947       elif field == "watcher_pause":
4948         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4949       elif field == "volume_group_name":
4950         entry = self.cfg.GetVGName()
4951       else:
4952         raise errors.ParameterError(field)
4953       values.append(entry)
4954     return values
4955
4956
4957 class LUInstanceActivateDisks(NoHooksLU):
4958   """Bring up an instance's disks.
4959
4960   """
4961   REQ_BGL = False
4962
4963   def ExpandNames(self):
4964     self._ExpandAndLockInstance()
4965     self.needed_locks[locking.LEVEL_NODE] = []
4966     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4967
4968   def DeclareLocks(self, level):
4969     if level == locking.LEVEL_NODE:
4970       self._LockInstancesNodes()
4971
4972   def CheckPrereq(self):
4973     """Check prerequisites.
4974
4975     This checks that the instance is in the cluster.
4976
4977     """
4978     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4979     assert self.instance is not None, \
4980       "Cannot retrieve locked instance %s" % self.op.instance_name
4981     _CheckNodeOnline(self, self.instance.primary_node)
4982
4983   def Exec(self, feedback_fn):
4984     """Activate the disks.
4985
4986     """
4987     disks_ok, disks_info = \
4988               _AssembleInstanceDisks(self, self.instance,
4989                                      ignore_size=self.op.ignore_size)
4990     if not disks_ok:
4991       raise errors.OpExecError("Cannot activate block devices")
4992
4993     return disks_info
4994
4995
4996 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4997                            ignore_size=False):
4998   """Prepare the block devices for an instance.
4999
5000   This sets up the block devices on all nodes.
5001
5002   @type lu: L{LogicalUnit}
5003   @param lu: the logical unit on whose behalf we execute
5004   @type instance: L{objects.Instance}
5005   @param instance: the instance for whose disks we assemble
5006   @type disks: list of L{objects.Disk} or None
5007   @param disks: which disks to assemble (or all, if None)
5008   @type ignore_secondaries: boolean
5009   @param ignore_secondaries: if true, errors on secondary nodes
5010       won't result in an error return from the function
5011   @type ignore_size: boolean
5012   @param ignore_size: if true, the current known size of the disk
5013       will not be used during the disk activation, useful for cases
5014       when the size is wrong
5015   @return: False if the operation failed, otherwise a list of
5016       (host, instance_visible_name, node_visible_name)
5017       with the mapping from node devices to instance devices
5018
5019   """
5020   device_info = []
5021   disks_ok = True
5022   iname = instance.name
5023   disks = _ExpandCheckDisks(instance, disks)
5024
5025   # With the two passes mechanism we try to reduce the window of
5026   # opportunity for the race condition of switching DRBD to primary
5027   # before handshaking occured, but we do not eliminate it
5028
5029   # The proper fix would be to wait (with some limits) until the
5030   # connection has been made and drbd transitions from WFConnection
5031   # into any other network-connected state (Connected, SyncTarget,
5032   # SyncSource, etc.)
5033
5034   # 1st pass, assemble on all nodes in secondary mode
5035   for idx, inst_disk in enumerate(disks):
5036     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5037       if ignore_size:
5038         node_disk = node_disk.Copy()
5039         node_disk.UnsetSize()
5040       lu.cfg.SetDiskID(node_disk, node)
5041       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5042       msg = result.fail_msg
5043       if msg:
5044         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5045                            " (is_primary=False, pass=1): %s",
5046                            inst_disk.iv_name, node, msg)
5047         if not ignore_secondaries:
5048           disks_ok = False
5049
5050   # FIXME: race condition on drbd migration to primary
5051
5052   # 2nd pass, do only the primary node
5053   for idx, inst_disk in enumerate(disks):
5054     dev_path = None
5055
5056     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5057       if node != instance.primary_node:
5058         continue
5059       if ignore_size:
5060         node_disk = node_disk.Copy()
5061         node_disk.UnsetSize()
5062       lu.cfg.SetDiskID(node_disk, node)
5063       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5064       msg = result.fail_msg
5065       if msg:
5066         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5067                            " (is_primary=True, pass=2): %s",
5068                            inst_disk.iv_name, node, msg)
5069         disks_ok = False
5070       else:
5071         dev_path = result.payload
5072
5073     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5074
5075   # leave the disks configured for the primary node
5076   # this is a workaround that would be fixed better by
5077   # improving the logical/physical id handling
5078   for disk in disks:
5079     lu.cfg.SetDiskID(disk, instance.primary_node)
5080
5081   return disks_ok, device_info
5082
5083
5084 def _StartInstanceDisks(lu, instance, force):
5085   """Start the disks of an instance.
5086
5087   """
5088   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5089                                            ignore_secondaries=force)
5090   if not disks_ok:
5091     _ShutdownInstanceDisks(lu, instance)
5092     if force is not None and not force:
5093       lu.proc.LogWarning("", hint="If the message above refers to a"
5094                          " secondary node,"
5095                          " you can retry the operation using '--force'.")
5096     raise errors.OpExecError("Disk consistency error")
5097
5098
5099 class LUInstanceDeactivateDisks(NoHooksLU):
5100   """Shutdown an instance's disks.
5101
5102   """
5103   REQ_BGL = False
5104
5105   def ExpandNames(self):
5106     self._ExpandAndLockInstance()
5107     self.needed_locks[locking.LEVEL_NODE] = []
5108     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5109
5110   def DeclareLocks(self, level):
5111     if level == locking.LEVEL_NODE:
5112       self._LockInstancesNodes()
5113
5114   def CheckPrereq(self):
5115     """Check prerequisites.
5116
5117     This checks that the instance is in the cluster.
5118
5119     """
5120     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5121     assert self.instance is not None, \
5122       "Cannot retrieve locked instance %s" % self.op.instance_name
5123
5124   def Exec(self, feedback_fn):
5125     """Deactivate the disks
5126
5127     """
5128     instance = self.instance
5129     if self.op.force:
5130       _ShutdownInstanceDisks(self, instance)
5131     else:
5132       _SafeShutdownInstanceDisks(self, instance)
5133
5134
5135 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5136   """Shutdown block devices of an instance.
5137
5138   This function checks if an instance is running, before calling
5139   _ShutdownInstanceDisks.
5140
5141   """
5142   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5143   _ShutdownInstanceDisks(lu, instance, disks=disks)
5144
5145
5146 def _ExpandCheckDisks(instance, disks):
5147   """Return the instance disks selected by the disks list
5148
5149   @type disks: list of L{objects.Disk} or None
5150   @param disks: selected disks
5151   @rtype: list of L{objects.Disk}
5152   @return: selected instance disks to act on
5153
5154   """
5155   if disks is None:
5156     return instance.disks
5157   else:
5158     if not set(disks).issubset(instance.disks):
5159       raise errors.ProgrammerError("Can only act on disks belonging to the"
5160                                    " target instance")
5161     return disks
5162
5163
5164 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5165   """Shutdown block devices of an instance.
5166
5167   This does the shutdown on all nodes of the instance.
5168
5169   If the ignore_primary is false, errors on the primary node are
5170   ignored.
5171
5172   """
5173   all_result = True
5174   disks = _ExpandCheckDisks(instance, disks)
5175
5176   for disk in disks:
5177     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5178       lu.cfg.SetDiskID(top_disk, node)
5179       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5180       msg = result.fail_msg
5181       if msg:
5182         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5183                       disk.iv_name, node, msg)
5184         if ((node == instance.primary_node and not ignore_primary) or
5185             (node != instance.primary_node and not result.offline)):
5186           all_result = False
5187   return all_result
5188
5189
5190 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5191   """Checks if a node has enough free memory.
5192
5193   This function check if a given node has the needed amount of free
5194   memory. In case the node has less memory or we cannot get the
5195   information from the node, this function raise an OpPrereqError
5196   exception.
5197
5198   @type lu: C{LogicalUnit}
5199   @param lu: a logical unit from which we get configuration data
5200   @type node: C{str}
5201   @param node: the node to check
5202   @type reason: C{str}
5203   @param reason: string to use in the error message
5204   @type requested: C{int}
5205   @param requested: the amount of memory in MiB to check for
5206   @type hypervisor_name: C{str}
5207   @param hypervisor_name: the hypervisor to ask for memory stats
5208   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5209       we cannot check the node
5210
5211   """
5212   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5213   nodeinfo[node].Raise("Can't get data from node %s" % node,
5214                        prereq=True, ecode=errors.ECODE_ENVIRON)
5215   free_mem = nodeinfo[node].payload.get('memory_free', None)
5216   if not isinstance(free_mem, int):
5217     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5218                                " was '%s'" % (node, free_mem),
5219                                errors.ECODE_ENVIRON)
5220   if requested > free_mem:
5221     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5222                                " needed %s MiB, available %s MiB" %
5223                                (node, reason, requested, free_mem),
5224                                errors.ECODE_NORES)
5225
5226
5227 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5228   """Checks if nodes have enough free disk space in the all VGs.
5229
5230   This function check if all given nodes have the needed amount of
5231   free disk. In case any node has less disk or we cannot get the
5232   information from the node, this function raise an OpPrereqError
5233   exception.
5234
5235   @type lu: C{LogicalUnit}
5236   @param lu: a logical unit from which we get configuration data
5237   @type nodenames: C{list}
5238   @param nodenames: the list of node names to check
5239   @type req_sizes: C{dict}
5240   @param req_sizes: the hash of vg and corresponding amount of disk in
5241       MiB to check for
5242   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5243       or we cannot check the node
5244
5245   """
5246   for vg, req_size in req_sizes.items():
5247     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5248
5249
5250 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5251   """Checks if nodes have enough free disk space in the specified VG.
5252
5253   This function check if all given nodes have the needed amount of
5254   free disk. In case any node has less disk or we cannot get the
5255   information from the node, this function raise an OpPrereqError
5256   exception.
5257
5258   @type lu: C{LogicalUnit}
5259   @param lu: a logical unit from which we get configuration data
5260   @type nodenames: C{list}
5261   @param nodenames: the list of node names to check
5262   @type vg: C{str}
5263   @param vg: the volume group to check
5264   @type requested: C{int}
5265   @param requested: the amount of disk in MiB to check for
5266   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5267       or we cannot check the node
5268
5269   """
5270   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5271   for node in nodenames:
5272     info = nodeinfo[node]
5273     info.Raise("Cannot get current information from node %s" % node,
5274                prereq=True, ecode=errors.ECODE_ENVIRON)
5275     vg_free = info.payload.get("vg_free", None)
5276     if not isinstance(vg_free, int):
5277       raise errors.OpPrereqError("Can't compute free disk space on node"
5278                                  " %s for vg %s, result was '%s'" %
5279                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5280     if requested > vg_free:
5281       raise errors.OpPrereqError("Not enough disk space on target node %s"
5282                                  " vg %s: required %d MiB, available %d MiB" %
5283                                  (node, vg, requested, vg_free),
5284                                  errors.ECODE_NORES)
5285
5286
5287 class LUInstanceStartup(LogicalUnit):
5288   """Starts an instance.
5289
5290   """
5291   HPATH = "instance-start"
5292   HTYPE = constants.HTYPE_INSTANCE
5293   REQ_BGL = False
5294
5295   def CheckArguments(self):
5296     # extra beparams
5297     if self.op.beparams:
5298       # fill the beparams dict
5299       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5300
5301   def ExpandNames(self):
5302     self._ExpandAndLockInstance()
5303
5304   def BuildHooksEnv(self):
5305     """Build hooks env.
5306
5307     This runs on master, primary and secondary nodes of the instance.
5308
5309     """
5310     env = {
5311       "FORCE": self.op.force,
5312       }
5313
5314     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5315
5316     return env
5317
5318   def BuildHooksNodes(self):
5319     """Build hooks nodes.
5320
5321     """
5322     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5323     return (nl, nl)
5324
5325   def CheckPrereq(self):
5326     """Check prerequisites.
5327
5328     This checks that the instance is in the cluster.
5329
5330     """
5331     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5332     assert self.instance is not None, \
5333       "Cannot retrieve locked instance %s" % self.op.instance_name
5334
5335     # extra hvparams
5336     if self.op.hvparams:
5337       # check hypervisor parameter syntax (locally)
5338       cluster = self.cfg.GetClusterInfo()
5339       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5340       filled_hvp = cluster.FillHV(instance)
5341       filled_hvp.update(self.op.hvparams)
5342       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5343       hv_type.CheckParameterSyntax(filled_hvp)
5344       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5345
5346     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5347
5348     if self.primary_offline and self.op.ignore_offline_nodes:
5349       self.proc.LogWarning("Ignoring offline primary node")
5350
5351       if self.op.hvparams or self.op.beparams:
5352         self.proc.LogWarning("Overridden parameters are ignored")
5353     else:
5354       _CheckNodeOnline(self, instance.primary_node)
5355
5356       bep = self.cfg.GetClusterInfo().FillBE(instance)
5357
5358       # check bridges existence
5359       _CheckInstanceBridgesExist(self, instance)
5360
5361       remote_info = self.rpc.call_instance_info(instance.primary_node,
5362                                                 instance.name,
5363                                                 instance.hypervisor)
5364       remote_info.Raise("Error checking node %s" % instance.primary_node,
5365                         prereq=True, ecode=errors.ECODE_ENVIRON)
5366       if not remote_info.payload: # not running already
5367         _CheckNodeFreeMemory(self, instance.primary_node,
5368                              "starting instance %s" % instance.name,
5369                              bep[constants.BE_MEMORY], instance.hypervisor)
5370
5371   def Exec(self, feedback_fn):
5372     """Start the instance.
5373
5374     """
5375     instance = self.instance
5376     force = self.op.force
5377
5378     self.cfg.MarkInstanceUp(instance.name)
5379
5380     if self.primary_offline:
5381       assert self.op.ignore_offline_nodes
5382       self.proc.LogInfo("Primary node offline, marked instance as started")
5383     else:
5384       node_current = instance.primary_node
5385
5386       _StartInstanceDisks(self, instance, force)
5387
5388       result = self.rpc.call_instance_start(node_current, instance,
5389                                             self.op.hvparams, self.op.beparams)
5390       msg = result.fail_msg
5391       if msg:
5392         _ShutdownInstanceDisks(self, instance)
5393         raise errors.OpExecError("Could not start instance: %s" % msg)
5394
5395
5396 class LUInstanceReboot(LogicalUnit):
5397   """Reboot an instance.
5398
5399   """
5400   HPATH = "instance-reboot"
5401   HTYPE = constants.HTYPE_INSTANCE
5402   REQ_BGL = False
5403
5404   def ExpandNames(self):
5405     self._ExpandAndLockInstance()
5406
5407   def BuildHooksEnv(self):
5408     """Build hooks env.
5409
5410     This runs on master, primary and secondary nodes of the instance.
5411
5412     """
5413     env = {
5414       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5415       "REBOOT_TYPE": self.op.reboot_type,
5416       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5417       }
5418
5419     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5420
5421     return env
5422
5423   def BuildHooksNodes(self):
5424     """Build hooks nodes.
5425
5426     """
5427     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5428     return (nl, nl)
5429
5430   def CheckPrereq(self):
5431     """Check prerequisites.
5432
5433     This checks that the instance is in the cluster.
5434
5435     """
5436     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5437     assert self.instance is not None, \
5438       "Cannot retrieve locked instance %s" % self.op.instance_name
5439
5440     _CheckNodeOnline(self, instance.primary_node)
5441
5442     # check bridges existence
5443     _CheckInstanceBridgesExist(self, instance)
5444
5445   def Exec(self, feedback_fn):
5446     """Reboot the instance.
5447
5448     """
5449     instance = self.instance
5450     ignore_secondaries = self.op.ignore_secondaries
5451     reboot_type = self.op.reboot_type
5452
5453     remote_info = self.rpc.call_instance_info(instance.primary_node,
5454                                               instance.name,
5455                                               instance.hypervisor)
5456     remote_info.Raise("Error checking node %s" % instance.primary_node)
5457     instance_running = bool(remote_info.payload)
5458
5459     node_current = instance.primary_node
5460
5461     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5462                                             constants.INSTANCE_REBOOT_HARD]:
5463       for disk in instance.disks:
5464         self.cfg.SetDiskID(disk, node_current)
5465       result = self.rpc.call_instance_reboot(node_current, instance,
5466                                              reboot_type,
5467                                              self.op.shutdown_timeout)
5468       result.Raise("Could not reboot instance")
5469     else:
5470       if instance_running:
5471         result = self.rpc.call_instance_shutdown(node_current, instance,
5472                                                  self.op.shutdown_timeout)
5473         result.Raise("Could not shutdown instance for full reboot")
5474         _ShutdownInstanceDisks(self, instance)
5475       else:
5476         self.LogInfo("Instance %s was already stopped, starting now",
5477                      instance.name)
5478       _StartInstanceDisks(self, instance, ignore_secondaries)
5479       result = self.rpc.call_instance_start(node_current, instance, None, None)
5480       msg = result.fail_msg
5481       if msg:
5482         _ShutdownInstanceDisks(self, instance)
5483         raise errors.OpExecError("Could not start instance for"
5484                                  " full reboot: %s" % msg)
5485
5486     self.cfg.MarkInstanceUp(instance.name)
5487
5488
5489 class LUInstanceShutdown(LogicalUnit):
5490   """Shutdown an instance.
5491
5492   """
5493   HPATH = "instance-stop"
5494   HTYPE = constants.HTYPE_INSTANCE
5495   REQ_BGL = False
5496
5497   def ExpandNames(self):
5498     self._ExpandAndLockInstance()
5499
5500   def BuildHooksEnv(self):
5501     """Build hooks env.
5502
5503     This runs on master, primary and secondary nodes of the instance.
5504
5505     """
5506     env = _BuildInstanceHookEnvByObject(self, self.instance)
5507     env["TIMEOUT"] = self.op.timeout
5508     return env
5509
5510   def BuildHooksNodes(self):
5511     """Build hooks nodes.
5512
5513     """
5514     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5515     return (nl, nl)
5516
5517   def CheckPrereq(self):
5518     """Check prerequisites.
5519
5520     This checks that the instance is in the cluster.
5521
5522     """
5523     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5524     assert self.instance is not None, \
5525       "Cannot retrieve locked instance %s" % self.op.instance_name
5526
5527     self.primary_offline = \
5528       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5529
5530     if self.primary_offline and self.op.ignore_offline_nodes:
5531       self.proc.LogWarning("Ignoring offline primary node")
5532     else:
5533       _CheckNodeOnline(self, self.instance.primary_node)
5534
5535   def Exec(self, feedback_fn):
5536     """Shutdown the instance.
5537
5538     """
5539     instance = self.instance
5540     node_current = instance.primary_node
5541     timeout = self.op.timeout
5542
5543     self.cfg.MarkInstanceDown(instance.name)
5544
5545     if self.primary_offline:
5546       assert self.op.ignore_offline_nodes
5547       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5548     else:
5549       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5550       msg = result.fail_msg
5551       if msg:
5552         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5553
5554       _ShutdownInstanceDisks(self, instance)
5555
5556
5557 class LUInstanceReinstall(LogicalUnit):
5558   """Reinstall an instance.
5559
5560   """
5561   HPATH = "instance-reinstall"
5562   HTYPE = constants.HTYPE_INSTANCE
5563   REQ_BGL = False
5564
5565   def ExpandNames(self):
5566     self._ExpandAndLockInstance()
5567
5568   def BuildHooksEnv(self):
5569     """Build hooks env.
5570
5571     This runs on master, primary and secondary nodes of the instance.
5572
5573     """
5574     return _BuildInstanceHookEnvByObject(self, self.instance)
5575
5576   def BuildHooksNodes(self):
5577     """Build hooks nodes.
5578
5579     """
5580     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5581     return (nl, nl)
5582
5583   def CheckPrereq(self):
5584     """Check prerequisites.
5585
5586     This checks that the instance is in the cluster and is not running.
5587
5588     """
5589     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5590     assert instance is not None, \
5591       "Cannot retrieve locked instance %s" % self.op.instance_name
5592     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5593                      " offline, cannot reinstall")
5594     for node in instance.secondary_nodes:
5595       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5596                        " cannot reinstall")
5597
5598     if instance.disk_template == constants.DT_DISKLESS:
5599       raise errors.OpPrereqError("Instance '%s' has no disks" %
5600                                  self.op.instance_name,
5601                                  errors.ECODE_INVAL)
5602     _CheckInstanceDown(self, instance, "cannot reinstall")
5603
5604     if self.op.os_type is not None:
5605       # OS verification
5606       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5607       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5608       instance_os = self.op.os_type
5609     else:
5610       instance_os = instance.os
5611
5612     nodelist = list(instance.all_nodes)
5613
5614     if self.op.osparams:
5615       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5616       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5617       self.os_inst = i_osdict # the new dict (without defaults)
5618     else:
5619       self.os_inst = None
5620
5621     self.instance = instance
5622
5623   def Exec(self, feedback_fn):
5624     """Reinstall the instance.
5625
5626     """
5627     inst = self.instance
5628
5629     if self.op.os_type is not None:
5630       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5631       inst.os = self.op.os_type
5632       # Write to configuration
5633       self.cfg.Update(inst, feedback_fn)
5634
5635     _StartInstanceDisks(self, inst, None)
5636     try:
5637       feedback_fn("Running the instance OS create scripts...")
5638       # FIXME: pass debug option from opcode to backend
5639       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5640                                              self.op.debug_level,
5641                                              osparams=self.os_inst)
5642       result.Raise("Could not install OS for instance %s on node %s" %
5643                    (inst.name, inst.primary_node))
5644     finally:
5645       _ShutdownInstanceDisks(self, inst)
5646
5647
5648 class LUInstanceRecreateDisks(LogicalUnit):
5649   """Recreate an instance's missing disks.
5650
5651   """
5652   HPATH = "instance-recreate-disks"
5653   HTYPE = constants.HTYPE_INSTANCE
5654   REQ_BGL = False
5655
5656   def ExpandNames(self):
5657     self._ExpandAndLockInstance()
5658
5659   def BuildHooksEnv(self):
5660     """Build hooks env.
5661
5662     This runs on master, primary and secondary nodes of the instance.
5663
5664     """
5665     return _BuildInstanceHookEnvByObject(self, self.instance)
5666
5667   def BuildHooksNodes(self):
5668     """Build hooks nodes.
5669
5670     """
5671     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5672     return (nl, nl)
5673
5674   def CheckPrereq(self):
5675     """Check prerequisites.
5676
5677     This checks that the instance is in the cluster and is not running.
5678
5679     """
5680     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5681     assert instance is not None, \
5682       "Cannot retrieve locked instance %s" % self.op.instance_name
5683     _CheckNodeOnline(self, instance.primary_node)
5684
5685     if instance.disk_template == constants.DT_DISKLESS:
5686       raise errors.OpPrereqError("Instance '%s' has no disks" %
5687                                  self.op.instance_name, errors.ECODE_INVAL)
5688     _CheckInstanceDown(self, instance, "cannot recreate disks")
5689
5690     if not self.op.disks:
5691       self.op.disks = range(len(instance.disks))
5692     else:
5693       for idx in self.op.disks:
5694         if idx >= len(instance.disks):
5695           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5696                                      errors.ECODE_INVAL)
5697
5698     self.instance = instance
5699
5700   def Exec(self, feedback_fn):
5701     """Recreate the disks.
5702
5703     """
5704     to_skip = []
5705     for idx, _ in enumerate(self.instance.disks):
5706       if idx not in self.op.disks: # disk idx has not been passed in
5707         to_skip.append(idx)
5708         continue
5709
5710     _CreateDisks(self, self.instance, to_skip=to_skip)
5711
5712
5713 class LUInstanceRename(LogicalUnit):
5714   """Rename an instance.
5715
5716   """
5717   HPATH = "instance-rename"
5718   HTYPE = constants.HTYPE_INSTANCE
5719
5720   def CheckArguments(self):
5721     """Check arguments.
5722
5723     """
5724     if self.op.ip_check and not self.op.name_check:
5725       # TODO: make the ip check more flexible and not depend on the name check
5726       raise errors.OpPrereqError("Cannot do ip check without a name check",
5727                                  errors.ECODE_INVAL)
5728
5729   def BuildHooksEnv(self):
5730     """Build hooks env.
5731
5732     This runs on master, primary and secondary nodes of the instance.
5733
5734     """
5735     env = _BuildInstanceHookEnvByObject(self, self.instance)
5736     env["INSTANCE_NEW_NAME"] = self.op.new_name
5737     return env
5738
5739   def BuildHooksNodes(self):
5740     """Build hooks nodes.
5741
5742     """
5743     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5744     return (nl, nl)
5745
5746   def CheckPrereq(self):
5747     """Check prerequisites.
5748
5749     This checks that the instance is in the cluster and is not running.
5750
5751     """
5752     self.op.instance_name = _ExpandInstanceName(self.cfg,
5753                                                 self.op.instance_name)
5754     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5755     assert instance is not None
5756     _CheckNodeOnline(self, instance.primary_node)
5757     _CheckInstanceDown(self, instance, "cannot rename")
5758     self.instance = instance
5759
5760     new_name = self.op.new_name
5761     if self.op.name_check:
5762       hostname = netutils.GetHostname(name=new_name)
5763       self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5764                    hostname.name)
5765       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5766         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5767                                     " same as given hostname '%s'") %
5768                                     (hostname.name, self.op.new_name),
5769                                     errors.ECODE_INVAL)
5770       new_name = self.op.new_name = hostname.name
5771       if (self.op.ip_check and
5772           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5773         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5774                                    (hostname.ip, new_name),
5775                                    errors.ECODE_NOTUNIQUE)
5776
5777     instance_list = self.cfg.GetInstanceList()
5778     if new_name in instance_list and new_name != instance.name:
5779       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5780                                  new_name, errors.ECODE_EXISTS)
5781
5782   def Exec(self, feedback_fn):
5783     """Rename the instance.
5784
5785     """
5786     inst = self.instance
5787     old_name = inst.name
5788
5789     rename_file_storage = False
5790     if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5791         self.op.new_name != inst.name):
5792       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5793       rename_file_storage = True
5794
5795     self.cfg.RenameInstance(inst.name, self.op.new_name)
5796     # Change the instance lock. This is definitely safe while we hold the BGL.
5797     # Otherwise the new lock would have to be added in acquired mode.
5798     assert self.REQ_BGL
5799     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5800     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5801
5802     # re-read the instance from the configuration after rename
5803     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5804
5805     if rename_file_storage:
5806       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5807       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5808                                                      old_file_storage_dir,
5809                                                      new_file_storage_dir)
5810       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5811                    " (but the instance has been renamed in Ganeti)" %
5812                    (inst.primary_node, old_file_storage_dir,
5813                     new_file_storage_dir))
5814
5815     _StartInstanceDisks(self, inst, None)
5816     try:
5817       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5818                                                  old_name, self.op.debug_level)
5819       msg = result.fail_msg
5820       if msg:
5821         msg = ("Could not run OS rename script for instance %s on node %s"
5822                " (but the instance has been renamed in Ganeti): %s" %
5823                (inst.name, inst.primary_node, msg))
5824         self.proc.LogWarning(msg)
5825     finally:
5826       _ShutdownInstanceDisks(self, inst)
5827
5828     return inst.name
5829
5830
5831 class LUInstanceRemove(LogicalUnit):
5832   """Remove an instance.
5833
5834   """
5835   HPATH = "instance-remove"
5836   HTYPE = constants.HTYPE_INSTANCE
5837   REQ_BGL = False
5838
5839   def ExpandNames(self):
5840     self._ExpandAndLockInstance()
5841     self.needed_locks[locking.LEVEL_NODE] = []
5842     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5843
5844   def DeclareLocks(self, level):
5845     if level == locking.LEVEL_NODE:
5846       self._LockInstancesNodes()
5847
5848   def BuildHooksEnv(self):
5849     """Build hooks env.
5850
5851     This runs on master, primary and secondary nodes of the instance.
5852
5853     """
5854     env = _BuildInstanceHookEnvByObject(self, self.instance)
5855     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5856     return env
5857
5858   def BuildHooksNodes(self):
5859     """Build hooks nodes.
5860
5861     """
5862     nl = [self.cfg.GetMasterNode()]
5863     nl_post = list(self.instance.all_nodes) + nl
5864     return (nl, nl_post)
5865
5866   def CheckPrereq(self):
5867     """Check prerequisites.
5868
5869     This checks that the instance is in the cluster.
5870
5871     """
5872     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5873     assert self.instance is not None, \
5874       "Cannot retrieve locked instance %s" % self.op.instance_name
5875
5876   def Exec(self, feedback_fn):
5877     """Remove the instance.
5878
5879     """
5880     instance = self.instance
5881     logging.info("Shutting down instance %s on node %s",
5882                  instance.name, instance.primary_node)
5883
5884     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5885                                              self.op.shutdown_timeout)
5886     msg = result.fail_msg
5887     if msg:
5888       if self.op.ignore_failures:
5889         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5890       else:
5891         raise errors.OpExecError("Could not shutdown instance %s on"
5892                                  " node %s: %s" %
5893                                  (instance.name, instance.primary_node, msg))
5894
5895     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5896
5897
5898 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5899   """Utility function to remove an instance.
5900
5901   """
5902   logging.info("Removing block devices for instance %s", instance.name)
5903
5904   if not _RemoveDisks(lu, instance):
5905     if not ignore_failures:
5906       raise errors.OpExecError("Can't remove instance's disks")
5907     feedback_fn("Warning: can't remove instance's disks")
5908
5909   logging.info("Removing instance %s out of cluster config", instance.name)
5910
5911   lu.cfg.RemoveInstance(instance.name)
5912
5913   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5914     "Instance lock removal conflict"
5915
5916   # Remove lock for the instance
5917   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5918
5919
5920 class LUInstanceQuery(NoHooksLU):
5921   """Logical unit for querying instances.
5922
5923   """
5924   # pylint: disable-msg=W0142
5925   REQ_BGL = False
5926
5927   def CheckArguments(self):
5928     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5929                              self.op.output_fields, self.op.use_locking)
5930
5931   def ExpandNames(self):
5932     self.iq.ExpandNames(self)
5933
5934   def DeclareLocks(self, level):
5935     self.iq.DeclareLocks(self, level)
5936
5937   def Exec(self, feedback_fn):
5938     return self.iq.OldStyleQuery(self)
5939
5940
5941 class LUInstanceFailover(LogicalUnit):
5942   """Failover an instance.
5943
5944   """
5945   HPATH = "instance-failover"
5946   HTYPE = constants.HTYPE_INSTANCE
5947   REQ_BGL = False
5948
5949   def CheckArguments(self):
5950     """Check the arguments.
5951
5952     """
5953     self.iallocator = getattr(self.op, "iallocator", None)
5954     self.target_node = getattr(self.op, "target_node", None)
5955
5956   def ExpandNames(self):
5957     self._ExpandAndLockInstance()
5958
5959     if self.op.target_node is not None:
5960       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5961
5962     self.needed_locks[locking.LEVEL_NODE] = []
5963     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5964
5965     ignore_consistency = self.op.ignore_consistency
5966     shutdown_timeout = self.op.shutdown_timeout
5967     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5968                                        cleanup=False,
5969                                        iallocator=self.op.iallocator,
5970                                        target_node=self.op.target_node,
5971                                        failover=True,
5972                                        ignore_consistency=ignore_consistency,
5973                                        shutdown_timeout=shutdown_timeout)
5974     self.tasklets = [self._migrater]
5975
5976   def DeclareLocks(self, level):
5977     if level == locking.LEVEL_NODE:
5978       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5979       if instance.disk_template in constants.DTS_EXT_MIRROR:
5980         if self.op.target_node is None:
5981           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5982         else:
5983           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5984                                                    self.op.target_node]
5985         del self.recalculate_locks[locking.LEVEL_NODE]
5986       else:
5987         self._LockInstancesNodes()
5988
5989   def BuildHooksEnv(self):
5990     """Build hooks env.
5991
5992     This runs on master, primary and secondary nodes of the instance.
5993
5994     """
5995     instance = self._migrater.instance
5996     source_node = instance.primary_node
5997     target_node = self._migrater.target_node
5998     env = {
5999       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6000       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6001       "OLD_PRIMARY": source_node,
6002       "NEW_PRIMARY": target_node,
6003       }
6004
6005     if instance.disk_template in constants.DTS_INT_MIRROR:
6006       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6007       env["NEW_SECONDARY"] = source_node
6008     else:
6009       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6010
6011     env.update(_BuildInstanceHookEnvByObject(self, instance))
6012
6013     return env
6014
6015   def BuildHooksNodes(self):
6016     """Build hooks nodes.
6017
6018     """
6019     instance = self._migrater.instance
6020     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6021     return (nl, nl + [instance.primary_node])
6022
6023
6024 class LUInstanceMigrate(LogicalUnit):
6025   """Migrate an instance.
6026
6027   This is migration without shutting down, compared to the failover,
6028   which is done with shutdown.
6029
6030   """
6031   HPATH = "instance-migrate"
6032   HTYPE = constants.HTYPE_INSTANCE
6033   REQ_BGL = False
6034
6035   def ExpandNames(self):
6036     self._ExpandAndLockInstance()
6037
6038     if self.op.target_node is not None:
6039       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6040
6041     self.needed_locks[locking.LEVEL_NODE] = []
6042     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6043
6044     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6045                                        cleanup=self.op.cleanup,
6046                                        iallocator=self.op.iallocator,
6047                                        target_node=self.op.target_node,
6048                                        failover=False,
6049                                        fallback=self.op.allow_failover)
6050     self.tasklets = [self._migrater]
6051
6052   def DeclareLocks(self, level):
6053     if level == locking.LEVEL_NODE:
6054       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6055       if instance.disk_template in constants.DTS_EXT_MIRROR:
6056         if self.op.target_node is None:
6057           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6058         else:
6059           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6060                                                    self.op.target_node]
6061         del self.recalculate_locks[locking.LEVEL_NODE]
6062       else:
6063         self._LockInstancesNodes()
6064
6065   def BuildHooksEnv(self):
6066     """Build hooks env.
6067
6068     This runs on master, primary and secondary nodes of the instance.
6069
6070     """
6071     instance = self._migrater.instance
6072     source_node = instance.primary_node
6073     target_node = self._migrater.target_node
6074     env = _BuildInstanceHookEnvByObject(self, instance)
6075     env.update({
6076       "MIGRATE_LIVE": self._migrater.live,
6077       "MIGRATE_CLEANUP": self.op.cleanup,
6078       "OLD_PRIMARY": source_node,
6079       "NEW_PRIMARY": target_node,
6080       })
6081
6082     if instance.disk_template in constants.DTS_INT_MIRROR:
6083       env["OLD_SECONDARY"] = target_node
6084       env["NEW_SECONDARY"] = source_node
6085     else:
6086       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6087
6088     return env
6089
6090   def BuildHooksNodes(self):
6091     """Build hooks nodes.
6092
6093     """
6094     instance = self._migrater.instance
6095     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6096     return (nl, nl + [instance.primary_node])
6097
6098
6099 class LUInstanceMove(LogicalUnit):
6100   """Move an instance by data-copying.
6101
6102   """
6103   HPATH = "instance-move"
6104   HTYPE = constants.HTYPE_INSTANCE
6105   REQ_BGL = False
6106
6107   def ExpandNames(self):
6108     self._ExpandAndLockInstance()
6109     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6110     self.op.target_node = target_node
6111     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6112     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6113
6114   def DeclareLocks(self, level):
6115     if level == locking.LEVEL_NODE:
6116       self._LockInstancesNodes(primary_only=True)
6117
6118   def BuildHooksEnv(self):
6119     """Build hooks env.
6120
6121     This runs on master, primary and secondary nodes of the instance.
6122
6123     """
6124     env = {
6125       "TARGET_NODE": self.op.target_node,
6126       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6127       }
6128     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6129     return env
6130
6131   def BuildHooksNodes(self):
6132     """Build hooks nodes.
6133
6134     """
6135     nl = [
6136       self.cfg.GetMasterNode(),
6137       self.instance.primary_node,
6138       self.op.target_node,
6139       ]
6140     return (nl, nl)
6141
6142   def CheckPrereq(self):
6143     """Check prerequisites.
6144
6145     This checks that the instance is in the cluster.
6146
6147     """
6148     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6149     assert self.instance is not None, \
6150       "Cannot retrieve locked instance %s" % self.op.instance_name
6151
6152     node = self.cfg.GetNodeInfo(self.op.target_node)
6153     assert node is not None, \
6154       "Cannot retrieve locked node %s" % self.op.target_node
6155
6156     self.target_node = target_node = node.name
6157
6158     if target_node == instance.primary_node:
6159       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6160                                  (instance.name, target_node),
6161                                  errors.ECODE_STATE)
6162
6163     bep = self.cfg.GetClusterInfo().FillBE(instance)
6164
6165     for idx, dsk in enumerate(instance.disks):
6166       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6167         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6168                                    " cannot copy" % idx, errors.ECODE_STATE)
6169
6170     _CheckNodeOnline(self, target_node)
6171     _CheckNodeNotDrained(self, target_node)
6172     _CheckNodeVmCapable(self, target_node)
6173
6174     if instance.admin_up:
6175       # check memory requirements on the secondary node
6176       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6177                            instance.name, bep[constants.BE_MEMORY],
6178                            instance.hypervisor)
6179     else:
6180       self.LogInfo("Not checking memory on the secondary node as"
6181                    " instance will not be started")
6182
6183     # check bridge existance
6184     _CheckInstanceBridgesExist(self, instance, node=target_node)
6185
6186   def Exec(self, feedback_fn):
6187     """Move an instance.
6188
6189     The move is done by shutting it down on its present node, copying
6190     the data over (slow) and starting it on the new node.
6191
6192     """
6193     instance = self.instance
6194
6195     source_node = instance.primary_node
6196     target_node = self.target_node
6197
6198     self.LogInfo("Shutting down instance %s on source node %s",
6199                  instance.name, source_node)
6200
6201     result = self.rpc.call_instance_shutdown(source_node, instance,
6202                                              self.op.shutdown_timeout)
6203     msg = result.fail_msg
6204     if msg:
6205       if self.op.ignore_consistency:
6206         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6207                              " Proceeding anyway. Please make sure node"
6208                              " %s is down. Error details: %s",
6209                              instance.name, source_node, source_node, msg)
6210       else:
6211         raise errors.OpExecError("Could not shutdown instance %s on"
6212                                  " node %s: %s" %
6213                                  (instance.name, source_node, msg))
6214
6215     # create the target disks
6216     try:
6217       _CreateDisks(self, instance, target_node=target_node)
6218     except errors.OpExecError:
6219       self.LogWarning("Device creation failed, reverting...")
6220       try:
6221         _RemoveDisks(self, instance, target_node=target_node)
6222       finally:
6223         self.cfg.ReleaseDRBDMinors(instance.name)
6224         raise
6225
6226     cluster_name = self.cfg.GetClusterInfo().cluster_name
6227
6228     errs = []
6229     # activate, get path, copy the data over
6230     for idx, disk in enumerate(instance.disks):
6231       self.LogInfo("Copying data for disk %d", idx)
6232       result = self.rpc.call_blockdev_assemble(target_node, disk,
6233                                                instance.name, True, idx)
6234       if result.fail_msg:
6235         self.LogWarning("Can't assemble newly created disk %d: %s",
6236                         idx, result.fail_msg)
6237         errs.append(result.fail_msg)
6238         break
6239       dev_path = result.payload
6240       result = self.rpc.call_blockdev_export(source_node, disk,
6241                                              target_node, dev_path,
6242                                              cluster_name)
6243       if result.fail_msg:
6244         self.LogWarning("Can't copy data over for disk %d: %s",
6245                         idx, result.fail_msg)
6246         errs.append(result.fail_msg)
6247         break
6248
6249     if errs:
6250       self.LogWarning("Some disks failed to copy, aborting")
6251       try:
6252         _RemoveDisks(self, instance, target_node=target_node)
6253       finally:
6254         self.cfg.ReleaseDRBDMinors(instance.name)
6255         raise errors.OpExecError("Errors during disk copy: %s" %
6256                                  (",".join(errs),))
6257
6258     instance.primary_node = target_node
6259     self.cfg.Update(instance, feedback_fn)
6260
6261     self.LogInfo("Removing the disks on the original node")
6262     _RemoveDisks(self, instance, target_node=source_node)
6263
6264     # Only start the instance if it's marked as up
6265     if instance.admin_up:
6266       self.LogInfo("Starting instance %s on node %s",
6267                    instance.name, target_node)
6268
6269       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6270                                            ignore_secondaries=True)
6271       if not disks_ok:
6272         _ShutdownInstanceDisks(self, instance)
6273         raise errors.OpExecError("Can't activate the instance's disks")
6274
6275       result = self.rpc.call_instance_start(target_node, instance, None, None)
6276       msg = result.fail_msg
6277       if msg:
6278         _ShutdownInstanceDisks(self, instance)
6279         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6280                                  (instance.name, target_node, msg))
6281
6282
6283 class LUNodeMigrate(LogicalUnit):
6284   """Migrate all instances from a node.
6285
6286   """
6287   HPATH = "node-migrate"
6288   HTYPE = constants.HTYPE_NODE
6289   REQ_BGL = False
6290
6291   def CheckArguments(self):
6292     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6293
6294   def ExpandNames(self):
6295     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6296
6297     self.needed_locks = {}
6298
6299     # Create tasklets for migrating instances for all instances on this node
6300     names = []
6301     tasklets = []
6302
6303     self.lock_all_nodes = False
6304
6305     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6306       logging.debug("Migrating instance %s", inst.name)
6307       names.append(inst.name)
6308
6309       tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False,
6310                                         iallocator=self.op.iallocator,
6311                                         taget_node=None))
6312
6313       if inst.disk_template in constants.DTS_EXT_MIRROR:
6314         # We need to lock all nodes, as the iallocator will choose the
6315         # destination nodes afterwards
6316         self.lock_all_nodes = True
6317
6318     self.tasklets = tasklets
6319
6320     # Declare node locks
6321     if self.lock_all_nodes:
6322       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6323     else:
6324       self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6325       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6326
6327     # Declare instance locks
6328     self.needed_locks[locking.LEVEL_INSTANCE] = names
6329
6330   def DeclareLocks(self, level):
6331     if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6332       self._LockInstancesNodes()
6333
6334   def BuildHooksEnv(self):
6335     """Build hooks env.
6336
6337     This runs on the master, the primary and all the secondaries.
6338
6339     """
6340     return {
6341       "NODE_NAME": self.op.node_name,
6342       }
6343
6344   def BuildHooksNodes(self):
6345     """Build hooks nodes.
6346
6347     """
6348     nl = [self.cfg.GetMasterNode()]
6349     return (nl, nl)
6350
6351
6352 class TLMigrateInstance(Tasklet):
6353   """Tasklet class for instance migration.
6354
6355   @type live: boolean
6356   @ivar live: whether the migration will be done live or non-live;
6357       this variable is initalized only after CheckPrereq has run
6358   @type cleanup: boolean
6359   @ivar cleanup: Wheater we cleanup from a failed migration
6360   @type iallocator: string
6361   @ivar iallocator: The iallocator used to determine target_node
6362   @type target_node: string
6363   @ivar target_node: If given, the target_node to reallocate the instance to
6364   @type failover: boolean
6365   @ivar failover: Whether operation results in failover or migration
6366   @type fallback: boolean
6367   @ivar fallback: Whether fallback to failover is allowed if migration not
6368                   possible
6369   @type ignore_consistency: boolean
6370   @ivar ignore_consistency: Wheter we should ignore consistency between source
6371                             and target node
6372   @type shutdown_timeout: int
6373   @ivar shutdown_timeout: In case of failover timeout of the shutdown
6374
6375   """
6376   def __init__(self, lu, instance_name, cleanup=False, iallocator=None,
6377                target_node=None, failover=False, fallback=False,
6378                ignore_consistency=False,
6379                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6380     """Initializes this class.
6381
6382     """
6383     Tasklet.__init__(self, lu)
6384
6385     # Parameters
6386     self.instance_name = instance_name
6387     self.cleanup = cleanup
6388     self.live = False # will be overridden later
6389     self.iallocator = iallocator
6390     self.target_node = target_node
6391     self.failover = failover
6392     self.fallback = fallback
6393     self.ignore_consistency = ignore_consistency
6394     self.shutdown_timeout = shutdown_timeout
6395
6396   def CheckPrereq(self):
6397     """Check prerequisites.
6398
6399     This checks that the instance is in the cluster.
6400
6401     """
6402     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6403     instance = self.cfg.GetInstanceInfo(instance_name)
6404     assert instance is not None
6405     self.instance = instance
6406
6407     if (not self.cleanup and not instance.admin_up and not self.failover and
6408         self.fallback):
6409       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6410                       " to failover")
6411       self.failover = True
6412
6413     if instance.disk_template not in constants.DTS_MIRRORED:
6414       if self.failover:
6415         text = "failovers"
6416       else:
6417         text = "migrations"
6418       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6419                                  " %s" % (instance.disk_template, text),
6420                                  errors.ECODE_STATE)
6421
6422     if instance.disk_template in constants.DTS_EXT_MIRROR:
6423       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6424
6425       if self.iallocator:
6426         self._RunAllocator()
6427
6428       # self.target_node is already populated, either directly or by the
6429       # iallocator run
6430       target_node = self.target_node
6431
6432       if len(self.lu.tasklets) == 1:
6433         # It is safe to remove locks only when we're the only tasklet in the LU
6434         nodes_keep = [instance.primary_node, self.target_node]
6435         nodes_rel = [node for node in self.lu.acquired_locks[locking.LEVEL_NODE]
6436                      if node not in nodes_keep]
6437         self.lu.context.glm.release(locking.LEVEL_NODE, nodes_rel)
6438         self.lu.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6439
6440     else:
6441       secondary_nodes = instance.secondary_nodes
6442       if not secondary_nodes:
6443         raise errors.ConfigurationError("No secondary node but using"
6444                                         " %s disk template" %
6445                                         instance.disk_template)
6446       target_node = secondary_nodes[0]
6447       if self.iallocator or (self.target_node and
6448                              self.target_node != target_node):
6449         if self.failover:
6450           text = "failed over"
6451         else:
6452           text = "migrated"
6453         raise errors.OpPrereqError("Instances with disk template %s cannot"
6454                                    " be %s to arbitrary nodes"
6455                                    " (neither an iallocator nor a target"
6456                                    " node can be passed)" %
6457                                    (instance.disk_template, text),
6458                                    errors.ECODE_INVAL)
6459
6460     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6461
6462     # check memory requirements on the secondary node
6463     if not self.failover or instance.admin_up:
6464       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6465                            instance.name, i_be[constants.BE_MEMORY],
6466                            instance.hypervisor)
6467     else:
6468       self.lu.LogInfo("Not checking memory on the secondary node as"
6469                       " instance will not be started")
6470
6471     # check bridge existance
6472     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6473
6474     if not self.cleanup:
6475       _CheckNodeNotDrained(self.lu, target_node)
6476       if not self.failover:
6477         result = self.rpc.call_instance_migratable(instance.primary_node,
6478                                                    instance)
6479         if result.fail_msg and self.fallback:
6480           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6481                           " failover")
6482           self.failover = True
6483         else:
6484           result.Raise("Can't migrate, please use failover",
6485                        prereq=True, ecode=errors.ECODE_STATE)
6486
6487     assert not (self.failover and self.cleanup)
6488
6489   def _RunAllocator(self):
6490     """Run the allocator based on input opcode.
6491
6492     """
6493     ial = IAllocator(self.cfg, self.rpc,
6494                      mode=constants.IALLOCATOR_MODE_RELOC,
6495                      name=self.instance_name,
6496                      # TODO See why hail breaks with a single node below
6497                      relocate_from=[self.instance.primary_node,
6498                                     self.instance.primary_node],
6499                      )
6500
6501     ial.Run(self.iallocator)
6502
6503     if not ial.success:
6504       raise errors.OpPrereqError("Can't compute nodes using"
6505                                  " iallocator '%s': %s" %
6506                                  (self.iallocator, ial.info),
6507                                  errors.ECODE_NORES)
6508     if len(ial.result) != ial.required_nodes:
6509       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6510                                  " of nodes (%s), required %s" %
6511                                  (self.iallocator, len(ial.result),
6512                                   ial.required_nodes), errors.ECODE_FAULT)
6513     self.target_node = ial.result[0]
6514     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6515                  self.instance_name, self.iallocator,
6516                  utils.CommaJoin(ial.result))
6517
6518     if not self.failover:
6519       if self.lu.op.live is not None and self.lu.op.mode is not None:
6520         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6521                                    " parameters are accepted",
6522                                    errors.ECODE_INVAL)
6523       if self.lu.op.live is not None:
6524         if self.lu.op.live:
6525           self.lu.op.mode = constants.HT_MIGRATION_LIVE
6526         else:
6527           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6528         # reset the 'live' parameter to None so that repeated
6529         # invocations of CheckPrereq do not raise an exception
6530         self.lu.op.live = None
6531       elif self.lu.op.mode is None:
6532         # read the default value from the hypervisor
6533         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6534                                                 skip_globals=False)
6535         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6536
6537       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6538     else:
6539       # Failover is never live
6540       self.live = False
6541
6542   def _WaitUntilSync(self):
6543     """Poll with custom rpc for disk sync.
6544
6545     This uses our own step-based rpc call.
6546
6547     """
6548     self.feedback_fn("* wait until resync is done")
6549     all_done = False
6550     while not all_done:
6551       all_done = True
6552       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6553                                             self.nodes_ip,
6554                                             self.instance.disks)
6555       min_percent = 100
6556       for node, nres in result.items():
6557         nres.Raise("Cannot resync disks on node %s" % node)
6558         node_done, node_percent = nres.payload
6559         all_done = all_done and node_done
6560         if node_percent is not None:
6561           min_percent = min(min_percent, node_percent)
6562       if not all_done:
6563         if min_percent < 100:
6564           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6565         time.sleep(2)
6566
6567   def _EnsureSecondary(self, node):
6568     """Demote a node to secondary.
6569
6570     """
6571     self.feedback_fn("* switching node %s to secondary mode" % node)
6572
6573     for dev in self.instance.disks:
6574       self.cfg.SetDiskID(dev, node)
6575
6576     result = self.rpc.call_blockdev_close(node, self.instance.name,
6577                                           self.instance.disks)
6578     result.Raise("Cannot change disk to secondary on node %s" % node)
6579
6580   def _GoStandalone(self):
6581     """Disconnect from the network.
6582
6583     """
6584     self.feedback_fn("* changing into standalone mode")
6585     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6586                                                self.instance.disks)
6587     for node, nres in result.items():
6588       nres.Raise("Cannot disconnect disks node %s" % node)
6589
6590   def _GoReconnect(self, multimaster):
6591     """Reconnect to the network.
6592
6593     """
6594     if multimaster:
6595       msg = "dual-master"
6596     else:
6597       msg = "single-master"
6598     self.feedback_fn("* changing disks into %s mode" % msg)
6599     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6600                                            self.instance.disks,
6601                                            self.instance.name, multimaster)
6602     for node, nres in result.items():
6603       nres.Raise("Cannot change disks config on node %s" % node)
6604
6605   def _ExecCleanup(self):
6606     """Try to cleanup after a failed migration.
6607
6608     The cleanup is done by:
6609       - check that the instance is running only on one node
6610         (and update the config if needed)
6611       - change disks on its secondary node to secondary
6612       - wait until disks are fully synchronized
6613       - disconnect from the network
6614       - change disks into single-master mode
6615       - wait again until disks are fully synchronized
6616
6617     """
6618     instance = self.instance
6619     target_node = self.target_node
6620     source_node = self.source_node
6621
6622     # check running on only one node
6623     self.feedback_fn("* checking where the instance actually runs"
6624                      " (if this hangs, the hypervisor might be in"
6625                      " a bad state)")
6626     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6627     for node, result in ins_l.items():
6628       result.Raise("Can't contact node %s" % node)
6629
6630     runningon_source = instance.name in ins_l[source_node].payload
6631     runningon_target = instance.name in ins_l[target_node].payload
6632
6633     if runningon_source and runningon_target:
6634       raise errors.OpExecError("Instance seems to be running on two nodes,"
6635                                " or the hypervisor is confused. You will have"
6636                                " to ensure manually that it runs only on one"
6637                                " and restart this operation.")
6638
6639     if not (runningon_source or runningon_target):
6640       raise errors.OpExecError("Instance does not seem to be running at all."
6641                                " In this case, it's safer to repair by"
6642                                " running 'gnt-instance stop' to ensure disk"
6643                                " shutdown, and then restarting it.")
6644
6645     if runningon_target:
6646       # the migration has actually succeeded, we need to update the config
6647       self.feedback_fn("* instance running on secondary node (%s),"
6648                        " updating config" % target_node)
6649       instance.primary_node = target_node
6650       self.cfg.Update(instance, self.feedback_fn)
6651       demoted_node = source_node
6652     else:
6653       self.feedback_fn("* instance confirmed to be running on its"
6654                        " primary node (%s)" % source_node)
6655       demoted_node = target_node
6656
6657     if instance.disk_template in constants.DTS_INT_MIRROR:
6658       self._EnsureSecondary(demoted_node)
6659       try:
6660         self._WaitUntilSync()
6661       except errors.OpExecError:
6662         # we ignore here errors, since if the device is standalone, it
6663         # won't be able to sync
6664         pass
6665       self._GoStandalone()
6666       self._GoReconnect(False)
6667       self._WaitUntilSync()
6668
6669     self.feedback_fn("* done")
6670
6671   def _RevertDiskStatus(self):
6672     """Try to revert the disk status after a failed migration.
6673
6674     """
6675     target_node = self.target_node
6676     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6677       return
6678
6679     try:
6680       self._EnsureSecondary(target_node)
6681       self._GoStandalone()
6682       self._GoReconnect(False)
6683       self._WaitUntilSync()
6684     except errors.OpExecError, err:
6685       self.lu.LogWarning("Migration failed and I can't reconnect the"
6686                          " drives: error '%s'\n"
6687                          "Please look and recover the instance status" %
6688                          str(err))
6689
6690   def _AbortMigration(self):
6691     """Call the hypervisor code to abort a started migration.
6692
6693     """
6694     instance = self.instance
6695     target_node = self.target_node
6696     migration_info = self.migration_info
6697
6698     abort_result = self.rpc.call_finalize_migration(target_node,
6699                                                     instance,
6700                                                     migration_info,
6701                                                     False)
6702     abort_msg = abort_result.fail_msg
6703     if abort_msg:
6704       logging.error("Aborting migration failed on target node %s: %s",
6705                     target_node, abort_msg)
6706       # Don't raise an exception here, as we stil have to try to revert the
6707       # disk status, even if this step failed.
6708
6709   def _ExecMigration(self):
6710     """Migrate an instance.
6711
6712     The migrate is done by:
6713       - change the disks into dual-master mode
6714       - wait until disks are fully synchronized again
6715       - migrate the instance
6716       - change disks on the new secondary node (the old primary) to secondary
6717       - wait until disks are fully synchronized
6718       - change disks into single-master mode
6719
6720     """
6721     instance = self.instance
6722     target_node = self.target_node
6723     source_node = self.source_node
6724
6725     self.feedback_fn("* checking disk consistency between source and target")
6726     for dev in instance.disks:
6727       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6728         raise errors.OpExecError("Disk %s is degraded or not fully"
6729                                  " synchronized on target node,"
6730                                  " aborting migrate." % dev.iv_name)
6731
6732     # First get the migration information from the remote node
6733     result = self.rpc.call_migration_info(source_node, instance)
6734     msg = result.fail_msg
6735     if msg:
6736       log_err = ("Failed fetching source migration information from %s: %s" %
6737                  (source_node, msg))
6738       logging.error(log_err)
6739       raise errors.OpExecError(log_err)
6740
6741     self.migration_info = migration_info = result.payload
6742
6743     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6744       # Then switch the disks to master/master mode
6745       self._EnsureSecondary(target_node)
6746       self._GoStandalone()
6747       self._GoReconnect(True)
6748       self._WaitUntilSync()
6749
6750     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6751     result = self.rpc.call_accept_instance(target_node,
6752                                            instance,
6753                                            migration_info,
6754                                            self.nodes_ip[target_node])
6755
6756     msg = result.fail_msg
6757     if msg:
6758       logging.error("Instance pre-migration failed, trying to revert"
6759                     " disk status: %s", msg)
6760       self.feedback_fn("Pre-migration failed, aborting")
6761       self._AbortMigration()
6762       self._RevertDiskStatus()
6763       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6764                                (instance.name, msg))
6765
6766     self.feedback_fn("* migrating instance to %s" % target_node)
6767     result = self.rpc.call_instance_migrate(source_node, instance,
6768                                             self.nodes_ip[target_node],
6769                                             self.live)
6770     msg = result.fail_msg
6771     if msg:
6772       logging.error("Instance migration failed, trying to revert"
6773                     " disk status: %s", msg)
6774       self.feedback_fn("Migration failed, aborting")
6775       self._AbortMigration()
6776       self._RevertDiskStatus()
6777       raise errors.OpExecError("Could not migrate instance %s: %s" %
6778                                (instance.name, msg))
6779
6780     instance.primary_node = target_node
6781     # distribute new instance config to the other nodes
6782     self.cfg.Update(instance, self.feedback_fn)
6783
6784     result = self.rpc.call_finalize_migration(target_node,
6785                                               instance,
6786                                               migration_info,
6787                                               True)
6788     msg = result.fail_msg
6789     if msg:
6790       logging.error("Instance migration succeeded, but finalization failed:"
6791                     " %s", msg)
6792       raise errors.OpExecError("Could not finalize instance migration: %s" %
6793                                msg)
6794
6795     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6796       self._EnsureSecondary(source_node)
6797       self._WaitUntilSync()
6798       self._GoStandalone()
6799       self._GoReconnect(False)
6800       self._WaitUntilSync()
6801
6802     self.feedback_fn("* done")
6803
6804   def _ExecFailover(self):
6805     """Failover an instance.
6806
6807     The failover is done by shutting it down on its present node and
6808     starting it on the secondary.
6809
6810     """
6811     instance = self.instance
6812     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6813
6814     source_node = instance.primary_node
6815     target_node = self.target_node
6816
6817     if instance.admin_up:
6818       self.feedback_fn("* checking disk consistency between source and target")
6819       for dev in instance.disks:
6820         # for drbd, these are drbd over lvm
6821         if not _CheckDiskConsistency(self, dev, target_node, False):
6822           if not self.ignore_consistency:
6823             raise errors.OpExecError("Disk %s is degraded on target node,"
6824                                      " aborting failover." % dev.iv_name)
6825     else:
6826       self.feedback_fn("* not checking disk consistency as instance is not"
6827                        " running")
6828
6829     self.feedback_fn("* shutting down instance on source node")
6830     logging.info("Shutting down instance %s on node %s",
6831                  instance.name, source_node)
6832
6833     result = self.rpc.call_instance_shutdown(source_node, instance,
6834                                              self.shutdown_timeout)
6835     msg = result.fail_msg
6836     if msg:
6837       if self.ignore_consistency or primary_node.offline:
6838         self.lu.LogWarning("Could not shutdown instance %s on node %s."
6839                            " Proceeding anyway. Please make sure node"
6840                            " %s is down. Error details: %s",
6841                            instance.name, source_node, source_node, msg)
6842       else:
6843         raise errors.OpExecError("Could not shutdown instance %s on"
6844                                  " node %s: %s" %
6845                                  (instance.name, source_node, msg))
6846
6847     self.feedback_fn("* deactivating the instance's disks on source node")
6848     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6849       raise errors.OpExecError("Can't shut down the instance's disks.")
6850
6851     instance.primary_node = target_node
6852     # distribute new instance config to the other nodes
6853     self.cfg.Update(instance, self.feedback_fn)
6854
6855     # Only start the instance if it's marked as up
6856     if instance.admin_up:
6857       self.feedback_fn("* activating the instance's disks on target node")
6858       logging.info("Starting instance %s on node %s",
6859                    instance.name, target_node)
6860
6861       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6862                                            ignore_secondaries=True)
6863       if not disks_ok:
6864         _ShutdownInstanceDisks(self, instance)
6865         raise errors.OpExecError("Can't activate the instance's disks")
6866
6867       self.feedback_fn("* starting the instance on the target node")
6868       result = self.rpc.call_instance_start(target_node, instance, None, None)
6869       msg = result.fail_msg
6870       if msg:
6871         _ShutdownInstanceDisks(self, instance)
6872         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6873                                  (instance.name, target_node, msg))
6874
6875   def Exec(self, feedback_fn):
6876     """Perform the migration.
6877
6878     """
6879     self.feedback_fn = feedback_fn
6880     self.source_node = self.instance.primary_node
6881
6882     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6883     if self.instance.disk_template in constants.DTS_INT_MIRROR:
6884       self.target_node = self.instance.secondary_nodes[0]
6885       # Otherwise self.target_node has been populated either
6886       # directly, or through an iallocator.
6887
6888     self.all_nodes = [self.source_node, self.target_node]
6889     self.nodes_ip = {
6890       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6891       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6892       }
6893
6894     if self.failover:
6895       feedback_fn("Failover instance %s" % self.instance.name)
6896       self._ExecFailover()
6897     else:
6898       feedback_fn("Migrating instance %s" % self.instance.name)
6899
6900       if self.cleanup:
6901         return self._ExecCleanup()
6902       else:
6903         return self._ExecMigration()
6904
6905
6906 def _CreateBlockDev(lu, node, instance, device, force_create,
6907                     info, force_open):
6908   """Create a tree of block devices on a given node.
6909
6910   If this device type has to be created on secondaries, create it and
6911   all its children.
6912
6913   If not, just recurse to children keeping the same 'force' value.
6914
6915   @param lu: the lu on whose behalf we execute
6916   @param node: the node on which to create the device
6917   @type instance: L{objects.Instance}
6918   @param instance: the instance which owns the device
6919   @type device: L{objects.Disk}
6920   @param device: the device to create
6921   @type force_create: boolean
6922   @param force_create: whether to force creation of this device; this
6923       will be change to True whenever we find a device which has
6924       CreateOnSecondary() attribute
6925   @param info: the extra 'metadata' we should attach to the device
6926       (this will be represented as a LVM tag)
6927   @type force_open: boolean
6928   @param force_open: this parameter will be passes to the
6929       L{backend.BlockdevCreate} function where it specifies
6930       whether we run on primary or not, and it affects both
6931       the child assembly and the device own Open() execution
6932
6933   """
6934   if device.CreateOnSecondary():
6935     force_create = True
6936
6937   if device.children:
6938     for child in device.children:
6939       _CreateBlockDev(lu, node, instance, child, force_create,
6940                       info, force_open)
6941
6942   if not force_create:
6943     return
6944
6945   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6946
6947
6948 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6949   """Create a single block device on a given node.
6950
6951   This will not recurse over children of the device, so they must be
6952   created in advance.
6953
6954   @param lu: the lu on whose behalf we execute
6955   @param node: the node on which to create the device
6956   @type instance: L{objects.Instance}
6957   @param instance: the instance which owns the device
6958   @type device: L{objects.Disk}
6959   @param device: the device to create
6960   @param info: the extra 'metadata' we should attach to the device
6961       (this will be represented as a LVM tag)
6962   @type force_open: boolean
6963   @param force_open: this parameter will be passes to the
6964       L{backend.BlockdevCreate} function where it specifies
6965       whether we run on primary or not, and it affects both
6966       the child assembly and the device own Open() execution
6967
6968   """
6969   lu.cfg.SetDiskID(device, node)
6970   result = lu.rpc.call_blockdev_create(node, device, device.size,
6971                                        instance.name, force_open, info)
6972   result.Raise("Can't create block device %s on"
6973                " node %s for instance %s" % (device, node, instance.name))
6974   if device.physical_id is None:
6975     device.physical_id = result.payload
6976
6977
6978 def _GenerateUniqueNames(lu, exts):
6979   """Generate a suitable LV name.
6980
6981   This will generate a logical volume name for the given instance.
6982
6983   """
6984   results = []
6985   for val in exts:
6986     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6987     results.append("%s%s" % (new_id, val))
6988   return results
6989
6990
6991 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
6992                          iv_name, p_minor, s_minor):
6993   """Generate a drbd8 device complete with its children.
6994
6995   """
6996   assert len(vgnames) == len(names) == 2
6997   port = lu.cfg.AllocatePort()
6998   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6999   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7000                           logical_id=(vgnames[0], names[0]))
7001   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7002                           logical_id=(vgnames[1], names[1]))
7003   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7004                           logical_id=(primary, secondary, port,
7005                                       p_minor, s_minor,
7006                                       shared_secret),
7007                           children=[dev_data, dev_meta],
7008                           iv_name=iv_name)
7009   return drbd_dev
7010
7011
7012 def _GenerateDiskTemplate(lu, template_name,
7013                           instance_name, primary_node,
7014                           secondary_nodes, disk_info,
7015                           file_storage_dir, file_driver,
7016                           base_index, feedback_fn):
7017   """Generate the entire disk layout for a given template type.
7018
7019   """
7020   #TODO: compute space requirements
7021
7022   vgname = lu.cfg.GetVGName()
7023   disk_count = len(disk_info)
7024   disks = []
7025   if template_name == constants.DT_DISKLESS:
7026     pass
7027   elif template_name == constants.DT_PLAIN:
7028     if len(secondary_nodes) != 0:
7029       raise errors.ProgrammerError("Wrong template configuration")
7030
7031     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7032                                       for i in range(disk_count)])
7033     for idx, disk in enumerate(disk_info):
7034       disk_index = idx + base_index
7035       vg = disk.get(constants.IDISK_VG, vgname)
7036       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7037       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7038                               size=disk[constants.IDISK_SIZE],
7039                               logical_id=(vg, names[idx]),
7040                               iv_name="disk/%d" % disk_index,
7041                               mode=disk[constants.IDISK_MODE])
7042       disks.append(disk_dev)
7043   elif template_name == constants.DT_DRBD8:
7044     if len(secondary_nodes) != 1:
7045       raise errors.ProgrammerError("Wrong template configuration")
7046     remote_node = secondary_nodes[0]
7047     minors = lu.cfg.AllocateDRBDMinor(
7048       [primary_node, remote_node] * len(disk_info), instance_name)
7049
7050     names = []
7051     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7052                                                for i in range(disk_count)]):
7053       names.append(lv_prefix + "_data")
7054       names.append(lv_prefix + "_meta")
7055     for idx, disk in enumerate(disk_info):
7056       disk_index = idx + base_index
7057       data_vg = disk.get(constants.IDISK_VG, vgname)
7058       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7059       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7060                                       disk[constants.IDISK_SIZE],
7061                                       [data_vg, meta_vg],
7062                                       names[idx * 2:idx * 2 + 2],
7063                                       "disk/%d" % disk_index,
7064                                       minors[idx * 2], minors[idx * 2 + 1])
7065       disk_dev.mode = disk[constants.IDISK_MODE]
7066       disks.append(disk_dev)
7067   elif template_name == constants.DT_FILE:
7068     if len(secondary_nodes) != 0:
7069       raise errors.ProgrammerError("Wrong template configuration")
7070
7071     opcodes.RequireFileStorage()
7072
7073     for idx, disk in enumerate(disk_info):
7074       disk_index = idx + base_index
7075       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7076                               size=disk[constants.IDISK_SIZE],
7077                               iv_name="disk/%d" % disk_index,
7078                               logical_id=(file_driver,
7079                                           "%s/disk%d" % (file_storage_dir,
7080                                                          disk_index)),
7081                               mode=disk[constants.IDISK_MODE])
7082       disks.append(disk_dev)
7083   elif template_name == constants.DT_SHARED_FILE:
7084     if len(secondary_nodes) != 0:
7085       raise errors.ProgrammerError("Wrong template configuration")
7086
7087     opcodes.RequireSharedFileStorage()
7088
7089     for idx, disk in enumerate(disk_info):
7090       disk_index = idx + base_index
7091       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7092                               size=disk[constants.IDISK_SIZE],
7093                               iv_name="disk/%d" % disk_index,
7094                               logical_id=(file_driver,
7095                                           "%s/disk%d" % (file_storage_dir,
7096                                                          disk_index)),
7097                               mode=disk[constants.IDISK_MODE])
7098       disks.append(disk_dev)
7099   elif template_name == constants.DT_BLOCK:
7100     if len(secondary_nodes) != 0:
7101       raise errors.ProgrammerError("Wrong template configuration")
7102
7103     for idx, disk in enumerate(disk_info):
7104       disk_index = idx + base_index
7105       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7106                               size=disk[constants.IDISK_SIZE],
7107                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7108                                           disk[constants.IDISK_ADOPT]),
7109                               iv_name="disk/%d" % disk_index,
7110                               mode=disk[constants.IDISK_MODE])
7111       disks.append(disk_dev)
7112
7113   else:
7114     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7115   return disks
7116
7117
7118 def _GetInstanceInfoText(instance):
7119   """Compute that text that should be added to the disk's metadata.
7120
7121   """
7122   return "originstname+%s" % instance.name
7123
7124
7125 def _CalcEta(time_taken, written, total_size):
7126   """Calculates the ETA based on size written and total size.
7127
7128   @param time_taken: The time taken so far
7129   @param written: amount written so far
7130   @param total_size: The total size of data to be written
7131   @return: The remaining time in seconds
7132
7133   """
7134   avg_time = time_taken / float(written)
7135   return (total_size - written) * avg_time
7136
7137
7138 def _WipeDisks(lu, instance):
7139   """Wipes instance disks.
7140
7141   @type lu: L{LogicalUnit}
7142   @param lu: the logical unit on whose behalf we execute
7143   @type instance: L{objects.Instance}
7144   @param instance: the instance whose disks we should create
7145   @return: the success of the wipe
7146
7147   """
7148   node = instance.primary_node
7149
7150   for device in instance.disks:
7151     lu.cfg.SetDiskID(device, node)
7152
7153   logging.info("Pause sync of instance %s disks", instance.name)
7154   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7155
7156   for idx, success in enumerate(result.payload):
7157     if not success:
7158       logging.warn("pause-sync of instance %s for disks %d failed",
7159                    instance.name, idx)
7160
7161   try:
7162     for idx, device in enumerate(instance.disks):
7163       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7164       # MAX_WIPE_CHUNK at max
7165       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7166                             constants.MIN_WIPE_CHUNK_PERCENT)
7167       # we _must_ make this an int, otherwise rounding errors will
7168       # occur
7169       wipe_chunk_size = int(wipe_chunk_size)
7170
7171       lu.LogInfo("* Wiping disk %d", idx)
7172       logging.info("Wiping disk %d for instance %s, node %s using"
7173                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7174
7175       offset = 0
7176       size = device.size
7177       last_output = 0
7178       start_time = time.time()
7179
7180       while offset < size:
7181         wipe_size = min(wipe_chunk_size, size - offset)
7182         logging.debug("Wiping disk %d, offset %s, chunk %s",
7183                       idx, offset, wipe_size)
7184         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7185         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7186                      (idx, offset, wipe_size))
7187         now = time.time()
7188         offset += wipe_size
7189         if now - last_output >= 60:
7190           eta = _CalcEta(now - start_time, offset, size)
7191           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7192                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7193           last_output = now
7194   finally:
7195     logging.info("Resume sync of instance %s disks", instance.name)
7196
7197     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7198
7199     for idx, success in enumerate(result.payload):
7200       if not success:
7201         lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
7202                       " look at the status and troubleshoot the issue.", idx)
7203         logging.warn("resume-sync of instance %s for disks %d failed",
7204                      instance.name, idx)
7205
7206
7207 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7208   """Create all disks for an instance.
7209
7210   This abstracts away some work from AddInstance.
7211
7212   @type lu: L{LogicalUnit}
7213   @param lu: the logical unit on whose behalf we execute
7214   @type instance: L{objects.Instance}
7215   @param instance: the instance whose disks we should create
7216   @type to_skip: list
7217   @param to_skip: list of indices to skip
7218   @type target_node: string
7219   @param target_node: if passed, overrides the target node for creation
7220   @rtype: boolean
7221   @return: the success of the creation
7222
7223   """
7224   info = _GetInstanceInfoText(instance)
7225   if target_node is None:
7226     pnode = instance.primary_node
7227     all_nodes = instance.all_nodes
7228   else:
7229     pnode = target_node
7230     all_nodes = [pnode]
7231
7232   if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7233     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7234     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7235
7236     result.Raise("Failed to create directory '%s' on"
7237                  " node %s" % (file_storage_dir, pnode))
7238
7239   # Note: this needs to be kept in sync with adding of disks in
7240   # LUInstanceSetParams
7241   for idx, device in enumerate(instance.disks):
7242     if to_skip and idx in to_skip:
7243       continue
7244     logging.info("Creating volume %s for instance %s",
7245                  device.iv_name, instance.name)
7246     #HARDCODE
7247     for node in all_nodes:
7248       f_create = node == pnode
7249       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7250
7251
7252 def _RemoveDisks(lu, instance, target_node=None):
7253   """Remove all disks for an instance.
7254
7255   This abstracts away some work from `AddInstance()` and
7256   `RemoveInstance()`. Note that in case some of the devices couldn't
7257   be removed, the removal will continue with the other ones (compare
7258   with `_CreateDisks()`).
7259
7260   @type lu: L{LogicalUnit}
7261   @param lu: the logical unit on whose behalf we execute
7262   @type instance: L{objects.Instance}
7263   @param instance: the instance whose disks we should remove
7264   @type target_node: string
7265   @param target_node: used to override the node on which to remove the disks
7266   @rtype: boolean
7267   @return: the success of the removal
7268
7269   """
7270   logging.info("Removing block devices for instance %s", instance.name)
7271
7272   all_result = True
7273   for device in instance.disks:
7274     if target_node:
7275       edata = [(target_node, device)]
7276     else:
7277       edata = device.ComputeNodeTree(instance.primary_node)
7278     for node, disk in edata:
7279       lu.cfg.SetDiskID(disk, node)
7280       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7281       if msg:
7282         lu.LogWarning("Could not remove block device %s on node %s,"
7283                       " continuing anyway: %s", device.iv_name, node, msg)
7284         all_result = False
7285
7286   if instance.disk_template == constants.DT_FILE:
7287     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7288     if target_node:
7289       tgt = target_node
7290     else:
7291       tgt = instance.primary_node
7292     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7293     if result.fail_msg:
7294       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7295                     file_storage_dir, instance.primary_node, result.fail_msg)
7296       all_result = False
7297
7298   return all_result
7299
7300
7301 def _ComputeDiskSizePerVG(disk_template, disks):
7302   """Compute disk size requirements in the volume group
7303
7304   """
7305   def _compute(disks, payload):
7306     """Universal algorithm.
7307
7308     """
7309     vgs = {}
7310     for disk in disks:
7311       vgs[disk[constants.IDISK_VG]] = \
7312         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7313
7314     return vgs
7315
7316   # Required free disk space as a function of disk and swap space
7317   req_size_dict = {
7318     constants.DT_DISKLESS: {},
7319     constants.DT_PLAIN: _compute(disks, 0),
7320     # 128 MB are added for drbd metadata for each disk
7321     constants.DT_DRBD8: _compute(disks, 128),
7322     constants.DT_FILE: {},
7323     constants.DT_SHARED_FILE: {},
7324   }
7325
7326   if disk_template not in req_size_dict:
7327     raise errors.ProgrammerError("Disk template '%s' size requirement"
7328                                  " is unknown" %  disk_template)
7329
7330   return req_size_dict[disk_template]
7331
7332
7333 def _ComputeDiskSize(disk_template, disks):
7334   """Compute disk size requirements in the volume group
7335
7336   """
7337   # Required free disk space as a function of disk and swap space
7338   req_size_dict = {
7339     constants.DT_DISKLESS: None,
7340     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7341     # 128 MB are added for drbd metadata for each disk
7342     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7343     constants.DT_FILE: None,
7344     constants.DT_SHARED_FILE: 0,
7345     constants.DT_BLOCK: 0,
7346   }
7347
7348   if disk_template not in req_size_dict:
7349     raise errors.ProgrammerError("Disk template '%s' size requirement"
7350                                  " is unknown" %  disk_template)
7351
7352   return req_size_dict[disk_template]
7353
7354
7355 def _FilterVmNodes(lu, nodenames):
7356   """Filters out non-vm_capable nodes from a list.
7357
7358   @type lu: L{LogicalUnit}
7359   @param lu: the logical unit for which we check
7360   @type nodenames: list
7361   @param nodenames: the list of nodes on which we should check
7362   @rtype: list
7363   @return: the list of vm-capable nodes
7364
7365   """
7366   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7367   return [name for name in nodenames if name not in vm_nodes]
7368
7369
7370 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7371   """Hypervisor parameter validation.
7372
7373   This function abstract the hypervisor parameter validation to be
7374   used in both instance create and instance modify.
7375
7376   @type lu: L{LogicalUnit}
7377   @param lu: the logical unit for which we check
7378   @type nodenames: list
7379   @param nodenames: the list of nodes on which we should check
7380   @type hvname: string
7381   @param hvname: the name of the hypervisor we should use
7382   @type hvparams: dict
7383   @param hvparams: the parameters which we need to check
7384   @raise errors.OpPrereqError: if the parameters are not valid
7385
7386   """
7387   nodenames = _FilterVmNodes(lu, nodenames)
7388   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7389                                                   hvname,
7390                                                   hvparams)
7391   for node in nodenames:
7392     info = hvinfo[node]
7393     if info.offline:
7394       continue
7395     info.Raise("Hypervisor parameter validation failed on node %s" % node)
7396
7397
7398 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7399   """OS parameters validation.
7400
7401   @type lu: L{LogicalUnit}
7402   @param lu: the logical unit for which we check
7403   @type required: boolean
7404   @param required: whether the validation should fail if the OS is not
7405       found
7406   @type nodenames: list
7407   @param nodenames: the list of nodes on which we should check
7408   @type osname: string
7409   @param osname: the name of the hypervisor we should use
7410   @type osparams: dict
7411   @param osparams: the parameters which we need to check
7412   @raise errors.OpPrereqError: if the parameters are not valid
7413
7414   """
7415   nodenames = _FilterVmNodes(lu, nodenames)
7416   result = lu.rpc.call_os_validate(required, nodenames, osname,
7417                                    [constants.OS_VALIDATE_PARAMETERS],
7418                                    osparams)
7419   for node, nres in result.items():
7420     # we don't check for offline cases since this should be run only
7421     # against the master node and/or an instance's nodes
7422     nres.Raise("OS Parameters validation failed on node %s" % node)
7423     if not nres.payload:
7424       lu.LogInfo("OS %s not found on node %s, validation skipped",
7425                  osname, node)
7426
7427
7428 class LUInstanceCreate(LogicalUnit):
7429   """Create an instance.
7430
7431   """
7432   HPATH = "instance-add"
7433   HTYPE = constants.HTYPE_INSTANCE
7434   REQ_BGL = False
7435
7436   def CheckArguments(self):
7437     """Check arguments.
7438
7439     """
7440     # do not require name_check to ease forward/backward compatibility
7441     # for tools
7442     if self.op.no_install and self.op.start:
7443       self.LogInfo("No-installation mode selected, disabling startup")
7444       self.op.start = False
7445     # validate/normalize the instance name
7446     self.op.instance_name = \
7447       netutils.Hostname.GetNormalizedName(self.op.instance_name)
7448
7449     if self.op.ip_check and not self.op.name_check:
7450       # TODO: make the ip check more flexible and not depend on the name check
7451       raise errors.OpPrereqError("Cannot do ip check without a name check",
7452                                  errors.ECODE_INVAL)
7453
7454     # check nics' parameter names
7455     for nic in self.op.nics:
7456       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7457
7458     # check disks. parameter names and consistent adopt/no-adopt strategy
7459     has_adopt = has_no_adopt = False
7460     for disk in self.op.disks:
7461       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7462       if constants.IDISK_ADOPT in disk:
7463         has_adopt = True
7464       else:
7465         has_no_adopt = True
7466     if has_adopt and has_no_adopt:
7467       raise errors.OpPrereqError("Either all disks are adopted or none is",
7468                                  errors.ECODE_INVAL)
7469     if has_adopt:
7470       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7471         raise errors.OpPrereqError("Disk adoption is not supported for the"
7472                                    " '%s' disk template" %
7473                                    self.op.disk_template,
7474                                    errors.ECODE_INVAL)
7475       if self.op.iallocator is not None:
7476         raise errors.OpPrereqError("Disk adoption not allowed with an"
7477                                    " iallocator script", errors.ECODE_INVAL)
7478       if self.op.mode == constants.INSTANCE_IMPORT:
7479         raise errors.OpPrereqError("Disk adoption not allowed for"
7480                                    " instance import", errors.ECODE_INVAL)
7481     else:
7482       if self.op.disk_template in constants.DTS_MUST_ADOPT:
7483         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7484                                    " but no 'adopt' parameter given" %
7485                                    self.op.disk_template,
7486                                    errors.ECODE_INVAL)
7487
7488     self.adopt_disks = has_adopt
7489
7490     # instance name verification
7491     if self.op.name_check:
7492       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7493       self.op.instance_name = self.hostname1.name
7494       # used in CheckPrereq for ip ping check
7495       self.check_ip = self.hostname1.ip
7496     else:
7497       self.check_ip = None
7498
7499     # file storage checks
7500     if (self.op.file_driver and
7501         not self.op.file_driver in constants.FILE_DRIVER):
7502       raise errors.OpPrereqError("Invalid file driver name '%s'" %
7503                                  self.op.file_driver, errors.ECODE_INVAL)
7504
7505     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7506       raise errors.OpPrereqError("File storage directory path not absolute",
7507                                  errors.ECODE_INVAL)
7508
7509     ### Node/iallocator related checks
7510     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7511
7512     if self.op.pnode is not None:
7513       if self.op.disk_template in constants.DTS_INT_MIRROR:
7514         if self.op.snode is None:
7515           raise errors.OpPrereqError("The networked disk templates need"
7516                                      " a mirror node", errors.ECODE_INVAL)
7517       elif self.op.snode:
7518         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7519                         " template")
7520         self.op.snode = None
7521
7522     self._cds = _GetClusterDomainSecret()
7523
7524     if self.op.mode == constants.INSTANCE_IMPORT:
7525       # On import force_variant must be True, because if we forced it at
7526       # initial install, our only chance when importing it back is that it
7527       # works again!
7528       self.op.force_variant = True
7529
7530       if self.op.no_install:
7531         self.LogInfo("No-installation mode has no effect during import")
7532
7533     elif self.op.mode == constants.INSTANCE_CREATE:
7534       if self.op.os_type is None:
7535         raise errors.OpPrereqError("No guest OS specified",
7536                                    errors.ECODE_INVAL)
7537       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7538         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7539                                    " installation" % self.op.os_type,
7540                                    errors.ECODE_STATE)
7541       if self.op.disk_template is None:
7542         raise errors.OpPrereqError("No disk template specified",
7543                                    errors.ECODE_INVAL)
7544
7545     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7546       # Check handshake to ensure both clusters have the same domain secret
7547       src_handshake = self.op.source_handshake
7548       if not src_handshake:
7549         raise errors.OpPrereqError("Missing source handshake",
7550                                    errors.ECODE_INVAL)
7551
7552       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7553                                                            src_handshake)
7554       if errmsg:
7555         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7556                                    errors.ECODE_INVAL)
7557
7558       # Load and check source CA
7559       self.source_x509_ca_pem = self.op.source_x509_ca
7560       if not self.source_x509_ca_pem:
7561         raise errors.OpPrereqError("Missing source X509 CA",
7562                                    errors.ECODE_INVAL)
7563
7564       try:
7565         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7566                                                     self._cds)
7567       except OpenSSL.crypto.Error, err:
7568         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7569                                    (err, ), errors.ECODE_INVAL)
7570
7571       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7572       if errcode is not None:
7573         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7574                                    errors.ECODE_INVAL)
7575
7576       self.source_x509_ca = cert
7577
7578       src_instance_name = self.op.source_instance_name
7579       if not src_instance_name:
7580         raise errors.OpPrereqError("Missing source instance name",
7581                                    errors.ECODE_INVAL)
7582
7583       self.source_instance_name = \
7584           netutils.GetHostname(name=src_instance_name).name
7585
7586     else:
7587       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7588                                  self.op.mode, errors.ECODE_INVAL)
7589
7590   def ExpandNames(self):
7591     """ExpandNames for CreateInstance.
7592
7593     Figure out the right locks for instance creation.
7594
7595     """
7596     self.needed_locks = {}
7597
7598     instance_name = self.op.instance_name
7599     # this is just a preventive check, but someone might still add this
7600     # instance in the meantime, and creation will fail at lock-add time
7601     if instance_name in self.cfg.GetInstanceList():
7602       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7603                                  instance_name, errors.ECODE_EXISTS)
7604
7605     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7606
7607     if self.op.iallocator:
7608       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7609     else:
7610       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7611       nodelist = [self.op.pnode]
7612       if self.op.snode is not None:
7613         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7614         nodelist.append(self.op.snode)
7615       self.needed_locks[locking.LEVEL_NODE] = nodelist
7616
7617     # in case of import lock the source node too
7618     if self.op.mode == constants.INSTANCE_IMPORT:
7619       src_node = self.op.src_node
7620       src_path = self.op.src_path
7621
7622       if src_path is None:
7623         self.op.src_path = src_path = self.op.instance_name
7624
7625       if src_node is None:
7626         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7627         self.op.src_node = None
7628         if os.path.isabs(src_path):
7629           raise errors.OpPrereqError("Importing an instance from an absolute"
7630                                      " path requires a source node option.",
7631                                      errors.ECODE_INVAL)
7632       else:
7633         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7634         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7635           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7636         if not os.path.isabs(src_path):
7637           self.op.src_path = src_path = \
7638             utils.PathJoin(constants.EXPORT_DIR, src_path)
7639
7640   def _RunAllocator(self):
7641     """Run the allocator based on input opcode.
7642
7643     """
7644     nics = [n.ToDict() for n in self.nics]
7645     ial = IAllocator(self.cfg, self.rpc,
7646                      mode=constants.IALLOCATOR_MODE_ALLOC,
7647                      name=self.op.instance_name,
7648                      disk_template=self.op.disk_template,
7649                      tags=[],
7650                      os=self.op.os_type,
7651                      vcpus=self.be_full[constants.BE_VCPUS],
7652                      mem_size=self.be_full[constants.BE_MEMORY],
7653                      disks=self.disks,
7654                      nics=nics,
7655                      hypervisor=self.op.hypervisor,
7656                      )
7657
7658     ial.Run(self.op.iallocator)
7659
7660     if not ial.success:
7661       raise errors.OpPrereqError("Can't compute nodes using"
7662                                  " iallocator '%s': %s" %
7663                                  (self.op.iallocator, ial.info),
7664                                  errors.ECODE_NORES)
7665     if len(ial.result) != ial.required_nodes:
7666       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7667                                  " of nodes (%s), required %s" %
7668                                  (self.op.iallocator, len(ial.result),
7669                                   ial.required_nodes), errors.ECODE_FAULT)
7670     self.op.pnode = ial.result[0]
7671     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7672                  self.op.instance_name, self.op.iallocator,
7673                  utils.CommaJoin(ial.result))
7674     if ial.required_nodes == 2:
7675       self.op.snode = ial.result[1]
7676
7677   def BuildHooksEnv(self):
7678     """Build hooks env.
7679
7680     This runs on master, primary and secondary nodes of the instance.
7681
7682     """
7683     env = {
7684       "ADD_MODE": self.op.mode,
7685       }
7686     if self.op.mode == constants.INSTANCE_IMPORT:
7687       env["SRC_NODE"] = self.op.src_node
7688       env["SRC_PATH"] = self.op.src_path
7689       env["SRC_IMAGES"] = self.src_images
7690
7691     env.update(_BuildInstanceHookEnv(
7692       name=self.op.instance_name,
7693       primary_node=self.op.pnode,
7694       secondary_nodes=self.secondaries,
7695       status=self.op.start,
7696       os_type=self.op.os_type,
7697       memory=self.be_full[constants.BE_MEMORY],
7698       vcpus=self.be_full[constants.BE_VCPUS],
7699       nics=_NICListToTuple(self, self.nics),
7700       disk_template=self.op.disk_template,
7701       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7702              for d in self.disks],
7703       bep=self.be_full,
7704       hvp=self.hv_full,
7705       hypervisor_name=self.op.hypervisor,
7706     ))
7707
7708     return env
7709
7710   def BuildHooksNodes(self):
7711     """Build hooks nodes.
7712
7713     """
7714     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7715     return nl, nl
7716
7717   def _ReadExportInfo(self):
7718     """Reads the export information from disk.
7719
7720     It will override the opcode source node and path with the actual
7721     information, if these two were not specified before.
7722
7723     @return: the export information
7724
7725     """
7726     assert self.op.mode == constants.INSTANCE_IMPORT
7727
7728     src_node = self.op.src_node
7729     src_path = self.op.src_path
7730
7731     if src_node is None:
7732       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7733       exp_list = self.rpc.call_export_list(locked_nodes)
7734       found = False
7735       for node in exp_list:
7736         if exp_list[node].fail_msg:
7737           continue
7738         if src_path in exp_list[node].payload:
7739           found = True
7740           self.op.src_node = src_node = node
7741           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7742                                                        src_path)
7743           break
7744       if not found:
7745         raise errors.OpPrereqError("No export found for relative path %s" %
7746                                     src_path, errors.ECODE_INVAL)
7747
7748     _CheckNodeOnline(self, src_node)
7749     result = self.rpc.call_export_info(src_node, src_path)
7750     result.Raise("No export or invalid export found in dir %s" % src_path)
7751
7752     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7753     if not export_info.has_section(constants.INISECT_EXP):
7754       raise errors.ProgrammerError("Corrupted export config",
7755                                    errors.ECODE_ENVIRON)
7756
7757     ei_version = export_info.get(constants.INISECT_EXP, "version")
7758     if (int(ei_version) != constants.EXPORT_VERSION):
7759       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7760                                  (ei_version, constants.EXPORT_VERSION),
7761                                  errors.ECODE_ENVIRON)
7762     return export_info
7763
7764   def _ReadExportParams(self, einfo):
7765     """Use export parameters as defaults.
7766
7767     In case the opcode doesn't specify (as in override) some instance
7768     parameters, then try to use them from the export information, if
7769     that declares them.
7770
7771     """
7772     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7773
7774     if self.op.disk_template is None:
7775       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7776         self.op.disk_template = einfo.get(constants.INISECT_INS,
7777                                           "disk_template")
7778       else:
7779         raise errors.OpPrereqError("No disk template specified and the export"
7780                                    " is missing the disk_template information",
7781                                    errors.ECODE_INVAL)
7782
7783     if not self.op.disks:
7784       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7785         disks = []
7786         # TODO: import the disk iv_name too
7787         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7788           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7789           disks.append({constants.IDISK_SIZE: disk_sz})
7790         self.op.disks = disks
7791       else:
7792         raise errors.OpPrereqError("No disk info specified and the export"
7793                                    " is missing the disk information",
7794                                    errors.ECODE_INVAL)
7795
7796     if (not self.op.nics and
7797         einfo.has_option(constants.INISECT_INS, "nic_count")):
7798       nics = []
7799       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7800         ndict = {}
7801         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7802           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7803           ndict[name] = v
7804         nics.append(ndict)
7805       self.op.nics = nics
7806
7807     if (self.op.hypervisor is None and
7808         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7809       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7810     if einfo.has_section(constants.INISECT_HYP):
7811       # use the export parameters but do not override the ones
7812       # specified by the user
7813       for name, value in einfo.items(constants.INISECT_HYP):
7814         if name not in self.op.hvparams:
7815           self.op.hvparams[name] = value
7816
7817     if einfo.has_section(constants.INISECT_BEP):
7818       # use the parameters, without overriding
7819       for name, value in einfo.items(constants.INISECT_BEP):
7820         if name not in self.op.beparams:
7821           self.op.beparams[name] = value
7822     else:
7823       # try to read the parameters old style, from the main section
7824       for name in constants.BES_PARAMETERS:
7825         if (name not in self.op.beparams and
7826             einfo.has_option(constants.INISECT_INS, name)):
7827           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7828
7829     if einfo.has_section(constants.INISECT_OSP):
7830       # use the parameters, without overriding
7831       for name, value in einfo.items(constants.INISECT_OSP):
7832         if name not in self.op.osparams:
7833           self.op.osparams[name] = value
7834
7835   def _RevertToDefaults(self, cluster):
7836     """Revert the instance parameters to the default values.
7837
7838     """
7839     # hvparams
7840     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7841     for name in self.op.hvparams.keys():
7842       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7843         del self.op.hvparams[name]
7844     # beparams
7845     be_defs = cluster.SimpleFillBE({})
7846     for name in self.op.beparams.keys():
7847       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7848         del self.op.beparams[name]
7849     # nic params
7850     nic_defs = cluster.SimpleFillNIC({})
7851     for nic in self.op.nics:
7852       for name in constants.NICS_PARAMETERS:
7853         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7854           del nic[name]
7855     # osparams
7856     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7857     for name in self.op.osparams.keys():
7858       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7859         del self.op.osparams[name]
7860
7861   def CheckPrereq(self):
7862     """Check prerequisites.
7863
7864     """
7865     if self.op.mode == constants.INSTANCE_IMPORT:
7866       export_info = self._ReadExportInfo()
7867       self._ReadExportParams(export_info)
7868
7869     if (not self.cfg.GetVGName() and
7870         self.op.disk_template not in constants.DTS_NOT_LVM):
7871       raise errors.OpPrereqError("Cluster does not support lvm-based"
7872                                  " instances", errors.ECODE_STATE)
7873
7874     if self.op.hypervisor is None:
7875       self.op.hypervisor = self.cfg.GetHypervisorType()
7876
7877     cluster = self.cfg.GetClusterInfo()
7878     enabled_hvs = cluster.enabled_hypervisors
7879     if self.op.hypervisor not in enabled_hvs:
7880       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7881                                  " cluster (%s)" % (self.op.hypervisor,
7882                                   ",".join(enabled_hvs)),
7883                                  errors.ECODE_STATE)
7884
7885     # check hypervisor parameter syntax (locally)
7886     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7887     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7888                                       self.op.hvparams)
7889     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7890     hv_type.CheckParameterSyntax(filled_hvp)
7891     self.hv_full = filled_hvp
7892     # check that we don't specify global parameters on an instance
7893     _CheckGlobalHvParams(self.op.hvparams)
7894
7895     # fill and remember the beparams dict
7896     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7897     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7898
7899     # build os parameters
7900     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7901
7902     # now that hvp/bep are in final format, let's reset to defaults,
7903     # if told to do so
7904     if self.op.identify_defaults:
7905       self._RevertToDefaults(cluster)
7906
7907     # NIC buildup
7908     self.nics = []
7909     for idx, nic in enumerate(self.op.nics):
7910       nic_mode_req = nic.get(constants.INIC_MODE, None)
7911       nic_mode = nic_mode_req
7912       if nic_mode is None:
7913         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7914
7915       # in routed mode, for the first nic, the default ip is 'auto'
7916       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7917         default_ip_mode = constants.VALUE_AUTO
7918       else:
7919         default_ip_mode = constants.VALUE_NONE
7920
7921       # ip validity checks
7922       ip = nic.get(constants.INIC_IP, default_ip_mode)
7923       if ip is None or ip.lower() == constants.VALUE_NONE:
7924         nic_ip = None
7925       elif ip.lower() == constants.VALUE_AUTO:
7926         if not self.op.name_check:
7927           raise errors.OpPrereqError("IP address set to auto but name checks"
7928                                      " have been skipped",
7929                                      errors.ECODE_INVAL)
7930         nic_ip = self.hostname1.ip
7931       else:
7932         if not netutils.IPAddress.IsValid(ip):
7933           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7934                                      errors.ECODE_INVAL)
7935         nic_ip = ip
7936
7937       # TODO: check the ip address for uniqueness
7938       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7939         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7940                                    errors.ECODE_INVAL)
7941
7942       # MAC address verification
7943       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
7944       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7945         mac = utils.NormalizeAndValidateMac(mac)
7946
7947         try:
7948           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7949         except errors.ReservationError:
7950           raise errors.OpPrereqError("MAC address %s already in use"
7951                                      " in cluster" % mac,
7952                                      errors.ECODE_NOTUNIQUE)
7953
7954       #  Build nic parameters
7955       link = nic.get(constants.INIC_LINK, None)
7956       nicparams = {}
7957       if nic_mode_req:
7958         nicparams[constants.NIC_MODE] = nic_mode_req
7959       if link:
7960         nicparams[constants.NIC_LINK] = link
7961
7962       check_params = cluster.SimpleFillNIC(nicparams)
7963       objects.NIC.CheckParameterSyntax(check_params)
7964       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7965
7966     # disk checks/pre-build
7967     default_vg = self.cfg.GetVGName()
7968     self.disks = []
7969     for disk in self.op.disks:
7970       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
7971       if mode not in constants.DISK_ACCESS_SET:
7972         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7973                                    mode, errors.ECODE_INVAL)
7974       size = disk.get(constants.IDISK_SIZE, None)
7975       if size is None:
7976         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7977       try:
7978         size = int(size)
7979       except (TypeError, ValueError):
7980         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7981                                    errors.ECODE_INVAL)
7982
7983       data_vg = disk.get(constants.IDISK_VG, default_vg)
7984       new_disk = {
7985         constants.IDISK_SIZE: size,
7986         constants.IDISK_MODE: mode,
7987         constants.IDISK_VG: data_vg,
7988         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
7989         }
7990       if constants.IDISK_ADOPT in disk:
7991         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
7992       self.disks.append(new_disk)
7993
7994     if self.op.mode == constants.INSTANCE_IMPORT:
7995
7996       # Check that the new instance doesn't have less disks than the export
7997       instance_disks = len(self.disks)
7998       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7999       if instance_disks < export_disks:
8000         raise errors.OpPrereqError("Not enough disks to import."
8001                                    " (instance: %d, export: %d)" %
8002                                    (instance_disks, export_disks),
8003                                    errors.ECODE_INVAL)
8004
8005       disk_images = []
8006       for idx in range(export_disks):
8007         option = 'disk%d_dump' % idx
8008         if export_info.has_option(constants.INISECT_INS, option):
8009           # FIXME: are the old os-es, disk sizes, etc. useful?
8010           export_name = export_info.get(constants.INISECT_INS, option)
8011           image = utils.PathJoin(self.op.src_path, export_name)
8012           disk_images.append(image)
8013         else:
8014           disk_images.append(False)
8015
8016       self.src_images = disk_images
8017
8018       old_name = export_info.get(constants.INISECT_INS, 'name')
8019       try:
8020         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8021       except (TypeError, ValueError), err:
8022         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8023                                    " an integer: %s" % str(err),
8024                                    errors.ECODE_STATE)
8025       if self.op.instance_name == old_name:
8026         for idx, nic in enumerate(self.nics):
8027           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8028             nic_mac_ini = 'nic%d_mac' % idx
8029             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8030
8031     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8032
8033     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8034     if self.op.ip_check:
8035       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8036         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8037                                    (self.check_ip, self.op.instance_name),
8038                                    errors.ECODE_NOTUNIQUE)
8039
8040     #### mac address generation
8041     # By generating here the mac address both the allocator and the hooks get
8042     # the real final mac address rather than the 'auto' or 'generate' value.
8043     # There is a race condition between the generation and the instance object
8044     # creation, which means that we know the mac is valid now, but we're not
8045     # sure it will be when we actually add the instance. If things go bad
8046     # adding the instance will abort because of a duplicate mac, and the
8047     # creation job will fail.
8048     for nic in self.nics:
8049       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8050         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8051
8052     #### allocator run
8053
8054     if self.op.iallocator is not None:
8055       self._RunAllocator()
8056
8057     #### node related checks
8058
8059     # check primary node
8060     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8061     assert self.pnode is not None, \
8062       "Cannot retrieve locked node %s" % self.op.pnode
8063     if pnode.offline:
8064       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8065                                  pnode.name, errors.ECODE_STATE)
8066     if pnode.drained:
8067       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8068                                  pnode.name, errors.ECODE_STATE)
8069     if not pnode.vm_capable:
8070       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8071                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8072
8073     self.secondaries = []
8074
8075     # mirror node verification
8076     if self.op.disk_template in constants.DTS_INT_MIRROR:
8077       if self.op.snode == pnode.name:
8078         raise errors.OpPrereqError("The secondary node cannot be the"
8079                                    " primary node.", errors.ECODE_INVAL)
8080       _CheckNodeOnline(self, self.op.snode)
8081       _CheckNodeNotDrained(self, self.op.snode)
8082       _CheckNodeVmCapable(self, self.op.snode)
8083       self.secondaries.append(self.op.snode)
8084
8085     nodenames = [pnode.name] + self.secondaries
8086
8087     if not self.adopt_disks:
8088       # Check lv size requirements, if not adopting
8089       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8090       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8091
8092     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8093       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8094                                 disk[constants.IDISK_ADOPT])
8095                      for disk in self.disks])
8096       if len(all_lvs) != len(self.disks):
8097         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8098                                    errors.ECODE_INVAL)
8099       for lv_name in all_lvs:
8100         try:
8101           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8102           # to ReserveLV uses the same syntax
8103           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8104         except errors.ReservationError:
8105           raise errors.OpPrereqError("LV named %s used by another instance" %
8106                                      lv_name, errors.ECODE_NOTUNIQUE)
8107
8108       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8109       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8110
8111       node_lvs = self.rpc.call_lv_list([pnode.name],
8112                                        vg_names.payload.keys())[pnode.name]
8113       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8114       node_lvs = node_lvs.payload
8115
8116       delta = all_lvs.difference(node_lvs.keys())
8117       if delta:
8118         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8119                                    utils.CommaJoin(delta),
8120                                    errors.ECODE_INVAL)
8121       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8122       if online_lvs:
8123         raise errors.OpPrereqError("Online logical volumes found, cannot"
8124                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8125                                    errors.ECODE_STATE)
8126       # update the size of disk based on what is found
8127       for dsk in self.disks:
8128         dsk[constants.IDISK_SIZE] = \
8129           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8130                                         dsk[constants.IDISK_ADOPT])][0]))
8131
8132     elif self.op.disk_template == constants.DT_BLOCK:
8133       # Normalize and de-duplicate device paths
8134       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8135                        for disk in self.disks])
8136       if len(all_disks) != len(self.disks):
8137         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8138                                    errors.ECODE_INVAL)
8139       baddisks = [d for d in all_disks
8140                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8141       if baddisks:
8142         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8143                                    " cannot be adopted" %
8144                                    (", ".join(baddisks),
8145                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8146                                    errors.ECODE_INVAL)
8147
8148       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8149                                             list(all_disks))[pnode.name]
8150       node_disks.Raise("Cannot get block device information from node %s" %
8151                        pnode.name)
8152       node_disks = node_disks.payload
8153       delta = all_disks.difference(node_disks.keys())
8154       if delta:
8155         raise errors.OpPrereqError("Missing block device(s): %s" %
8156                                    utils.CommaJoin(delta),
8157                                    errors.ECODE_INVAL)
8158       for dsk in self.disks:
8159         dsk[constants.IDISK_SIZE] = \
8160           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8161
8162     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8163
8164     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8165     # check OS parameters (remotely)
8166     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8167
8168     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8169
8170     # memory check on primary node
8171     if self.op.start:
8172       _CheckNodeFreeMemory(self, self.pnode.name,
8173                            "creating instance %s" % self.op.instance_name,
8174                            self.be_full[constants.BE_MEMORY],
8175                            self.op.hypervisor)
8176
8177     self.dry_run_result = list(nodenames)
8178
8179   def Exec(self, feedback_fn):
8180     """Create and add the instance to the cluster.
8181
8182     """
8183     instance = self.op.instance_name
8184     pnode_name = self.pnode.name
8185
8186     ht_kind = self.op.hypervisor
8187     if ht_kind in constants.HTS_REQ_PORT:
8188       network_port = self.cfg.AllocatePort()
8189     else:
8190       network_port = None
8191
8192     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8193       # this is needed because os.path.join does not accept None arguments
8194       if self.op.file_storage_dir is None:
8195         string_file_storage_dir = ""
8196       else:
8197         string_file_storage_dir = self.op.file_storage_dir
8198
8199       # build the full file storage dir path
8200       if self.op.disk_template == constants.DT_SHARED_FILE:
8201         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8202       else:
8203         get_fsd_fn = self.cfg.GetFileStorageDir
8204
8205       file_storage_dir = utils.PathJoin(get_fsd_fn(),
8206                                         string_file_storage_dir, instance)
8207     else:
8208       file_storage_dir = ""
8209
8210     disks = _GenerateDiskTemplate(self,
8211                                   self.op.disk_template,
8212                                   instance, pnode_name,
8213                                   self.secondaries,
8214                                   self.disks,
8215                                   file_storage_dir,
8216                                   self.op.file_driver,
8217                                   0,
8218                                   feedback_fn)
8219
8220     iobj = objects.Instance(name=instance, os=self.op.os_type,
8221                             primary_node=pnode_name,
8222                             nics=self.nics, disks=disks,
8223                             disk_template=self.op.disk_template,
8224                             admin_up=False,
8225                             network_port=network_port,
8226                             beparams=self.op.beparams,
8227                             hvparams=self.op.hvparams,
8228                             hypervisor=self.op.hypervisor,
8229                             osparams=self.op.osparams,
8230                             )
8231
8232     if self.adopt_disks:
8233       if self.op.disk_template == constants.DT_PLAIN:
8234         # rename LVs to the newly-generated names; we need to construct
8235         # 'fake' LV disks with the old data, plus the new unique_id
8236         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8237         rename_to = []
8238         for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8239           rename_to.append(t_dsk.logical_id)
8240           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8241           self.cfg.SetDiskID(t_dsk, pnode_name)
8242         result = self.rpc.call_blockdev_rename(pnode_name,
8243                                                zip(tmp_disks, rename_to))
8244         result.Raise("Failed to rename adoped LVs")
8245     else:
8246       feedback_fn("* creating instance disks...")
8247       try:
8248         _CreateDisks(self, iobj)
8249       except errors.OpExecError:
8250         self.LogWarning("Device creation failed, reverting...")
8251         try:
8252           _RemoveDisks(self, iobj)
8253         finally:
8254           self.cfg.ReleaseDRBDMinors(instance)
8255           raise
8256
8257     feedback_fn("adding instance %s to cluster config" % instance)
8258
8259     self.cfg.AddInstance(iobj, self.proc.GetECId())
8260
8261     # Declare that we don't want to remove the instance lock anymore, as we've
8262     # added the instance to the config
8263     del self.remove_locks[locking.LEVEL_INSTANCE]
8264     # Unlock all the nodes
8265     if self.op.mode == constants.INSTANCE_IMPORT:
8266       nodes_keep = [self.op.src_node]
8267       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
8268                        if node != self.op.src_node]
8269       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
8270       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
8271     else:
8272       self.context.glm.release(locking.LEVEL_NODE)
8273       del self.acquired_locks[locking.LEVEL_NODE]
8274
8275     disk_abort = False
8276     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8277       feedback_fn("* wiping instance disks...")
8278       try:
8279         _WipeDisks(self, iobj)
8280       except errors.OpExecError, err:
8281         logging.exception("Wiping disks failed")
8282         self.LogWarning("Wiping instance disks failed (%s)", err)
8283         disk_abort = True
8284
8285     if disk_abort:
8286       # Something is already wrong with the disks, don't do anything else
8287       pass
8288     elif self.op.wait_for_sync:
8289       disk_abort = not _WaitForSync(self, iobj)
8290     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8291       # make sure the disks are not degraded (still sync-ing is ok)
8292       time.sleep(15)
8293       feedback_fn("* checking mirrors status")
8294       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8295     else:
8296       disk_abort = False
8297
8298     if disk_abort:
8299       _RemoveDisks(self, iobj)
8300       self.cfg.RemoveInstance(iobj.name)
8301       # Make sure the instance lock gets removed
8302       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8303       raise errors.OpExecError("There are some degraded disks for"
8304                                " this instance")
8305
8306     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8307       if self.op.mode == constants.INSTANCE_CREATE:
8308         if not self.op.no_install:
8309           feedback_fn("* running the instance OS create scripts...")
8310           # FIXME: pass debug option from opcode to backend
8311           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8312                                                  self.op.debug_level)
8313           result.Raise("Could not add os for instance %s"
8314                        " on node %s" % (instance, pnode_name))
8315
8316       elif self.op.mode == constants.INSTANCE_IMPORT:
8317         feedback_fn("* running the instance OS import scripts...")
8318
8319         transfers = []
8320
8321         for idx, image in enumerate(self.src_images):
8322           if not image:
8323             continue
8324
8325           # FIXME: pass debug option from opcode to backend
8326           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8327                                              constants.IEIO_FILE, (image, ),
8328                                              constants.IEIO_SCRIPT,
8329                                              (iobj.disks[idx], idx),
8330                                              None)
8331           transfers.append(dt)
8332
8333         import_result = \
8334           masterd.instance.TransferInstanceData(self, feedback_fn,
8335                                                 self.op.src_node, pnode_name,
8336                                                 self.pnode.secondary_ip,
8337                                                 iobj, transfers)
8338         if not compat.all(import_result):
8339           self.LogWarning("Some disks for instance %s on node %s were not"
8340                           " imported successfully" % (instance, pnode_name))
8341
8342       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8343         feedback_fn("* preparing remote import...")
8344         # The source cluster will stop the instance before attempting to make a
8345         # connection. In some cases stopping an instance can take a long time,
8346         # hence the shutdown timeout is added to the connection timeout.
8347         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8348                            self.op.source_shutdown_timeout)
8349         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8350
8351         assert iobj.primary_node == self.pnode.name
8352         disk_results = \
8353           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8354                                         self.source_x509_ca,
8355                                         self._cds, timeouts)
8356         if not compat.all(disk_results):
8357           # TODO: Should the instance still be started, even if some disks
8358           # failed to import (valid for local imports, too)?
8359           self.LogWarning("Some disks for instance %s on node %s were not"
8360                           " imported successfully" % (instance, pnode_name))
8361
8362         # Run rename script on newly imported instance
8363         assert iobj.name == instance
8364         feedback_fn("Running rename script for %s" % instance)
8365         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8366                                                    self.source_instance_name,
8367                                                    self.op.debug_level)
8368         if result.fail_msg:
8369           self.LogWarning("Failed to run rename script for %s on node"
8370                           " %s: %s" % (instance, pnode_name, result.fail_msg))
8371
8372       else:
8373         # also checked in the prereq part
8374         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8375                                      % self.op.mode)
8376
8377     if self.op.start:
8378       iobj.admin_up = True
8379       self.cfg.Update(iobj, feedback_fn)
8380       logging.info("Starting instance %s on node %s", instance, pnode_name)
8381       feedback_fn("* starting instance...")
8382       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8383       result.Raise("Could not start instance")
8384
8385     return list(iobj.all_nodes)
8386
8387
8388 class LUInstanceConsole(NoHooksLU):
8389   """Connect to an instance's console.
8390
8391   This is somewhat special in that it returns the command line that
8392   you need to run on the master node in order to connect to the
8393   console.
8394
8395   """
8396   REQ_BGL = False
8397
8398   def ExpandNames(self):
8399     self._ExpandAndLockInstance()
8400
8401   def CheckPrereq(self):
8402     """Check prerequisites.
8403
8404     This checks that the instance is in the cluster.
8405
8406     """
8407     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8408     assert self.instance is not None, \
8409       "Cannot retrieve locked instance %s" % self.op.instance_name
8410     _CheckNodeOnline(self, self.instance.primary_node)
8411
8412   def Exec(self, feedback_fn):
8413     """Connect to the console of an instance
8414
8415     """
8416     instance = self.instance
8417     node = instance.primary_node
8418
8419     node_insts = self.rpc.call_instance_list([node],
8420                                              [instance.hypervisor])[node]
8421     node_insts.Raise("Can't get node information from %s" % node)
8422
8423     if instance.name not in node_insts.payload:
8424       if instance.admin_up:
8425         state = constants.INSTST_ERRORDOWN
8426       else:
8427         state = constants.INSTST_ADMINDOWN
8428       raise errors.OpExecError("Instance %s is not running (state %s)" %
8429                                (instance.name, state))
8430
8431     logging.debug("Connecting to console of %s on %s", instance.name, node)
8432
8433     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8434
8435
8436 def _GetInstanceConsole(cluster, instance):
8437   """Returns console information for an instance.
8438
8439   @type cluster: L{objects.Cluster}
8440   @type instance: L{objects.Instance}
8441   @rtype: dict
8442
8443   """
8444   hyper = hypervisor.GetHypervisor(instance.hypervisor)
8445   # beparams and hvparams are passed separately, to avoid editing the
8446   # instance and then saving the defaults in the instance itself.
8447   hvparams = cluster.FillHV(instance)
8448   beparams = cluster.FillBE(instance)
8449   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8450
8451   assert console.instance == instance.name
8452   assert console.Validate()
8453
8454   return console.ToDict()
8455
8456
8457 class LUInstanceReplaceDisks(LogicalUnit):
8458   """Replace the disks of an instance.
8459
8460   """
8461   HPATH = "mirrors-replace"
8462   HTYPE = constants.HTYPE_INSTANCE
8463   REQ_BGL = False
8464
8465   def CheckArguments(self):
8466     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8467                                   self.op.iallocator)
8468
8469   def ExpandNames(self):
8470     self._ExpandAndLockInstance()
8471
8472     if self.op.iallocator is not None:
8473       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8474
8475     elif self.op.remote_node is not None:
8476       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8477       self.op.remote_node = remote_node
8478
8479       # Warning: do not remove the locking of the new secondary here
8480       # unless DRBD8.AddChildren is changed to work in parallel;
8481       # currently it doesn't since parallel invocations of
8482       # FindUnusedMinor will conflict
8483       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8484       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8485
8486     else:
8487       self.needed_locks[locking.LEVEL_NODE] = []
8488       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8489
8490     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8491                                    self.op.iallocator, self.op.remote_node,
8492                                    self.op.disks, False, self.op.early_release)
8493
8494     self.tasklets = [self.replacer]
8495
8496   def DeclareLocks(self, level):
8497     # If we're not already locking all nodes in the set we have to declare the
8498     # instance's primary/secondary nodes.
8499     if (level == locking.LEVEL_NODE and
8500         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8501       self._LockInstancesNodes()
8502
8503   def BuildHooksEnv(self):
8504     """Build hooks env.
8505
8506     This runs on the master, the primary and all the secondaries.
8507
8508     """
8509     instance = self.replacer.instance
8510     env = {
8511       "MODE": self.op.mode,
8512       "NEW_SECONDARY": self.op.remote_node,
8513       "OLD_SECONDARY": instance.secondary_nodes[0],
8514       }
8515     env.update(_BuildInstanceHookEnvByObject(self, instance))
8516     return env
8517
8518   def BuildHooksNodes(self):
8519     """Build hooks nodes.
8520
8521     """
8522     instance = self.replacer.instance
8523     nl = [
8524       self.cfg.GetMasterNode(),
8525       instance.primary_node,
8526       ]
8527     if self.op.remote_node is not None:
8528       nl.append(self.op.remote_node)
8529     return nl, nl
8530
8531
8532 class TLReplaceDisks(Tasklet):
8533   """Replaces disks for an instance.
8534
8535   Note: Locking is not within the scope of this class.
8536
8537   """
8538   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8539                disks, delay_iallocator, early_release):
8540     """Initializes this class.
8541
8542     """
8543     Tasklet.__init__(self, lu)
8544
8545     # Parameters
8546     self.instance_name = instance_name
8547     self.mode = mode
8548     self.iallocator_name = iallocator_name
8549     self.remote_node = remote_node
8550     self.disks = disks
8551     self.delay_iallocator = delay_iallocator
8552     self.early_release = early_release
8553
8554     # Runtime data
8555     self.instance = None
8556     self.new_node = None
8557     self.target_node = None
8558     self.other_node = None
8559     self.remote_node_info = None
8560     self.node_secondary_ip = None
8561
8562   @staticmethod
8563   def CheckArguments(mode, remote_node, iallocator):
8564     """Helper function for users of this class.
8565
8566     """
8567     # check for valid parameter combination
8568     if mode == constants.REPLACE_DISK_CHG:
8569       if remote_node is None and iallocator is None:
8570         raise errors.OpPrereqError("When changing the secondary either an"
8571                                    " iallocator script must be used or the"
8572                                    " new node given", errors.ECODE_INVAL)
8573
8574       if remote_node is not None and iallocator is not None:
8575         raise errors.OpPrereqError("Give either the iallocator or the new"
8576                                    " secondary, not both", errors.ECODE_INVAL)
8577
8578     elif remote_node is not None or iallocator is not None:
8579       # Not replacing the secondary
8580       raise errors.OpPrereqError("The iallocator and new node options can"
8581                                  " only be used when changing the"
8582                                  " secondary node", errors.ECODE_INVAL)
8583
8584   @staticmethod
8585   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8586     """Compute a new secondary node using an IAllocator.
8587
8588     """
8589     ial = IAllocator(lu.cfg, lu.rpc,
8590                      mode=constants.IALLOCATOR_MODE_RELOC,
8591                      name=instance_name,
8592                      relocate_from=relocate_from)
8593
8594     ial.Run(iallocator_name)
8595
8596     if not ial.success:
8597       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8598                                  " %s" % (iallocator_name, ial.info),
8599                                  errors.ECODE_NORES)
8600
8601     if len(ial.result) != ial.required_nodes:
8602       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8603                                  " of nodes (%s), required %s" %
8604                                  (iallocator_name,
8605                                   len(ial.result), ial.required_nodes),
8606                                  errors.ECODE_FAULT)
8607
8608     remote_node_name = ial.result[0]
8609
8610     lu.LogInfo("Selected new secondary for instance '%s': %s",
8611                instance_name, remote_node_name)
8612
8613     return remote_node_name
8614
8615   def _FindFaultyDisks(self, node_name):
8616     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8617                                     node_name, True)
8618
8619   def _CheckDisksActivated(self, instance):
8620     """Checks if the instance disks are activated.
8621
8622     @param instance: The instance to check disks
8623     @return: True if they are activated, False otherwise
8624
8625     """
8626     nodes = instance.all_nodes
8627
8628     for idx, dev in enumerate(instance.disks):
8629       for node in nodes:
8630         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8631         self.cfg.SetDiskID(dev, node)
8632
8633         result = self.rpc.call_blockdev_find(node, dev)
8634
8635         if result.offline:
8636           continue
8637         elif result.fail_msg or not result.payload:
8638           return False
8639
8640     return True
8641
8642
8643   def CheckPrereq(self):
8644     """Check prerequisites.
8645
8646     This checks that the instance is in the cluster.
8647
8648     """
8649     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8650     assert instance is not None, \
8651       "Cannot retrieve locked instance %s" % self.instance_name
8652
8653     if instance.disk_template != constants.DT_DRBD8:
8654       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8655                                  " instances", errors.ECODE_INVAL)
8656
8657     if len(instance.secondary_nodes) != 1:
8658       raise errors.OpPrereqError("The instance has a strange layout,"
8659                                  " expected one secondary but found %d" %
8660                                  len(instance.secondary_nodes),
8661                                  errors.ECODE_FAULT)
8662
8663     if not self.delay_iallocator:
8664       self._CheckPrereq2()
8665
8666   def _CheckPrereq2(self):
8667     """Check prerequisites, second part.
8668
8669     This function should always be part of CheckPrereq. It was separated and is
8670     now called from Exec because during node evacuation iallocator was only
8671     called with an unmodified cluster model, not taking planned changes into
8672     account.
8673
8674     """
8675     instance = self.instance
8676     secondary_node = instance.secondary_nodes[0]
8677
8678     if self.iallocator_name is None:
8679       remote_node = self.remote_node
8680     else:
8681       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8682                                        instance.name, instance.secondary_nodes)
8683
8684     if remote_node is not None:
8685       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8686       assert self.remote_node_info is not None, \
8687         "Cannot retrieve locked node %s" % remote_node
8688     else:
8689       self.remote_node_info = None
8690
8691     if remote_node == self.instance.primary_node:
8692       raise errors.OpPrereqError("The specified node is the primary node of"
8693                                  " the instance.", errors.ECODE_INVAL)
8694
8695     if remote_node == secondary_node:
8696       raise errors.OpPrereqError("The specified node is already the"
8697                                  " secondary node of the instance.",
8698                                  errors.ECODE_INVAL)
8699
8700     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8701                                     constants.REPLACE_DISK_CHG):
8702       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8703                                  errors.ECODE_INVAL)
8704
8705     if self.mode == constants.REPLACE_DISK_AUTO:
8706       if not self._CheckDisksActivated(instance):
8707         raise errors.OpPrereqError("Please run activate-disks on instance %s"
8708                                    " first" % self.instance_name,
8709                                    errors.ECODE_STATE)
8710       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8711       faulty_secondary = self._FindFaultyDisks(secondary_node)
8712
8713       if faulty_primary and faulty_secondary:
8714         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8715                                    " one node and can not be repaired"
8716                                    " automatically" % self.instance_name,
8717                                    errors.ECODE_STATE)
8718
8719       if faulty_primary:
8720         self.disks = faulty_primary
8721         self.target_node = instance.primary_node
8722         self.other_node = secondary_node
8723         check_nodes = [self.target_node, self.other_node]
8724       elif faulty_secondary:
8725         self.disks = faulty_secondary
8726         self.target_node = secondary_node
8727         self.other_node = instance.primary_node
8728         check_nodes = [self.target_node, self.other_node]
8729       else:
8730         self.disks = []
8731         check_nodes = []
8732
8733     else:
8734       # Non-automatic modes
8735       if self.mode == constants.REPLACE_DISK_PRI:
8736         self.target_node = instance.primary_node
8737         self.other_node = secondary_node
8738         check_nodes = [self.target_node, self.other_node]
8739
8740       elif self.mode == constants.REPLACE_DISK_SEC:
8741         self.target_node = secondary_node
8742         self.other_node = instance.primary_node
8743         check_nodes = [self.target_node, self.other_node]
8744
8745       elif self.mode == constants.REPLACE_DISK_CHG:
8746         self.new_node = remote_node
8747         self.other_node = instance.primary_node
8748         self.target_node = secondary_node
8749         check_nodes = [self.new_node, self.other_node]
8750
8751         _CheckNodeNotDrained(self.lu, remote_node)
8752         _CheckNodeVmCapable(self.lu, remote_node)
8753
8754         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8755         assert old_node_info is not None
8756         if old_node_info.offline and not self.early_release:
8757           # doesn't make sense to delay the release
8758           self.early_release = True
8759           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8760                           " early-release mode", secondary_node)
8761
8762       else:
8763         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8764                                      self.mode)
8765
8766       # If not specified all disks should be replaced
8767       if not self.disks:
8768         self.disks = range(len(self.instance.disks))
8769
8770     for node in check_nodes:
8771       _CheckNodeOnline(self.lu, node)
8772
8773     # Check whether disks are valid
8774     for disk_idx in self.disks:
8775       instance.FindDisk(disk_idx)
8776
8777     # Get secondary node IP addresses
8778     node_2nd_ip = {}
8779
8780     for node_name in [self.target_node, self.other_node, self.new_node]:
8781       if node_name is not None:
8782         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8783
8784     self.node_secondary_ip = node_2nd_ip
8785
8786   def Exec(self, feedback_fn):
8787     """Execute disk replacement.
8788
8789     This dispatches the disk replacement to the appropriate handler.
8790
8791     """
8792     if self.delay_iallocator:
8793       self._CheckPrereq2()
8794
8795     if not self.disks:
8796       feedback_fn("No disks need replacement")
8797       return
8798
8799     feedback_fn("Replacing disk(s) %s for %s" %
8800                 (utils.CommaJoin(self.disks), self.instance.name))
8801
8802     activate_disks = (not self.instance.admin_up)
8803
8804     # Activate the instance disks if we're replacing them on a down instance
8805     if activate_disks:
8806       _StartInstanceDisks(self.lu, self.instance, True)
8807
8808     try:
8809       # Should we replace the secondary node?
8810       if self.new_node is not None:
8811         fn = self._ExecDrbd8Secondary
8812       else:
8813         fn = self._ExecDrbd8DiskOnly
8814
8815       return fn(feedback_fn)
8816
8817     finally:
8818       # Deactivate the instance disks if we're replacing them on a
8819       # down instance
8820       if activate_disks:
8821         _SafeShutdownInstanceDisks(self.lu, self.instance)
8822
8823   def _CheckVolumeGroup(self, nodes):
8824     self.lu.LogInfo("Checking volume groups")
8825
8826     vgname = self.cfg.GetVGName()
8827
8828     # Make sure volume group exists on all involved nodes
8829     results = self.rpc.call_vg_list(nodes)
8830     if not results:
8831       raise errors.OpExecError("Can't list volume groups on the nodes")
8832
8833     for node in nodes:
8834       res = results[node]
8835       res.Raise("Error checking node %s" % node)
8836       if vgname not in res.payload:
8837         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8838                                  (vgname, node))
8839
8840   def _CheckDisksExistence(self, nodes):
8841     # Check disk existence
8842     for idx, dev in enumerate(self.instance.disks):
8843       if idx not in self.disks:
8844         continue
8845
8846       for node in nodes:
8847         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8848         self.cfg.SetDiskID(dev, node)
8849
8850         result = self.rpc.call_blockdev_find(node, dev)
8851
8852         msg = result.fail_msg
8853         if msg or not result.payload:
8854           if not msg:
8855             msg = "disk not found"
8856           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8857                                    (idx, node, msg))
8858
8859   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8860     for idx, dev in enumerate(self.instance.disks):
8861       if idx not in self.disks:
8862         continue
8863
8864       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8865                       (idx, node_name))
8866
8867       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8868                                    ldisk=ldisk):
8869         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8870                                  " replace disks for instance %s" %
8871                                  (node_name, self.instance.name))
8872
8873   def _CreateNewStorage(self, node_name):
8874     iv_names = {}
8875
8876     for idx, dev in enumerate(self.instance.disks):
8877       if idx not in self.disks:
8878         continue
8879
8880       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8881
8882       self.cfg.SetDiskID(dev, node_name)
8883
8884       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8885       names = _GenerateUniqueNames(self.lu, lv_names)
8886
8887       vg_data = dev.children[0].logical_id[0]
8888       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8889                              logical_id=(vg_data, names[0]))
8890       vg_meta = dev.children[1].logical_id[0]
8891       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8892                              logical_id=(vg_meta, names[1]))
8893
8894       new_lvs = [lv_data, lv_meta]
8895       old_lvs = dev.children
8896       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8897
8898       # we pass force_create=True to force the LVM creation
8899       for new_lv in new_lvs:
8900         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8901                         _GetInstanceInfoText(self.instance), False)
8902
8903     return iv_names
8904
8905   def _CheckDevices(self, node_name, iv_names):
8906     for name, (dev, _, _) in iv_names.iteritems():
8907       self.cfg.SetDiskID(dev, node_name)
8908
8909       result = self.rpc.call_blockdev_find(node_name, dev)
8910
8911       msg = result.fail_msg
8912       if msg or not result.payload:
8913         if not msg:
8914           msg = "disk not found"
8915         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8916                                  (name, msg))
8917
8918       if result.payload.is_degraded:
8919         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8920
8921   def _RemoveOldStorage(self, node_name, iv_names):
8922     for name, (_, old_lvs, _) in iv_names.iteritems():
8923       self.lu.LogInfo("Remove logical volumes for %s" % name)
8924
8925       for lv in old_lvs:
8926         self.cfg.SetDiskID(lv, node_name)
8927
8928         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8929         if msg:
8930           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8931                              hint="remove unused LVs manually")
8932
8933   def _ReleaseNodeLock(self, node_name):
8934     """Releases the lock for a given node."""
8935     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8936
8937   def _ExecDrbd8DiskOnly(self, feedback_fn):
8938     """Replace a disk on the primary or secondary for DRBD 8.
8939
8940     The algorithm for replace is quite complicated:
8941
8942       1. for each disk to be replaced:
8943
8944         1. create new LVs on the target node with unique names
8945         1. detach old LVs from the drbd device
8946         1. rename old LVs to name_replaced.<time_t>
8947         1. rename new LVs to old LVs
8948         1. attach the new LVs (with the old names now) to the drbd device
8949
8950       1. wait for sync across all devices
8951
8952       1. for each modified disk:
8953
8954         1. remove old LVs (which have the name name_replaces.<time_t>)
8955
8956     Failures are not very well handled.
8957
8958     """
8959     steps_total = 6
8960
8961     # Step: check device activation
8962     self.lu.LogStep(1, steps_total, "Check device existence")
8963     self._CheckDisksExistence([self.other_node, self.target_node])
8964     self._CheckVolumeGroup([self.target_node, self.other_node])
8965
8966     # Step: check other node consistency
8967     self.lu.LogStep(2, steps_total, "Check peer consistency")
8968     self._CheckDisksConsistency(self.other_node,
8969                                 self.other_node == self.instance.primary_node,
8970                                 False)
8971
8972     # Step: create new storage
8973     self.lu.LogStep(3, steps_total, "Allocate new storage")
8974     iv_names = self._CreateNewStorage(self.target_node)
8975
8976     # Step: for each lv, detach+rename*2+attach
8977     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8978     for dev, old_lvs, new_lvs in iv_names.itervalues():
8979       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8980
8981       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8982                                                      old_lvs)
8983       result.Raise("Can't detach drbd from local storage on node"
8984                    " %s for device %s" % (self.target_node, dev.iv_name))
8985       #dev.children = []
8986       #cfg.Update(instance)
8987
8988       # ok, we created the new LVs, so now we know we have the needed
8989       # storage; as such, we proceed on the target node to rename
8990       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8991       # using the assumption that logical_id == physical_id (which in
8992       # turn is the unique_id on that node)
8993
8994       # FIXME(iustin): use a better name for the replaced LVs
8995       temp_suffix = int(time.time())
8996       ren_fn = lambda d, suff: (d.physical_id[0],
8997                                 d.physical_id[1] + "_replaced-%s" % suff)
8998
8999       # Build the rename list based on what LVs exist on the node
9000       rename_old_to_new = []
9001       for to_ren in old_lvs:
9002         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9003         if not result.fail_msg and result.payload:
9004           # device exists
9005           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9006
9007       self.lu.LogInfo("Renaming the old LVs on the target node")
9008       result = self.rpc.call_blockdev_rename(self.target_node,
9009                                              rename_old_to_new)
9010       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9011
9012       # Now we rename the new LVs to the old LVs
9013       self.lu.LogInfo("Renaming the new LVs on the target node")
9014       rename_new_to_old = [(new, old.physical_id)
9015                            for old, new in zip(old_lvs, new_lvs)]
9016       result = self.rpc.call_blockdev_rename(self.target_node,
9017                                              rename_new_to_old)
9018       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9019
9020       for old, new in zip(old_lvs, new_lvs):
9021         new.logical_id = old.logical_id
9022         self.cfg.SetDiskID(new, self.target_node)
9023
9024       for disk in old_lvs:
9025         disk.logical_id = ren_fn(disk, temp_suffix)
9026         self.cfg.SetDiskID(disk, self.target_node)
9027
9028       # Now that the new lvs have the old name, we can add them to the device
9029       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9030       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9031                                                   new_lvs)
9032       msg = result.fail_msg
9033       if msg:
9034         for new_lv in new_lvs:
9035           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9036                                                new_lv).fail_msg
9037           if msg2:
9038             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9039                                hint=("cleanup manually the unused logical"
9040                                      "volumes"))
9041         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9042
9043       dev.children = new_lvs
9044
9045       self.cfg.Update(self.instance, feedback_fn)
9046
9047     cstep = 5
9048     if self.early_release:
9049       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9050       cstep += 1
9051       self._RemoveOldStorage(self.target_node, iv_names)
9052       # WARNING: we release both node locks here, do not do other RPCs
9053       # than WaitForSync to the primary node
9054       self._ReleaseNodeLock([self.target_node, self.other_node])
9055
9056     # Wait for sync
9057     # This can fail as the old devices are degraded and _WaitForSync
9058     # does a combined result over all disks, so we don't check its return value
9059     self.lu.LogStep(cstep, steps_total, "Sync devices")
9060     cstep += 1
9061     _WaitForSync(self.lu, self.instance)
9062
9063     # Check all devices manually
9064     self._CheckDevices(self.instance.primary_node, iv_names)
9065
9066     # Step: remove old storage
9067     if not self.early_release:
9068       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9069       cstep += 1
9070       self._RemoveOldStorage(self.target_node, iv_names)
9071
9072   def _ExecDrbd8Secondary(self, feedback_fn):
9073     """Replace the secondary node for DRBD 8.
9074
9075     The algorithm for replace is quite complicated:
9076       - for all disks of the instance:
9077         - create new LVs on the new node with same names
9078         - shutdown the drbd device on the old secondary
9079         - disconnect the drbd network on the primary
9080         - create the drbd device on the new secondary
9081         - network attach the drbd on the primary, using an artifice:
9082           the drbd code for Attach() will connect to the network if it
9083           finds a device which is connected to the good local disks but
9084           not network enabled
9085       - wait for sync across all devices
9086       - remove all disks from the old secondary
9087
9088     Failures are not very well handled.
9089
9090     """
9091     steps_total = 6
9092
9093     # Step: check device activation
9094     self.lu.LogStep(1, steps_total, "Check device existence")
9095     self._CheckDisksExistence([self.instance.primary_node])
9096     self._CheckVolumeGroup([self.instance.primary_node])
9097
9098     # Step: check other node consistency
9099     self.lu.LogStep(2, steps_total, "Check peer consistency")
9100     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9101
9102     # Step: create new storage
9103     self.lu.LogStep(3, steps_total, "Allocate new storage")
9104     for idx, dev in enumerate(self.instance.disks):
9105       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9106                       (self.new_node, idx))
9107       # we pass force_create=True to force LVM creation
9108       for new_lv in dev.children:
9109         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9110                         _GetInstanceInfoText(self.instance), False)
9111
9112     # Step 4: dbrd minors and drbd setups changes
9113     # after this, we must manually remove the drbd minors on both the
9114     # error and the success paths
9115     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9116     minors = self.cfg.AllocateDRBDMinor([self.new_node
9117                                          for dev in self.instance.disks],
9118                                         self.instance.name)
9119     logging.debug("Allocated minors %r", minors)
9120
9121     iv_names = {}
9122     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9123       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9124                       (self.new_node, idx))
9125       # create new devices on new_node; note that we create two IDs:
9126       # one without port, so the drbd will be activated without
9127       # networking information on the new node at this stage, and one
9128       # with network, for the latter activation in step 4
9129       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9130       if self.instance.primary_node == o_node1:
9131         p_minor = o_minor1
9132       else:
9133         assert self.instance.primary_node == o_node2, "Three-node instance?"
9134         p_minor = o_minor2
9135
9136       new_alone_id = (self.instance.primary_node, self.new_node, None,
9137                       p_minor, new_minor, o_secret)
9138       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9139                     p_minor, new_minor, o_secret)
9140
9141       iv_names[idx] = (dev, dev.children, new_net_id)
9142       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9143                     new_net_id)
9144       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9145                               logical_id=new_alone_id,
9146                               children=dev.children,
9147                               size=dev.size)
9148       try:
9149         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9150                               _GetInstanceInfoText(self.instance), False)
9151       except errors.GenericError:
9152         self.cfg.ReleaseDRBDMinors(self.instance.name)
9153         raise
9154
9155     # We have new devices, shutdown the drbd on the old secondary
9156     for idx, dev in enumerate(self.instance.disks):
9157       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9158       self.cfg.SetDiskID(dev, self.target_node)
9159       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9160       if msg:
9161         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9162                            "node: %s" % (idx, msg),
9163                            hint=("Please cleanup this device manually as"
9164                                  " soon as possible"))
9165
9166     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9167     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9168                                                self.node_secondary_ip,
9169                                                self.instance.disks)\
9170                                               [self.instance.primary_node]
9171
9172     msg = result.fail_msg
9173     if msg:
9174       # detaches didn't succeed (unlikely)
9175       self.cfg.ReleaseDRBDMinors(self.instance.name)
9176       raise errors.OpExecError("Can't detach the disks from the network on"
9177                                " old node: %s" % (msg,))
9178
9179     # if we managed to detach at least one, we update all the disks of
9180     # the instance to point to the new secondary
9181     self.lu.LogInfo("Updating instance configuration")
9182     for dev, _, new_logical_id in iv_names.itervalues():
9183       dev.logical_id = new_logical_id
9184       self.cfg.SetDiskID(dev, self.instance.primary_node)
9185
9186     self.cfg.Update(self.instance, feedback_fn)
9187
9188     # and now perform the drbd attach
9189     self.lu.LogInfo("Attaching primary drbds to new secondary"
9190                     " (standalone => connected)")
9191     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9192                                             self.new_node],
9193                                            self.node_secondary_ip,
9194                                            self.instance.disks,
9195                                            self.instance.name,
9196                                            False)
9197     for to_node, to_result in result.items():
9198       msg = to_result.fail_msg
9199       if msg:
9200         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9201                            to_node, msg,
9202                            hint=("please do a gnt-instance info to see the"
9203                                  " status of disks"))
9204     cstep = 5
9205     if self.early_release:
9206       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9207       cstep += 1
9208       self._RemoveOldStorage(self.target_node, iv_names)
9209       # WARNING: we release all node locks here, do not do other RPCs
9210       # than WaitForSync to the primary node
9211       self._ReleaseNodeLock([self.instance.primary_node,
9212                              self.target_node,
9213                              self.new_node])
9214
9215     # Wait for sync
9216     # This can fail as the old devices are degraded and _WaitForSync
9217     # does a combined result over all disks, so we don't check its return value
9218     self.lu.LogStep(cstep, steps_total, "Sync devices")
9219     cstep += 1
9220     _WaitForSync(self.lu, self.instance)
9221
9222     # Check all devices manually
9223     self._CheckDevices(self.instance.primary_node, iv_names)
9224
9225     # Step: remove old storage
9226     if not self.early_release:
9227       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9228       self._RemoveOldStorage(self.target_node, iv_names)
9229
9230
9231 class LURepairNodeStorage(NoHooksLU):
9232   """Repairs the volume group on a node.
9233
9234   """
9235   REQ_BGL = False
9236
9237   def CheckArguments(self):
9238     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9239
9240     storage_type = self.op.storage_type
9241
9242     if (constants.SO_FIX_CONSISTENCY not in
9243         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9244       raise errors.OpPrereqError("Storage units of type '%s' can not be"
9245                                  " repaired" % storage_type,
9246                                  errors.ECODE_INVAL)
9247
9248   def ExpandNames(self):
9249     self.needed_locks = {
9250       locking.LEVEL_NODE: [self.op.node_name],
9251       }
9252
9253   def _CheckFaultyDisks(self, instance, node_name):
9254     """Ensure faulty disks abort the opcode or at least warn."""
9255     try:
9256       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9257                                   node_name, True):
9258         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9259                                    " node '%s'" % (instance.name, node_name),
9260                                    errors.ECODE_STATE)
9261     except errors.OpPrereqError, err:
9262       if self.op.ignore_consistency:
9263         self.proc.LogWarning(str(err.args[0]))
9264       else:
9265         raise
9266
9267   def CheckPrereq(self):
9268     """Check prerequisites.
9269
9270     """
9271     # Check whether any instance on this node has faulty disks
9272     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9273       if not inst.admin_up:
9274         continue
9275       check_nodes = set(inst.all_nodes)
9276       check_nodes.discard(self.op.node_name)
9277       for inst_node_name in check_nodes:
9278         self._CheckFaultyDisks(inst, inst_node_name)
9279
9280   def Exec(self, feedback_fn):
9281     feedback_fn("Repairing storage unit '%s' on %s ..." %
9282                 (self.op.name, self.op.node_name))
9283
9284     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9285     result = self.rpc.call_storage_execute(self.op.node_name,
9286                                            self.op.storage_type, st_args,
9287                                            self.op.name,
9288                                            constants.SO_FIX_CONSISTENCY)
9289     result.Raise("Failed to repair storage unit '%s' on %s" %
9290                  (self.op.name, self.op.node_name))
9291
9292
9293 class LUNodeEvacStrategy(NoHooksLU):
9294   """Computes the node evacuation strategy.
9295
9296   """
9297   REQ_BGL = False
9298
9299   def CheckArguments(self):
9300     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9301
9302   def ExpandNames(self):
9303     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9304     self.needed_locks = locks = {}
9305     if self.op.remote_node is None:
9306       locks[locking.LEVEL_NODE] = locking.ALL_SET
9307     else:
9308       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9309       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9310
9311   def Exec(self, feedback_fn):
9312     if self.op.remote_node is not None:
9313       instances = []
9314       for node in self.op.nodes:
9315         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9316       result = []
9317       for i in instances:
9318         if i.primary_node == self.op.remote_node:
9319           raise errors.OpPrereqError("Node %s is the primary node of"
9320                                      " instance %s, cannot use it as"
9321                                      " secondary" %
9322                                      (self.op.remote_node, i.name),
9323                                      errors.ECODE_INVAL)
9324         result.append([i.name, self.op.remote_node])
9325     else:
9326       ial = IAllocator(self.cfg, self.rpc,
9327                        mode=constants.IALLOCATOR_MODE_MEVAC,
9328                        evac_nodes=self.op.nodes)
9329       ial.Run(self.op.iallocator, validate=True)
9330       if not ial.success:
9331         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9332                                  errors.ECODE_NORES)
9333       result = ial.result
9334     return result
9335
9336
9337 class LUInstanceGrowDisk(LogicalUnit):
9338   """Grow a disk of an instance.
9339
9340   """
9341   HPATH = "disk-grow"
9342   HTYPE = constants.HTYPE_INSTANCE
9343   REQ_BGL = False
9344
9345   def ExpandNames(self):
9346     self._ExpandAndLockInstance()
9347     self.needed_locks[locking.LEVEL_NODE] = []
9348     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9349
9350   def DeclareLocks(self, level):
9351     if level == locking.LEVEL_NODE:
9352       self._LockInstancesNodes()
9353
9354   def BuildHooksEnv(self):
9355     """Build hooks env.
9356
9357     This runs on the master, the primary and all the secondaries.
9358
9359     """
9360     env = {
9361       "DISK": self.op.disk,
9362       "AMOUNT": self.op.amount,
9363       }
9364     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9365     return env
9366
9367   def BuildHooksNodes(self):
9368     """Build hooks nodes.
9369
9370     """
9371     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9372     return (nl, nl)
9373
9374   def CheckPrereq(self):
9375     """Check prerequisites.
9376
9377     This checks that the instance is in the cluster.
9378
9379     """
9380     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9381     assert instance is not None, \
9382       "Cannot retrieve locked instance %s" % self.op.instance_name
9383     nodenames = list(instance.all_nodes)
9384     for node in nodenames:
9385       _CheckNodeOnline(self, node)
9386
9387     self.instance = instance
9388
9389     if instance.disk_template not in constants.DTS_GROWABLE:
9390       raise errors.OpPrereqError("Instance's disk layout does not support"
9391                                  " growing.", errors.ECODE_INVAL)
9392
9393     self.disk = instance.FindDisk(self.op.disk)
9394
9395     if instance.disk_template not in (constants.DT_FILE,
9396                                       constants.DT_SHARED_FILE):
9397       # TODO: check the free disk space for file, when that feature will be
9398       # supported
9399       _CheckNodesFreeDiskPerVG(self, nodenames,
9400                                self.disk.ComputeGrowth(self.op.amount))
9401
9402   def Exec(self, feedback_fn):
9403     """Execute disk grow.
9404
9405     """
9406     instance = self.instance
9407     disk = self.disk
9408
9409     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9410     if not disks_ok:
9411       raise errors.OpExecError("Cannot activate block device to grow")
9412
9413     for node in instance.all_nodes:
9414       self.cfg.SetDiskID(disk, node)
9415       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9416       result.Raise("Grow request failed to node %s" % node)
9417
9418       # TODO: Rewrite code to work properly
9419       # DRBD goes into sync mode for a short amount of time after executing the
9420       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9421       # calling "resize" in sync mode fails. Sleeping for a short amount of
9422       # time is a work-around.
9423       time.sleep(5)
9424
9425     disk.RecordGrow(self.op.amount)
9426     self.cfg.Update(instance, feedback_fn)
9427     if self.op.wait_for_sync:
9428       disk_abort = not _WaitForSync(self, instance, disks=[disk])
9429       if disk_abort:
9430         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9431                              " status.\nPlease check the instance.")
9432       if not instance.admin_up:
9433         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9434     elif not instance.admin_up:
9435       self.proc.LogWarning("Not shutting down the disk even if the instance is"
9436                            " not supposed to be running because no wait for"
9437                            " sync mode was requested.")
9438
9439
9440 class LUInstanceQueryData(NoHooksLU):
9441   """Query runtime instance data.
9442
9443   """
9444   REQ_BGL = False
9445
9446   def ExpandNames(self):
9447     self.needed_locks = {}
9448
9449     # Use locking if requested or when non-static information is wanted
9450     if not (self.op.static or self.op.use_locking):
9451       self.LogWarning("Non-static data requested, locks need to be acquired")
9452       self.op.use_locking = True
9453
9454     if self.op.instances or not self.op.use_locking:
9455       # Expand instance names right here
9456       self.wanted_names = _GetWantedInstances(self, self.op.instances)
9457     else:
9458       # Will use acquired locks
9459       self.wanted_names = None
9460
9461     if self.op.use_locking:
9462       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9463
9464       if self.wanted_names is None:
9465         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9466       else:
9467         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9468
9469       self.needed_locks[locking.LEVEL_NODE] = []
9470       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9471       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9472
9473   def DeclareLocks(self, level):
9474     if self.op.use_locking and level == locking.LEVEL_NODE:
9475       self._LockInstancesNodes()
9476
9477   def CheckPrereq(self):
9478     """Check prerequisites.
9479
9480     This only checks the optional instance list against the existing names.
9481
9482     """
9483     if self.wanted_names is None:
9484       assert self.op.use_locking, "Locking was not used"
9485       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9486
9487     self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9488                              for name in self.wanted_names]
9489
9490   def _ComputeBlockdevStatus(self, node, instance_name, dev):
9491     """Returns the status of a block device
9492
9493     """
9494     if self.op.static or not node:
9495       return None
9496
9497     self.cfg.SetDiskID(dev, node)
9498
9499     result = self.rpc.call_blockdev_find(node, dev)
9500     if result.offline:
9501       return None
9502
9503     result.Raise("Can't compute disk status for %s" % instance_name)
9504
9505     status = result.payload
9506     if status is None:
9507       return None
9508
9509     return (status.dev_path, status.major, status.minor,
9510             status.sync_percent, status.estimated_time,
9511             status.is_degraded, status.ldisk_status)
9512
9513   def _ComputeDiskStatus(self, instance, snode, dev):
9514     """Compute block device status.
9515
9516     """
9517     if dev.dev_type in constants.LDS_DRBD:
9518       # we change the snode then (otherwise we use the one passed in)
9519       if dev.logical_id[0] == instance.primary_node:
9520         snode = dev.logical_id[1]
9521       else:
9522         snode = dev.logical_id[0]
9523
9524     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9525                                               instance.name, dev)
9526     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9527
9528     if dev.children:
9529       dev_children = [self._ComputeDiskStatus(instance, snode, child)
9530                       for child in dev.children]
9531     else:
9532       dev_children = []
9533
9534     return {
9535       "iv_name": dev.iv_name,
9536       "dev_type": dev.dev_type,
9537       "logical_id": dev.logical_id,
9538       "physical_id": dev.physical_id,
9539       "pstatus": dev_pstatus,
9540       "sstatus": dev_sstatus,
9541       "children": dev_children,
9542       "mode": dev.mode,
9543       "size": dev.size,
9544       }
9545
9546   def Exec(self, feedback_fn):
9547     """Gather and return data"""
9548     result = {}
9549
9550     cluster = self.cfg.GetClusterInfo()
9551
9552     for instance in self.wanted_instances:
9553       if not self.op.static:
9554         remote_info = self.rpc.call_instance_info(instance.primary_node,
9555                                                   instance.name,
9556                                                   instance.hypervisor)
9557         remote_info.Raise("Error checking node %s" % instance.primary_node)
9558         remote_info = remote_info.payload
9559         if remote_info and "state" in remote_info:
9560           remote_state = "up"
9561         else:
9562           remote_state = "down"
9563       else:
9564         remote_state = None
9565       if instance.admin_up:
9566         config_state = "up"
9567       else:
9568         config_state = "down"
9569
9570       disks = [self._ComputeDiskStatus(instance, None, device)
9571                for device in instance.disks]
9572
9573       result[instance.name] = {
9574         "name": instance.name,
9575         "config_state": config_state,
9576         "run_state": remote_state,
9577         "pnode": instance.primary_node,
9578         "snodes": instance.secondary_nodes,
9579         "os": instance.os,
9580         # this happens to be the same format used for hooks
9581         "nics": _NICListToTuple(self, instance.nics),
9582         "disk_template": instance.disk_template,
9583         "disks": disks,
9584         "hypervisor": instance.hypervisor,
9585         "network_port": instance.network_port,
9586         "hv_instance": instance.hvparams,
9587         "hv_actual": cluster.FillHV(instance, skip_globals=True),
9588         "be_instance": instance.beparams,
9589         "be_actual": cluster.FillBE(instance),
9590         "os_instance": instance.osparams,
9591         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9592         "serial_no": instance.serial_no,
9593         "mtime": instance.mtime,
9594         "ctime": instance.ctime,
9595         "uuid": instance.uuid,
9596         }
9597
9598     return result
9599
9600
9601 class LUInstanceSetParams(LogicalUnit):
9602   """Modifies an instances's parameters.
9603
9604   """
9605   HPATH = "instance-modify"
9606   HTYPE = constants.HTYPE_INSTANCE
9607   REQ_BGL = False
9608
9609   def CheckArguments(self):
9610     if not (self.op.nics or self.op.disks or self.op.disk_template or
9611             self.op.hvparams or self.op.beparams or self.op.os_name):
9612       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9613
9614     if self.op.hvparams:
9615       _CheckGlobalHvParams(self.op.hvparams)
9616
9617     # Disk validation
9618     disk_addremove = 0
9619     for disk_op, disk_dict in self.op.disks:
9620       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9621       if disk_op == constants.DDM_REMOVE:
9622         disk_addremove += 1
9623         continue
9624       elif disk_op == constants.DDM_ADD:
9625         disk_addremove += 1
9626       else:
9627         if not isinstance(disk_op, int):
9628           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9629         if not isinstance(disk_dict, dict):
9630           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9631           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9632
9633       if disk_op == constants.DDM_ADD:
9634         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9635         if mode not in constants.DISK_ACCESS_SET:
9636           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9637                                      errors.ECODE_INVAL)
9638         size = disk_dict.get(constants.IDISK_SIZE, None)
9639         if size is None:
9640           raise errors.OpPrereqError("Required disk parameter size missing",
9641                                      errors.ECODE_INVAL)
9642         try:
9643           size = int(size)
9644         except (TypeError, ValueError), err:
9645           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9646                                      str(err), errors.ECODE_INVAL)
9647         disk_dict[constants.IDISK_SIZE] = size
9648       else:
9649         # modification of disk
9650         if constants.IDISK_SIZE in disk_dict:
9651           raise errors.OpPrereqError("Disk size change not possible, use"
9652                                      " grow-disk", errors.ECODE_INVAL)
9653
9654     if disk_addremove > 1:
9655       raise errors.OpPrereqError("Only one disk add or remove operation"
9656                                  " supported at a time", errors.ECODE_INVAL)
9657
9658     if self.op.disks and self.op.disk_template is not None:
9659       raise errors.OpPrereqError("Disk template conversion and other disk"
9660                                  " changes not supported at the same time",
9661                                  errors.ECODE_INVAL)
9662
9663     if (self.op.disk_template and
9664         self.op.disk_template in constants.DTS_INT_MIRROR and
9665         self.op.remote_node is None):
9666       raise errors.OpPrereqError("Changing the disk template to a mirrored"
9667                                  " one requires specifying a secondary node",
9668                                  errors.ECODE_INVAL)
9669
9670     # NIC validation
9671     nic_addremove = 0
9672     for nic_op, nic_dict in self.op.nics:
9673       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9674       if nic_op == constants.DDM_REMOVE:
9675         nic_addremove += 1
9676         continue
9677       elif nic_op == constants.DDM_ADD:
9678         nic_addremove += 1
9679       else:
9680         if not isinstance(nic_op, int):
9681           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9682         if not isinstance(nic_dict, dict):
9683           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9684           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9685
9686       # nic_dict should be a dict
9687       nic_ip = nic_dict.get(constants.INIC_IP, None)
9688       if nic_ip is not None:
9689         if nic_ip.lower() == constants.VALUE_NONE:
9690           nic_dict[constants.INIC_IP] = None
9691         else:
9692           if not netutils.IPAddress.IsValid(nic_ip):
9693             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9694                                        errors.ECODE_INVAL)
9695
9696       nic_bridge = nic_dict.get('bridge', None)
9697       nic_link = nic_dict.get(constants.INIC_LINK, None)
9698       if nic_bridge and nic_link:
9699         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9700                                    " at the same time", errors.ECODE_INVAL)
9701       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9702         nic_dict['bridge'] = None
9703       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9704         nic_dict[constants.INIC_LINK] = None
9705
9706       if nic_op == constants.DDM_ADD:
9707         nic_mac = nic_dict.get(constants.INIC_MAC, None)
9708         if nic_mac is None:
9709           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9710
9711       if constants.INIC_MAC in nic_dict:
9712         nic_mac = nic_dict[constants.INIC_MAC]
9713         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9714           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9715
9716         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9717           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9718                                      " modifying an existing nic",
9719                                      errors.ECODE_INVAL)
9720
9721     if nic_addremove > 1:
9722       raise errors.OpPrereqError("Only one NIC add or remove operation"
9723                                  " supported at a time", errors.ECODE_INVAL)
9724
9725   def ExpandNames(self):
9726     self._ExpandAndLockInstance()
9727     self.needed_locks[locking.LEVEL_NODE] = []
9728     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9729
9730   def DeclareLocks(self, level):
9731     if level == locking.LEVEL_NODE:
9732       self._LockInstancesNodes()
9733       if self.op.disk_template and self.op.remote_node:
9734         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9735         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9736
9737   def BuildHooksEnv(self):
9738     """Build hooks env.
9739
9740     This runs on the master, primary and secondaries.
9741
9742     """
9743     args = dict()
9744     if constants.BE_MEMORY in self.be_new:
9745       args['memory'] = self.be_new[constants.BE_MEMORY]
9746     if constants.BE_VCPUS in self.be_new:
9747       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9748     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9749     # information at all.
9750     if self.op.nics:
9751       args['nics'] = []
9752       nic_override = dict(self.op.nics)
9753       for idx, nic in enumerate(self.instance.nics):
9754         if idx in nic_override:
9755           this_nic_override = nic_override[idx]
9756         else:
9757           this_nic_override = {}
9758         if constants.INIC_IP in this_nic_override:
9759           ip = this_nic_override[constants.INIC_IP]
9760         else:
9761           ip = nic.ip
9762         if constants.INIC_MAC in this_nic_override:
9763           mac = this_nic_override[constants.INIC_MAC]
9764         else:
9765           mac = nic.mac
9766         if idx in self.nic_pnew:
9767           nicparams = self.nic_pnew[idx]
9768         else:
9769           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9770         mode = nicparams[constants.NIC_MODE]
9771         link = nicparams[constants.NIC_LINK]
9772         args['nics'].append((ip, mac, mode, link))
9773       if constants.DDM_ADD in nic_override:
9774         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
9775         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
9776         nicparams = self.nic_pnew[constants.DDM_ADD]
9777         mode = nicparams[constants.NIC_MODE]
9778         link = nicparams[constants.NIC_LINK]
9779         args['nics'].append((ip, mac, mode, link))
9780       elif constants.DDM_REMOVE in nic_override:
9781         del args['nics'][-1]
9782
9783     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9784     if self.op.disk_template:
9785       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9786
9787     return env
9788
9789   def BuildHooksNodes(self):
9790     """Build hooks nodes.
9791
9792     """
9793     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9794     return (nl, nl)
9795
9796   def CheckPrereq(self):
9797     """Check prerequisites.
9798
9799     This only checks the instance list against the existing names.
9800
9801     """
9802     # checking the new params on the primary/secondary nodes
9803
9804     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9805     cluster = self.cluster = self.cfg.GetClusterInfo()
9806     assert self.instance is not None, \
9807       "Cannot retrieve locked instance %s" % self.op.instance_name
9808     pnode = instance.primary_node
9809     nodelist = list(instance.all_nodes)
9810
9811     # OS change
9812     if self.op.os_name and not self.op.force:
9813       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9814                       self.op.force_variant)
9815       instance_os = self.op.os_name
9816     else:
9817       instance_os = instance.os
9818
9819     if self.op.disk_template:
9820       if instance.disk_template == self.op.disk_template:
9821         raise errors.OpPrereqError("Instance already has disk template %s" %
9822                                    instance.disk_template, errors.ECODE_INVAL)
9823
9824       if (instance.disk_template,
9825           self.op.disk_template) not in self._DISK_CONVERSIONS:
9826         raise errors.OpPrereqError("Unsupported disk template conversion from"
9827                                    " %s to %s" % (instance.disk_template,
9828                                                   self.op.disk_template),
9829                                    errors.ECODE_INVAL)
9830       _CheckInstanceDown(self, instance, "cannot change disk template")
9831       if self.op.disk_template in constants.DTS_INT_MIRROR:
9832         if self.op.remote_node == pnode:
9833           raise errors.OpPrereqError("Given new secondary node %s is the same"
9834                                      " as the primary node of the instance" %
9835                                      self.op.remote_node, errors.ECODE_STATE)
9836         _CheckNodeOnline(self, self.op.remote_node)
9837         _CheckNodeNotDrained(self, self.op.remote_node)
9838         # FIXME: here we assume that the old instance type is DT_PLAIN
9839         assert instance.disk_template == constants.DT_PLAIN
9840         disks = [{constants.IDISK_SIZE: d.size,
9841                   constants.IDISK_VG: d.logical_id[0]}
9842                  for d in instance.disks]
9843         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9844         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9845
9846     # hvparams processing
9847     if self.op.hvparams:
9848       hv_type = instance.hypervisor
9849       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9850       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9851       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9852
9853       # local check
9854       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9855       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9856       self.hv_new = hv_new # the new actual values
9857       self.hv_inst = i_hvdict # the new dict (without defaults)
9858     else:
9859       self.hv_new = self.hv_inst = {}
9860
9861     # beparams processing
9862     if self.op.beparams:
9863       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9864                                    use_none=True)
9865       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9866       be_new = cluster.SimpleFillBE(i_bedict)
9867       self.be_new = be_new # the new actual values
9868       self.be_inst = i_bedict # the new dict (without defaults)
9869     else:
9870       self.be_new = self.be_inst = {}
9871
9872     # osparams processing
9873     if self.op.osparams:
9874       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9875       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9876       self.os_inst = i_osdict # the new dict (without defaults)
9877     else:
9878       self.os_inst = {}
9879
9880     self.warn = []
9881
9882     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9883       mem_check_list = [pnode]
9884       if be_new[constants.BE_AUTO_BALANCE]:
9885         # either we changed auto_balance to yes or it was from before
9886         mem_check_list.extend(instance.secondary_nodes)
9887       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9888                                                   instance.hypervisor)
9889       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9890                                          instance.hypervisor)
9891       pninfo = nodeinfo[pnode]
9892       msg = pninfo.fail_msg
9893       if msg:
9894         # Assume the primary node is unreachable and go ahead
9895         self.warn.append("Can't get info from primary node %s: %s" %
9896                          (pnode,  msg))
9897       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9898         self.warn.append("Node data from primary node %s doesn't contain"
9899                          " free memory information" % pnode)
9900       elif instance_info.fail_msg:
9901         self.warn.append("Can't get instance runtime information: %s" %
9902                         instance_info.fail_msg)
9903       else:
9904         if instance_info.payload:
9905           current_mem = int(instance_info.payload['memory'])
9906         else:
9907           # Assume instance not running
9908           # (there is a slight race condition here, but it's not very probable,
9909           # and we have no other way to check)
9910           current_mem = 0
9911         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9912                     pninfo.payload['memory_free'])
9913         if miss_mem > 0:
9914           raise errors.OpPrereqError("This change will prevent the instance"
9915                                      " from starting, due to %d MB of memory"
9916                                      " missing on its primary node" % miss_mem,
9917                                      errors.ECODE_NORES)
9918
9919       if be_new[constants.BE_AUTO_BALANCE]:
9920         for node, nres in nodeinfo.items():
9921           if node not in instance.secondary_nodes:
9922             continue
9923           msg = nres.fail_msg
9924           if msg:
9925             self.warn.append("Can't get info from secondary node %s: %s" %
9926                              (node, msg))
9927           elif not isinstance(nres.payload.get('memory_free', None), int):
9928             self.warn.append("Secondary node %s didn't return free"
9929                              " memory information" % node)
9930           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9931             self.warn.append("Not enough memory to failover instance to"
9932                              " secondary node %s" % node)
9933
9934     # NIC processing
9935     self.nic_pnew = {}
9936     self.nic_pinst = {}
9937     for nic_op, nic_dict in self.op.nics:
9938       if nic_op == constants.DDM_REMOVE:
9939         if not instance.nics:
9940           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9941                                      errors.ECODE_INVAL)
9942         continue
9943       if nic_op != constants.DDM_ADD:
9944         # an existing nic
9945         if not instance.nics:
9946           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9947                                      " no NICs" % nic_op,
9948                                      errors.ECODE_INVAL)
9949         if nic_op < 0 or nic_op >= len(instance.nics):
9950           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9951                                      " are 0 to %d" %
9952                                      (nic_op, len(instance.nics) - 1),
9953                                      errors.ECODE_INVAL)
9954         old_nic_params = instance.nics[nic_op].nicparams
9955         old_nic_ip = instance.nics[nic_op].ip
9956       else:
9957         old_nic_params = {}
9958         old_nic_ip = None
9959
9960       update_params_dict = dict([(key, nic_dict[key])
9961                                  for key in constants.NICS_PARAMETERS
9962                                  if key in nic_dict])
9963
9964       if 'bridge' in nic_dict:
9965         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9966
9967       new_nic_params = _GetUpdatedParams(old_nic_params,
9968                                          update_params_dict)
9969       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9970       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9971       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9972       self.nic_pinst[nic_op] = new_nic_params
9973       self.nic_pnew[nic_op] = new_filled_nic_params
9974       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9975
9976       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9977         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9978         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9979         if msg:
9980           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9981           if self.op.force:
9982             self.warn.append(msg)
9983           else:
9984             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9985       if new_nic_mode == constants.NIC_MODE_ROUTED:
9986         if constants.INIC_IP in nic_dict:
9987           nic_ip = nic_dict[constants.INIC_IP]
9988         else:
9989           nic_ip = old_nic_ip
9990         if nic_ip is None:
9991           raise errors.OpPrereqError('Cannot set the nic ip to None'
9992                                      ' on a routed nic', errors.ECODE_INVAL)
9993       if constants.INIC_MAC in nic_dict:
9994         nic_mac = nic_dict[constants.INIC_MAC]
9995         if nic_mac is None:
9996           raise errors.OpPrereqError('Cannot set the nic mac to None',
9997                                      errors.ECODE_INVAL)
9998         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9999           # otherwise generate the mac
10000           nic_dict[constants.INIC_MAC] = \
10001             self.cfg.GenerateMAC(self.proc.GetECId())
10002         else:
10003           # or validate/reserve the current one
10004           try:
10005             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10006           except errors.ReservationError:
10007             raise errors.OpPrereqError("MAC address %s already in use"
10008                                        " in cluster" % nic_mac,
10009                                        errors.ECODE_NOTUNIQUE)
10010
10011     # DISK processing
10012     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10013       raise errors.OpPrereqError("Disk operations not supported for"
10014                                  " diskless instances",
10015                                  errors.ECODE_INVAL)
10016     for disk_op, _ in self.op.disks:
10017       if disk_op == constants.DDM_REMOVE:
10018         if len(instance.disks) == 1:
10019           raise errors.OpPrereqError("Cannot remove the last disk of"
10020                                      " an instance", errors.ECODE_INVAL)
10021         _CheckInstanceDown(self, instance, "cannot remove disks")
10022
10023       if (disk_op == constants.DDM_ADD and
10024           len(instance.disks) >= constants.MAX_DISKS):
10025         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10026                                    " add more" % constants.MAX_DISKS,
10027                                    errors.ECODE_STATE)
10028       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10029         # an existing disk
10030         if disk_op < 0 or disk_op >= len(instance.disks):
10031           raise errors.OpPrereqError("Invalid disk index %s, valid values"
10032                                      " are 0 to %d" %
10033                                      (disk_op, len(instance.disks)),
10034                                      errors.ECODE_INVAL)
10035
10036     return
10037
10038   def _ConvertPlainToDrbd(self, feedback_fn):
10039     """Converts an instance from plain to drbd.
10040
10041     """
10042     feedback_fn("Converting template to drbd")
10043     instance = self.instance
10044     pnode = instance.primary_node
10045     snode = self.op.remote_node
10046
10047     # create a fake disk info for _GenerateDiskTemplate
10048     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10049                   constants.IDISK_VG: d.logical_id[0]}
10050                  for d in instance.disks]
10051     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10052                                       instance.name, pnode, [snode],
10053                                       disk_info, None, None, 0, feedback_fn)
10054     info = _GetInstanceInfoText(instance)
10055     feedback_fn("Creating aditional volumes...")
10056     # first, create the missing data and meta devices
10057     for disk in new_disks:
10058       # unfortunately this is... not too nice
10059       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10060                             info, True)
10061       for child in disk.children:
10062         _CreateSingleBlockDev(self, snode, instance, child, info, True)
10063     # at this stage, all new LVs have been created, we can rename the
10064     # old ones
10065     feedback_fn("Renaming original volumes...")
10066     rename_list = [(o, n.children[0].logical_id)
10067                    for (o, n) in zip(instance.disks, new_disks)]
10068     result = self.rpc.call_blockdev_rename(pnode, rename_list)
10069     result.Raise("Failed to rename original LVs")
10070
10071     feedback_fn("Initializing DRBD devices...")
10072     # all child devices are in place, we can now create the DRBD devices
10073     for disk in new_disks:
10074       for node in [pnode, snode]:
10075         f_create = node == pnode
10076         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10077
10078     # at this point, the instance has been modified
10079     instance.disk_template = constants.DT_DRBD8
10080     instance.disks = new_disks
10081     self.cfg.Update(instance, feedback_fn)
10082
10083     # disks are created, waiting for sync
10084     disk_abort = not _WaitForSync(self, instance)
10085     if disk_abort:
10086       raise errors.OpExecError("There are some degraded disks for"
10087                                " this instance, please cleanup manually")
10088
10089   def _ConvertDrbdToPlain(self, feedback_fn):
10090     """Converts an instance from drbd to plain.
10091
10092     """
10093     instance = self.instance
10094     assert len(instance.secondary_nodes) == 1
10095     pnode = instance.primary_node
10096     snode = instance.secondary_nodes[0]
10097     feedback_fn("Converting template to plain")
10098
10099     old_disks = instance.disks
10100     new_disks = [d.children[0] for d in old_disks]
10101
10102     # copy over size and mode
10103     for parent, child in zip(old_disks, new_disks):
10104       child.size = parent.size
10105       child.mode = parent.mode
10106
10107     # update instance structure
10108     instance.disks = new_disks
10109     instance.disk_template = constants.DT_PLAIN
10110     self.cfg.Update(instance, feedback_fn)
10111
10112     feedback_fn("Removing volumes on the secondary node...")
10113     for disk in old_disks:
10114       self.cfg.SetDiskID(disk, snode)
10115       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10116       if msg:
10117         self.LogWarning("Could not remove block device %s on node %s,"
10118                         " continuing anyway: %s", disk.iv_name, snode, msg)
10119
10120     feedback_fn("Removing unneeded volumes on the primary node...")
10121     for idx, disk in enumerate(old_disks):
10122       meta = disk.children[1]
10123       self.cfg.SetDiskID(meta, pnode)
10124       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10125       if msg:
10126         self.LogWarning("Could not remove metadata for disk %d on node %s,"
10127                         " continuing anyway: %s", idx, pnode, msg)
10128
10129   def Exec(self, feedback_fn):
10130     """Modifies an instance.
10131
10132     All parameters take effect only at the next restart of the instance.
10133
10134     """
10135     # Process here the warnings from CheckPrereq, as we don't have a
10136     # feedback_fn there.
10137     for warn in self.warn:
10138       feedback_fn("WARNING: %s" % warn)
10139
10140     result = []
10141     instance = self.instance
10142     # disk changes
10143     for disk_op, disk_dict in self.op.disks:
10144       if disk_op == constants.DDM_REMOVE:
10145         # remove the last disk
10146         device = instance.disks.pop()
10147         device_idx = len(instance.disks)
10148         for node, disk in device.ComputeNodeTree(instance.primary_node):
10149           self.cfg.SetDiskID(disk, node)
10150           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10151           if msg:
10152             self.LogWarning("Could not remove disk/%d on node %s: %s,"
10153                             " continuing anyway", device_idx, node, msg)
10154         result.append(("disk/%d" % device_idx, "remove"))
10155       elif disk_op == constants.DDM_ADD:
10156         # add a new disk
10157         if instance.disk_template in (constants.DT_FILE,
10158                                         constants.DT_SHARED_FILE):
10159           file_driver, file_path = instance.disks[0].logical_id
10160           file_path = os.path.dirname(file_path)
10161         else:
10162           file_driver = file_path = None
10163         disk_idx_base = len(instance.disks)
10164         new_disk = _GenerateDiskTemplate(self,
10165                                          instance.disk_template,
10166                                          instance.name, instance.primary_node,
10167                                          instance.secondary_nodes,
10168                                          [disk_dict],
10169                                          file_path,
10170                                          file_driver,
10171                                          disk_idx_base, feedback_fn)[0]
10172         instance.disks.append(new_disk)
10173         info = _GetInstanceInfoText(instance)
10174
10175         logging.info("Creating volume %s for instance %s",
10176                      new_disk.iv_name, instance.name)
10177         # Note: this needs to be kept in sync with _CreateDisks
10178         #HARDCODE
10179         for node in instance.all_nodes:
10180           f_create = node == instance.primary_node
10181           try:
10182             _CreateBlockDev(self, node, instance, new_disk,
10183                             f_create, info, f_create)
10184           except errors.OpExecError, err:
10185             self.LogWarning("Failed to create volume %s (%s) on"
10186                             " node %s: %s",
10187                             new_disk.iv_name, new_disk, node, err)
10188         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10189                        (new_disk.size, new_disk.mode)))
10190       else:
10191         # change a given disk
10192         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10193         result.append(("disk.mode/%d" % disk_op,
10194                        disk_dict[constants.IDISK_MODE]))
10195
10196     if self.op.disk_template:
10197       r_shut = _ShutdownInstanceDisks(self, instance)
10198       if not r_shut:
10199         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10200                                  " proceed with disk template conversion")
10201       mode = (instance.disk_template, self.op.disk_template)
10202       try:
10203         self._DISK_CONVERSIONS[mode](self, feedback_fn)
10204       except:
10205         self.cfg.ReleaseDRBDMinors(instance.name)
10206         raise
10207       result.append(("disk_template", self.op.disk_template))
10208
10209     # NIC changes
10210     for nic_op, nic_dict in self.op.nics:
10211       if nic_op == constants.DDM_REMOVE:
10212         # remove the last nic
10213         del instance.nics[-1]
10214         result.append(("nic.%d" % len(instance.nics), "remove"))
10215       elif nic_op == constants.DDM_ADD:
10216         # mac and bridge should be set, by now
10217         mac = nic_dict[constants.INIC_MAC]
10218         ip = nic_dict.get(constants.INIC_IP, None)
10219         nicparams = self.nic_pinst[constants.DDM_ADD]
10220         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10221         instance.nics.append(new_nic)
10222         result.append(("nic.%d" % (len(instance.nics) - 1),
10223                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
10224                        (new_nic.mac, new_nic.ip,
10225                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10226                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10227                        )))
10228       else:
10229         for key in (constants.INIC_MAC, constants.INIC_IP):
10230           if key in nic_dict:
10231             setattr(instance.nics[nic_op], key, nic_dict[key])
10232         if nic_op in self.nic_pinst:
10233           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10234         for key, val in nic_dict.iteritems():
10235           result.append(("nic.%s/%d" % (key, nic_op), val))
10236
10237     # hvparams changes
10238     if self.op.hvparams:
10239       instance.hvparams = self.hv_inst
10240       for key, val in self.op.hvparams.iteritems():
10241         result.append(("hv/%s" % key, val))
10242
10243     # beparams changes
10244     if self.op.beparams:
10245       instance.beparams = self.be_inst
10246       for key, val in self.op.beparams.iteritems():
10247         result.append(("be/%s" % key, val))
10248
10249     # OS change
10250     if self.op.os_name:
10251       instance.os = self.op.os_name
10252
10253     # osparams changes
10254     if self.op.osparams:
10255       instance.osparams = self.os_inst
10256       for key, val in self.op.osparams.iteritems():
10257         result.append(("os/%s" % key, val))
10258
10259     self.cfg.Update(instance, feedback_fn)
10260
10261     return result
10262
10263   _DISK_CONVERSIONS = {
10264     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10265     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10266     }
10267
10268
10269 class LUBackupQuery(NoHooksLU):
10270   """Query the exports list
10271
10272   """
10273   REQ_BGL = False
10274
10275   def ExpandNames(self):
10276     self.needed_locks = {}
10277     self.share_locks[locking.LEVEL_NODE] = 1
10278     if not self.op.nodes:
10279       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10280     else:
10281       self.needed_locks[locking.LEVEL_NODE] = \
10282         _GetWantedNodes(self, self.op.nodes)
10283
10284   def Exec(self, feedback_fn):
10285     """Compute the list of all the exported system images.
10286
10287     @rtype: dict
10288     @return: a dictionary with the structure node->(export-list)
10289         where export-list is a list of the instances exported on
10290         that node.
10291
10292     """
10293     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
10294     rpcresult = self.rpc.call_export_list(self.nodes)
10295     result = {}
10296     for node in rpcresult:
10297       if rpcresult[node].fail_msg:
10298         result[node] = False
10299       else:
10300         result[node] = rpcresult[node].payload
10301
10302     return result
10303
10304
10305 class LUBackupPrepare(NoHooksLU):
10306   """Prepares an instance for an export and returns useful information.
10307
10308   """
10309   REQ_BGL = False
10310
10311   def ExpandNames(self):
10312     self._ExpandAndLockInstance()
10313
10314   def CheckPrereq(self):
10315     """Check prerequisites.
10316
10317     """
10318     instance_name = self.op.instance_name
10319
10320     self.instance = self.cfg.GetInstanceInfo(instance_name)
10321     assert self.instance is not None, \
10322           "Cannot retrieve locked instance %s" % self.op.instance_name
10323     _CheckNodeOnline(self, self.instance.primary_node)
10324
10325     self._cds = _GetClusterDomainSecret()
10326
10327   def Exec(self, feedback_fn):
10328     """Prepares an instance for an export.
10329
10330     """
10331     instance = self.instance
10332
10333     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10334       salt = utils.GenerateSecret(8)
10335
10336       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10337       result = self.rpc.call_x509_cert_create(instance.primary_node,
10338                                               constants.RIE_CERT_VALIDITY)
10339       result.Raise("Can't create X509 key and certificate on %s" % result.node)
10340
10341       (name, cert_pem) = result.payload
10342
10343       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10344                                              cert_pem)
10345
10346       return {
10347         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10348         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10349                           salt),
10350         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10351         }
10352
10353     return None
10354
10355
10356 class LUBackupExport(LogicalUnit):
10357   """Export an instance to an image in the cluster.
10358
10359   """
10360   HPATH = "instance-export"
10361   HTYPE = constants.HTYPE_INSTANCE
10362   REQ_BGL = False
10363
10364   def CheckArguments(self):
10365     """Check the arguments.
10366
10367     """
10368     self.x509_key_name = self.op.x509_key_name
10369     self.dest_x509_ca_pem = self.op.destination_x509_ca
10370
10371     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10372       if not self.x509_key_name:
10373         raise errors.OpPrereqError("Missing X509 key name for encryption",
10374                                    errors.ECODE_INVAL)
10375
10376       if not self.dest_x509_ca_pem:
10377         raise errors.OpPrereqError("Missing destination X509 CA",
10378                                    errors.ECODE_INVAL)
10379
10380   def ExpandNames(self):
10381     self._ExpandAndLockInstance()
10382
10383     # Lock all nodes for local exports
10384     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10385       # FIXME: lock only instance primary and destination node
10386       #
10387       # Sad but true, for now we have do lock all nodes, as we don't know where
10388       # the previous export might be, and in this LU we search for it and
10389       # remove it from its current node. In the future we could fix this by:
10390       #  - making a tasklet to search (share-lock all), then create the
10391       #    new one, then one to remove, after
10392       #  - removing the removal operation altogether
10393       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10394
10395   def DeclareLocks(self, level):
10396     """Last minute lock declaration."""
10397     # All nodes are locked anyway, so nothing to do here.
10398
10399   def BuildHooksEnv(self):
10400     """Build hooks env.
10401
10402     This will run on the master, primary node and target node.
10403
10404     """
10405     env = {
10406       "EXPORT_MODE": self.op.mode,
10407       "EXPORT_NODE": self.op.target_node,
10408       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10409       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10410       # TODO: Generic function for boolean env variables
10411       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10412       }
10413
10414     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10415
10416     return env
10417
10418   def BuildHooksNodes(self):
10419     """Build hooks nodes.
10420
10421     """
10422     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10423
10424     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10425       nl.append(self.op.target_node)
10426
10427     return (nl, nl)
10428
10429   def CheckPrereq(self):
10430     """Check prerequisites.
10431
10432     This checks that the instance and node names are valid.
10433
10434     """
10435     instance_name = self.op.instance_name
10436
10437     self.instance = self.cfg.GetInstanceInfo(instance_name)
10438     assert self.instance is not None, \
10439           "Cannot retrieve locked instance %s" % self.op.instance_name
10440     _CheckNodeOnline(self, self.instance.primary_node)
10441
10442     if (self.op.remove_instance and self.instance.admin_up and
10443         not self.op.shutdown):
10444       raise errors.OpPrereqError("Can not remove instance without shutting it"
10445                                  " down before")
10446
10447     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10448       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10449       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10450       assert self.dst_node is not None
10451
10452       _CheckNodeOnline(self, self.dst_node.name)
10453       _CheckNodeNotDrained(self, self.dst_node.name)
10454
10455       self._cds = None
10456       self.dest_disk_info = None
10457       self.dest_x509_ca = None
10458
10459     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10460       self.dst_node = None
10461
10462       if len(self.op.target_node) != len(self.instance.disks):
10463         raise errors.OpPrereqError(("Received destination information for %s"
10464                                     " disks, but instance %s has %s disks") %
10465                                    (len(self.op.target_node), instance_name,
10466                                     len(self.instance.disks)),
10467                                    errors.ECODE_INVAL)
10468
10469       cds = _GetClusterDomainSecret()
10470
10471       # Check X509 key name
10472       try:
10473         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10474       except (TypeError, ValueError), err:
10475         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10476
10477       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10478         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10479                                    errors.ECODE_INVAL)
10480
10481       # Load and verify CA
10482       try:
10483         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10484       except OpenSSL.crypto.Error, err:
10485         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10486                                    (err, ), errors.ECODE_INVAL)
10487
10488       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10489       if errcode is not None:
10490         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10491                                    (msg, ), errors.ECODE_INVAL)
10492
10493       self.dest_x509_ca = cert
10494
10495       # Verify target information
10496       disk_info = []
10497       for idx, disk_data in enumerate(self.op.target_node):
10498         try:
10499           (host, port, magic) = \
10500             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10501         except errors.GenericError, err:
10502           raise errors.OpPrereqError("Target info for disk %s: %s" %
10503                                      (idx, err), errors.ECODE_INVAL)
10504
10505         disk_info.append((host, port, magic))
10506
10507       assert len(disk_info) == len(self.op.target_node)
10508       self.dest_disk_info = disk_info
10509
10510     else:
10511       raise errors.ProgrammerError("Unhandled export mode %r" %
10512                                    self.op.mode)
10513
10514     # instance disk type verification
10515     # TODO: Implement export support for file-based disks
10516     for disk in self.instance.disks:
10517       if disk.dev_type == constants.LD_FILE:
10518         raise errors.OpPrereqError("Export not supported for instances with"
10519                                    " file-based disks", errors.ECODE_INVAL)
10520
10521   def _CleanupExports(self, feedback_fn):
10522     """Removes exports of current instance from all other nodes.
10523
10524     If an instance in a cluster with nodes A..D was exported to node C, its
10525     exports will be removed from the nodes A, B and D.
10526
10527     """
10528     assert self.op.mode != constants.EXPORT_MODE_REMOTE
10529
10530     nodelist = self.cfg.GetNodeList()
10531     nodelist.remove(self.dst_node.name)
10532
10533     # on one-node clusters nodelist will be empty after the removal
10534     # if we proceed the backup would be removed because OpBackupQuery
10535     # substitutes an empty list with the full cluster node list.
10536     iname = self.instance.name
10537     if nodelist:
10538       feedback_fn("Removing old exports for instance %s" % iname)
10539       exportlist = self.rpc.call_export_list(nodelist)
10540       for node in exportlist:
10541         if exportlist[node].fail_msg:
10542           continue
10543         if iname in exportlist[node].payload:
10544           msg = self.rpc.call_export_remove(node, iname).fail_msg
10545           if msg:
10546             self.LogWarning("Could not remove older export for instance %s"
10547                             " on node %s: %s", iname, node, msg)
10548
10549   def Exec(self, feedback_fn):
10550     """Export an instance to an image in the cluster.
10551
10552     """
10553     assert self.op.mode in constants.EXPORT_MODES
10554
10555     instance = self.instance
10556     src_node = instance.primary_node
10557
10558     if self.op.shutdown:
10559       # shutdown the instance, but not the disks
10560       feedback_fn("Shutting down instance %s" % instance.name)
10561       result = self.rpc.call_instance_shutdown(src_node, instance,
10562                                                self.op.shutdown_timeout)
10563       # TODO: Maybe ignore failures if ignore_remove_failures is set
10564       result.Raise("Could not shutdown instance %s on"
10565                    " node %s" % (instance.name, src_node))
10566
10567     # set the disks ID correctly since call_instance_start needs the
10568     # correct drbd minor to create the symlinks
10569     for disk in instance.disks:
10570       self.cfg.SetDiskID(disk, src_node)
10571
10572     activate_disks = (not instance.admin_up)
10573
10574     if activate_disks:
10575       # Activate the instance disks if we'exporting a stopped instance
10576       feedback_fn("Activating disks for %s" % instance.name)
10577       _StartInstanceDisks(self, instance, None)
10578
10579     try:
10580       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10581                                                      instance)
10582
10583       helper.CreateSnapshots()
10584       try:
10585         if (self.op.shutdown and instance.admin_up and
10586             not self.op.remove_instance):
10587           assert not activate_disks
10588           feedback_fn("Starting instance %s" % instance.name)
10589           result = self.rpc.call_instance_start(src_node, instance, None, None)
10590           msg = result.fail_msg
10591           if msg:
10592             feedback_fn("Failed to start instance: %s" % msg)
10593             _ShutdownInstanceDisks(self, instance)
10594             raise errors.OpExecError("Could not start instance: %s" % msg)
10595
10596         if self.op.mode == constants.EXPORT_MODE_LOCAL:
10597           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10598         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10599           connect_timeout = constants.RIE_CONNECT_TIMEOUT
10600           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10601
10602           (key_name, _, _) = self.x509_key_name
10603
10604           dest_ca_pem = \
10605             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10606                                             self.dest_x509_ca)
10607
10608           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10609                                                      key_name, dest_ca_pem,
10610                                                      timeouts)
10611       finally:
10612         helper.Cleanup()
10613
10614       # Check for backwards compatibility
10615       assert len(dresults) == len(instance.disks)
10616       assert compat.all(isinstance(i, bool) for i in dresults), \
10617              "Not all results are boolean: %r" % dresults
10618
10619     finally:
10620       if activate_disks:
10621         feedback_fn("Deactivating disks for %s" % instance.name)
10622         _ShutdownInstanceDisks(self, instance)
10623
10624     if not (compat.all(dresults) and fin_resu):
10625       failures = []
10626       if not fin_resu:
10627         failures.append("export finalization")
10628       if not compat.all(dresults):
10629         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10630                                if not dsk)
10631         failures.append("disk export: disk(s) %s" % fdsk)
10632
10633       raise errors.OpExecError("Export failed, errors in %s" %
10634                                utils.CommaJoin(failures))
10635
10636     # At this point, the export was successful, we can cleanup/finish
10637
10638     # Remove instance if requested
10639     if self.op.remove_instance:
10640       feedback_fn("Removing instance %s" % instance.name)
10641       _RemoveInstance(self, feedback_fn, instance,
10642                       self.op.ignore_remove_failures)
10643
10644     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10645       self._CleanupExports(feedback_fn)
10646
10647     return fin_resu, dresults
10648
10649
10650 class LUBackupRemove(NoHooksLU):
10651   """Remove exports related to the named instance.
10652
10653   """
10654   REQ_BGL = False
10655
10656   def ExpandNames(self):
10657     self.needed_locks = {}
10658     # We need all nodes to be locked in order for RemoveExport to work, but we
10659     # don't need to lock the instance itself, as nothing will happen to it (and
10660     # we can remove exports also for a removed instance)
10661     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10662
10663   def Exec(self, feedback_fn):
10664     """Remove any export.
10665
10666     """
10667     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10668     # If the instance was not found we'll try with the name that was passed in.
10669     # This will only work if it was an FQDN, though.
10670     fqdn_warn = False
10671     if not instance_name:
10672       fqdn_warn = True
10673       instance_name = self.op.instance_name
10674
10675     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10676     exportlist = self.rpc.call_export_list(locked_nodes)
10677     found = False
10678     for node in exportlist:
10679       msg = exportlist[node].fail_msg
10680       if msg:
10681         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10682         continue
10683       if instance_name in exportlist[node].payload:
10684         found = True
10685         result = self.rpc.call_export_remove(node, instance_name)
10686         msg = result.fail_msg
10687         if msg:
10688           logging.error("Could not remove export for instance %s"
10689                         " on node %s: %s", instance_name, node, msg)
10690
10691     if fqdn_warn and not found:
10692       feedback_fn("Export not found. If trying to remove an export belonging"
10693                   " to a deleted instance please use its Fully Qualified"
10694                   " Domain Name.")
10695
10696
10697 class LUGroupAdd(LogicalUnit):
10698   """Logical unit for creating node groups.
10699
10700   """
10701   HPATH = "group-add"
10702   HTYPE = constants.HTYPE_GROUP
10703   REQ_BGL = False
10704
10705   def ExpandNames(self):
10706     # We need the new group's UUID here so that we can create and acquire the
10707     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10708     # that it should not check whether the UUID exists in the configuration.
10709     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10710     self.needed_locks = {}
10711     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10712
10713   def CheckPrereq(self):
10714     """Check prerequisites.
10715
10716     This checks that the given group name is not an existing node group
10717     already.
10718
10719     """
10720     try:
10721       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10722     except errors.OpPrereqError:
10723       pass
10724     else:
10725       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10726                                  " node group (UUID: %s)" %
10727                                  (self.op.group_name, existing_uuid),
10728                                  errors.ECODE_EXISTS)
10729
10730     if self.op.ndparams:
10731       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10732
10733   def BuildHooksEnv(self):
10734     """Build hooks env.
10735
10736     """
10737     return {
10738       "GROUP_NAME": self.op.group_name,
10739       }
10740
10741   def BuildHooksNodes(self):
10742     """Build hooks nodes.
10743
10744     """
10745     mn = self.cfg.GetMasterNode()
10746     return ([mn], [mn])
10747
10748   def Exec(self, feedback_fn):
10749     """Add the node group to the cluster.
10750
10751     """
10752     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10753                                   uuid=self.group_uuid,
10754                                   alloc_policy=self.op.alloc_policy,
10755                                   ndparams=self.op.ndparams)
10756
10757     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10758     del self.remove_locks[locking.LEVEL_NODEGROUP]
10759
10760
10761 class LUGroupAssignNodes(NoHooksLU):
10762   """Logical unit for assigning nodes to groups.
10763
10764   """
10765   REQ_BGL = False
10766
10767   def ExpandNames(self):
10768     # These raise errors.OpPrereqError on their own:
10769     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10770     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10771
10772     # We want to lock all the affected nodes and groups. We have readily
10773     # available the list of nodes, and the *destination* group. To gather the
10774     # list of "source" groups, we need to fetch node information.
10775     self.node_data = self.cfg.GetAllNodesInfo()
10776     affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10777     affected_groups.add(self.group_uuid)
10778
10779     self.needed_locks = {
10780       locking.LEVEL_NODEGROUP: list(affected_groups),
10781       locking.LEVEL_NODE: self.op.nodes,
10782       }
10783
10784   def CheckPrereq(self):
10785     """Check prerequisites.
10786
10787     """
10788     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10789     instance_data = self.cfg.GetAllInstancesInfo()
10790
10791     if self.group is None:
10792       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10793                                (self.op.group_name, self.group_uuid))
10794
10795     (new_splits, previous_splits) = \
10796       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10797                                              for node in self.op.nodes],
10798                                             self.node_data, instance_data)
10799
10800     if new_splits:
10801       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10802
10803       if not self.op.force:
10804         raise errors.OpExecError("The following instances get split by this"
10805                                  " change and --force was not given: %s" %
10806                                  fmt_new_splits)
10807       else:
10808         self.LogWarning("This operation will split the following instances: %s",
10809                         fmt_new_splits)
10810
10811         if previous_splits:
10812           self.LogWarning("In addition, these already-split instances continue"
10813                           " to be split across groups: %s",
10814                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10815
10816   def Exec(self, feedback_fn):
10817     """Assign nodes to a new group.
10818
10819     """
10820     for node in self.op.nodes:
10821       self.node_data[node].group = self.group_uuid
10822
10823     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10824
10825   @staticmethod
10826   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10827     """Check for split instances after a node assignment.
10828
10829     This method considers a series of node assignments as an atomic operation,
10830     and returns information about split instances after applying the set of
10831     changes.
10832
10833     In particular, it returns information about newly split instances, and
10834     instances that were already split, and remain so after the change.
10835
10836     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10837     considered.
10838
10839     @type changes: list of (node_name, new_group_uuid) pairs.
10840     @param changes: list of node assignments to consider.
10841     @param node_data: a dict with data for all nodes
10842     @param instance_data: a dict with all instances to consider
10843     @rtype: a two-tuple
10844     @return: a list of instances that were previously okay and result split as a
10845       consequence of this change, and a list of instances that were previously
10846       split and this change does not fix.
10847
10848     """
10849     changed_nodes = dict((node, group) for node, group in changes
10850                          if node_data[node].group != group)
10851
10852     all_split_instances = set()
10853     previously_split_instances = set()
10854
10855     def InstanceNodes(instance):
10856       return [instance.primary_node] + list(instance.secondary_nodes)
10857
10858     for inst in instance_data.values():
10859       if inst.disk_template not in constants.DTS_INT_MIRROR:
10860         continue
10861
10862       instance_nodes = InstanceNodes(inst)
10863
10864       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10865         previously_split_instances.add(inst.name)
10866
10867       if len(set(changed_nodes.get(node, node_data[node].group)
10868                  for node in instance_nodes)) > 1:
10869         all_split_instances.add(inst.name)
10870
10871     return (list(all_split_instances - previously_split_instances),
10872             list(previously_split_instances & all_split_instances))
10873
10874
10875 class _GroupQuery(_QueryBase):
10876   FIELDS = query.GROUP_FIELDS
10877
10878   def ExpandNames(self, lu):
10879     lu.needed_locks = {}
10880
10881     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10882     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10883
10884     if not self.names:
10885       self.wanted = [name_to_uuid[name]
10886                      for name in utils.NiceSort(name_to_uuid.keys())]
10887     else:
10888       # Accept names to be either names or UUIDs.
10889       missing = []
10890       self.wanted = []
10891       all_uuid = frozenset(self._all_groups.keys())
10892
10893       for name in self.names:
10894         if name in all_uuid:
10895           self.wanted.append(name)
10896         elif name in name_to_uuid:
10897           self.wanted.append(name_to_uuid[name])
10898         else:
10899           missing.append(name)
10900
10901       if missing:
10902         raise errors.OpPrereqError("Some groups do not exist: %s" %
10903                                    utils.CommaJoin(missing),
10904                                    errors.ECODE_NOENT)
10905
10906   def DeclareLocks(self, lu, level):
10907     pass
10908
10909   def _GetQueryData(self, lu):
10910     """Computes the list of node groups and their attributes.
10911
10912     """
10913     do_nodes = query.GQ_NODE in self.requested_data
10914     do_instances = query.GQ_INST in self.requested_data
10915
10916     group_to_nodes = None
10917     group_to_instances = None
10918
10919     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10920     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10921     # latter GetAllInstancesInfo() is not enough, for we have to go through
10922     # instance->node. Hence, we will need to process nodes even if we only need
10923     # instance information.
10924     if do_nodes or do_instances:
10925       all_nodes = lu.cfg.GetAllNodesInfo()
10926       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10927       node_to_group = {}
10928
10929       for node in all_nodes.values():
10930         if node.group in group_to_nodes:
10931           group_to_nodes[node.group].append(node.name)
10932           node_to_group[node.name] = node.group
10933
10934       if do_instances:
10935         all_instances = lu.cfg.GetAllInstancesInfo()
10936         group_to_instances = dict((uuid, []) for uuid in self.wanted)
10937
10938         for instance in all_instances.values():
10939           node = instance.primary_node
10940           if node in node_to_group:
10941             group_to_instances[node_to_group[node]].append(instance.name)
10942
10943         if not do_nodes:
10944           # Do not pass on node information if it was not requested.
10945           group_to_nodes = None
10946
10947     return query.GroupQueryData([self._all_groups[uuid]
10948                                  for uuid in self.wanted],
10949                                 group_to_nodes, group_to_instances)
10950
10951
10952 class LUGroupQuery(NoHooksLU):
10953   """Logical unit for querying node groups.
10954
10955   """
10956   REQ_BGL = False
10957
10958   def CheckArguments(self):
10959     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10960                           self.op.output_fields, False)
10961
10962   def ExpandNames(self):
10963     self.gq.ExpandNames(self)
10964
10965   def Exec(self, feedback_fn):
10966     return self.gq.OldStyleQuery(self)
10967
10968
10969 class LUGroupSetParams(LogicalUnit):
10970   """Modifies the parameters of a node group.
10971
10972   """
10973   HPATH = "group-modify"
10974   HTYPE = constants.HTYPE_GROUP
10975   REQ_BGL = False
10976
10977   def CheckArguments(self):
10978     all_changes = [
10979       self.op.ndparams,
10980       self.op.alloc_policy,
10981       ]
10982
10983     if all_changes.count(None) == len(all_changes):
10984       raise errors.OpPrereqError("Please pass at least one modification",
10985                                  errors.ECODE_INVAL)
10986
10987   def ExpandNames(self):
10988     # This raises errors.OpPrereqError on its own:
10989     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10990
10991     self.needed_locks = {
10992       locking.LEVEL_NODEGROUP: [self.group_uuid],
10993       }
10994
10995   def CheckPrereq(self):
10996     """Check prerequisites.
10997
10998     """
10999     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11000
11001     if self.group is None:
11002       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11003                                (self.op.group_name, self.group_uuid))
11004
11005     if self.op.ndparams:
11006       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11007       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11008       self.new_ndparams = new_ndparams
11009
11010   def BuildHooksEnv(self):
11011     """Build hooks env.
11012
11013     """
11014     return {
11015       "GROUP_NAME": self.op.group_name,
11016       "NEW_ALLOC_POLICY": self.op.alloc_policy,
11017       }
11018
11019   def BuildHooksNodes(self):
11020     """Build hooks nodes.
11021
11022     """
11023     mn = self.cfg.GetMasterNode()
11024     return ([mn], [mn])
11025
11026   def Exec(self, feedback_fn):
11027     """Modifies the node group.
11028
11029     """
11030     result = []
11031
11032     if self.op.ndparams:
11033       self.group.ndparams = self.new_ndparams
11034       result.append(("ndparams", str(self.group.ndparams)))
11035
11036     if self.op.alloc_policy:
11037       self.group.alloc_policy = self.op.alloc_policy
11038
11039     self.cfg.Update(self.group, feedback_fn)
11040     return result
11041
11042
11043
11044 class LUGroupRemove(LogicalUnit):
11045   HPATH = "group-remove"
11046   HTYPE = constants.HTYPE_GROUP
11047   REQ_BGL = False
11048
11049   def ExpandNames(self):
11050     # This will raises errors.OpPrereqError on its own:
11051     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11052     self.needed_locks = {
11053       locking.LEVEL_NODEGROUP: [self.group_uuid],
11054       }
11055
11056   def CheckPrereq(self):
11057     """Check prerequisites.
11058
11059     This checks that the given group name exists as a node group, that is
11060     empty (i.e., contains no nodes), and that is not the last group of the
11061     cluster.
11062
11063     """
11064     # Verify that the group is empty.
11065     group_nodes = [node.name
11066                    for node in self.cfg.GetAllNodesInfo().values()
11067                    if node.group == self.group_uuid]
11068
11069     if group_nodes:
11070       raise errors.OpPrereqError("Group '%s' not empty, has the following"
11071                                  " nodes: %s" %
11072                                  (self.op.group_name,
11073                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
11074                                  errors.ECODE_STATE)
11075
11076     # Verify the cluster would not be left group-less.
11077     if len(self.cfg.GetNodeGroupList()) == 1:
11078       raise errors.OpPrereqError("Group '%s' is the only group,"
11079                                  " cannot be removed" %
11080                                  self.op.group_name,
11081                                  errors.ECODE_STATE)
11082
11083   def BuildHooksEnv(self):
11084     """Build hooks env.
11085
11086     """
11087     return {
11088       "GROUP_NAME": self.op.group_name,
11089       }
11090
11091   def BuildHooksNodes(self):
11092     """Build hooks nodes.
11093
11094     """
11095     mn = self.cfg.GetMasterNode()
11096     return ([mn], [mn])
11097
11098   def Exec(self, feedback_fn):
11099     """Remove the node group.
11100
11101     """
11102     try:
11103       self.cfg.RemoveNodeGroup(self.group_uuid)
11104     except errors.ConfigurationError:
11105       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11106                                (self.op.group_name, self.group_uuid))
11107
11108     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11109
11110
11111 class LUGroupRename(LogicalUnit):
11112   HPATH = "group-rename"
11113   HTYPE = constants.HTYPE_GROUP
11114   REQ_BGL = False
11115
11116   def ExpandNames(self):
11117     # This raises errors.OpPrereqError on its own:
11118     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11119
11120     self.needed_locks = {
11121       locking.LEVEL_NODEGROUP: [self.group_uuid],
11122       }
11123
11124   def CheckPrereq(self):
11125     """Check prerequisites.
11126
11127     Ensures requested new name is not yet used.
11128
11129     """
11130     try:
11131       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11132     except errors.OpPrereqError:
11133       pass
11134     else:
11135       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11136                                  " node group (UUID: %s)" %
11137                                  (self.op.new_name, new_name_uuid),
11138                                  errors.ECODE_EXISTS)
11139
11140   def BuildHooksEnv(self):
11141     """Build hooks env.
11142
11143     """
11144     return {
11145       "OLD_NAME": self.op.group_name,
11146       "NEW_NAME": self.op.new_name,
11147       }
11148
11149   def BuildHooksNodes(self):
11150     """Build hooks nodes.
11151
11152     """
11153     mn = self.cfg.GetMasterNode()
11154
11155     all_nodes = self.cfg.GetAllNodesInfo()
11156     all_nodes.pop(mn, None)
11157
11158     run_nodes = [mn]
11159     run_nodes.extend(node.name for node in all_nodes.values()
11160                      if node.group == self.group_uuid)
11161
11162     return (run_nodes, run_nodes)
11163
11164   def Exec(self, feedback_fn):
11165     """Rename the node group.
11166
11167     """
11168     group = self.cfg.GetNodeGroup(self.group_uuid)
11169
11170     if group is None:
11171       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11172                                (self.op.group_name, self.group_uuid))
11173
11174     group.name = self.op.new_name
11175     self.cfg.Update(group, feedback_fn)
11176
11177     return self.op.new_name
11178
11179
11180 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11181   """Generic tags LU.
11182
11183   This is an abstract class which is the parent of all the other tags LUs.
11184
11185   """
11186   def ExpandNames(self):
11187     self.group_uuid = None
11188     self.needed_locks = {}
11189     if self.op.kind == constants.TAG_NODE:
11190       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11191       self.needed_locks[locking.LEVEL_NODE] = self.op.name
11192     elif self.op.kind == constants.TAG_INSTANCE:
11193       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11194       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11195     elif self.op.kind == constants.TAG_NODEGROUP:
11196       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11197
11198     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11199     # not possible to acquire the BGL based on opcode parameters)
11200
11201   def CheckPrereq(self):
11202     """Check prerequisites.
11203
11204     """
11205     if self.op.kind == constants.TAG_CLUSTER:
11206       self.target = self.cfg.GetClusterInfo()
11207     elif self.op.kind == constants.TAG_NODE:
11208       self.target = self.cfg.GetNodeInfo(self.op.name)
11209     elif self.op.kind == constants.TAG_INSTANCE:
11210       self.target = self.cfg.GetInstanceInfo(self.op.name)
11211     elif self.op.kind == constants.TAG_NODEGROUP:
11212       self.target = self.cfg.GetNodeGroup(self.group_uuid)
11213     else:
11214       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11215                                  str(self.op.kind), errors.ECODE_INVAL)
11216
11217
11218 class LUTagsGet(TagsLU):
11219   """Returns the tags of a given object.
11220
11221   """
11222   REQ_BGL = False
11223
11224   def ExpandNames(self):
11225     TagsLU.ExpandNames(self)
11226
11227     # Share locks as this is only a read operation
11228     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11229
11230   def Exec(self, feedback_fn):
11231     """Returns the tag list.
11232
11233     """
11234     return list(self.target.GetTags())
11235
11236
11237 class LUTagsSearch(NoHooksLU):
11238   """Searches the tags for a given pattern.
11239
11240   """
11241   REQ_BGL = False
11242
11243   def ExpandNames(self):
11244     self.needed_locks = {}
11245
11246   def CheckPrereq(self):
11247     """Check prerequisites.
11248
11249     This checks the pattern passed for validity by compiling it.
11250
11251     """
11252     try:
11253       self.re = re.compile(self.op.pattern)
11254     except re.error, err:
11255       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11256                                  (self.op.pattern, err), errors.ECODE_INVAL)
11257
11258   def Exec(self, feedback_fn):
11259     """Returns the tag list.
11260
11261     """
11262     cfg = self.cfg
11263     tgts = [("/cluster", cfg.GetClusterInfo())]
11264     ilist = cfg.GetAllInstancesInfo().values()
11265     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11266     nlist = cfg.GetAllNodesInfo().values()
11267     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11268     tgts.extend(("/nodegroup/%s" % n.name, n)
11269                 for n in cfg.GetAllNodeGroupsInfo().values())
11270     results = []
11271     for path, target in tgts:
11272       for tag in target.GetTags():
11273         if self.re.search(tag):
11274           results.append((path, tag))
11275     return results
11276
11277
11278 class LUTagsSet(TagsLU):
11279   """Sets a tag on a given object.
11280
11281   """
11282   REQ_BGL = False
11283
11284   def CheckPrereq(self):
11285     """Check prerequisites.
11286
11287     This checks the type and length of the tag name and value.
11288
11289     """
11290     TagsLU.CheckPrereq(self)
11291     for tag in self.op.tags:
11292       objects.TaggableObject.ValidateTag(tag)
11293
11294   def Exec(self, feedback_fn):
11295     """Sets the tag.
11296
11297     """
11298     try:
11299       for tag in self.op.tags:
11300         self.target.AddTag(tag)
11301     except errors.TagError, err:
11302       raise errors.OpExecError("Error while setting tag: %s" % str(err))
11303     self.cfg.Update(self.target, feedback_fn)
11304
11305
11306 class LUTagsDel(TagsLU):
11307   """Delete a list of tags from a given object.
11308
11309   """
11310   REQ_BGL = False
11311
11312   def CheckPrereq(self):
11313     """Check prerequisites.
11314
11315     This checks that we have the given tag.
11316
11317     """
11318     TagsLU.CheckPrereq(self)
11319     for tag in self.op.tags:
11320       objects.TaggableObject.ValidateTag(tag)
11321     del_tags = frozenset(self.op.tags)
11322     cur_tags = self.target.GetTags()
11323
11324     diff_tags = del_tags - cur_tags
11325     if diff_tags:
11326       diff_names = ("'%s'" % i for i in sorted(diff_tags))
11327       raise errors.OpPrereqError("Tag(s) %s not found" %
11328                                  (utils.CommaJoin(diff_names), ),
11329                                  errors.ECODE_NOENT)
11330
11331   def Exec(self, feedback_fn):
11332     """Remove the tag from the object.
11333
11334     """
11335     for tag in self.op.tags:
11336       self.target.RemoveTag(tag)
11337     self.cfg.Update(self.target, feedback_fn)
11338
11339
11340 class LUTestDelay(NoHooksLU):
11341   """Sleep for a specified amount of time.
11342
11343   This LU sleeps on the master and/or nodes for a specified amount of
11344   time.
11345
11346   """
11347   REQ_BGL = False
11348
11349   def ExpandNames(self):
11350     """Expand names and set required locks.
11351
11352     This expands the node list, if any.
11353
11354     """
11355     self.needed_locks = {}
11356     if self.op.on_nodes:
11357       # _GetWantedNodes can be used here, but is not always appropriate to use
11358       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11359       # more information.
11360       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11361       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11362
11363   def _TestDelay(self):
11364     """Do the actual sleep.
11365
11366     """
11367     if self.op.on_master:
11368       if not utils.TestDelay(self.op.duration):
11369         raise errors.OpExecError("Error during master delay test")
11370     if self.op.on_nodes:
11371       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11372       for node, node_result in result.items():
11373         node_result.Raise("Failure during rpc call to node %s" % node)
11374
11375   def Exec(self, feedback_fn):
11376     """Execute the test delay opcode, with the wanted repetitions.
11377
11378     """
11379     if self.op.repeat == 0:
11380       self._TestDelay()
11381     else:
11382       top_value = self.op.repeat - 1
11383       for i in range(self.op.repeat):
11384         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11385         self._TestDelay()
11386
11387
11388 class LUTestJqueue(NoHooksLU):
11389   """Utility LU to test some aspects of the job queue.
11390
11391   """
11392   REQ_BGL = False
11393
11394   # Must be lower than default timeout for WaitForJobChange to see whether it
11395   # notices changed jobs
11396   _CLIENT_CONNECT_TIMEOUT = 20.0
11397   _CLIENT_CONFIRM_TIMEOUT = 60.0
11398
11399   @classmethod
11400   def _NotifyUsingSocket(cls, cb, errcls):
11401     """Opens a Unix socket and waits for another program to connect.
11402
11403     @type cb: callable
11404     @param cb: Callback to send socket name to client
11405     @type errcls: class
11406     @param errcls: Exception class to use for errors
11407
11408     """
11409     # Using a temporary directory as there's no easy way to create temporary
11410     # sockets without writing a custom loop around tempfile.mktemp and
11411     # socket.bind
11412     tmpdir = tempfile.mkdtemp()
11413     try:
11414       tmpsock = utils.PathJoin(tmpdir, "sock")
11415
11416       logging.debug("Creating temporary socket at %s", tmpsock)
11417       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11418       try:
11419         sock.bind(tmpsock)
11420         sock.listen(1)
11421
11422         # Send details to client
11423         cb(tmpsock)
11424
11425         # Wait for client to connect before continuing
11426         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11427         try:
11428           (conn, _) = sock.accept()
11429         except socket.error, err:
11430           raise errcls("Client didn't connect in time (%s)" % err)
11431       finally:
11432         sock.close()
11433     finally:
11434       # Remove as soon as client is connected
11435       shutil.rmtree(tmpdir)
11436
11437     # Wait for client to close
11438     try:
11439       try:
11440         # pylint: disable-msg=E1101
11441         # Instance of '_socketobject' has no ... member
11442         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11443         conn.recv(1)
11444       except socket.error, err:
11445         raise errcls("Client failed to confirm notification (%s)" % err)
11446     finally:
11447       conn.close()
11448
11449   def _SendNotification(self, test, arg, sockname):
11450     """Sends a notification to the client.
11451
11452     @type test: string
11453     @param test: Test name
11454     @param arg: Test argument (depends on test)
11455     @type sockname: string
11456     @param sockname: Socket path
11457
11458     """
11459     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11460
11461   def _Notify(self, prereq, test, arg):
11462     """Notifies the client of a test.
11463
11464     @type prereq: bool
11465     @param prereq: Whether this is a prereq-phase test
11466     @type test: string
11467     @param test: Test name
11468     @param arg: Test argument (depends on test)
11469
11470     """
11471     if prereq:
11472       errcls = errors.OpPrereqError
11473     else:
11474       errcls = errors.OpExecError
11475
11476     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11477                                                   test, arg),
11478                                    errcls)
11479
11480   def CheckArguments(self):
11481     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11482     self.expandnames_calls = 0
11483
11484   def ExpandNames(self):
11485     checkargs_calls = getattr(self, "checkargs_calls", 0)
11486     if checkargs_calls < 1:
11487       raise errors.ProgrammerError("CheckArguments was not called")
11488
11489     self.expandnames_calls += 1
11490
11491     if self.op.notify_waitlock:
11492       self._Notify(True, constants.JQT_EXPANDNAMES, None)
11493
11494     self.LogInfo("Expanding names")
11495
11496     # Get lock on master node (just to get a lock, not for a particular reason)
11497     self.needed_locks = {
11498       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11499       }
11500
11501   def Exec(self, feedback_fn):
11502     if self.expandnames_calls < 1:
11503       raise errors.ProgrammerError("ExpandNames was not called")
11504
11505     if self.op.notify_exec:
11506       self._Notify(False, constants.JQT_EXEC, None)
11507
11508     self.LogInfo("Executing")
11509
11510     if self.op.log_messages:
11511       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11512       for idx, msg in enumerate(self.op.log_messages):
11513         self.LogInfo("Sending log message %s", idx + 1)
11514         feedback_fn(constants.JQT_MSGPREFIX + msg)
11515         # Report how many test messages have been sent
11516         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11517
11518     if self.op.fail:
11519       raise errors.OpExecError("Opcode failure was requested")
11520
11521     return True
11522
11523
11524 class IAllocator(object):
11525   """IAllocator framework.
11526
11527   An IAllocator instance has three sets of attributes:
11528     - cfg that is needed to query the cluster
11529     - input data (all members of the _KEYS class attribute are required)
11530     - four buffer attributes (in|out_data|text), that represent the
11531       input (to the external script) in text and data structure format,
11532       and the output from it, again in two formats
11533     - the result variables from the script (success, info, nodes) for
11534       easy usage
11535
11536   """
11537   # pylint: disable-msg=R0902
11538   # lots of instance attributes
11539   _ALLO_KEYS = [
11540     "name", "mem_size", "disks", "disk_template",
11541     "os", "tags", "nics", "vcpus", "hypervisor",
11542     ]
11543   _RELO_KEYS = [
11544     "name", "relocate_from",
11545     ]
11546   _EVAC_KEYS = [
11547     "evac_nodes",
11548     ]
11549
11550   def __init__(self, cfg, rpc, mode, **kwargs):
11551     self.cfg = cfg
11552     self.rpc = rpc
11553     # init buffer variables
11554     self.in_text = self.out_text = self.in_data = self.out_data = None
11555     # init all input fields so that pylint is happy
11556     self.mode = mode
11557     self.mem_size = self.disks = self.disk_template = None
11558     self.os = self.tags = self.nics = self.vcpus = None
11559     self.hypervisor = None
11560     self.relocate_from = None
11561     self.name = None
11562     self.evac_nodes = None
11563     # computed fields
11564     self.required_nodes = None
11565     # init result fields
11566     self.success = self.info = self.result = None
11567     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11568       keyset = self._ALLO_KEYS
11569       fn = self._AddNewInstance
11570     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11571       keyset = self._RELO_KEYS
11572       fn = self._AddRelocateInstance
11573     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11574       keyset = self._EVAC_KEYS
11575       fn = self._AddEvacuateNodes
11576     else:
11577       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11578                                    " IAllocator" % self.mode)
11579     for key in kwargs:
11580       if key not in keyset:
11581         raise errors.ProgrammerError("Invalid input parameter '%s' to"
11582                                      " IAllocator" % key)
11583       setattr(self, key, kwargs[key])
11584
11585     for key in keyset:
11586       if key not in kwargs:
11587         raise errors.ProgrammerError("Missing input parameter '%s' to"
11588                                      " IAllocator" % key)
11589     self._BuildInputData(fn)
11590
11591   def _ComputeClusterData(self):
11592     """Compute the generic allocator input data.
11593
11594     This is the data that is independent of the actual operation.
11595
11596     """
11597     cfg = self.cfg
11598     cluster_info = cfg.GetClusterInfo()
11599     # cluster data
11600     data = {
11601       "version": constants.IALLOCATOR_VERSION,
11602       "cluster_name": cfg.GetClusterName(),
11603       "cluster_tags": list(cluster_info.GetTags()),
11604       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11605       # we don't have job IDs
11606       }
11607     ninfo = cfg.GetAllNodesInfo()
11608     iinfo = cfg.GetAllInstancesInfo().values()
11609     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11610
11611     # node data
11612     node_list = [n.name for n in ninfo.values() if n.vm_capable]
11613
11614     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11615       hypervisor_name = self.hypervisor
11616     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11617       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11618     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11619       hypervisor_name = cluster_info.enabled_hypervisors[0]
11620
11621     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11622                                         hypervisor_name)
11623     node_iinfo = \
11624       self.rpc.call_all_instances_info(node_list,
11625                                        cluster_info.enabled_hypervisors)
11626
11627     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11628
11629     config_ndata = self._ComputeBasicNodeData(ninfo)
11630     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11631                                                  i_list, config_ndata)
11632     assert len(data["nodes"]) == len(ninfo), \
11633         "Incomplete node data computed"
11634
11635     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11636
11637     self.in_data = data
11638
11639   @staticmethod
11640   def _ComputeNodeGroupData(cfg):
11641     """Compute node groups data.
11642
11643     """
11644     ng = {}
11645     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11646       ng[guuid] = {
11647         "name": gdata.name,
11648         "alloc_policy": gdata.alloc_policy,
11649         }
11650     return ng
11651
11652   @staticmethod
11653   def _ComputeBasicNodeData(node_cfg):
11654     """Compute global node data.
11655
11656     @rtype: dict
11657     @returns: a dict of name: (node dict, node config)
11658
11659     """
11660     node_results = {}
11661     for ninfo in node_cfg.values():
11662       # fill in static (config-based) values
11663       pnr = {
11664         "tags": list(ninfo.GetTags()),
11665         "primary_ip": ninfo.primary_ip,
11666         "secondary_ip": ninfo.secondary_ip,
11667         "offline": ninfo.offline,
11668         "drained": ninfo.drained,
11669         "master_candidate": ninfo.master_candidate,
11670         "group": ninfo.group,
11671         "master_capable": ninfo.master_capable,
11672         "vm_capable": ninfo.vm_capable,
11673         }
11674
11675       node_results[ninfo.name] = pnr
11676
11677     return node_results
11678
11679   @staticmethod
11680   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11681                               node_results):
11682     """Compute global node data.
11683
11684     @param node_results: the basic node structures as filled from the config
11685
11686     """
11687     # make a copy of the current dict
11688     node_results = dict(node_results)
11689     for nname, nresult in node_data.items():
11690       assert nname in node_results, "Missing basic data for node %s" % nname
11691       ninfo = node_cfg[nname]
11692
11693       if not (ninfo.offline or ninfo.drained):
11694         nresult.Raise("Can't get data for node %s" % nname)
11695         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11696                                 nname)
11697         remote_info = nresult.payload
11698
11699         for attr in ['memory_total', 'memory_free', 'memory_dom0',
11700                      'vg_size', 'vg_free', 'cpu_total']:
11701           if attr not in remote_info:
11702             raise errors.OpExecError("Node '%s' didn't return attribute"
11703                                      " '%s'" % (nname, attr))
11704           if not isinstance(remote_info[attr], int):
11705             raise errors.OpExecError("Node '%s' returned invalid value"
11706                                      " for '%s': %s" %
11707                                      (nname, attr, remote_info[attr]))
11708         # compute memory used by primary instances
11709         i_p_mem = i_p_up_mem = 0
11710         for iinfo, beinfo in i_list:
11711           if iinfo.primary_node == nname:
11712             i_p_mem += beinfo[constants.BE_MEMORY]
11713             if iinfo.name not in node_iinfo[nname].payload:
11714               i_used_mem = 0
11715             else:
11716               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11717             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11718             remote_info['memory_free'] -= max(0, i_mem_diff)
11719
11720             if iinfo.admin_up:
11721               i_p_up_mem += beinfo[constants.BE_MEMORY]
11722
11723         # compute memory used by instances
11724         pnr_dyn = {
11725           "total_memory": remote_info['memory_total'],
11726           "reserved_memory": remote_info['memory_dom0'],
11727           "free_memory": remote_info['memory_free'],
11728           "total_disk": remote_info['vg_size'],
11729           "free_disk": remote_info['vg_free'],
11730           "total_cpus": remote_info['cpu_total'],
11731           "i_pri_memory": i_p_mem,
11732           "i_pri_up_memory": i_p_up_mem,
11733           }
11734         pnr_dyn.update(node_results[nname])
11735         node_results[nname] = pnr_dyn
11736
11737     return node_results
11738
11739   @staticmethod
11740   def _ComputeInstanceData(cluster_info, i_list):
11741     """Compute global instance data.
11742
11743     """
11744     instance_data = {}
11745     for iinfo, beinfo in i_list:
11746       nic_data = []
11747       for nic in iinfo.nics:
11748         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11749         nic_dict = {"mac": nic.mac,
11750                     "ip": nic.ip,
11751                     "mode": filled_params[constants.NIC_MODE],
11752                     "link": filled_params[constants.NIC_LINK],
11753                    }
11754         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11755           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11756         nic_data.append(nic_dict)
11757       pir = {
11758         "tags": list(iinfo.GetTags()),
11759         "admin_up": iinfo.admin_up,
11760         "vcpus": beinfo[constants.BE_VCPUS],
11761         "memory": beinfo[constants.BE_MEMORY],
11762         "os": iinfo.os,
11763         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11764         "nics": nic_data,
11765         "disks": [{constants.IDISK_SIZE: dsk.size,
11766                    constants.IDISK_MODE: dsk.mode}
11767                   for dsk in iinfo.disks],
11768         "disk_template": iinfo.disk_template,
11769         "hypervisor": iinfo.hypervisor,
11770         }
11771       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11772                                                  pir["disks"])
11773       instance_data[iinfo.name] = pir
11774
11775     return instance_data
11776
11777   def _AddNewInstance(self):
11778     """Add new instance data to allocator structure.
11779
11780     This in combination with _AllocatorGetClusterData will create the
11781     correct structure needed as input for the allocator.
11782
11783     The checks for the completeness of the opcode must have already been
11784     done.
11785
11786     """
11787     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11788
11789     if self.disk_template in constants.DTS_INT_MIRROR:
11790       self.required_nodes = 2
11791     else:
11792       self.required_nodes = 1
11793     request = {
11794       "name": self.name,
11795       "disk_template": self.disk_template,
11796       "tags": self.tags,
11797       "os": self.os,
11798       "vcpus": self.vcpus,
11799       "memory": self.mem_size,
11800       "disks": self.disks,
11801       "disk_space_total": disk_space,
11802       "nics": self.nics,
11803       "required_nodes": self.required_nodes,
11804       }
11805     return request
11806
11807   def _AddRelocateInstance(self):
11808     """Add relocate instance data to allocator structure.
11809
11810     This in combination with _IAllocatorGetClusterData will create the
11811     correct structure needed as input for the allocator.
11812
11813     The checks for the completeness of the opcode must have already been
11814     done.
11815
11816     """
11817     instance = self.cfg.GetInstanceInfo(self.name)
11818     if instance is None:
11819       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11820                                    " IAllocator" % self.name)
11821
11822     if instance.disk_template not in constants.DTS_MIRRORED:
11823       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11824                                  errors.ECODE_INVAL)
11825
11826     if instance.disk_template in constants.DTS_INT_MIRROR and \
11827         len(instance.secondary_nodes) != 1:
11828       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11829                                  errors.ECODE_STATE)
11830
11831     self.required_nodes = 1
11832     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
11833     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11834
11835     request = {
11836       "name": self.name,
11837       "disk_space_total": disk_space,
11838       "required_nodes": self.required_nodes,
11839       "relocate_from": self.relocate_from,
11840       }
11841     return request
11842
11843   def _AddEvacuateNodes(self):
11844     """Add evacuate nodes data to allocator structure.
11845
11846     """
11847     request = {
11848       "evac_nodes": self.evac_nodes
11849       }
11850     return request
11851
11852   def _BuildInputData(self, fn):
11853     """Build input data structures.
11854
11855     """
11856     self._ComputeClusterData()
11857
11858     request = fn()
11859     request["type"] = self.mode
11860     self.in_data["request"] = request
11861
11862     self.in_text = serializer.Dump(self.in_data)
11863
11864   def Run(self, name, validate=True, call_fn=None):
11865     """Run an instance allocator and return the results.
11866
11867     """
11868     if call_fn is None:
11869       call_fn = self.rpc.call_iallocator_runner
11870
11871     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11872     result.Raise("Failure while running the iallocator script")
11873
11874     self.out_text = result.payload
11875     if validate:
11876       self._ValidateResult()
11877
11878   def _ValidateResult(self):
11879     """Process the allocator results.
11880
11881     This will process and if successful save the result in
11882     self.out_data and the other parameters.
11883
11884     """
11885     try:
11886       rdict = serializer.Load(self.out_text)
11887     except Exception, err:
11888       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11889
11890     if not isinstance(rdict, dict):
11891       raise errors.OpExecError("Can't parse iallocator results: not a dict")
11892
11893     # TODO: remove backwards compatiblity in later versions
11894     if "nodes" in rdict and "result" not in rdict:
11895       rdict["result"] = rdict["nodes"]
11896       del rdict["nodes"]
11897
11898     for key in "success", "info", "result":
11899       if key not in rdict:
11900         raise errors.OpExecError("Can't parse iallocator results:"
11901                                  " missing key '%s'" % key)
11902       setattr(self, key, rdict[key])
11903
11904     if not isinstance(rdict["result"], list):
11905       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11906                                " is not a list")
11907
11908     if self.mode == constants.IALLOCATOR_MODE_RELOC:
11909       assert self.relocate_from is not None
11910       assert self.required_nodes == 1
11911
11912       node2group = dict((name, ndata["group"])
11913                         for (name, ndata) in self.in_data["nodes"].items())
11914
11915       fn = compat.partial(self._NodesToGroups, node2group,
11916                           self.in_data["nodegroups"])
11917
11918       request_groups = fn(self.relocate_from)
11919       result_groups = fn(rdict["result"])
11920
11921       if result_groups != request_groups:
11922         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
11923                                  " differ from original groups (%s)" %
11924                                  (utils.CommaJoin(result_groups),
11925                                   utils.CommaJoin(request_groups)))
11926
11927     self.out_data = rdict
11928
11929   @staticmethod
11930   def _NodesToGroups(node2group, groups, nodes):
11931     """Returns a list of unique group names for a list of nodes.
11932
11933     @type node2group: dict
11934     @param node2group: Map from node name to group UUID
11935     @type groups: dict
11936     @param groups: Group information
11937     @type nodes: list
11938     @param nodes: Node names
11939
11940     """
11941     result = set()
11942
11943     for node in nodes:
11944       try:
11945         group_uuid = node2group[node]
11946       except KeyError:
11947         # Ignore unknown node
11948         pass
11949       else:
11950         try:
11951           group = groups[group_uuid]
11952         except KeyError:
11953           # Can't find group, let's use UUID
11954           group_name = group_uuid
11955         else:
11956           group_name = group["name"]
11957
11958         result.add(group_name)
11959
11960     return sorted(result)
11961
11962
11963 class LUTestAllocator(NoHooksLU):
11964   """Run allocator tests.
11965
11966   This LU runs the allocator tests
11967
11968   """
11969   def CheckPrereq(self):
11970     """Check prerequisites.
11971
11972     This checks the opcode parameters depending on the director and mode test.
11973
11974     """
11975     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11976       for attr in ["mem_size", "disks", "disk_template",
11977                    "os", "tags", "nics", "vcpus"]:
11978         if not hasattr(self.op, attr):
11979           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11980                                      attr, errors.ECODE_INVAL)
11981       iname = self.cfg.ExpandInstanceName(self.op.name)
11982       if iname is not None:
11983         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11984                                    iname, errors.ECODE_EXISTS)
11985       if not isinstance(self.op.nics, list):
11986         raise errors.OpPrereqError("Invalid parameter 'nics'",
11987                                    errors.ECODE_INVAL)
11988       if not isinstance(self.op.disks, list):
11989         raise errors.OpPrereqError("Invalid parameter 'disks'",
11990                                    errors.ECODE_INVAL)
11991       for row in self.op.disks:
11992         if (not isinstance(row, dict) or
11993             "size" not in row or
11994             not isinstance(row["size"], int) or
11995             "mode" not in row or
11996             row["mode"] not in ['r', 'w']):
11997           raise errors.OpPrereqError("Invalid contents of the 'disks'"
11998                                      " parameter", errors.ECODE_INVAL)
11999       if self.op.hypervisor is None:
12000         self.op.hypervisor = self.cfg.GetHypervisorType()
12001     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12002       fname = _ExpandInstanceName(self.cfg, self.op.name)
12003       self.op.name = fname
12004       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12005     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12006       if not hasattr(self.op, "evac_nodes"):
12007         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12008                                    " opcode input", errors.ECODE_INVAL)
12009     else:
12010       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12011                                  self.op.mode, errors.ECODE_INVAL)
12012
12013     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12014       if self.op.allocator is None:
12015         raise errors.OpPrereqError("Missing allocator name",
12016                                    errors.ECODE_INVAL)
12017     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12018       raise errors.OpPrereqError("Wrong allocator test '%s'" %
12019                                  self.op.direction, errors.ECODE_INVAL)
12020
12021   def Exec(self, feedback_fn):
12022     """Run the allocator test.
12023
12024     """
12025     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12026       ial = IAllocator(self.cfg, self.rpc,
12027                        mode=self.op.mode,
12028                        name=self.op.name,
12029                        mem_size=self.op.mem_size,
12030                        disks=self.op.disks,
12031                        disk_template=self.op.disk_template,
12032                        os=self.op.os,
12033                        tags=self.op.tags,
12034                        nics=self.op.nics,
12035                        vcpus=self.op.vcpus,
12036                        hypervisor=self.op.hypervisor,
12037                        )
12038     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12039       ial = IAllocator(self.cfg, self.rpc,
12040                        mode=self.op.mode,
12041                        name=self.op.name,
12042                        relocate_from=list(self.relocate_from),
12043                        )
12044     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12045       ial = IAllocator(self.cfg, self.rpc,
12046                        mode=self.op.mode,
12047                        evac_nodes=self.op.evac_nodes)
12048     else:
12049       raise errors.ProgrammerError("Uncatched mode %s in"
12050                                    " LUTestAllocator.Exec", self.op.mode)
12051
12052     if self.op.direction == constants.IALLOCATOR_DIR_IN:
12053       result = ial.in_text
12054     else:
12055       ial.Run(self.op.allocator, validate=False)
12056       result = ial.out_text
12057     return result
12058
12059
12060 #: Query type implementations
12061 _QUERY_IMPL = {
12062   constants.QR_INSTANCE: _InstanceQuery,
12063   constants.QR_NODE: _NodeQuery,
12064   constants.QR_GROUP: _GroupQuery,
12065   constants.QR_OS: _OsQuery,
12066   }
12067
12068 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12069
12070
12071 def _GetQueryImplementation(name):
12072   """Returns the implemtnation for a query type.
12073
12074   @param name: Query type, must be one of L{constants.QR_VIA_OP}
12075
12076   """
12077   try:
12078     return _QUERY_IMPL[name]
12079   except KeyError:
12080     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12081                                errors.ECODE_INVAL)