code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 class ResultWithJobs:
  78   """Data container for LU results with jobs.
  79
  80   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  81   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  82   contained in the C{jobs} attribute and include the job IDs in the opcode
  83   result.
  84
  85   """
  86   def __init__(self, jobs, **kwargs):
  87     """Initializes this class.
  88
  89     Additional return values can be specified as keyword arguments.
  90
  91     @type jobs: list of lists of L{opcode.OpCode}
  92     @param jobs: A list of lists of opcode objects
  93
  94     """
  95     self.jobs = jobs
  96     self.other = kwargs
  97
  98
  99 class LogicalUnit(object):
 100   """Logical Unit base class.
 101
 102   Subclasses must follow these rules:
 103     - implement ExpandNames
 104     - implement CheckPrereq (except when tasklets are used)
 105     - implement Exec (except when tasklets are used)
 106     - implement BuildHooksEnv
 107     - implement BuildHooksNodes
 108     - redefine HPATH and HTYPE
 109     - optionally redefine their run requirements:
 110         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 111
 112   Note that all commands require root permissions.
 113
 114   @ivar dry_run_result: the value (if any) that will be returned to the caller
 115       in dry-run mode (signalled by opcode dry_run parameter)
 116
 117   """
 118   HPATH = None
 119   HTYPE = None
 120   REQ_BGL = True
 121
 122   def __init__(self, processor, op, context, rpc):
 123     """Constructor for LogicalUnit.
 124
 125     This needs to be overridden in derived classes in order to check op
 126     validity.
 127
 128     """
 129     self.proc = processor
 130     self.op = op
 131     self.cfg = context.cfg
 132     self.context = context
 133     self.rpc = rpc
 134     # Dicts used to declare locking needs to mcpu
 135     self.needed_locks = None
 136     self.acquired_locks = {}
 137     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 138     self.add_locks = {}
 139     self.remove_locks = {}
 140     # Used to force good behavior when calling helper functions
 141     self.recalculate_locks = {}
 142     self.__ssh = None
 143     # logging
 144     self.Log = processor.Log # pylint: disable-msg=C0103
 145     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 146     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 147     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 148     # support for dry-run
 149     self.dry_run_result = None
 150     # support for generic debug attribute
 151     if (not hasattr(self.op, "debug_level") or
 152         not isinstance(self.op.debug_level, int)):
 153       self.op.debug_level = 0
 154
 155     # Tasklets
 156     self.tasklets = None
 157
 158     # Validate opcode parameters and set defaults
 159     self.op.Validate(True)
 160
 161     self.CheckArguments()
 162
 163   def __GetSSH(self):
 164     """Returns the SshRunner object
 165
 166     """
 167     if not self.__ssh:
 168       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 169     return self.__ssh
 170
 171   ssh = property(fget=__GetSSH)
 172
 173   def CheckArguments(self):
 174     """Check syntactic validity for the opcode arguments.
 175
 176     This method is for doing a simple syntactic check and ensure
 177     validity of opcode parameters, without any cluster-related
 178     checks. While the same can be accomplished in ExpandNames and/or
 179     CheckPrereq, doing these separate is better because:
 180
 181       - ExpandNames is left as as purely a lock-related function
 182       - CheckPrereq is run after we have acquired locks (and possible
 183         waited for them)
 184
 185     The function is allowed to change the self.op attribute so that
 186     later methods can no longer worry about missing parameters.
 187
 188     """
 189     pass
 190
 191   def ExpandNames(self):
 192     """Expand names for this LU.
 193
 194     This method is called before starting to execute the opcode, and it should
 195     update all the parameters of the opcode to their canonical form (e.g. a
 196     short node name must be fully expanded after this method has successfully
 197     completed). This way locking, hooks, logging, etc. can work correctly.
 198
 199     LUs which implement this method must also populate the self.needed_locks
 200     member, as a dict with lock levels as keys, and a list of needed lock names
 201     as values. Rules:
 202
 203       - use an empty dict if you don't need any lock
 204       - if you don't need any lock at a particular level omit that level
 205       - don't put anything for the BGL level
 206       - if you want all locks at a level use locking.ALL_SET as a value
 207
 208     If you need to share locks (rather than acquire them exclusively) at one
 209     level you can modify self.share_locks, setting a true value (usually 1) for
 210     that level. By default locks are not shared.
 211
 212     This function can also define a list of tasklets, which then will be
 213     executed in order instead of the usual LU-level CheckPrereq and Exec
 214     functions, if those are not defined by the LU.
 215
 216     Examples::
 217
 218       # Acquire all nodes and one instance
 219       self.needed_locks = {
 220         locking.LEVEL_NODE: locking.ALL_SET,
 221         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 222       }
 223       # Acquire just two nodes
 224       self.needed_locks = {
 225         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 226       }
 227       # Acquire no locks
 228       self.needed_locks = {} # No, you can't leave it to the default value None
 229
 230     """
 231     # The implementation of this method is mandatory only if the new LU is
 232     # concurrent, so that old LUs don't need to be changed all at the same
 233     # time.
 234     if self.REQ_BGL:
 235       self.needed_locks = {} # Exclusive LUs don't need locks.
 236     else:
 237       raise NotImplementedError
 238
 239   def DeclareLocks(self, level):
 240     """Declare LU locking needs for a level
 241
 242     While most LUs can just declare their locking needs at ExpandNames time,
 243     sometimes there's the need to calculate some locks after having acquired
 244     the ones before. This function is called just before acquiring locks at a
 245     particular level, but after acquiring the ones at lower levels, and permits
 246     such calculations. It can be used to modify self.needed_locks, and by
 247     default it does nothing.
 248
 249     This function is only called if you have something already set in
 250     self.needed_locks for the level.
 251
 252     @param level: Locking level which is going to be locked
 253     @type level: member of ganeti.locking.LEVELS
 254
 255     """
 256
 257   def CheckPrereq(self):
 258     """Check prerequisites for this LU.
 259
 260     This method should check that the prerequisites for the execution
 261     of this LU are fulfilled. It can do internode communication, but
 262     it should be idempotent - no cluster or system changes are
 263     allowed.
 264
 265     The method should raise errors.OpPrereqError in case something is
 266     not fulfilled. Its return value is ignored.
 267
 268     This method should also update all the parameters of the opcode to
 269     their canonical form if it hasn't been done by ExpandNames before.
 270
 271     """
 272     if self.tasklets is not None:
 273       for (idx, tl) in enumerate(self.tasklets):
 274         logging.debug("Checking prerequisites for tasklet %s/%s",
 275                       idx + 1, len(self.tasklets))
 276         tl.CheckPrereq()
 277     else:
 278       pass
 279
 280   def Exec(self, feedback_fn):
 281     """Execute the LU.
 282
 283     This method should implement the actual work. It should raise
 284     errors.OpExecError for failures that are somewhat dealt with in
 285     code, or expected.
 286
 287     """
 288     if self.tasklets is not None:
 289       for (idx, tl) in enumerate(self.tasklets):
 290         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 291         tl.Exec(feedback_fn)
 292     else:
 293       raise NotImplementedError
 294
 295   def BuildHooksEnv(self):
 296     """Build hooks environment for this LU.
 297
 298     @rtype: dict
 299     @return: Dictionary containing the environment that will be used for
 300       running the hooks for this LU. The keys of the dict must not be prefixed
 301       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 302       will extend the environment with additional variables. If no environment
 303       should be defined, an empty dictionary should be returned (not C{None}).
 304     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 305       will not be called.
 306
 307     """
 308     raise NotImplementedError
 309
 310   def BuildHooksNodes(self):
 311     """Build list of nodes to run LU's hooks.
 312
 313     @rtype: tuple; (list, list)
 314     @return: Tuple containing a list of node names on which the hook
 315       should run before the execution and a list of node names on which the
 316       hook should run after the execution. No nodes should be returned as an
 317       empty list (and not None).
 318     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 319       will not be called.
 320
 321     """
 322     raise NotImplementedError
 323
 324   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 325     """Notify the LU about the results of its hooks.
 326
 327     This method is called every time a hooks phase is executed, and notifies
 328     the Logical Unit about the hooks' result. The LU can then use it to alter
 329     its result based on the hooks.  By default the method does nothing and the
 330     previous result is passed back unchanged but any LU can define it if it
 331     wants to use the local cluster hook-scripts somehow.
 332
 333     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 334         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 335     @param hook_results: the results of the multi-node hooks rpc call
 336     @param feedback_fn: function used send feedback back to the caller
 337     @param lu_result: the previous Exec result this LU had, or None
 338         in the PRE phase
 339     @return: the new Exec result, based on the previous result
 340         and hook results
 341
 342     """
 343     # API must be kept, thus we ignore the unused argument and could
 344     # be a function warnings
 345     # pylint: disable-msg=W0613,R0201
 346     return lu_result
 347
 348   def _ExpandAndLockInstance(self):
 349     """Helper function to expand and lock an instance.
 350
 351     Many LUs that work on an instance take its name in self.op.instance_name
 352     and need to expand it and then declare the expanded name for locking. This
 353     function does it, and then updates self.op.instance_name to the expanded
 354     name. It also initializes needed_locks as a dict, if this hasn't been done
 355     before.
 356
 357     """
 358     if self.needed_locks is None:
 359       self.needed_locks = {}
 360     else:
 361       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 362         "_ExpandAndLockInstance called with instance-level locks set"
 363     self.op.instance_name = _ExpandInstanceName(self.cfg,
 364                                                 self.op.instance_name)
 365     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 366
 367   def _LockInstancesNodes(self, primary_only=False):
 368     """Helper function to declare instances' nodes for locking.
 369
 370     This function should be called after locking one or more instances to lock
 371     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 372     with all primary or secondary nodes for instances already locked and
 373     present in self.needed_locks[locking.LEVEL_INSTANCE].
 374
 375     It should be called from DeclareLocks, and for safety only works if
 376     self.recalculate_locks[locking.LEVEL_NODE] is set.
 377
 378     In the future it may grow parameters to just lock some instance's nodes, or
 379     to just lock primaries or secondary nodes, if needed.
 380
 381     If should be called in DeclareLocks in a way similar to::
 382
 383       if level == locking.LEVEL_NODE:
 384         self._LockInstancesNodes()
 385
 386     @type primary_only: boolean
 387     @param primary_only: only lock primary nodes of locked instances
 388
 389     """
 390     assert locking.LEVEL_NODE in self.recalculate_locks, \
 391       "_LockInstancesNodes helper function called with no nodes to recalculate"
 392
 393     # TODO: check if we're really been called with the instance locks held
 394
 395     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 396     # future we might want to have different behaviors depending on the value
 397     # of self.recalculate_locks[locking.LEVEL_NODE]
 398     wanted_nodes = []
 399     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 400       instance = self.context.cfg.GetInstanceInfo(instance_name)
 401       wanted_nodes.append(instance.primary_node)
 402       if not primary_only:
 403         wanted_nodes.extend(instance.secondary_nodes)
 404
 405     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 406       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 407     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 408       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 409
 410     del self.recalculate_locks[locking.LEVEL_NODE]
 411
 412
 413 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 414   """Simple LU which runs no hooks.
 415
 416   This LU is intended as a parent for other LogicalUnits which will
 417   run no hooks, in order to reduce duplicate code.
 418
 419   """
 420   HPATH = None
 421   HTYPE = None
 422
 423   def BuildHooksEnv(self):
 424     """Empty BuildHooksEnv for NoHooksLu.
 425
 426     This just raises an error.
 427
 428     """
 429     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 430
 431   def BuildHooksNodes(self):
 432     """Empty BuildHooksNodes for NoHooksLU.
 433
 434     """
 435     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 436
 437
 438 class Tasklet:
 439   """Tasklet base class.
 440
 441   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 442   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 443   tasklets know nothing about locks.
 444
 445   Subclasses must follow these rules:
 446     - Implement CheckPrereq
 447     - Implement Exec
 448
 449   """
 450   def __init__(self, lu):
 451     self.lu = lu
 452
 453     # Shortcuts
 454     self.cfg = lu.cfg
 455     self.rpc = lu.rpc
 456
 457   def CheckPrereq(self):
 458     """Check prerequisites for this tasklets.
 459
 460     This method should check whether the prerequisites for the execution of
 461     this tasklet are fulfilled. It can do internode communication, but it
 462     should be idempotent - no cluster or system changes are allowed.
 463
 464     The method should raise errors.OpPrereqError in case something is not
 465     fulfilled. Its return value is ignored.
 466
 467     This method should also update all parameters to their canonical form if it
 468     hasn't been done before.
 469
 470     """
 471     pass
 472
 473   def Exec(self, feedback_fn):
 474     """Execute the tasklet.
 475
 476     This method should implement the actual work. It should raise
 477     errors.OpExecError for failures that are somewhat dealt with in code, or
 478     expected.
 479
 480     """
 481     raise NotImplementedError
 482
 483
 484 class _QueryBase:
 485   """Base for query utility classes.
 486
 487   """
 488   #: Attribute holding field definitions
 489   FIELDS = None
 490
 491   def __init__(self, filter_, fields, use_locking):
 492     """Initializes this class.
 493
 494     """
 495     self.use_locking = use_locking
 496
 497     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 498                              namefield="name")
 499     self.requested_data = self.query.RequestedData()
 500     self.names = self.query.RequestedNames()
 501
 502     # Sort only if no names were requested
 503     self.sort_by_name = not self.names
 504
 505     self.do_locking = None
 506     self.wanted = None
 507
 508   def _GetNames(self, lu, all_names, lock_level):
 509     """Helper function to determine names asked for in the query.
 510
 511     """
 512     if self.do_locking:
 513       names = lu.acquired_locks[lock_level]
 514     else:
 515       names = all_names
 516
 517     if self.wanted == locking.ALL_SET:
 518       assert not self.names
 519       # caller didn't specify names, so ordering is not important
 520       return utils.NiceSort(names)
 521
 522     # caller specified names and we must keep the same order
 523     assert self.names
 524     assert not self.do_locking or lu.acquired_locks[lock_level]
 525
 526     missing = set(self.wanted).difference(names)
 527     if missing:
 528       raise errors.OpExecError("Some items were removed before retrieving"
 529                                " their data: %s" % missing)
 530
 531     # Return expanded names
 532     return self.wanted
 533
 534   def ExpandNames(self, lu):
 535     """Expand names for this query.
 536
 537     See L{LogicalUnit.ExpandNames}.
 538
 539     """
 540     raise NotImplementedError()
 541
 542   def DeclareLocks(self, lu, level):
 543     """Declare locks for this query.
 544
 545     See L{LogicalUnit.DeclareLocks}.
 546
 547     """
 548     raise NotImplementedError()
 549
 550   def _GetQueryData(self, lu):
 551     """Collects all data for this query.
 552
 553     @return: Query data object
 554
 555     """
 556     raise NotImplementedError()
 557
 558   def NewStyleQuery(self, lu):
 559     """Collect data and execute query.
 560
 561     """
 562     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 563                                   sort_by_name=self.sort_by_name)
 564
 565   def OldStyleQuery(self, lu):
 566     """Collect data and execute query.
 567
 568     """
 569     return self.query.OldStyleQuery(self._GetQueryData(lu),
 570                                     sort_by_name=self.sort_by_name)
 571
 572
 573 def _GetWantedNodes(lu, nodes):
 574   """Returns list of checked and expanded node names.
 575
 576   @type lu: L{LogicalUnit}
 577   @param lu: the logical unit on whose behalf we execute
 578   @type nodes: list
 579   @param nodes: list of node names or None for all nodes
 580   @rtype: list
 581   @return: the list of nodes, sorted
 582   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 583
 584   """
 585   if nodes:
 586     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 587
 588   return utils.NiceSort(lu.cfg.GetNodeList())
 589
 590
 591 def _GetWantedInstances(lu, instances):
 592   """Returns list of checked and expanded instance names.
 593
 594   @type lu: L{LogicalUnit}
 595   @param lu: the logical unit on whose behalf we execute
 596   @type instances: list
 597   @param instances: list of instance names or None for all instances
 598   @rtype: list
 599   @return: the list of instances, sorted
 600   @raise errors.OpPrereqError: if the instances parameter is wrong type
 601   @raise errors.OpPrereqError: if any of the passed instances is not found
 602
 603   """
 604   if instances:
 605     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 606   else:
 607     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 608   return wanted
 609
 610
 611 def _GetUpdatedParams(old_params, update_dict,
 612                       use_default=True, use_none=False):
 613   """Return the new version of a parameter dictionary.
 614
 615   @type old_params: dict
 616   @param old_params: old parameters
 617   @type update_dict: dict
 618   @param update_dict: dict containing new parameter values, or
 619       constants.VALUE_DEFAULT to reset the parameter to its default
 620       value
 621   @param use_default: boolean
 622   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 623       values as 'to be deleted' values
 624   @param use_none: boolean
 625   @type use_none: whether to recognise C{None} values as 'to be
 626       deleted' values
 627   @rtype: dict
 628   @return: the new parameter dictionary
 629
 630   """
 631   params_copy = copy.deepcopy(old_params)
 632   for key, val in update_dict.iteritems():
 633     if ((use_default and val == constants.VALUE_DEFAULT) or
 634         (use_none and val is None)):
 635       try:
 636         del params_copy[key]
 637       except KeyError:
 638         pass
 639     else:
 640       params_copy[key] = val
 641   return params_copy
 642
 643
 644 def _RunPostHook(lu, node_name):
 645   """Runs the post-hook for an opcode on a single node.
 646
 647   """
 648   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 649   try:
 650     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 651   except:
 652     # pylint: disable-msg=W0702
 653     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 654
 655
 656 def _CheckOutputFields(static, dynamic, selected):
 657   """Checks whether all selected fields are valid.
 658
 659   @type static: L{utils.FieldSet}
 660   @param static: static fields set
 661   @type dynamic: L{utils.FieldSet}
 662   @param dynamic: dynamic fields set
 663
 664   """
 665   f = utils.FieldSet()
 666   f.Extend(static)
 667   f.Extend(dynamic)
 668
 669   delta = f.NonMatching(selected)
 670   if delta:
 671     raise errors.OpPrereqError("Unknown output fields selected: %s"
 672                                % ",".join(delta), errors.ECODE_INVAL)
 673
 674
 675 def _CheckGlobalHvParams(params):
 676   """Validates that given hypervisor params are not global ones.
 677
 678   This will ensure that instances don't get customised versions of
 679   global params.
 680
 681   """
 682   used_globals = constants.HVC_GLOBALS.intersection(params)
 683   if used_globals:
 684     msg = ("The following hypervisor parameters are global and cannot"
 685            " be customized at instance level, please modify them at"
 686            " cluster level: %s" % utils.CommaJoin(used_globals))
 687     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 688
 689
 690 def _CheckNodeOnline(lu, node, msg=None):
 691   """Ensure that a given node is online.
 692
 693   @param lu: the LU on behalf of which we make the check
 694   @param node: the node to check
 695   @param msg: if passed, should be a message to replace the default one
 696   @raise errors.OpPrereqError: if the node is offline
 697
 698   """
 699   if msg is None:
 700     msg = "Can't use offline node"
 701   if lu.cfg.GetNodeInfo(node).offline:
 702     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 703
 704
 705 def _CheckNodeNotDrained(lu, node):
 706   """Ensure that a given node is not drained.
 707
 708   @param lu: the LU on behalf of which we make the check
 709   @param node: the node to check
 710   @raise errors.OpPrereqError: if the node is drained
 711
 712   """
 713   if lu.cfg.GetNodeInfo(node).drained:
 714     raise errors.OpPrereqError("Can't use drained node %s" % node,
 715                                errors.ECODE_STATE)
 716
 717
 718 def _CheckNodeVmCapable(lu, node):
 719   """Ensure that a given node is vm capable.
 720
 721   @param lu: the LU on behalf of which we make the check
 722   @param node: the node to check
 723   @raise errors.OpPrereqError: if the node is not vm capable
 724
 725   """
 726   if not lu.cfg.GetNodeInfo(node).vm_capable:
 727     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 728                                errors.ECODE_STATE)
 729
 730
 731 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 732   """Ensure that a node supports a given OS.
 733
 734   @param lu: the LU on behalf of which we make the check
 735   @param node: the node to check
 736   @param os_name: the OS to query about
 737   @param force_variant: whether to ignore variant errors
 738   @raise errors.OpPrereqError: if the node is not supporting the OS
 739
 740   """
 741   result = lu.rpc.call_os_get(node, os_name)
 742   result.Raise("OS '%s' not in supported OS list for node %s" %
 743                (os_name, node),
 744                prereq=True, ecode=errors.ECODE_INVAL)
 745   if not force_variant:
 746     _CheckOSVariant(result.payload, os_name)
 747
 748
 749 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 750   """Ensure that a node has the given secondary ip.
 751
 752   @type lu: L{LogicalUnit}
 753   @param lu: the LU on behalf of which we make the check
 754   @type node: string
 755   @param node: the node to check
 756   @type secondary_ip: string
 757   @param secondary_ip: the ip to check
 758   @type prereq: boolean
 759   @param prereq: whether to throw a prerequisite or an execute error
 760   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 761   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 762
 763   """
 764   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 765   result.Raise("Failure checking secondary ip on node %s" % node,
 766                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 767   if not result.payload:
 768     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 769            " please fix and re-run this command" % secondary_ip)
 770     if prereq:
 771       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 772     else:
 773       raise errors.OpExecError(msg)
 774
 775
 776 def _GetClusterDomainSecret():
 777   """Reads the cluster domain secret.
 778
 779   """
 780   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 781                                strict=True)
 782
 783
 784 def _CheckInstanceDown(lu, instance, reason):
 785   """Ensure that an instance is not running."""
 786   if instance.admin_up:
 787     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 788                                (instance.name, reason), errors.ECODE_STATE)
 789
 790   pnode = instance.primary_node
 791   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 792   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 793               prereq=True, ecode=errors.ECODE_ENVIRON)
 794
 795   if instance.name in ins_l.payload:
 796     raise errors.OpPrereqError("Instance %s is running, %s" %
 797                                (instance.name, reason), errors.ECODE_STATE)
 798
 799
 800 def _ExpandItemName(fn, name, kind):
 801   """Expand an item name.
 802
 803   @param fn: the function to use for expansion
 804   @param name: requested item name
 805   @param kind: text description ('Node' or 'Instance')
 806   @return: the resolved (full) name
 807   @raise errors.OpPrereqError: if the item is not found
 808
 809   """
 810   full_name = fn(name)
 811   if full_name is None:
 812     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 813                                errors.ECODE_NOENT)
 814   return full_name
 815
 816
 817 def _ExpandNodeName(cfg, name):
 818   """Wrapper over L{_ExpandItemName} for nodes."""
 819   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 820
 821
 822 def _ExpandInstanceName(cfg, name):
 823   """Wrapper over L{_ExpandItemName} for instance."""
 824   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 825
 826
 827 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 828                           memory, vcpus, nics, disk_template, disks,
 829                           bep, hvp, hypervisor_name):
 830   """Builds instance related env variables for hooks
 831
 832   This builds the hook environment from individual variables.
 833
 834   @type name: string
 835   @param name: the name of the instance
 836   @type primary_node: string
 837   @param primary_node: the name of the instance's primary node
 838   @type secondary_nodes: list
 839   @param secondary_nodes: list of secondary nodes as strings
 840   @type os_type: string
 841   @param os_type: the name of the instance's OS
 842   @type status: boolean
 843   @param status: the should_run status of the instance
 844   @type memory: string
 845   @param memory: the memory size of the instance
 846   @type vcpus: string
 847   @param vcpus: the count of VCPUs the instance has
 848   @type nics: list
 849   @param nics: list of tuples (ip, mac, mode, link) representing
 850       the NICs the instance has
 851   @type disk_template: string
 852   @param disk_template: the disk template of the instance
 853   @type disks: list
 854   @param disks: the list of (size, mode) pairs
 855   @type bep: dict
 856   @param bep: the backend parameters for the instance
 857   @type hvp: dict
 858   @param hvp: the hypervisor parameters for the instance
 859   @type hypervisor_name: string
 860   @param hypervisor_name: the hypervisor for the instance
 861   @rtype: dict
 862   @return: the hook environment for this instance
 863
 864   """
 865   if status:
 866     str_status = "up"
 867   else:
 868     str_status = "down"
 869   env = {
 870     "OP_TARGET": name,
 871     "INSTANCE_NAME": name,
 872     "INSTANCE_PRIMARY": primary_node,
 873     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 874     "INSTANCE_OS_TYPE": os_type,
 875     "INSTANCE_STATUS": str_status,
 876     "INSTANCE_MEMORY": memory,
 877     "INSTANCE_VCPUS": vcpus,
 878     "INSTANCE_DISK_TEMPLATE": disk_template,
 879     "INSTANCE_HYPERVISOR": hypervisor_name,
 880   }
 881
 882   if nics:
 883     nic_count = len(nics)
 884     for idx, (ip, mac, mode, link) in enumerate(nics):
 885       if ip is None:
 886         ip = ""
 887       env["INSTANCE_NIC%d_IP" % idx] = ip
 888       env["INSTANCE_NIC%d_MAC" % idx] = mac
 889       env["INSTANCE_NIC%d_MODE" % idx] = mode
 890       env["INSTANCE_NIC%d_LINK" % idx] = link
 891       if mode == constants.NIC_MODE_BRIDGED:
 892         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 893   else:
 894     nic_count = 0
 895
 896   env["INSTANCE_NIC_COUNT"] = nic_count
 897
 898   if disks:
 899     disk_count = len(disks)
 900     for idx, (size, mode) in enumerate(disks):
 901       env["INSTANCE_DISK%d_SIZE" % idx] = size
 902       env["INSTANCE_DISK%d_MODE" % idx] = mode
 903   else:
 904     disk_count = 0
 905
 906   env["INSTANCE_DISK_COUNT"] = disk_count
 907
 908   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 909     for key, value in source.items():
 910       env["INSTANCE_%s_%s" % (kind, key)] = value
 911
 912   return env
 913
 914
 915 def _NICListToTuple(lu, nics):
 916   """Build a list of nic information tuples.
 917
 918   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 919   value in LUInstanceQueryData.
 920
 921   @type lu:  L{LogicalUnit}
 922   @param lu: the logical unit on whose behalf we execute
 923   @type nics: list of L{objects.NIC}
 924   @param nics: list of nics to convert to hooks tuples
 925
 926   """
 927   hooks_nics = []
 928   cluster = lu.cfg.GetClusterInfo()
 929   for nic in nics:
 930     ip = nic.ip
 931     mac = nic.mac
 932     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 933     mode = filled_params[constants.NIC_MODE]
 934     link = filled_params[constants.NIC_LINK]
 935     hooks_nics.append((ip, mac, mode, link))
 936   return hooks_nics
 937
 938
 939 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 940   """Builds instance related env variables for hooks from an object.
 941
 942   @type lu: L{LogicalUnit}
 943   @param lu: the logical unit on whose behalf we execute
 944   @type instance: L{objects.Instance}
 945   @param instance: the instance for which we should build the
 946       environment
 947   @type override: dict
 948   @param override: dictionary with key/values that will override
 949       our values
 950   @rtype: dict
 951   @return: the hook environment dictionary
 952
 953   """
 954   cluster = lu.cfg.GetClusterInfo()
 955   bep = cluster.FillBE(instance)
 956   hvp = cluster.FillHV(instance)
 957   args = {
 958     'name': instance.name,
 959     'primary_node': instance.primary_node,
 960     'secondary_nodes': instance.secondary_nodes,
 961     'os_type': instance.os,
 962     'status': instance.admin_up,
 963     'memory': bep[constants.BE_MEMORY],
 964     'vcpus': bep[constants.BE_VCPUS],
 965     'nics': _NICListToTuple(lu, instance.nics),
 966     'disk_template': instance.disk_template,
 967     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 968     'bep': bep,
 969     'hvp': hvp,
 970     'hypervisor_name': instance.hypervisor,
 971   }
 972   if override:
 973     args.update(override)
 974   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 975
 976
 977 def _AdjustCandidatePool(lu, exceptions):
 978   """Adjust the candidate pool after node operations.
 979
 980   """
 981   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 982   if mod_list:
 983     lu.LogInfo("Promoted nodes to master candidate role: %s",
 984                utils.CommaJoin(node.name for node in mod_list))
 985     for name in mod_list:
 986       lu.context.ReaddNode(name)
 987   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 988   if mc_now > mc_max:
 989     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 990                (mc_now, mc_max))
 991
 992
 993 def _DecideSelfPromotion(lu, exceptions=None):
 994   """Decide whether I should promote myself as a master candidate.
 995
 996   """
 997   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 998   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 999   # the new node will increase mc_max with one, so:
1000   mc_should = min(mc_should + 1, cp_size)
1001   return mc_now < mc_should
1002
1003
1004 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1005   """Check that the brigdes needed by a list of nics exist.
1006
1007   """
1008   cluster = lu.cfg.GetClusterInfo()
1009   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1010   brlist = [params[constants.NIC_LINK] for params in paramslist
1011             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1012   if brlist:
1013     result = lu.rpc.call_bridges_exist(target_node, brlist)
1014     result.Raise("Error checking bridges on destination node '%s'" %
1015                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1016
1017
1018 def _CheckInstanceBridgesExist(lu, instance, node=None):
1019   """Check that the brigdes needed by an instance exist.
1020
1021   """
1022   if node is None:
1023     node = instance.primary_node
1024   _CheckNicsBridgesExist(lu, instance.nics, node)
1025
1026
1027 def _CheckOSVariant(os_obj, name):
1028   """Check whether an OS name conforms to the os variants specification.
1029
1030   @type os_obj: L{objects.OS}
1031   @param os_obj: OS object to check
1032   @type name: string
1033   @param name: OS name passed by the user, to check for validity
1034
1035   """
1036   if not os_obj.supported_variants:
1037     return
1038   variant = objects.OS.GetVariant(name)
1039   if not variant:
1040     raise errors.OpPrereqError("OS name must include a variant",
1041                                errors.ECODE_INVAL)
1042
1043   if variant not in os_obj.supported_variants:
1044     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1045
1046
1047 def _GetNodeInstancesInner(cfg, fn):
1048   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1049
1050
1051 def _GetNodeInstances(cfg, node_name):
1052   """Returns a list of all primary and secondary instances on a node.
1053
1054   """
1055
1056   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1057
1058
1059 def _GetNodePrimaryInstances(cfg, node_name):
1060   """Returns primary instances on a node.
1061
1062   """
1063   return _GetNodeInstancesInner(cfg,
1064                                 lambda inst: node_name == inst.primary_node)
1065
1066
1067 def _GetNodeSecondaryInstances(cfg, node_name):
1068   """Returns secondary instances on a node.
1069
1070   """
1071   return _GetNodeInstancesInner(cfg,
1072                                 lambda inst: node_name in inst.secondary_nodes)
1073
1074
1075 def _GetStorageTypeArgs(cfg, storage_type):
1076   """Returns the arguments for a storage type.
1077
1078   """
1079   # Special case for file storage
1080   if storage_type == constants.ST_FILE:
1081     # storage.FileStorage wants a list of storage directories
1082     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1083
1084   return []
1085
1086
1087 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1088   faulty = []
1089
1090   for dev in instance.disks:
1091     cfg.SetDiskID(dev, node_name)
1092
1093   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1094   result.Raise("Failed to get disk status from node %s" % node_name,
1095                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1096
1097   for idx, bdev_status in enumerate(result.payload):
1098     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1099       faulty.append(idx)
1100
1101   return faulty
1102
1103
1104 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1105   """Check the sanity of iallocator and node arguments and use the
1106   cluster-wide iallocator if appropriate.
1107
1108   Check that at most one of (iallocator, node) is specified. If none is
1109   specified, then the LU's opcode's iallocator slot is filled with the
1110   cluster-wide default iallocator.
1111
1112   @type iallocator_slot: string
1113   @param iallocator_slot: the name of the opcode iallocator slot
1114   @type node_slot: string
1115   @param node_slot: the name of the opcode target node slot
1116
1117   """
1118   node = getattr(lu.op, node_slot, None)
1119   iallocator = getattr(lu.op, iallocator_slot, None)
1120
1121   if node is not None and iallocator is not None:
1122     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1123                                errors.ECODE_INVAL)
1124   elif node is None and iallocator is None:
1125     default_iallocator = lu.cfg.GetDefaultIAllocator()
1126     if default_iallocator:
1127       setattr(lu.op, iallocator_slot, default_iallocator)
1128     else:
1129       raise errors.OpPrereqError("No iallocator or node given and no"
1130                                  " cluster-wide default iallocator found."
1131                                  " Please specify either an iallocator or a"
1132                                  " node, or set a cluster-wide default"
1133                                  " iallocator.")
1134
1135
1136 class LUClusterPostInit(LogicalUnit):
1137   """Logical unit for running hooks after cluster initialization.
1138
1139   """
1140   HPATH = "cluster-init"
1141   HTYPE = constants.HTYPE_CLUSTER
1142
1143   def BuildHooksEnv(self):
1144     """Build hooks env.
1145
1146     """
1147     return {
1148       "OP_TARGET": self.cfg.GetClusterName(),
1149       }
1150
1151   def BuildHooksNodes(self):
1152     """Build hooks nodes.
1153
1154     """
1155     return ([], [self.cfg.GetMasterNode()])
1156
1157   def Exec(self, feedback_fn):
1158     """Nothing to do.
1159
1160     """
1161     return True
1162
1163
1164 class LUClusterDestroy(LogicalUnit):
1165   """Logical unit for destroying the cluster.
1166
1167   """
1168   HPATH = "cluster-destroy"
1169   HTYPE = constants.HTYPE_CLUSTER
1170
1171   def BuildHooksEnv(self):
1172     """Build hooks env.
1173
1174     """
1175     return {
1176       "OP_TARGET": self.cfg.GetClusterName(),
1177       }
1178
1179   def BuildHooksNodes(self):
1180     """Build hooks nodes.
1181
1182     """
1183     return ([], [])
1184
1185   def CheckPrereq(self):
1186     """Check prerequisites.
1187
1188     This checks whether the cluster is empty.
1189
1190     Any errors are signaled by raising errors.OpPrereqError.
1191
1192     """
1193     master = self.cfg.GetMasterNode()
1194
1195     nodelist = self.cfg.GetNodeList()
1196     if len(nodelist) != 1 or nodelist[0] != master:
1197       raise errors.OpPrereqError("There are still %d node(s) in"
1198                                  " this cluster." % (len(nodelist) - 1),
1199                                  errors.ECODE_INVAL)
1200     instancelist = self.cfg.GetInstanceList()
1201     if instancelist:
1202       raise errors.OpPrereqError("There are still %d instance(s) in"
1203                                  " this cluster." % len(instancelist),
1204                                  errors.ECODE_INVAL)
1205
1206   def Exec(self, feedback_fn):
1207     """Destroys the cluster.
1208
1209     """
1210     master = self.cfg.GetMasterNode()
1211
1212     # Run post hooks on master node before it's removed
1213     _RunPostHook(self, master)
1214
1215     result = self.rpc.call_node_stop_master(master, False)
1216     result.Raise("Could not disable the master role")
1217
1218     return master
1219
1220
1221 def _VerifyCertificate(filename):
1222   """Verifies a certificate for LUClusterVerify.
1223
1224   @type filename: string
1225   @param filename: Path to PEM file
1226
1227   """
1228   try:
1229     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1230                                            utils.ReadFile(filename))
1231   except Exception, err: # pylint: disable-msg=W0703
1232     return (LUClusterVerify.ETYPE_ERROR,
1233             "Failed to load X509 certificate %s: %s" % (filename, err))
1234
1235   (errcode, msg) = \
1236     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1237                                 constants.SSL_CERT_EXPIRATION_ERROR)
1238
1239   if msg:
1240     fnamemsg = "While verifying %s: %s" % (filename, msg)
1241   else:
1242     fnamemsg = None
1243
1244   if errcode is None:
1245     return (None, fnamemsg)
1246   elif errcode == utils.CERT_WARNING:
1247     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1248   elif errcode == utils.CERT_ERROR:
1249     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1250
1251   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1252
1253
1254 class LUClusterVerify(LogicalUnit):
1255   """Verifies the cluster status.
1256
1257   """
1258   HPATH = "cluster-verify"
1259   HTYPE = constants.HTYPE_CLUSTER
1260   REQ_BGL = False
1261
1262   TCLUSTER = "cluster"
1263   TNODE = "node"
1264   TINSTANCE = "instance"
1265
1266   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1267   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1268   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1269   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1270   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1271   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1272   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1273   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1274   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1275   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1276   ENODEDRBD = (TNODE, "ENODEDRBD")
1277   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1278   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1279   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1280   ENODEHV = (TNODE, "ENODEHV")
1281   ENODELVM = (TNODE, "ENODELVM")
1282   ENODEN1 = (TNODE, "ENODEN1")
1283   ENODENET = (TNODE, "ENODENET")
1284   ENODEOS = (TNODE, "ENODEOS")
1285   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1286   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1287   ENODERPC = (TNODE, "ENODERPC")
1288   ENODESSH = (TNODE, "ENODESSH")
1289   ENODEVERSION = (TNODE, "ENODEVERSION")
1290   ENODESETUP = (TNODE, "ENODESETUP")
1291   ENODETIME = (TNODE, "ENODETIME")
1292   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1293
1294   ETYPE_FIELD = "code"
1295   ETYPE_ERROR = "ERROR"
1296   ETYPE_WARNING = "WARNING"
1297
1298   _HOOKS_INDENT_RE = re.compile("^", re.M)
1299
1300   class NodeImage(object):
1301     """A class representing the logical and physical status of a node.
1302
1303     @type name: string
1304     @ivar name: the node name to which this object refers
1305     @ivar volumes: a structure as returned from
1306         L{ganeti.backend.GetVolumeList} (runtime)
1307     @ivar instances: a list of running instances (runtime)
1308     @ivar pinst: list of configured primary instances (config)
1309     @ivar sinst: list of configured secondary instances (config)
1310     @ivar sbp: dictionary of {primary-node: list of instances} for all
1311         instances for which this node is secondary (config)
1312     @ivar mfree: free memory, as reported by hypervisor (runtime)
1313     @ivar dfree: free disk, as reported by the node (runtime)
1314     @ivar offline: the offline status (config)
1315     @type rpc_fail: boolean
1316     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1317         not whether the individual keys were correct) (runtime)
1318     @type lvm_fail: boolean
1319     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1320     @type hyp_fail: boolean
1321     @ivar hyp_fail: whether the RPC call didn't return the instance list
1322     @type ghost: boolean
1323     @ivar ghost: whether this is a known node or not (config)
1324     @type os_fail: boolean
1325     @ivar os_fail: whether the RPC call didn't return valid OS data
1326     @type oslist: list
1327     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1328     @type vm_capable: boolean
1329     @ivar vm_capable: whether the node can host instances
1330
1331     """
1332     def __init__(self, offline=False, name=None, vm_capable=True):
1333       self.name = name
1334       self.volumes = {}
1335       self.instances = []
1336       self.pinst = []
1337       self.sinst = []
1338       self.sbp = {}
1339       self.mfree = 0
1340       self.dfree = 0
1341       self.offline = offline
1342       self.vm_capable = vm_capable
1343       self.rpc_fail = False
1344       self.lvm_fail = False
1345       self.hyp_fail = False
1346       self.ghost = False
1347       self.os_fail = False
1348       self.oslist = {}
1349
1350   def ExpandNames(self):
1351     self.needed_locks = {
1352       locking.LEVEL_NODE: locking.ALL_SET,
1353       locking.LEVEL_INSTANCE: locking.ALL_SET,
1354     }
1355     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1356
1357   def _Error(self, ecode, item, msg, *args, **kwargs):
1358     """Format an error message.
1359
1360     Based on the opcode's error_codes parameter, either format a
1361     parseable error code, or a simpler error string.
1362
1363     This must be called only from Exec and functions called from Exec.
1364
1365     """
1366     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1367     itype, etxt = ecode
1368     # first complete the msg
1369     if args:
1370       msg = msg % args
1371     # then format the whole message
1372     if self.op.error_codes:
1373       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1374     else:
1375       if item:
1376         item = " " + item
1377       else:
1378         item = ""
1379       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1380     # and finally report it via the feedback_fn
1381     self._feedback_fn("  - %s" % msg)
1382
1383   def _ErrorIf(self, cond, *args, **kwargs):
1384     """Log an error message if the passed condition is True.
1385
1386     """
1387     cond = bool(cond) or self.op.debug_simulate_errors
1388     if cond:
1389       self._Error(*args, **kwargs)
1390     # do not mark the operation as failed for WARN cases only
1391     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1392       self.bad = self.bad or cond
1393
1394   def _VerifyNode(self, ninfo, nresult):
1395     """Perform some basic validation on data returned from a node.
1396
1397       - check the result data structure is well formed and has all the
1398         mandatory fields
1399       - check ganeti version
1400
1401     @type ninfo: L{objects.Node}
1402     @param ninfo: the node to check
1403     @param nresult: the results from the node
1404     @rtype: boolean
1405     @return: whether overall this call was successful (and we can expect
1406          reasonable values in the respose)
1407
1408     """
1409     node = ninfo.name
1410     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1411
1412     # main result, nresult should be a non-empty dict
1413     test = not nresult or not isinstance(nresult, dict)
1414     _ErrorIf(test, self.ENODERPC, node,
1415                   "unable to verify node: no data returned")
1416     if test:
1417       return False
1418
1419     # compares ganeti version
1420     local_version = constants.PROTOCOL_VERSION
1421     remote_version = nresult.get("version", None)
1422     test = not (remote_version and
1423                 isinstance(remote_version, (list, tuple)) and
1424                 len(remote_version) == 2)
1425     _ErrorIf(test, self.ENODERPC, node,
1426              "connection to node returned invalid data")
1427     if test:
1428       return False
1429
1430     test = local_version != remote_version[0]
1431     _ErrorIf(test, self.ENODEVERSION, node,
1432              "incompatible protocol versions: master %s,"
1433              " node %s", local_version, remote_version[0])
1434     if test:
1435       return False
1436
1437     # node seems compatible, we can actually try to look into its results
1438
1439     # full package version
1440     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1441                   self.ENODEVERSION, node,
1442                   "software version mismatch: master %s, node %s",
1443                   constants.RELEASE_VERSION, remote_version[1],
1444                   code=self.ETYPE_WARNING)
1445
1446     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1447     if ninfo.vm_capable and isinstance(hyp_result, dict):
1448       for hv_name, hv_result in hyp_result.iteritems():
1449         test = hv_result is not None
1450         _ErrorIf(test, self.ENODEHV, node,
1451                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1452
1453     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1454     if ninfo.vm_capable and isinstance(hvp_result, list):
1455       for item, hv_name, hv_result in hvp_result:
1456         _ErrorIf(True, self.ENODEHV, node,
1457                  "hypervisor %s parameter verify failure (source %s): %s",
1458                  hv_name, item, hv_result)
1459
1460     test = nresult.get(constants.NV_NODESETUP,
1461                            ["Missing NODESETUP results"])
1462     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1463              "; ".join(test))
1464
1465     return True
1466
1467   def _VerifyNodeTime(self, ninfo, nresult,
1468                       nvinfo_starttime, nvinfo_endtime):
1469     """Check the node time.
1470
1471     @type ninfo: L{objects.Node}
1472     @param ninfo: the node to check
1473     @param nresult: the remote results for the node
1474     @param nvinfo_starttime: the start time of the RPC call
1475     @param nvinfo_endtime: the end time of the RPC call
1476
1477     """
1478     node = ninfo.name
1479     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1480
1481     ntime = nresult.get(constants.NV_TIME, None)
1482     try:
1483       ntime_merged = utils.MergeTime(ntime)
1484     except (ValueError, TypeError):
1485       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1486       return
1487
1488     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1489       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1490     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1491       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1492     else:
1493       ntime_diff = None
1494
1495     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1496              "Node time diverges by at least %s from master node time",
1497              ntime_diff)
1498
1499   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1500     """Check the node time.
1501
1502     @type ninfo: L{objects.Node}
1503     @param ninfo: the node to check
1504     @param nresult: the remote results for the node
1505     @param vg_name: the configured VG name
1506
1507     """
1508     if vg_name is None:
1509       return
1510
1511     node = ninfo.name
1512     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1513
1514     # checks vg existence and size > 20G
1515     vglist = nresult.get(constants.NV_VGLIST, None)
1516     test = not vglist
1517     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1518     if not test:
1519       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1520                                             constants.MIN_VG_SIZE)
1521       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1522
1523     # check pv names
1524     pvlist = nresult.get(constants.NV_PVLIST, None)
1525     test = pvlist is None
1526     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1527     if not test:
1528       # check that ':' is not present in PV names, since it's a
1529       # special character for lvcreate (denotes the range of PEs to
1530       # use on the PV)
1531       for _, pvname, owner_vg in pvlist:
1532         test = ":" in pvname
1533         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1534                  " '%s' of VG '%s'", pvname, owner_vg)
1535
1536   def _VerifyNodeNetwork(self, ninfo, nresult):
1537     """Check the node time.
1538
1539     @type ninfo: L{objects.Node}
1540     @param ninfo: the node to check
1541     @param nresult: the remote results for the node
1542
1543     """
1544     node = ninfo.name
1545     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1546
1547     test = constants.NV_NODELIST not in nresult
1548     _ErrorIf(test, self.ENODESSH, node,
1549              "node hasn't returned node ssh connectivity data")
1550     if not test:
1551       if nresult[constants.NV_NODELIST]:
1552         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1553           _ErrorIf(True, self.ENODESSH, node,
1554                    "ssh communication with node '%s': %s", a_node, a_msg)
1555
1556     test = constants.NV_NODENETTEST not in nresult
1557     _ErrorIf(test, self.ENODENET, node,
1558              "node hasn't returned node tcp connectivity data")
1559     if not test:
1560       if nresult[constants.NV_NODENETTEST]:
1561         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1562         for anode in nlist:
1563           _ErrorIf(True, self.ENODENET, node,
1564                    "tcp communication with node '%s': %s",
1565                    anode, nresult[constants.NV_NODENETTEST][anode])
1566
1567     test = constants.NV_MASTERIP not in nresult
1568     _ErrorIf(test, self.ENODENET, node,
1569              "node hasn't returned node master IP reachability data")
1570     if not test:
1571       if not nresult[constants.NV_MASTERIP]:
1572         if node == self.master_node:
1573           msg = "the master node cannot reach the master IP (not configured?)"
1574         else:
1575           msg = "cannot reach the master IP"
1576         _ErrorIf(True, self.ENODENET, node, msg)
1577
1578   def _VerifyInstance(self, instance, instanceconfig, node_image,
1579                       diskstatus):
1580     """Verify an instance.
1581
1582     This function checks to see if the required block devices are
1583     available on the instance's node.
1584
1585     """
1586     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1587     node_current = instanceconfig.primary_node
1588
1589     node_vol_should = {}
1590     instanceconfig.MapLVsByNode(node_vol_should)
1591
1592     for node in node_vol_should:
1593       n_img = node_image[node]
1594       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1595         # ignore missing volumes on offline or broken nodes
1596         continue
1597       for volume in node_vol_should[node]:
1598         test = volume not in n_img.volumes
1599         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1600                  "volume %s missing on node %s", volume, node)
1601
1602     if instanceconfig.admin_up:
1603       pri_img = node_image[node_current]
1604       test = instance not in pri_img.instances and not pri_img.offline
1605       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1606                "instance not running on its primary node %s",
1607                node_current)
1608
1609     for node, n_img in node_image.items():
1610       if node != node_current:
1611         test = instance in n_img.instances
1612         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1613                  "instance should not run on node %s", node)
1614
1615     diskdata = [(nname, success, status, idx)
1616                 for (nname, disks) in diskstatus.items()
1617                 for idx, (success, status) in enumerate(disks)]
1618
1619     for nname, success, bdev_status, idx in diskdata:
1620       # the 'ghost node' construction in Exec() ensures that we have a
1621       # node here
1622       snode = node_image[nname]
1623       bad_snode = snode.ghost or snode.offline
1624       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1625                self.EINSTANCEFAULTYDISK, instance,
1626                "couldn't retrieve status for disk/%s on %s: %s",
1627                idx, nname, bdev_status)
1628       _ErrorIf((instanceconfig.admin_up and success and
1629                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1630                self.EINSTANCEFAULTYDISK, instance,
1631                "disk/%s on %s is faulty", idx, nname)
1632
1633   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1634     """Verify if there are any unknown volumes in the cluster.
1635
1636     The .os, .swap and backup volumes are ignored. All other volumes are
1637     reported as unknown.
1638
1639     @type reserved: L{ganeti.utils.FieldSet}
1640     @param reserved: a FieldSet of reserved volume names
1641
1642     """
1643     for node, n_img in node_image.items():
1644       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1645         # skip non-healthy nodes
1646         continue
1647       for volume in n_img.volumes:
1648         test = ((node not in node_vol_should or
1649                 volume not in node_vol_should[node]) and
1650                 not reserved.Matches(volume))
1651         self._ErrorIf(test, self.ENODEORPHANLV, node,
1652                       "volume %s is unknown", volume)
1653
1654   def _VerifyOrphanInstances(self, instancelist, node_image):
1655     """Verify the list of running instances.
1656
1657     This checks what instances are running but unknown to the cluster.
1658
1659     """
1660     for node, n_img in node_image.items():
1661       for o_inst in n_img.instances:
1662         test = o_inst not in instancelist
1663         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1664                       "instance %s on node %s should not exist", o_inst, node)
1665
1666   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1667     """Verify N+1 Memory Resilience.
1668
1669     Check that if one single node dies we can still start all the
1670     instances it was primary for.
1671
1672     """
1673     cluster_info = self.cfg.GetClusterInfo()
1674     for node, n_img in node_image.items():
1675       # This code checks that every node which is now listed as
1676       # secondary has enough memory to host all instances it is
1677       # supposed to should a single other node in the cluster fail.
1678       # FIXME: not ready for failover to an arbitrary node
1679       # FIXME: does not support file-backed instances
1680       # WARNING: we currently take into account down instances as well
1681       # as up ones, considering that even if they're down someone
1682       # might want to start them even in the event of a node failure.
1683       if n_img.offline:
1684         # we're skipping offline nodes from the N+1 warning, since
1685         # most likely we don't have good memory infromation from them;
1686         # we already list instances living on such nodes, and that's
1687         # enough warning
1688         continue
1689       for prinode, instances in n_img.sbp.items():
1690         needed_mem = 0
1691         for instance in instances:
1692           bep = cluster_info.FillBE(instance_cfg[instance])
1693           if bep[constants.BE_AUTO_BALANCE]:
1694             needed_mem += bep[constants.BE_MEMORY]
1695         test = n_img.mfree < needed_mem
1696         self._ErrorIf(test, self.ENODEN1, node,
1697                       "not enough memory to accomodate instance failovers"
1698                       " should node %s fail (%dMiB needed, %dMiB available)",
1699                       prinode, needed_mem, n_img.mfree)
1700
1701   @classmethod
1702   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1703                    (files_all, files_all_opt, files_mc, files_vm)):
1704     """Verifies file checksums collected from all nodes.
1705
1706     @param errorif: Callback for reporting errors
1707     @param nodeinfo: List of L{objects.Node} objects
1708     @param master_node: Name of master node
1709     @param all_nvinfo: RPC results
1710
1711     """
1712     node_names = frozenset(node.name for node in nodeinfo)
1713
1714     assert master_node in node_names
1715     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1716             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1717            "Found file listed in more than one file list"
1718
1719     # Define functions determining which nodes to consider for a file
1720     file2nodefn = dict([(filename, fn)
1721       for (files, fn) in [(files_all, None),
1722                           (files_all_opt, None),
1723                           (files_mc, lambda node: (node.master_candidate or
1724                                                    node.name == master_node)),
1725                           (files_vm, lambda node: node.vm_capable)]
1726       for filename in files])
1727
1728     fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1729
1730     for node in nodeinfo:
1731       nresult = all_nvinfo[node.name]
1732
1733       if nresult.fail_msg or not nresult.payload:
1734         node_files = None
1735       else:
1736         node_files = nresult.payload.get(constants.NV_FILELIST, None)
1737
1738       test = not (node_files and isinstance(node_files, dict))
1739       errorif(test, cls.ENODEFILECHECK, node.name,
1740               "Node did not return file checksum data")
1741       if test:
1742         continue
1743
1744       for (filename, checksum) in node_files.items():
1745         # Check if the file should be considered for a node
1746         fn = file2nodefn[filename]
1747         if fn is None or fn(node):
1748           fileinfo[filename].setdefault(checksum, set()).add(node.name)
1749
1750     for (filename, checksums) in fileinfo.items():
1751       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1752
1753       # Nodes having the file
1754       with_file = frozenset(node_name
1755                             for nodes in fileinfo[filename].values()
1756                             for node_name in nodes)
1757
1758       # Nodes missing file
1759       missing_file = node_names - with_file
1760
1761       if filename in files_all_opt:
1762         # All or no nodes
1763         errorif(missing_file and missing_file != node_names,
1764                 cls.ECLUSTERFILECHECK, None,
1765                 "File %s is optional, but it must exist on all or no nodes (not"
1766                 " found on %s)",
1767                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1768       else:
1769         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1770                 "File %s is missing from node(s) %s", filename,
1771                 utils.CommaJoin(utils.NiceSort(missing_file)))
1772
1773       # See if there are multiple versions of the file
1774       test = len(checksums) > 1
1775       if test:
1776         variants = ["variant %s on %s" %
1777                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1778                     for (idx, (checksum, nodes)) in
1779                       enumerate(sorted(checksums.items()))]
1780       else:
1781         variants = []
1782
1783       errorif(test, cls.ECLUSTERFILECHECK, None,
1784               "File %s found with %s different checksums (%s)",
1785               filename, len(checksums), "; ".join(variants))
1786
1787   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1788                       drbd_map):
1789     """Verifies and the node DRBD status.
1790
1791     @type ninfo: L{objects.Node}
1792     @param ninfo: the node to check
1793     @param nresult: the remote results for the node
1794     @param instanceinfo: the dict of instances
1795     @param drbd_helper: the configured DRBD usermode helper
1796     @param drbd_map: the DRBD map as returned by
1797         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1798
1799     """
1800     node = ninfo.name
1801     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1802
1803     if drbd_helper:
1804       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1805       test = (helper_result == None)
1806       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1807                "no drbd usermode helper returned")
1808       if helper_result:
1809         status, payload = helper_result
1810         test = not status
1811         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1812                  "drbd usermode helper check unsuccessful: %s", payload)
1813         test = status and (payload != drbd_helper)
1814         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1815                  "wrong drbd usermode helper: %s", payload)
1816
1817     # compute the DRBD minors
1818     node_drbd = {}
1819     for minor, instance in drbd_map[node].items():
1820       test = instance not in instanceinfo
1821       _ErrorIf(test, self.ECLUSTERCFG, None,
1822                "ghost instance '%s' in temporary DRBD map", instance)
1823         # ghost instance should not be running, but otherwise we
1824         # don't give double warnings (both ghost instance and
1825         # unallocated minor in use)
1826       if test:
1827         node_drbd[minor] = (instance, False)
1828       else:
1829         instance = instanceinfo[instance]
1830         node_drbd[minor] = (instance.name, instance.admin_up)
1831
1832     # and now check them
1833     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1834     test = not isinstance(used_minors, (tuple, list))
1835     _ErrorIf(test, self.ENODEDRBD, node,
1836              "cannot parse drbd status file: %s", str(used_minors))
1837     if test:
1838       # we cannot check drbd status
1839       return
1840
1841     for minor, (iname, must_exist) in node_drbd.items():
1842       test = minor not in used_minors and must_exist
1843       _ErrorIf(test, self.ENODEDRBD, node,
1844                "drbd minor %d of instance %s is not active", minor, iname)
1845     for minor in used_minors:
1846       test = minor not in node_drbd
1847       _ErrorIf(test, self.ENODEDRBD, node,
1848                "unallocated drbd minor %d is in use", minor)
1849
1850   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1851     """Builds the node OS structures.
1852
1853     @type ninfo: L{objects.Node}
1854     @param ninfo: the node to check
1855     @param nresult: the remote results for the node
1856     @param nimg: the node image object
1857
1858     """
1859     node = ninfo.name
1860     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1861
1862     remote_os = nresult.get(constants.NV_OSLIST, None)
1863     test = (not isinstance(remote_os, list) or
1864             not compat.all(isinstance(v, list) and len(v) == 7
1865                            for v in remote_os))
1866
1867     _ErrorIf(test, self.ENODEOS, node,
1868              "node hasn't returned valid OS data")
1869
1870     nimg.os_fail = test
1871
1872     if test:
1873       return
1874
1875     os_dict = {}
1876
1877     for (name, os_path, status, diagnose,
1878          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1879
1880       if name not in os_dict:
1881         os_dict[name] = []
1882
1883       # parameters is a list of lists instead of list of tuples due to
1884       # JSON lacking a real tuple type, fix it:
1885       parameters = [tuple(v) for v in parameters]
1886       os_dict[name].append((os_path, status, diagnose,
1887                             set(variants), set(parameters), set(api_ver)))
1888
1889     nimg.oslist = os_dict
1890
1891   def _VerifyNodeOS(self, ninfo, nimg, base):
1892     """Verifies the node OS list.
1893
1894     @type ninfo: L{objects.Node}
1895     @param ninfo: the node to check
1896     @param nimg: the node image object
1897     @param base: the 'template' node we match against (e.g. from the master)
1898
1899     """
1900     node = ninfo.name
1901     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1902
1903     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1904
1905     for os_name, os_data in nimg.oslist.items():
1906       assert os_data, "Empty OS status for OS %s?!" % os_name
1907       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1908       _ErrorIf(not f_status, self.ENODEOS, node,
1909                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1910       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1911                "OS '%s' has multiple entries (first one shadows the rest): %s",
1912                os_name, utils.CommaJoin([v[0] for v in os_data]))
1913       # this will catched in backend too
1914       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1915                and not f_var, self.ENODEOS, node,
1916                "OS %s with API at least %d does not declare any variant",
1917                os_name, constants.OS_API_V15)
1918       # comparisons with the 'base' image
1919       test = os_name not in base.oslist
1920       _ErrorIf(test, self.ENODEOS, node,
1921                "Extra OS %s not present on reference node (%s)",
1922                os_name, base.name)
1923       if test:
1924         continue
1925       assert base.oslist[os_name], "Base node has empty OS status?"
1926       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1927       if not b_status:
1928         # base OS is invalid, skipping
1929         continue
1930       for kind, a, b in [("API version", f_api, b_api),
1931                          ("variants list", f_var, b_var),
1932                          ("parameters", f_param, b_param)]:
1933         _ErrorIf(a != b, self.ENODEOS, node,
1934                  "OS %s %s differs from reference node %s: %s vs. %s",
1935                  kind, os_name, base.name,
1936                  utils.CommaJoin(a), utils.CommaJoin(b))
1937
1938     # check any missing OSes
1939     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1940     _ErrorIf(missing, self.ENODEOS, node,
1941              "OSes present on reference node %s but missing on this node: %s",
1942              base.name, utils.CommaJoin(missing))
1943
1944   def _VerifyOob(self, ninfo, nresult):
1945     """Verifies out of band functionality of a node.
1946
1947     @type ninfo: L{objects.Node}
1948     @param ninfo: the node to check
1949     @param nresult: the remote results for the node
1950
1951     """
1952     node = ninfo.name
1953     # We just have to verify the paths on master and/or master candidates
1954     # as the oob helper is invoked on the master
1955     if ((ninfo.master_candidate or ninfo.master_capable) and
1956         constants.NV_OOB_PATHS in nresult):
1957       for path_result in nresult[constants.NV_OOB_PATHS]:
1958         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1959
1960   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1961     """Verifies and updates the node volume data.
1962
1963     This function will update a L{NodeImage}'s internal structures
1964     with data from the remote call.
1965
1966     @type ninfo: L{objects.Node}
1967     @param ninfo: the node to check
1968     @param nresult: the remote results for the node
1969     @param nimg: the node image object
1970     @param vg_name: the configured VG name
1971
1972     """
1973     node = ninfo.name
1974     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1975
1976     nimg.lvm_fail = True
1977     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1978     if vg_name is None:
1979       pass
1980     elif isinstance(lvdata, basestring):
1981       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1982                utils.SafeEncode(lvdata))
1983     elif not isinstance(lvdata, dict):
1984       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1985     else:
1986       nimg.volumes = lvdata
1987       nimg.lvm_fail = False
1988
1989   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1990     """Verifies and updates the node instance list.
1991
1992     If the listing was successful, then updates this node's instance
1993     list. Otherwise, it marks the RPC call as failed for the instance
1994     list key.
1995
1996     @type ninfo: L{objects.Node}
1997     @param ninfo: the node to check
1998     @param nresult: the remote results for the node
1999     @param nimg: the node image object
2000
2001     """
2002     idata = nresult.get(constants.NV_INSTANCELIST, None)
2003     test = not isinstance(idata, list)
2004     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2005                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2006     if test:
2007       nimg.hyp_fail = True
2008     else:
2009       nimg.instances = idata
2010
2011   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2012     """Verifies and computes a node information map
2013
2014     @type ninfo: L{objects.Node}
2015     @param ninfo: the node to check
2016     @param nresult: the remote results for the node
2017     @param nimg: the node image object
2018     @param vg_name: the configured VG name
2019
2020     """
2021     node = ninfo.name
2022     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2023
2024     # try to read free memory (from the hypervisor)
2025     hv_info = nresult.get(constants.NV_HVINFO, None)
2026     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2027     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2028     if not test:
2029       try:
2030         nimg.mfree = int(hv_info["memory_free"])
2031       except (ValueError, TypeError):
2032         _ErrorIf(True, self.ENODERPC, node,
2033                  "node returned invalid nodeinfo, check hypervisor")
2034
2035     # FIXME: devise a free space model for file based instances as well
2036     if vg_name is not None:
2037       test = (constants.NV_VGLIST not in nresult or
2038               vg_name not in nresult[constants.NV_VGLIST])
2039       _ErrorIf(test, self.ENODELVM, node,
2040                "node didn't return data for the volume group '%s'"
2041                " - it is either missing or broken", vg_name)
2042       if not test:
2043         try:
2044           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2045         except (ValueError, TypeError):
2046           _ErrorIf(True, self.ENODERPC, node,
2047                    "node returned invalid LVM info, check LVM status")
2048
2049   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2050     """Gets per-disk status information for all instances.
2051
2052     @type nodelist: list of strings
2053     @param nodelist: Node names
2054     @type node_image: dict of (name, L{objects.Node})
2055     @param node_image: Node objects
2056     @type instanceinfo: dict of (name, L{objects.Instance})
2057     @param instanceinfo: Instance objects
2058     @rtype: {instance: {node: [(succes, payload)]}}
2059     @return: a dictionary of per-instance dictionaries with nodes as
2060         keys and disk information as values; the disk information is a
2061         list of tuples (success, payload)
2062
2063     """
2064     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2065
2066     node_disks = {}
2067     node_disks_devonly = {}
2068     diskless_instances = set()
2069     diskless = constants.DT_DISKLESS
2070
2071     for nname in nodelist:
2072       node_instances = list(itertools.chain(node_image[nname].pinst,
2073                                             node_image[nname].sinst))
2074       diskless_instances.update(inst for inst in node_instances
2075                                 if instanceinfo[inst].disk_template == diskless)
2076       disks = [(inst, disk)
2077                for inst in node_instances
2078                for disk in instanceinfo[inst].disks]
2079
2080       if not disks:
2081         # No need to collect data
2082         continue
2083
2084       node_disks[nname] = disks
2085
2086       # Creating copies as SetDiskID below will modify the objects and that can
2087       # lead to incorrect data returned from nodes
2088       devonly = [dev.Copy() for (_, dev) in disks]
2089
2090       for dev in devonly:
2091         self.cfg.SetDiskID(dev, nname)
2092
2093       node_disks_devonly[nname] = devonly
2094
2095     assert len(node_disks) == len(node_disks_devonly)
2096
2097     # Collect data from all nodes with disks
2098     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2099                                                           node_disks_devonly)
2100
2101     assert len(result) == len(node_disks)
2102
2103     instdisk = {}
2104
2105     for (nname, nres) in result.items():
2106       disks = node_disks[nname]
2107
2108       if nres.offline:
2109         # No data from this node
2110         data = len(disks) * [(False, "node offline")]
2111       else:
2112         msg = nres.fail_msg
2113         _ErrorIf(msg, self.ENODERPC, nname,
2114                  "while getting disk information: %s", msg)
2115         if msg:
2116           # No data from this node
2117           data = len(disks) * [(False, msg)]
2118         else:
2119           data = []
2120           for idx, i in enumerate(nres.payload):
2121             if isinstance(i, (tuple, list)) and len(i) == 2:
2122               data.append(i)
2123             else:
2124               logging.warning("Invalid result from node %s, entry %d: %s",
2125                               nname, idx, i)
2126               data.append((False, "Invalid result from the remote node"))
2127
2128       for ((inst, _), status) in zip(disks, data):
2129         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2130
2131     # Add empty entries for diskless instances.
2132     for inst in diskless_instances:
2133       assert inst not in instdisk
2134       instdisk[inst] = {}
2135
2136     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2137                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2138                       compat.all(isinstance(s, (tuple, list)) and
2139                                  len(s) == 2 for s in statuses)
2140                       for inst, nnames in instdisk.items()
2141                       for nname, statuses in nnames.items())
2142     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2143
2144     return instdisk
2145
2146   def _VerifyHVP(self, hvp_data):
2147     """Verifies locally the syntax of the hypervisor parameters.
2148
2149     """
2150     for item, hv_name, hv_params in hvp_data:
2151       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2152              (item, hv_name))
2153       try:
2154         hv_class = hypervisor.GetHypervisor(hv_name)
2155         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2156         hv_class.CheckParameterSyntax(hv_params)
2157       except errors.GenericError, err:
2158         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2159
2160   def BuildHooksEnv(self):
2161     """Build hooks env.
2162
2163     Cluster-Verify hooks just ran in the post phase and their failure makes
2164     the output be logged in the verify output and the verification to fail.
2165
2166     """
2167     cfg = self.cfg
2168
2169     env = {
2170       "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2171       }
2172
2173     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2174                for node in cfg.GetAllNodesInfo().values())
2175
2176     return env
2177
2178   def BuildHooksNodes(self):
2179     """Build hooks nodes.
2180
2181     """
2182     return ([], self.cfg.GetNodeList())
2183
2184   def Exec(self, feedback_fn):
2185     """Verify integrity of cluster, performing various test on nodes.
2186
2187     """
2188     # This method has too many local variables. pylint: disable-msg=R0914
2189     self.bad = False
2190     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2191     verbose = self.op.verbose
2192     self._feedback_fn = feedback_fn
2193     feedback_fn("* Verifying global settings")
2194     for msg in self.cfg.VerifyConfig():
2195       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2196
2197     # Check the cluster certificates
2198     for cert_filename in constants.ALL_CERT_FILES:
2199       (errcode, msg) = _VerifyCertificate(cert_filename)
2200       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2201
2202     vg_name = self.cfg.GetVGName()
2203     drbd_helper = self.cfg.GetDRBDHelper()
2204     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2205     cluster = self.cfg.GetClusterInfo()
2206     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2207     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2208     nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2209     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2210     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2211                         for iname in instancelist)
2212     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2213     i_non_redundant = [] # Non redundant instances
2214     i_non_a_balanced = [] # Non auto-balanced instances
2215     n_offline = 0 # Count of offline nodes
2216     n_drained = 0 # Count of nodes being drained
2217     node_vol_should = {}
2218
2219     # FIXME: verify OS list
2220
2221     # File verification
2222     filemap = _ComputeAncillaryFiles(cluster, False)
2223
2224     # do local checksums
2225     master_node = self.master_node = self.cfg.GetMasterNode()
2226     master_ip = self.cfg.GetMasterIP()
2227
2228     # Compute the set of hypervisor parameters
2229     hvp_data = []
2230     for hv_name in hypervisors:
2231       hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2232     for os_name, os_hvp in cluster.os_hvp.items():
2233       for hv_name, hv_params in os_hvp.items():
2234         if not hv_params:
2235           continue
2236         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2237         hvp_data.append(("os %s" % os_name, hv_name, full_params))
2238     # TODO: collapse identical parameter values in a single one
2239     for instance in instanceinfo.values():
2240       if not instance.hvparams:
2241         continue
2242       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2243                        cluster.FillHV(instance)))
2244     # and verify them locally
2245     self._VerifyHVP(hvp_data)
2246
2247     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2248     node_verify_param = {
2249       constants.NV_FILELIST:
2250         utils.UniqueSequence(filename
2251                              for files in filemap
2252                              for filename in files),
2253       constants.NV_NODELIST: [node.name for node in nodeinfo
2254                               if not node.offline],
2255       constants.NV_HYPERVISOR: hypervisors,
2256       constants.NV_HVPARAMS: hvp_data,
2257       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2258                                   node.secondary_ip) for node in nodeinfo
2259                                  if not node.offline],
2260       constants.NV_INSTANCELIST: hypervisors,
2261       constants.NV_VERSION: None,
2262       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2263       constants.NV_NODESETUP: None,
2264       constants.NV_TIME: None,
2265       constants.NV_MASTERIP: (master_node, master_ip),
2266       constants.NV_OSLIST: None,
2267       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2268       }
2269
2270     if vg_name is not None:
2271       node_verify_param[constants.NV_VGLIST] = None
2272       node_verify_param[constants.NV_LVLIST] = vg_name
2273       node_verify_param[constants.NV_PVLIST] = [vg_name]
2274       node_verify_param[constants.NV_DRBDLIST] = None
2275
2276     if drbd_helper:
2277       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2278
2279     # Build our expected cluster state
2280     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2281                                                  name=node.name,
2282                                                  vm_capable=node.vm_capable))
2283                       for node in nodeinfo)
2284
2285     # Gather OOB paths
2286     oob_paths = []
2287     for node in nodeinfo:
2288       path = _SupportsOob(self.cfg, node)
2289       if path and path not in oob_paths:
2290         oob_paths.append(path)
2291
2292     if oob_paths:
2293       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2294
2295     for instance in instancelist:
2296       inst_config = instanceinfo[instance]
2297
2298       for nname in inst_config.all_nodes:
2299         if nname not in node_image:
2300           # ghost node
2301           gnode = self.NodeImage(name=nname)
2302           gnode.ghost = True
2303           node_image[nname] = gnode
2304
2305       inst_config.MapLVsByNode(node_vol_should)
2306
2307       pnode = inst_config.primary_node
2308       node_image[pnode].pinst.append(instance)
2309
2310       for snode in inst_config.secondary_nodes:
2311         nimg = node_image[snode]
2312         nimg.sinst.append(instance)
2313         if pnode not in nimg.sbp:
2314           nimg.sbp[pnode] = []
2315         nimg.sbp[pnode].append(instance)
2316
2317     # At this point, we have the in-memory data structures complete,
2318     # except for the runtime information, which we'll gather next
2319
2320     # Due to the way our RPC system works, exact response times cannot be
2321     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2322     # time before and after executing the request, we can at least have a time
2323     # window.
2324     nvinfo_starttime = time.time()
2325     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2326                                            self.cfg.GetClusterName())
2327     nvinfo_endtime = time.time()
2328
2329     all_drbd_map = self.cfg.ComputeDRBDMap()
2330
2331     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2332     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2333
2334     feedback_fn("* Verifying configuration file consistency")
2335     self._VerifyFiles(_ErrorIf, nodeinfo, master_node, all_nvinfo, filemap)
2336
2337     feedback_fn("* Verifying node status")
2338
2339     refos_img = None
2340
2341     for node_i in nodeinfo:
2342       node = node_i.name
2343       nimg = node_image[node]
2344
2345       if node_i.offline:
2346         if verbose:
2347           feedback_fn("* Skipping offline node %s" % (node,))
2348         n_offline += 1
2349         continue
2350
2351       if node == master_node:
2352         ntype = "master"
2353       elif node_i.master_candidate:
2354         ntype = "master candidate"
2355       elif node_i.drained:
2356         ntype = "drained"
2357         n_drained += 1
2358       else:
2359         ntype = "regular"
2360       if verbose:
2361         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2362
2363       msg = all_nvinfo[node].fail_msg
2364       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2365       if msg:
2366         nimg.rpc_fail = True
2367         continue
2368
2369       nresult = all_nvinfo[node].payload
2370
2371       nimg.call_ok = self._VerifyNode(node_i, nresult)
2372       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2373       self._VerifyNodeNetwork(node_i, nresult)
2374       self._VerifyOob(node_i, nresult)
2375
2376       if nimg.vm_capable:
2377         self._VerifyNodeLVM(node_i, nresult, vg_name)
2378         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2379                              all_drbd_map)
2380
2381         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2382         self._UpdateNodeInstances(node_i, nresult, nimg)
2383         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2384         self._UpdateNodeOS(node_i, nresult, nimg)
2385         if not nimg.os_fail:
2386           if refos_img is None:
2387             refos_img = nimg
2388           self._VerifyNodeOS(node_i, nimg, refos_img)
2389
2390     feedback_fn("* Verifying instance status")
2391     for instance in instancelist:
2392       if verbose:
2393         feedback_fn("* Verifying instance %s" % instance)
2394       inst_config = instanceinfo[instance]
2395       self._VerifyInstance(instance, inst_config, node_image,
2396                            instdisk[instance])
2397       inst_nodes_offline = []
2398
2399       pnode = inst_config.primary_node
2400       pnode_img = node_image[pnode]
2401       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2402                self.ENODERPC, pnode, "instance %s, connection to"
2403                " primary node failed", instance)
2404
2405       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2406                self.EINSTANCEBADNODE, instance,
2407                "instance is marked as running and lives on offline node %s",
2408                inst_config.primary_node)
2409
2410       # If the instance is non-redundant we cannot survive losing its primary
2411       # node, so we are not N+1 compliant. On the other hand we have no disk
2412       # templates with more than one secondary so that situation is not well
2413       # supported either.
2414       # FIXME: does not support file-backed instances
2415       if not inst_config.secondary_nodes:
2416         i_non_redundant.append(instance)
2417
2418       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2419                instance, "instance has multiple secondary nodes: %s",
2420                utils.CommaJoin(inst_config.secondary_nodes),
2421                code=self.ETYPE_WARNING)
2422
2423       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2424         pnode = inst_config.primary_node
2425         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2426         instance_groups = {}
2427
2428         for node in instance_nodes:
2429           instance_groups.setdefault(nodeinfo_byname[node].group,
2430                                      []).append(node)
2431
2432         pretty_list = [
2433           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2434           # Sort so that we always list the primary node first.
2435           for group, nodes in sorted(instance_groups.items(),
2436                                      key=lambda (_, nodes): pnode in nodes,
2437                                      reverse=True)]
2438
2439         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2440                       instance, "instance has primary and secondary nodes in"
2441                       " different groups: %s", utils.CommaJoin(pretty_list),
2442                       code=self.ETYPE_WARNING)
2443
2444       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2445         i_non_a_balanced.append(instance)
2446
2447       for snode in inst_config.secondary_nodes:
2448         s_img = node_image[snode]
2449         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2450                  "instance %s, connection to secondary node failed", instance)
2451
2452         if s_img.offline:
2453           inst_nodes_offline.append(snode)
2454
2455       # warn that the instance lives on offline nodes
2456       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2457                "instance has offline secondary node(s) %s",
2458                utils.CommaJoin(inst_nodes_offline))
2459       # ... or ghost/non-vm_capable nodes
2460       for node in inst_config.all_nodes:
2461         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2462                  "instance lives on ghost node %s", node)
2463         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2464                  instance, "instance lives on non-vm_capable node %s", node)
2465
2466     feedback_fn("* Verifying orphan volumes")
2467     reserved = utils.FieldSet(*cluster.reserved_lvs)
2468     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2469
2470     feedback_fn("* Verifying orphan instances")
2471     self._VerifyOrphanInstances(instancelist, node_image)
2472
2473     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2474       feedback_fn("* Verifying N+1 Memory redundancy")
2475       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2476
2477     feedback_fn("* Other Notes")
2478     if i_non_redundant:
2479       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2480                   % len(i_non_redundant))
2481
2482     if i_non_a_balanced:
2483       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2484                   % len(i_non_a_balanced))
2485
2486     if n_offline:
2487       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2488
2489     if n_drained:
2490       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2491
2492     return not self.bad
2493
2494   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2495     """Analyze the post-hooks' result
2496
2497     This method analyses the hook result, handles it, and sends some
2498     nicely-formatted feedback back to the user.
2499
2500     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2501         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2502     @param hooks_results: the results of the multi-node hooks rpc call
2503     @param feedback_fn: function used send feedback back to the caller
2504     @param lu_result: previous Exec result
2505     @return: the new Exec result, based on the previous result
2506         and hook results
2507
2508     """
2509     # We only really run POST phase hooks, and are only interested in
2510     # their results
2511     if phase == constants.HOOKS_PHASE_POST:
2512       # Used to change hooks' output to proper indentation
2513       feedback_fn("* Hooks Results")
2514       assert hooks_results, "invalid result from hooks"
2515
2516       for node_name in hooks_results:
2517         res = hooks_results[node_name]
2518         msg = res.fail_msg
2519         test = msg and not res.offline
2520         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2521                       "Communication failure in hooks execution: %s", msg)
2522         if res.offline or msg:
2523           # No need to investigate payload if node is offline or gave an error.
2524           # override manually lu_result here as _ErrorIf only
2525           # overrides self.bad
2526           lu_result = 1
2527           continue
2528         for script, hkr, output in res.payload:
2529           test = hkr == constants.HKR_FAIL
2530           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2531                         "Script %s failed, output:", script)
2532           if test:
2533             output = self._HOOKS_INDENT_RE.sub('      ', output)
2534             feedback_fn("%s" % output)
2535             lu_result = 0
2536
2537       return lu_result
2538
2539
2540 class LUClusterVerifyDisks(NoHooksLU):
2541   """Verifies the cluster disks status.
2542
2543   """
2544   REQ_BGL = False
2545
2546   def ExpandNames(self):
2547     self.needed_locks = {
2548       locking.LEVEL_NODE: locking.ALL_SET,
2549       locking.LEVEL_INSTANCE: locking.ALL_SET,
2550     }
2551     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2552
2553   def Exec(self, feedback_fn):
2554     """Verify integrity of cluster disks.
2555
2556     @rtype: tuple of three items
2557     @return: a tuple of (dict of node-to-node_error, list of instances
2558         which need activate-disks, dict of instance: (node, volume) for
2559         missing volumes
2560
2561     """
2562     result = res_nodes, res_instances, res_missing = {}, [], {}
2563
2564     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2565     instances = self.cfg.GetAllInstancesInfo().values()
2566
2567     nv_dict = {}
2568     for inst in instances:
2569       inst_lvs = {}
2570       if not inst.admin_up:
2571         continue
2572       inst.MapLVsByNode(inst_lvs)
2573       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2574       for node, vol_list in inst_lvs.iteritems():
2575         for vol in vol_list:
2576           nv_dict[(node, vol)] = inst
2577
2578     if not nv_dict:
2579       return result
2580
2581     node_lvs = self.rpc.call_lv_list(nodes, [])
2582     for node, node_res in node_lvs.items():
2583       if node_res.offline:
2584         continue
2585       msg = node_res.fail_msg
2586       if msg:
2587         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2588         res_nodes[node] = msg
2589         continue
2590
2591       lvs = node_res.payload
2592       for lv_name, (_, _, lv_online) in lvs.items():
2593         inst = nv_dict.pop((node, lv_name), None)
2594         if (not lv_online and inst is not None
2595             and inst.name not in res_instances):
2596           res_instances.append(inst.name)
2597
2598     # any leftover items in nv_dict are missing LVs, let's arrange the
2599     # data better
2600     for key, inst in nv_dict.iteritems():
2601       if inst.name not in res_missing:
2602         res_missing[inst.name] = []
2603       res_missing[inst.name].append(key)
2604
2605     return result
2606
2607
2608 class LUClusterRepairDiskSizes(NoHooksLU):
2609   """Verifies the cluster disks sizes.
2610
2611   """
2612   REQ_BGL = False
2613
2614   def ExpandNames(self):
2615     if self.op.instances:
2616       self.wanted_names = []
2617       for name in self.op.instances:
2618         full_name = _ExpandInstanceName(self.cfg, name)
2619         self.wanted_names.append(full_name)
2620       self.needed_locks = {
2621         locking.LEVEL_NODE: [],
2622         locking.LEVEL_INSTANCE: self.wanted_names,
2623         }
2624       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2625     else:
2626       self.wanted_names = None
2627       self.needed_locks = {
2628         locking.LEVEL_NODE: locking.ALL_SET,
2629         locking.LEVEL_INSTANCE: locking.ALL_SET,
2630         }
2631     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2632
2633   def DeclareLocks(self, level):
2634     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2635       self._LockInstancesNodes(primary_only=True)
2636
2637   def CheckPrereq(self):
2638     """Check prerequisites.
2639
2640     This only checks the optional instance list against the existing names.
2641
2642     """
2643     if self.wanted_names is None:
2644       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2645
2646     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2647                              in self.wanted_names]
2648
2649   def _EnsureChildSizes(self, disk):
2650     """Ensure children of the disk have the needed disk size.
2651
2652     This is valid mainly for DRBD8 and fixes an issue where the
2653     children have smaller disk size.
2654
2655     @param disk: an L{ganeti.objects.Disk} object
2656
2657     """
2658     if disk.dev_type == constants.LD_DRBD8:
2659       assert disk.children, "Empty children for DRBD8?"
2660       fchild = disk.children[0]
2661       mismatch = fchild.size < disk.size
2662       if mismatch:
2663         self.LogInfo("Child disk has size %d, parent %d, fixing",
2664                      fchild.size, disk.size)
2665         fchild.size = disk.size
2666
2667       # and we recurse on this child only, not on the metadev
2668       return self._EnsureChildSizes(fchild) or mismatch
2669     else:
2670       return False
2671
2672   def Exec(self, feedback_fn):
2673     """Verify the size of cluster disks.
2674
2675     """
2676     # TODO: check child disks too
2677     # TODO: check differences in size between primary/secondary nodes
2678     per_node_disks = {}
2679     for instance in self.wanted_instances:
2680       pnode = instance.primary_node
2681       if pnode not in per_node_disks:
2682         per_node_disks[pnode] = []
2683       for idx, disk in enumerate(instance.disks):
2684         per_node_disks[pnode].append((instance, idx, disk))
2685
2686     changed = []
2687     for node, dskl in per_node_disks.items():
2688       newl = [v[2].Copy() for v in dskl]
2689       for dsk in newl:
2690         self.cfg.SetDiskID(dsk, node)
2691       result = self.rpc.call_blockdev_getsize(node, newl)
2692       if result.fail_msg:
2693         self.LogWarning("Failure in blockdev_getsize call to node"
2694                         " %s, ignoring", node)
2695         continue
2696       if len(result.payload) != len(dskl):
2697         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2698                         " result.payload=%s", node, len(dskl), result.payload)
2699         self.LogWarning("Invalid result from node %s, ignoring node results",
2700                         node)
2701         continue
2702       for ((instance, idx, disk), size) in zip(dskl, result.payload):
2703         if size is None:
2704           self.LogWarning("Disk %d of instance %s did not return size"
2705                           " information, ignoring", idx, instance.name)
2706           continue
2707         if not isinstance(size, (int, long)):
2708           self.LogWarning("Disk %d of instance %s did not return valid"
2709                           " size information, ignoring", idx, instance.name)
2710           continue
2711         size = size >> 20
2712         if size != disk.size:
2713           self.LogInfo("Disk %d of instance %s has mismatched size,"
2714                        " correcting: recorded %d, actual %d", idx,
2715                        instance.name, disk.size, size)
2716           disk.size = size
2717           self.cfg.Update(instance, feedback_fn)
2718           changed.append((instance.name, idx, size))
2719         if self._EnsureChildSizes(disk):
2720           self.cfg.Update(instance, feedback_fn)
2721           changed.append((instance.name, idx, disk.size))
2722     return changed
2723
2724
2725 class LUClusterRename(LogicalUnit):
2726   """Rename the cluster.
2727
2728   """
2729   HPATH = "cluster-rename"
2730   HTYPE = constants.HTYPE_CLUSTER
2731
2732   def BuildHooksEnv(self):
2733     """Build hooks env.
2734
2735     """
2736     return {
2737       "OP_TARGET": self.cfg.GetClusterName(),
2738       "NEW_NAME": self.op.name,
2739       }
2740
2741   def BuildHooksNodes(self):
2742     """Build hooks nodes.
2743
2744     """
2745     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2746
2747   def CheckPrereq(self):
2748     """Verify that the passed name is a valid one.
2749
2750     """
2751     hostname = netutils.GetHostname(name=self.op.name,
2752                                     family=self.cfg.GetPrimaryIPFamily())
2753
2754     new_name = hostname.name
2755     self.ip = new_ip = hostname.ip
2756     old_name = self.cfg.GetClusterName()
2757     old_ip = self.cfg.GetMasterIP()
2758     if new_name == old_name and new_ip == old_ip:
2759       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2760                                  " cluster has changed",
2761                                  errors.ECODE_INVAL)
2762     if new_ip != old_ip:
2763       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2764         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2765                                    " reachable on the network" %
2766                                    new_ip, errors.ECODE_NOTUNIQUE)
2767
2768     self.op.name = new_name
2769
2770   def Exec(self, feedback_fn):
2771     """Rename the cluster.
2772
2773     """
2774     clustername = self.op.name
2775     ip = self.ip
2776
2777     # shutdown the master IP
2778     master = self.cfg.GetMasterNode()
2779     result = self.rpc.call_node_stop_master(master, False)
2780     result.Raise("Could not disable the master role")
2781
2782     try:
2783       cluster = self.cfg.GetClusterInfo()
2784       cluster.cluster_name = clustername
2785       cluster.master_ip = ip
2786       self.cfg.Update(cluster, feedback_fn)
2787
2788       # update the known hosts file
2789       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2790       node_list = self.cfg.GetOnlineNodeList()
2791       try:
2792         node_list.remove(master)
2793       except ValueError:
2794         pass
2795       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2796     finally:
2797       result = self.rpc.call_node_start_master(master, False, False)
2798       msg = result.fail_msg
2799       if msg:
2800         self.LogWarning("Could not re-enable the master role on"
2801                         " the master, please restart manually: %s", msg)
2802
2803     return clustername
2804
2805
2806 class LUClusterSetParams(LogicalUnit):
2807   """Change the parameters of the cluster.
2808
2809   """
2810   HPATH = "cluster-modify"
2811   HTYPE = constants.HTYPE_CLUSTER
2812   REQ_BGL = False
2813
2814   def CheckArguments(self):
2815     """Check parameters
2816
2817     """
2818     if self.op.uid_pool:
2819       uidpool.CheckUidPool(self.op.uid_pool)
2820
2821     if self.op.add_uids:
2822       uidpool.CheckUidPool(self.op.add_uids)
2823
2824     if self.op.remove_uids:
2825       uidpool.CheckUidPool(self.op.remove_uids)
2826
2827   def ExpandNames(self):
2828     # FIXME: in the future maybe other cluster params won't require checking on
2829     # all nodes to be modified.
2830     self.needed_locks = {
2831       locking.LEVEL_NODE: locking.ALL_SET,
2832     }
2833     self.share_locks[locking.LEVEL_NODE] = 1
2834
2835   def BuildHooksEnv(self):
2836     """Build hooks env.
2837
2838     """
2839     return {
2840       "OP_TARGET": self.cfg.GetClusterName(),
2841       "NEW_VG_NAME": self.op.vg_name,
2842       }
2843
2844   def BuildHooksNodes(self):
2845     """Build hooks nodes.
2846
2847     """
2848     mn = self.cfg.GetMasterNode()
2849     return ([mn], [mn])
2850
2851   def CheckPrereq(self):
2852     """Check prerequisites.
2853
2854     This checks whether the given params don't conflict and
2855     if the given volume group is valid.
2856
2857     """
2858     if self.op.vg_name is not None and not self.op.vg_name:
2859       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2860         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2861                                    " instances exist", errors.ECODE_INVAL)
2862
2863     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2864       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2865         raise errors.OpPrereqError("Cannot disable drbd helper while"
2866                                    " drbd-based instances exist",
2867                                    errors.ECODE_INVAL)
2868
2869     node_list = self.acquired_locks[locking.LEVEL_NODE]
2870
2871     # if vg_name not None, checks given volume group on all nodes
2872     if self.op.vg_name:
2873       vglist = self.rpc.call_vg_list(node_list)
2874       for node in node_list:
2875         msg = vglist[node].fail_msg
2876         if msg:
2877           # ignoring down node
2878           self.LogWarning("Error while gathering data on node %s"
2879                           " (ignoring node): %s", node, msg)
2880           continue
2881         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2882                                               self.op.vg_name,
2883                                               constants.MIN_VG_SIZE)
2884         if vgstatus:
2885           raise errors.OpPrereqError("Error on node '%s': %s" %
2886                                      (node, vgstatus), errors.ECODE_ENVIRON)
2887
2888     if self.op.drbd_helper:
2889       # checks given drbd helper on all nodes
2890       helpers = self.rpc.call_drbd_helper(node_list)
2891       for node in node_list:
2892         ninfo = self.cfg.GetNodeInfo(node)
2893         if ninfo.offline:
2894           self.LogInfo("Not checking drbd helper on offline node %s", node)
2895           continue
2896         msg = helpers[node].fail_msg
2897         if msg:
2898           raise errors.OpPrereqError("Error checking drbd helper on node"
2899                                      " '%s': %s" % (node, msg),
2900                                      errors.ECODE_ENVIRON)
2901         node_helper = helpers[node].payload
2902         if node_helper != self.op.drbd_helper:
2903           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2904                                      (node, node_helper), errors.ECODE_ENVIRON)
2905
2906     self.cluster = cluster = self.cfg.GetClusterInfo()
2907     # validate params changes
2908     if self.op.beparams:
2909       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2910       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2911
2912     if self.op.ndparams:
2913       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2914       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2915
2916       # TODO: we need a more general way to handle resetting
2917       # cluster-level parameters to default values
2918       if self.new_ndparams["oob_program"] == "":
2919         self.new_ndparams["oob_program"] = \
2920             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2921
2922     if self.op.nicparams:
2923       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2924       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2925       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2926       nic_errors = []
2927
2928       # check all instances for consistency
2929       for instance in self.cfg.GetAllInstancesInfo().values():
2930         for nic_idx, nic in enumerate(instance.nics):
2931           params_copy = copy.deepcopy(nic.nicparams)
2932           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2933
2934           # check parameter syntax
2935           try:
2936             objects.NIC.CheckParameterSyntax(params_filled)
2937           except errors.ConfigurationError, err:
2938             nic_errors.append("Instance %s, nic/%d: %s" %
2939                               (instance.name, nic_idx, err))
2940
2941           # if we're moving instances to routed, check that they have an ip
2942           target_mode = params_filled[constants.NIC_MODE]
2943           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2944             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2945                               (instance.name, nic_idx))
2946       if nic_errors:
2947         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2948                                    "\n".join(nic_errors))
2949
2950     # hypervisor list/parameters
2951     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2952     if self.op.hvparams:
2953       for hv_name, hv_dict in self.op.hvparams.items():
2954         if hv_name not in self.new_hvparams:
2955           self.new_hvparams[hv_name] = hv_dict
2956         else:
2957           self.new_hvparams[hv_name].update(hv_dict)
2958
2959     # os hypervisor parameters
2960     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2961     if self.op.os_hvp:
2962       for os_name, hvs in self.op.os_hvp.items():
2963         if os_name not in self.new_os_hvp:
2964           self.new_os_hvp[os_name] = hvs
2965         else:
2966           for hv_name, hv_dict in hvs.items():
2967             if hv_name not in self.new_os_hvp[os_name]:
2968               self.new_os_hvp[os_name][hv_name] = hv_dict
2969             else:
2970               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2971
2972     # os parameters
2973     self.new_osp = objects.FillDict(cluster.osparams, {})
2974     if self.op.osparams:
2975       for os_name, osp in self.op.osparams.items():
2976         if os_name not in self.new_osp:
2977           self.new_osp[os_name] = {}
2978
2979         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2980                                                   use_none=True)
2981
2982         if not self.new_osp[os_name]:
2983           # we removed all parameters
2984           del self.new_osp[os_name]
2985         else:
2986           # check the parameter validity (remote check)
2987           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2988                          os_name, self.new_osp[os_name])
2989
2990     # changes to the hypervisor list
2991     if self.op.enabled_hypervisors is not None:
2992       self.hv_list = self.op.enabled_hypervisors
2993       for hv in self.hv_list:
2994         # if the hypervisor doesn't already exist in the cluster
2995         # hvparams, we initialize it to empty, and then (in both
2996         # cases) we make sure to fill the defaults, as we might not
2997         # have a complete defaults list if the hypervisor wasn't
2998         # enabled before
2999         if hv not in new_hvp:
3000           new_hvp[hv] = {}
3001         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3002         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3003     else:
3004       self.hv_list = cluster.enabled_hypervisors
3005
3006     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3007       # either the enabled list has changed, or the parameters have, validate
3008       for hv_name, hv_params in self.new_hvparams.items():
3009         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3010             (self.op.enabled_hypervisors and
3011              hv_name in self.op.enabled_hypervisors)):
3012           # either this is a new hypervisor, or its parameters have changed
3013           hv_class = hypervisor.GetHypervisor(hv_name)
3014           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3015           hv_class.CheckParameterSyntax(hv_params)
3016           _CheckHVParams(self, node_list, hv_name, hv_params)
3017
3018     if self.op.os_hvp:
3019       # no need to check any newly-enabled hypervisors, since the
3020       # defaults have already been checked in the above code-block
3021       for os_name, os_hvp in self.new_os_hvp.items():
3022         for hv_name, hv_params in os_hvp.items():
3023           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3024           # we need to fill in the new os_hvp on top of the actual hv_p
3025           cluster_defaults = self.new_hvparams.get(hv_name, {})
3026           new_osp = objects.FillDict(cluster_defaults, hv_params)
3027           hv_class = hypervisor.GetHypervisor(hv_name)
3028           hv_class.CheckParameterSyntax(new_osp)
3029           _CheckHVParams(self, node_list, hv_name, new_osp)
3030
3031     if self.op.default_iallocator:
3032       alloc_script = utils.FindFile(self.op.default_iallocator,
3033                                     constants.IALLOCATOR_SEARCH_PATH,
3034                                     os.path.isfile)
3035       if alloc_script is None:
3036         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3037                                    " specified" % self.op.default_iallocator,
3038                                    errors.ECODE_INVAL)
3039
3040   def Exec(self, feedback_fn):
3041     """Change the parameters of the cluster.
3042
3043     """
3044     if self.op.vg_name is not None:
3045       new_volume = self.op.vg_name
3046       if not new_volume:
3047         new_volume = None
3048       if new_volume != self.cfg.GetVGName():
3049         self.cfg.SetVGName(new_volume)
3050       else:
3051         feedback_fn("Cluster LVM configuration already in desired"
3052                     " state, not changing")
3053     if self.op.drbd_helper is not None:
3054       new_helper = self.op.drbd_helper
3055       if not new_helper:
3056         new_helper = None
3057       if new_helper != self.cfg.GetDRBDHelper():
3058         self.cfg.SetDRBDHelper(new_helper)
3059       else:
3060         feedback_fn("Cluster DRBD helper already in desired state,"
3061                     " not changing")
3062     if self.op.hvparams:
3063       self.cluster.hvparams = self.new_hvparams
3064     if self.op.os_hvp:
3065       self.cluster.os_hvp = self.new_os_hvp
3066     if self.op.enabled_hypervisors is not None:
3067       self.cluster.hvparams = self.new_hvparams
3068       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3069     if self.op.beparams:
3070       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3071     if self.op.nicparams:
3072       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3073     if self.op.osparams:
3074       self.cluster.osparams = self.new_osp
3075     if self.op.ndparams:
3076       self.cluster.ndparams = self.new_ndparams
3077
3078     if self.op.candidate_pool_size is not None:
3079       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3080       # we need to update the pool size here, otherwise the save will fail
3081       _AdjustCandidatePool(self, [])
3082
3083     if self.op.maintain_node_health is not None:
3084       self.cluster.maintain_node_health = self.op.maintain_node_health
3085
3086     if self.op.prealloc_wipe_disks is not None:
3087       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3088
3089     if self.op.add_uids is not None:
3090       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3091
3092     if self.op.remove_uids is not None:
3093       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3094
3095     if self.op.uid_pool is not None:
3096       self.cluster.uid_pool = self.op.uid_pool
3097
3098     if self.op.default_iallocator is not None:
3099       self.cluster.default_iallocator = self.op.default_iallocator
3100
3101     if self.op.reserved_lvs is not None:
3102       self.cluster.reserved_lvs = self.op.reserved_lvs
3103
3104     def helper_os(aname, mods, desc):
3105       desc += " OS list"
3106       lst = getattr(self.cluster, aname)
3107       for key, val in mods:
3108         if key == constants.DDM_ADD:
3109           if val in lst:
3110             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3111           else:
3112             lst.append(val)
3113         elif key == constants.DDM_REMOVE:
3114           if val in lst:
3115             lst.remove(val)
3116           else:
3117             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3118         else:
3119           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3120
3121     if self.op.hidden_os:
3122       helper_os("hidden_os", self.op.hidden_os, "hidden")
3123
3124     if self.op.blacklisted_os:
3125       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3126
3127     if self.op.master_netdev:
3128       master = self.cfg.GetMasterNode()
3129       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3130                   self.cluster.master_netdev)
3131       result = self.rpc.call_node_stop_master(master, False)
3132       result.Raise("Could not disable the master ip")
3133       feedback_fn("Changing master_netdev from %s to %s" %
3134                   (self.cluster.master_netdev, self.op.master_netdev))
3135       self.cluster.master_netdev = self.op.master_netdev
3136
3137     self.cfg.Update(self.cluster, feedback_fn)
3138
3139     if self.op.master_netdev:
3140       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3141                   self.op.master_netdev)
3142       result = self.rpc.call_node_start_master(master, False, False)
3143       if result.fail_msg:
3144         self.LogWarning("Could not re-enable the master ip on"
3145                         " the master, please restart manually: %s",
3146                         result.fail_msg)
3147
3148
3149 def _UploadHelper(lu, nodes, fname):
3150   """Helper for uploading a file and showing warnings.
3151
3152   """
3153   if os.path.exists(fname):
3154     result = lu.rpc.call_upload_file(nodes, fname)
3155     for to_node, to_result in result.items():
3156       msg = to_result.fail_msg
3157       if msg:
3158         msg = ("Copy of file %s to node %s failed: %s" %
3159                (fname, to_node, msg))
3160         lu.proc.LogWarning(msg)
3161
3162
3163 def _ComputeAncillaryFiles(cluster, redist):
3164   """Compute files external to Ganeti which need to be consistent.
3165
3166   @type redist: boolean
3167   @param redist: Whether to include files which need to be redistributed
3168
3169   """
3170   # Compute files for all nodes
3171   files_all = set([
3172     constants.SSH_KNOWN_HOSTS_FILE,
3173     constants.CONFD_HMAC_KEY,
3174     constants.CLUSTER_DOMAIN_SECRET_FILE,
3175     ])
3176
3177   if not redist:
3178     files_all.update(constants.ALL_CERT_FILES)
3179     files_all.update(ssconf.SimpleStore().GetFileList())
3180
3181   if cluster.modify_etc_hosts:
3182     files_all.add(constants.ETC_HOSTS)
3183
3184   # Files which must either exist on all nodes or on none
3185   files_all_opt = set([
3186     constants.RAPI_USERS_FILE,
3187     ])
3188
3189   # Files which should only be on master candidates
3190   files_mc = set()
3191   if not redist:
3192     files_mc.add(constants.CLUSTER_CONF_FILE)
3193
3194   # Files which should only be on VM-capable nodes
3195   files_vm = set(filename
3196     for hv_name in cluster.enabled_hypervisors
3197     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3198
3199   # Filenames must be unique
3200   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3201           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3202          "Found file listed in more than one file list"
3203
3204   return (files_all, files_all_opt, files_mc, files_vm)
3205
3206
3207 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3208   """Distribute additional files which are part of the cluster configuration.
3209
3210   ConfigWriter takes care of distributing the config and ssconf files, but
3211   there are more files which should be distributed to all nodes. This function
3212   makes sure those are copied.
3213
3214   @param lu: calling logical unit
3215   @param additional_nodes: list of nodes not in the config to distribute to
3216   @type additional_vm: boolean
3217   @param additional_vm: whether the additional nodes are vm-capable or not
3218
3219   """
3220   # Gather target nodes
3221   cluster = lu.cfg.GetClusterInfo()
3222   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3223
3224   online_nodes = lu.cfg.GetOnlineNodeList()
3225   vm_nodes = lu.cfg.GetVmCapableNodeList()
3226
3227   if additional_nodes is not None:
3228     online_nodes.extend(additional_nodes)
3229     if additional_vm:
3230       vm_nodes.extend(additional_nodes)
3231
3232   # Never distribute to master node
3233   for nodelist in [online_nodes, vm_nodes]:
3234     if master_info.name in nodelist:
3235       nodelist.remove(master_info.name)
3236
3237   # Gather file lists
3238   (files_all, files_all_opt, files_mc, files_vm) = \
3239     _ComputeAncillaryFiles(cluster, True)
3240
3241   # Never re-distribute configuration file from here
3242   assert not (constants.CLUSTER_CONF_FILE in files_all or
3243               constants.CLUSTER_CONF_FILE in files_vm)
3244   assert not files_mc, "Master candidates not handled in this function"
3245
3246   filemap = [
3247     (online_nodes, files_all),
3248     (online_nodes, files_all_opt),
3249     (vm_nodes, files_vm),
3250     ]
3251
3252   # Upload the files
3253   for (node_list, files) in filemap:
3254     for fname in files:
3255       _UploadHelper(lu, node_list, fname)
3256
3257
3258 class LUClusterRedistConf(NoHooksLU):
3259   """Force the redistribution of cluster configuration.
3260
3261   This is a very simple LU.
3262
3263   """
3264   REQ_BGL = False
3265
3266   def ExpandNames(self):
3267     self.needed_locks = {
3268       locking.LEVEL_NODE: locking.ALL_SET,
3269     }
3270     self.share_locks[locking.LEVEL_NODE] = 1
3271
3272   def Exec(self, feedback_fn):
3273     """Redistribute the configuration.
3274
3275     """
3276     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3277     _RedistributeAncillaryFiles(self)
3278
3279
3280 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3281   """Sleep and poll for an instance's disk to sync.
3282
3283   """
3284   if not instance.disks or disks is not None and not disks:
3285     return True
3286
3287   disks = _ExpandCheckDisks(instance, disks)
3288
3289   if not oneshot:
3290     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3291
3292   node = instance.primary_node
3293
3294   for dev in disks:
3295     lu.cfg.SetDiskID(dev, node)
3296
3297   # TODO: Convert to utils.Retry
3298
3299   retries = 0
3300   degr_retries = 10 # in seconds, as we sleep 1 second each time
3301   while True:
3302     max_time = 0
3303     done = True
3304     cumul_degraded = False
3305     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3306     msg = rstats.fail_msg
3307     if msg:
3308       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3309       retries += 1
3310       if retries >= 10:
3311         raise errors.RemoteError("Can't contact node %s for mirror data,"
3312                                  " aborting." % node)
3313       time.sleep(6)
3314       continue
3315     rstats = rstats.payload
3316     retries = 0
3317     for i, mstat in enumerate(rstats):
3318       if mstat is None:
3319         lu.LogWarning("Can't compute data for node %s/%s",
3320                            node, disks[i].iv_name)
3321         continue
3322
3323       cumul_degraded = (cumul_degraded or
3324                         (mstat.is_degraded and mstat.sync_percent is None))
3325       if mstat.sync_percent is not None:
3326         done = False
3327         if mstat.estimated_time is not None:
3328           rem_time = ("%s remaining (estimated)" %
3329                       utils.FormatSeconds(mstat.estimated_time))
3330           max_time = mstat.estimated_time
3331         else:
3332           rem_time = "no time estimate"
3333         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3334                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3335
3336     # if we're done but degraded, let's do a few small retries, to
3337     # make sure we see a stable and not transient situation; therefore
3338     # we force restart of the loop
3339     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3340       logging.info("Degraded disks found, %d retries left", degr_retries)
3341       degr_retries -= 1
3342       time.sleep(1)
3343       continue
3344
3345     if done or oneshot:
3346       break
3347
3348     time.sleep(min(60, max_time))
3349
3350   if done:
3351     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3352   return not cumul_degraded
3353
3354
3355 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3356   """Check that mirrors are not degraded.
3357
3358   The ldisk parameter, if True, will change the test from the
3359   is_degraded attribute (which represents overall non-ok status for
3360   the device(s)) to the ldisk (representing the local storage status).
3361
3362   """
3363   lu.cfg.SetDiskID(dev, node)
3364
3365   result = True
3366
3367   if on_primary or dev.AssembleOnSecondary():
3368     rstats = lu.rpc.call_blockdev_find(node, dev)
3369     msg = rstats.fail_msg
3370     if msg:
3371       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3372       result = False
3373     elif not rstats.payload:
3374       lu.LogWarning("Can't find disk on node %s", node)
3375       result = False
3376     else:
3377       if ldisk:
3378         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3379       else:
3380         result = result and not rstats.payload.is_degraded
3381
3382   if dev.children:
3383     for child in dev.children:
3384       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3385
3386   return result
3387
3388
3389 class LUOobCommand(NoHooksLU):
3390   """Logical unit for OOB handling.
3391
3392   """
3393   REG_BGL = False
3394   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3395
3396   def CheckPrereq(self):
3397     """Check prerequisites.
3398
3399     This checks:
3400      - the node exists in the configuration
3401      - OOB is supported
3402
3403     Any errors are signaled by raising errors.OpPrereqError.
3404
3405     """
3406     self.nodes = []
3407     self.master_node = self.cfg.GetMasterNode()
3408
3409     assert self.op.power_delay >= 0.0
3410
3411     if self.op.node_names:
3412       if self.op.command in self._SKIP_MASTER:
3413         if self.master_node in self.op.node_names:
3414           master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3415           master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3416
3417           if master_oob_handler:
3418             additional_text = ("Run '%s %s %s' if you want to operate on the"
3419                                " master regardless") % (master_oob_handler,
3420                                                         self.op.command,
3421                                                         self.master_node)
3422           else:
3423             additional_text = "The master node does not support out-of-band"
3424
3425           raise errors.OpPrereqError(("Operating on the master node %s is not"
3426                                       " allowed for %s\n%s") %
3427                                      (self.master_node, self.op.command,
3428                                       additional_text), errors.ECODE_INVAL)
3429     else:
3430       self.op.node_names = self.cfg.GetNodeList()
3431       if self.op.command in self._SKIP_MASTER:
3432         self.op.node_names.remove(self.master_node)
3433
3434     if self.op.command in self._SKIP_MASTER:
3435       assert self.master_node not in self.op.node_names
3436
3437     for node_name in self.op.node_names:
3438       node = self.cfg.GetNodeInfo(node_name)
3439
3440       if node is None:
3441         raise errors.OpPrereqError("Node %s not found" % node_name,
3442                                    errors.ECODE_NOENT)
3443       else:
3444         self.nodes.append(node)
3445
3446       if (not self.op.ignore_status and
3447           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3448         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3449                                     " not marked offline") % node_name,
3450                                    errors.ECODE_STATE)
3451
3452   def ExpandNames(self):
3453     """Gather locks we need.
3454
3455     """
3456     if self.op.node_names:
3457       self.op.node_names = [_ExpandNodeName(self.cfg, name)
3458                             for name in self.op.node_names]
3459       lock_names = self.op.node_names
3460     else:
3461       lock_names = locking.ALL_SET
3462
3463     self.needed_locks = {
3464       locking.LEVEL_NODE: lock_names,
3465       }
3466
3467   def Exec(self, feedback_fn):
3468     """Execute OOB and return result if we expect any.
3469
3470     """
3471     master_node = self.master_node
3472     ret = []
3473
3474     for idx, node in enumerate(self.nodes):
3475       node_entry = [(constants.RS_NORMAL, node.name)]
3476       ret.append(node_entry)
3477
3478       oob_program = _SupportsOob(self.cfg, node)
3479
3480       if not oob_program:
3481         node_entry.append((constants.RS_UNAVAIL, None))
3482         continue
3483
3484       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3485                    self.op.command, oob_program, node.name)
3486       result = self.rpc.call_run_oob(master_node, oob_program,
3487                                      self.op.command, node.name,
3488                                      self.op.timeout)
3489
3490       if result.fail_msg:
3491         self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3492                         node.name, result.fail_msg)
3493         node_entry.append((constants.RS_NODATA, None))
3494       else:
3495         try:
3496           self._CheckPayload(result)
3497         except errors.OpExecError, err:
3498           self.LogWarning("The payload returned by '%s' is not valid: %s",
3499                           node.name, err)
3500           node_entry.append((constants.RS_NODATA, None))
3501         else:
3502           if self.op.command == constants.OOB_HEALTH:
3503             # For health we should log important events
3504             for item, status in result.payload:
3505               if status in [constants.OOB_STATUS_WARNING,
3506                             constants.OOB_STATUS_CRITICAL]:
3507                 self.LogWarning("On node '%s' item '%s' has status '%s'",
3508                                 node.name, item, status)
3509
3510           if self.op.command == constants.OOB_POWER_ON:
3511             node.powered = True
3512           elif self.op.command == constants.OOB_POWER_OFF:
3513             node.powered = False
3514           elif self.op.command == constants.OOB_POWER_STATUS:
3515             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3516             if powered != node.powered:
3517               logging.warning(("Recorded power state (%s) of node '%s' does not"
3518                                " match actual power state (%s)"), node.powered,
3519                               node.name, powered)
3520
3521           # For configuration changing commands we should update the node
3522           if self.op.command in (constants.OOB_POWER_ON,
3523                                  constants.OOB_POWER_OFF):
3524             self.cfg.Update(node, feedback_fn)
3525
3526           node_entry.append((constants.RS_NORMAL, result.payload))
3527
3528           if (self.op.command == constants.OOB_POWER_ON and
3529               idx < len(self.nodes) - 1):
3530             time.sleep(self.op.power_delay)
3531
3532     return ret
3533
3534   def _CheckPayload(self, result):
3535     """Checks if the payload is valid.
3536
3537     @param result: RPC result
3538     @raises errors.OpExecError: If payload is not valid
3539
3540     """
3541     errs = []
3542     if self.op.command == constants.OOB_HEALTH:
3543       if not isinstance(result.payload, list):
3544         errs.append("command 'health' is expected to return a list but got %s" %
3545                     type(result.payload))
3546       else:
3547         for item, status in result.payload:
3548           if status not in constants.OOB_STATUSES:
3549             errs.append("health item '%s' has invalid status '%s'" %
3550                         (item, status))
3551
3552     if self.op.command == constants.OOB_POWER_STATUS:
3553       if not isinstance(result.payload, dict):
3554         errs.append("power-status is expected to return a dict but got %s" %
3555                     type(result.payload))
3556
3557     if self.op.command in [
3558         constants.OOB_POWER_ON,
3559         constants.OOB_POWER_OFF,
3560         constants.OOB_POWER_CYCLE,
3561         ]:
3562       if result.payload is not None:
3563         errs.append("%s is expected to not return payload but got '%s'" %
3564                     (self.op.command, result.payload))
3565
3566     if errs:
3567       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3568                                utils.CommaJoin(errs))
3569
3570 class _OsQuery(_QueryBase):
3571   FIELDS = query.OS_FIELDS
3572
3573   def ExpandNames(self, lu):
3574     # Lock all nodes in shared mode
3575     # Temporary removal of locks, should be reverted later
3576     # TODO: reintroduce locks when they are lighter-weight
3577     lu.needed_locks = {}
3578     #self.share_locks[locking.LEVEL_NODE] = 1
3579     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3580
3581     # The following variables interact with _QueryBase._GetNames
3582     if self.names:
3583       self.wanted = self.names
3584     else:
3585       self.wanted = locking.ALL_SET
3586
3587     self.do_locking = self.use_locking
3588
3589   def DeclareLocks(self, lu, level):
3590     pass
3591
3592   @staticmethod
3593   def _DiagnoseByOS(rlist):
3594     """Remaps a per-node return list into an a per-os per-node dictionary
3595
3596     @param rlist: a map with node names as keys and OS objects as values
3597
3598     @rtype: dict
3599     @return: a dictionary with osnames as keys and as value another
3600         map, with nodes as keys and tuples of (path, status, diagnose,
3601         variants, parameters, api_versions) as values, eg::
3602
3603           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3604                                      (/srv/..., False, "invalid api")],
3605                            "node2": [(/srv/..., True, "", [], [])]}
3606           }
3607
3608     """
3609     all_os = {}
3610     # we build here the list of nodes that didn't fail the RPC (at RPC
3611     # level), so that nodes with a non-responding node daemon don't
3612     # make all OSes invalid
3613     good_nodes = [node_name for node_name in rlist
3614                   if not rlist[node_name].fail_msg]
3615     for node_name, nr in rlist.items():
3616       if nr.fail_msg or not nr.payload:
3617         continue
3618       for (name, path, status, diagnose, variants,
3619            params, api_versions) in nr.payload:
3620         if name not in all_os:
3621           # build a list of nodes for this os containing empty lists
3622           # for each node in node_list
3623           all_os[name] = {}
3624           for nname in good_nodes:
3625             all_os[name][nname] = []
3626         # convert params from [name, help] to (name, help)
3627         params = [tuple(v) for v in params]
3628         all_os[name][node_name].append((path, status, diagnose,
3629                                         variants, params, api_versions))
3630     return all_os
3631
3632   def _GetQueryData(self, lu):
3633     """Computes the list of nodes and their attributes.
3634
3635     """
3636     # Locking is not used
3637     assert not (lu.acquired_locks or self.do_locking or self.use_locking)
3638
3639     valid_nodes = [node.name
3640                    for node in lu.cfg.GetAllNodesInfo().values()
3641                    if not node.offline and node.vm_capable]
3642     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3643     cluster = lu.cfg.GetClusterInfo()
3644
3645     data = {}
3646
3647     for (os_name, os_data) in pol.items():
3648       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3649                           hidden=(os_name in cluster.hidden_os),
3650                           blacklisted=(os_name in cluster.blacklisted_os))
3651
3652       variants = set()
3653       parameters = set()
3654       api_versions = set()
3655
3656       for idx, osl in enumerate(os_data.values()):
3657         info.valid = bool(info.valid and osl and osl[0][1])
3658         if not info.valid:
3659           break
3660
3661         (node_variants, node_params, node_api) = osl[0][3:6]
3662         if idx == 0:
3663           # First entry
3664           variants.update(node_variants)
3665           parameters.update(node_params)
3666           api_versions.update(node_api)
3667         else:
3668           # Filter out inconsistent values
3669           variants.intersection_update(node_variants)
3670           parameters.intersection_update(node_params)
3671           api_versions.intersection_update(node_api)
3672
3673       info.variants = list(variants)
3674       info.parameters = list(parameters)
3675       info.api_versions = list(api_versions)
3676
3677       data[os_name] = info
3678
3679     # Prepare data in requested order
3680     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3681             if name in data]
3682
3683
3684 class LUOsDiagnose(NoHooksLU):
3685   """Logical unit for OS diagnose/query.
3686
3687   """
3688   REQ_BGL = False
3689
3690   @staticmethod
3691   def _BuildFilter(fields, names):
3692     """Builds a filter for querying OSes.
3693
3694     """
3695     name_filter = qlang.MakeSimpleFilter("name", names)
3696
3697     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3698     # respective field is not requested
3699     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3700                      for fname in ["hidden", "blacklisted"]
3701                      if fname not in fields]
3702     if "valid" not in fields:
3703       status_filter.append([qlang.OP_TRUE, "valid"])
3704
3705     if status_filter:
3706       status_filter.insert(0, qlang.OP_AND)
3707     else:
3708       status_filter = None
3709
3710     if name_filter and status_filter:
3711       return [qlang.OP_AND, name_filter, status_filter]
3712     elif name_filter:
3713       return name_filter
3714     else:
3715       return status_filter
3716
3717   def CheckArguments(self):
3718     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3719                        self.op.output_fields, False)
3720
3721   def ExpandNames(self):
3722     self.oq.ExpandNames(self)
3723
3724   def Exec(self, feedback_fn):
3725     return self.oq.OldStyleQuery(self)
3726
3727
3728 class LUNodeRemove(LogicalUnit):
3729   """Logical unit for removing a node.
3730
3731   """
3732   HPATH = "node-remove"
3733   HTYPE = constants.HTYPE_NODE
3734
3735   def BuildHooksEnv(self):
3736     """Build hooks env.
3737
3738     This doesn't run on the target node in the pre phase as a failed
3739     node would then be impossible to remove.
3740
3741     """
3742     return {
3743       "OP_TARGET": self.op.node_name,
3744       "NODE_NAME": self.op.node_name,
3745       }
3746
3747   def BuildHooksNodes(self):
3748     """Build hooks nodes.
3749
3750     """
3751     all_nodes = self.cfg.GetNodeList()
3752     try:
3753       all_nodes.remove(self.op.node_name)
3754     except ValueError:
3755       logging.warning("Node '%s', which is about to be removed, was not found"
3756                       " in the list of all nodes", self.op.node_name)
3757     return (all_nodes, all_nodes)
3758
3759   def CheckPrereq(self):
3760     """Check prerequisites.
3761
3762     This checks:
3763      - the node exists in the configuration
3764      - it does not have primary or secondary instances
3765      - it's not the master
3766
3767     Any errors are signaled by raising errors.OpPrereqError.
3768
3769     """
3770     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3771     node = self.cfg.GetNodeInfo(self.op.node_name)
3772     assert node is not None
3773
3774     instance_list = self.cfg.GetInstanceList()
3775
3776     masternode = self.cfg.GetMasterNode()
3777     if node.name == masternode:
3778       raise errors.OpPrereqError("Node is the master node,"
3779                                  " you need to failover first.",
3780                                  errors.ECODE_INVAL)
3781
3782     for instance_name in instance_list:
3783       instance = self.cfg.GetInstanceInfo(instance_name)
3784       if node.name in instance.all_nodes:
3785         raise errors.OpPrereqError("Instance %s is still running on the node,"
3786                                    " please remove first." % instance_name,
3787                                    errors.ECODE_INVAL)
3788     self.op.node_name = node.name
3789     self.node = node
3790
3791   def Exec(self, feedback_fn):
3792     """Removes the node from the cluster.
3793
3794     """
3795     node = self.node
3796     logging.info("Stopping the node daemon and removing configs from node %s",
3797                  node.name)
3798
3799     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3800
3801     # Promote nodes to master candidate as needed
3802     _AdjustCandidatePool(self, exceptions=[node.name])
3803     self.context.RemoveNode(node.name)
3804
3805     # Run post hooks on the node before it's removed
3806     _RunPostHook(self, node.name)
3807
3808     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3809     msg = result.fail_msg
3810     if msg:
3811       self.LogWarning("Errors encountered on the remote node while leaving"
3812                       " the cluster: %s", msg)
3813
3814     # Remove node from our /etc/hosts
3815     if self.cfg.GetClusterInfo().modify_etc_hosts:
3816       master_node = self.cfg.GetMasterNode()
3817       result = self.rpc.call_etc_hosts_modify(master_node,
3818                                               constants.ETC_HOSTS_REMOVE,
3819                                               node.name, None)
3820       result.Raise("Can't update hosts file with new host data")
3821       _RedistributeAncillaryFiles(self)
3822
3823
3824 class _NodeQuery(_QueryBase):
3825   FIELDS = query.NODE_FIELDS
3826
3827   def ExpandNames(self, lu):
3828     lu.needed_locks = {}
3829     lu.share_locks[locking.LEVEL_NODE] = 1
3830
3831     if self.names:
3832       self.wanted = _GetWantedNodes(lu, self.names)
3833     else:
3834       self.wanted = locking.ALL_SET
3835
3836     self.do_locking = (self.use_locking and
3837                        query.NQ_LIVE in self.requested_data)
3838
3839     if self.do_locking:
3840       # if we don't request only static fields, we need to lock the nodes
3841       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3842
3843   def DeclareLocks(self, lu, level):
3844     pass
3845
3846   def _GetQueryData(self, lu):
3847     """Computes the list of nodes and their attributes.
3848
3849     """
3850     all_info = lu.cfg.GetAllNodesInfo()
3851
3852     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3853
3854     # Gather data as requested
3855     if query.NQ_LIVE in self.requested_data:
3856       # filter out non-vm_capable nodes
3857       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3858
3859       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3860                                         lu.cfg.GetHypervisorType())
3861       live_data = dict((name, nresult.payload)
3862                        for (name, nresult) in node_data.items()
3863                        if not nresult.fail_msg and nresult.payload)
3864     else:
3865       live_data = None
3866
3867     if query.NQ_INST in self.requested_data:
3868       node_to_primary = dict([(name, set()) for name in nodenames])
3869       node_to_secondary = dict([(name, set()) for name in nodenames])
3870
3871       inst_data = lu.cfg.GetAllInstancesInfo()
3872
3873       for inst in inst_data.values():
3874         if inst.primary_node in node_to_primary:
3875           node_to_primary[inst.primary_node].add(inst.name)
3876         for secnode in inst.secondary_nodes:
3877           if secnode in node_to_secondary:
3878             node_to_secondary[secnode].add(inst.name)
3879     else:
3880       node_to_primary = None
3881       node_to_secondary = None
3882
3883     if query.NQ_OOB in self.requested_data:
3884       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3885                          for name, node in all_info.iteritems())
3886     else:
3887       oob_support = None
3888
3889     if query.NQ_GROUP in self.requested_data:
3890       groups = lu.cfg.GetAllNodeGroupsInfo()
3891     else:
3892       groups = {}
3893
3894     return query.NodeQueryData([all_info[name] for name in nodenames],
3895                                live_data, lu.cfg.GetMasterNode(),
3896                                node_to_primary, node_to_secondary, groups,
3897                                oob_support, lu.cfg.GetClusterInfo())
3898
3899
3900 class LUNodeQuery(NoHooksLU):
3901   """Logical unit for querying nodes.
3902
3903   """
3904   # pylint: disable-msg=W0142
3905   REQ_BGL = False
3906
3907   def CheckArguments(self):
3908     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3909                          self.op.output_fields, self.op.use_locking)
3910
3911   def ExpandNames(self):
3912     self.nq.ExpandNames(self)
3913
3914   def Exec(self, feedback_fn):
3915     return self.nq.OldStyleQuery(self)
3916
3917
3918 class LUNodeQueryvols(NoHooksLU):
3919   """Logical unit for getting volumes on node(s).
3920
3921   """
3922   REQ_BGL = False
3923   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3924   _FIELDS_STATIC = utils.FieldSet("node")
3925
3926   def CheckArguments(self):
3927     _CheckOutputFields(static=self._FIELDS_STATIC,
3928                        dynamic=self._FIELDS_DYNAMIC,
3929                        selected=self.op.output_fields)
3930
3931   def ExpandNames(self):
3932     self.needed_locks = {}
3933     self.share_locks[locking.LEVEL_NODE] = 1
3934     if not self.op.nodes:
3935       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3936     else:
3937       self.needed_locks[locking.LEVEL_NODE] = \
3938         _GetWantedNodes(self, self.op.nodes)
3939
3940   def Exec(self, feedback_fn):
3941     """Computes the list of nodes and their attributes.
3942
3943     """
3944     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3945     volumes = self.rpc.call_node_volumes(nodenames)
3946
3947     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3948              in self.cfg.GetInstanceList()]
3949
3950     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3951
3952     output = []
3953     for node in nodenames:
3954       nresult = volumes[node]
3955       if nresult.offline:
3956         continue
3957       msg = nresult.fail_msg
3958       if msg:
3959         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3960         continue
3961
3962       node_vols = nresult.payload[:]
3963       node_vols.sort(key=lambda vol: vol['dev'])
3964
3965       for vol in node_vols:
3966         node_output = []
3967         for field in self.op.output_fields:
3968           if field == "node":
3969             val = node
3970           elif field == "phys":
3971             val = vol['dev']
3972           elif field == "vg":
3973             val = vol['vg']
3974           elif field == "name":
3975             val = vol['name']
3976           elif field == "size":
3977             val = int(float(vol['size']))
3978           elif field == "instance":
3979             for inst in ilist:
3980               if node not in lv_by_node[inst]:
3981                 continue
3982               if vol['name'] in lv_by_node[inst][node]:
3983                 val = inst.name
3984                 break
3985             else:
3986               val = '-'
3987           else:
3988             raise errors.ParameterError(field)
3989           node_output.append(str(val))
3990
3991         output.append(node_output)
3992
3993     return output
3994
3995
3996 class LUNodeQueryStorage(NoHooksLU):
3997   """Logical unit for getting information on storage units on node(s).
3998
3999   """
4000   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4001   REQ_BGL = False
4002
4003   def CheckArguments(self):
4004     _CheckOutputFields(static=self._FIELDS_STATIC,
4005                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4006                        selected=self.op.output_fields)
4007
4008   def ExpandNames(self):
4009     self.needed_locks = {}
4010     self.share_locks[locking.LEVEL_NODE] = 1
4011
4012     if self.op.nodes:
4013       self.needed_locks[locking.LEVEL_NODE] = \
4014         _GetWantedNodes(self, self.op.nodes)
4015     else:
4016       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4017
4018   def Exec(self, feedback_fn):
4019     """Computes the list of nodes and their attributes.
4020
4021     """
4022     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
4023
4024     # Always get name to sort by
4025     if constants.SF_NAME in self.op.output_fields:
4026       fields = self.op.output_fields[:]
4027     else:
4028       fields = [constants.SF_NAME] + self.op.output_fields
4029
4030     # Never ask for node or type as it's only known to the LU
4031     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4032       while extra in fields:
4033         fields.remove(extra)
4034
4035     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4036     name_idx = field_idx[constants.SF_NAME]
4037
4038     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4039     data = self.rpc.call_storage_list(self.nodes,
4040                                       self.op.storage_type, st_args,
4041                                       self.op.name, fields)
4042
4043     result = []
4044
4045     for node in utils.NiceSort(self.nodes):
4046       nresult = data[node]
4047       if nresult.offline:
4048         continue
4049
4050       msg = nresult.fail_msg
4051       if msg:
4052         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4053         continue
4054
4055       rows = dict([(row[name_idx], row) for row in nresult.payload])
4056
4057       for name in utils.NiceSort(rows.keys()):
4058         row = rows[name]
4059
4060         out = []
4061
4062         for field in self.op.output_fields:
4063           if field == constants.SF_NODE:
4064             val = node
4065           elif field == constants.SF_TYPE:
4066             val = self.op.storage_type
4067           elif field in field_idx:
4068             val = row[field_idx[field]]
4069           else:
4070             raise errors.ParameterError(field)
4071
4072           out.append(val)
4073
4074         result.append(out)
4075
4076     return result
4077
4078
4079 class _InstanceQuery(_QueryBase):
4080   FIELDS = query.INSTANCE_FIELDS
4081
4082   def ExpandNames(self, lu):
4083     lu.needed_locks = {}
4084     lu.share_locks[locking.LEVEL_INSTANCE] = 1
4085     lu.share_locks[locking.LEVEL_NODE] = 1
4086
4087     if self.names:
4088       self.wanted = _GetWantedInstances(lu, self.names)
4089     else:
4090       self.wanted = locking.ALL_SET
4091
4092     self.do_locking = (self.use_locking and
4093                        query.IQ_LIVE in self.requested_data)
4094     if self.do_locking:
4095       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4096       lu.needed_locks[locking.LEVEL_NODE] = []
4097       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4098
4099   def DeclareLocks(self, lu, level):
4100     if level == locking.LEVEL_NODE and self.do_locking:
4101       lu._LockInstancesNodes() # pylint: disable-msg=W0212
4102
4103   def _GetQueryData(self, lu):
4104     """Computes the list of instances and their attributes.
4105
4106     """
4107     cluster = lu.cfg.GetClusterInfo()
4108     all_info = lu.cfg.GetAllInstancesInfo()
4109
4110     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4111
4112     instance_list = [all_info[name] for name in instance_names]
4113     nodes = frozenset(itertools.chain(*(inst.all_nodes
4114                                         for inst in instance_list)))
4115     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4116     bad_nodes = []
4117     offline_nodes = []
4118     wrongnode_inst = set()
4119
4120     # Gather data as requested
4121     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4122       live_data = {}
4123       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4124       for name in nodes:
4125         result = node_data[name]
4126         if result.offline:
4127           # offline nodes will be in both lists
4128           assert result.fail_msg
4129           offline_nodes.append(name)
4130         if result.fail_msg:
4131           bad_nodes.append(name)
4132         elif result.payload:
4133           for inst in result.payload:
4134             if inst in all_info:
4135               if all_info[inst].primary_node == name:
4136                 live_data.update(result.payload)
4137               else:
4138                 wrongnode_inst.add(inst)
4139             else:
4140               # orphan instance; we don't list it here as we don't
4141               # handle this case yet in the output of instance listing
4142               logging.warning("Orphan instance '%s' found on node %s",
4143                               inst, name)
4144         # else no instance is alive
4145     else:
4146       live_data = {}
4147
4148     if query.IQ_DISKUSAGE in self.requested_data:
4149       disk_usage = dict((inst.name,
4150                          _ComputeDiskSize(inst.disk_template,
4151                                           [{constants.IDISK_SIZE: disk.size}
4152                                            for disk in inst.disks]))
4153                         for inst in instance_list)
4154     else:
4155       disk_usage = None
4156
4157     if query.IQ_CONSOLE in self.requested_data:
4158       consinfo = {}
4159       for inst in instance_list:
4160         if inst.name in live_data:
4161           # Instance is running
4162           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4163         else:
4164           consinfo[inst.name] = None
4165       assert set(consinfo.keys()) == set(instance_names)
4166     else:
4167       consinfo = None
4168
4169     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4170                                    disk_usage, offline_nodes, bad_nodes,
4171                                    live_data, wrongnode_inst, consinfo)
4172
4173
4174 class LUQuery(NoHooksLU):
4175   """Query for resources/items of a certain kind.
4176
4177   """
4178   # pylint: disable-msg=W0142
4179   REQ_BGL = False
4180
4181   def CheckArguments(self):
4182     qcls = _GetQueryImplementation(self.op.what)
4183
4184     self.impl = qcls(self.op.filter, self.op.fields, False)
4185
4186   def ExpandNames(self):
4187     self.impl.ExpandNames(self)
4188
4189   def DeclareLocks(self, level):
4190     self.impl.DeclareLocks(self, level)
4191
4192   def Exec(self, feedback_fn):
4193     return self.impl.NewStyleQuery(self)
4194
4195
4196 class LUQueryFields(NoHooksLU):
4197   """Query for resources/items of a certain kind.
4198
4199   """
4200   # pylint: disable-msg=W0142
4201   REQ_BGL = False
4202
4203   def CheckArguments(self):
4204     self.qcls = _GetQueryImplementation(self.op.what)
4205
4206   def ExpandNames(self):
4207     self.needed_locks = {}
4208
4209   def Exec(self, feedback_fn):
4210     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4211
4212
4213 class LUNodeModifyStorage(NoHooksLU):
4214   """Logical unit for modifying a storage volume on a node.
4215
4216   """
4217   REQ_BGL = False
4218
4219   def CheckArguments(self):
4220     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4221
4222     storage_type = self.op.storage_type
4223
4224     try:
4225       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4226     except KeyError:
4227       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4228                                  " modified" % storage_type,
4229                                  errors.ECODE_INVAL)
4230
4231     diff = set(self.op.changes.keys()) - modifiable
4232     if diff:
4233       raise errors.OpPrereqError("The following fields can not be modified for"
4234                                  " storage units of type '%s': %r" %
4235                                  (storage_type, list(diff)),
4236                                  errors.ECODE_INVAL)
4237
4238   def ExpandNames(self):
4239     self.needed_locks = {
4240       locking.LEVEL_NODE: self.op.node_name,
4241       }
4242
4243   def Exec(self, feedback_fn):
4244     """Computes the list of nodes and their attributes.
4245
4246     """
4247     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4248     result = self.rpc.call_storage_modify(self.op.node_name,
4249                                           self.op.storage_type, st_args,
4250                                           self.op.name, self.op.changes)
4251     result.Raise("Failed to modify storage unit '%s' on %s" %
4252                  (self.op.name, self.op.node_name))
4253
4254
4255 class LUNodeAdd(LogicalUnit):
4256   """Logical unit for adding node to the cluster.
4257
4258   """
4259   HPATH = "node-add"
4260   HTYPE = constants.HTYPE_NODE
4261   _NFLAGS = ["master_capable", "vm_capable"]
4262
4263   def CheckArguments(self):
4264     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4265     # validate/normalize the node name
4266     self.hostname = netutils.GetHostname(name=self.op.node_name,
4267                                          family=self.primary_ip_family)
4268     self.op.node_name = self.hostname.name
4269     if self.op.readd and self.op.group:
4270       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4271                                  " being readded", errors.ECODE_INVAL)
4272
4273   def BuildHooksEnv(self):
4274     """Build hooks env.
4275
4276     This will run on all nodes before, and on all nodes + the new node after.
4277
4278     """
4279     return {
4280       "OP_TARGET": self.op.node_name,
4281       "NODE_NAME": self.op.node_name,
4282       "NODE_PIP": self.op.primary_ip,
4283       "NODE_SIP": self.op.secondary_ip,
4284       "MASTER_CAPABLE": str(self.op.master_capable),
4285       "VM_CAPABLE": str(self.op.vm_capable),
4286       }
4287
4288   def BuildHooksNodes(self):
4289     """Build hooks nodes.
4290
4291     """
4292     # Exclude added node
4293     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4294     post_nodes = pre_nodes + [self.op.node_name, ]
4295
4296     return (pre_nodes, post_nodes)
4297
4298   def CheckPrereq(self):
4299     """Check prerequisites.
4300
4301     This checks:
4302      - the new node is not already in the config
4303      - it is resolvable
4304      - its parameters (single/dual homed) matches the cluster
4305
4306     Any errors are signaled by raising errors.OpPrereqError.
4307
4308     """
4309     cfg = self.cfg
4310     hostname = self.hostname
4311     node = hostname.name
4312     primary_ip = self.op.primary_ip = hostname.ip
4313     if self.op.secondary_ip is None:
4314       if self.primary_ip_family == netutils.IP6Address.family:
4315         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4316                                    " IPv4 address must be given as secondary",
4317                                    errors.ECODE_INVAL)
4318       self.op.secondary_ip = primary_ip
4319
4320     secondary_ip = self.op.secondary_ip
4321     if not netutils.IP4Address.IsValid(secondary_ip):
4322       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4323                                  " address" % secondary_ip, errors.ECODE_INVAL)
4324
4325     node_list = cfg.GetNodeList()
4326     if not self.op.readd and node in node_list:
4327       raise errors.OpPrereqError("Node %s is already in the configuration" %
4328                                  node, errors.ECODE_EXISTS)
4329     elif self.op.readd and node not in node_list:
4330       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4331                                  errors.ECODE_NOENT)
4332
4333     self.changed_primary_ip = False
4334
4335     for existing_node_name in node_list:
4336       existing_node = cfg.GetNodeInfo(existing_node_name)
4337
4338       if self.op.readd and node == existing_node_name:
4339         if existing_node.secondary_ip != secondary_ip:
4340           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4341                                      " address configuration as before",
4342                                      errors.ECODE_INVAL)
4343         if existing_node.primary_ip != primary_ip:
4344           self.changed_primary_ip = True
4345
4346         continue
4347
4348       if (existing_node.primary_ip == primary_ip or
4349           existing_node.secondary_ip == primary_ip or
4350           existing_node.primary_ip == secondary_ip or
4351           existing_node.secondary_ip == secondary_ip):
4352         raise errors.OpPrereqError("New node ip address(es) conflict with"
4353                                    " existing node %s" % existing_node.name,
4354                                    errors.ECODE_NOTUNIQUE)
4355
4356     # After this 'if' block, None is no longer a valid value for the
4357     # _capable op attributes
4358     if self.op.readd:
4359       old_node = self.cfg.GetNodeInfo(node)
4360       assert old_node is not None, "Can't retrieve locked node %s" % node
4361       for attr in self._NFLAGS:
4362         if getattr(self.op, attr) is None:
4363           setattr(self.op, attr, getattr(old_node, attr))
4364     else:
4365       for attr in self._NFLAGS:
4366         if getattr(self.op, attr) is None:
4367           setattr(self.op, attr, True)
4368
4369     if self.op.readd and not self.op.vm_capable:
4370       pri, sec = cfg.GetNodeInstances(node)
4371       if pri or sec:
4372         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4373                                    " flag set to false, but it already holds"
4374                                    " instances" % node,
4375                                    errors.ECODE_STATE)
4376
4377     # check that the type of the node (single versus dual homed) is the
4378     # same as for the master
4379     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4380     master_singlehomed = myself.secondary_ip == myself.primary_ip
4381     newbie_singlehomed = secondary_ip == primary_ip
4382     if master_singlehomed != newbie_singlehomed:
4383       if master_singlehomed:
4384         raise errors.OpPrereqError("The master has no secondary ip but the"
4385                                    " new node has one",
4386                                    errors.ECODE_INVAL)
4387       else:
4388         raise errors.OpPrereqError("The master has a secondary ip but the"
4389                                    " new node doesn't have one",
4390                                    errors.ECODE_INVAL)
4391
4392     # checks reachability
4393     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4394       raise errors.OpPrereqError("Node not reachable by ping",
4395                                  errors.ECODE_ENVIRON)
4396
4397     if not newbie_singlehomed:
4398       # check reachability from my secondary ip to newbie's secondary ip
4399       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4400                            source=myself.secondary_ip):
4401         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4402                                    " based ping to node daemon port",
4403                                    errors.ECODE_ENVIRON)
4404
4405     if self.op.readd:
4406       exceptions = [node]
4407     else:
4408       exceptions = []
4409
4410     if self.op.master_capable:
4411       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4412     else:
4413       self.master_candidate = False
4414
4415     if self.op.readd:
4416       self.new_node = old_node
4417     else:
4418       node_group = cfg.LookupNodeGroup(self.op.group)
4419       self.new_node = objects.Node(name=node,
4420                                    primary_ip=primary_ip,
4421                                    secondary_ip=secondary_ip,
4422                                    master_candidate=self.master_candidate,
4423                                    offline=False, drained=False,
4424                                    group=node_group)
4425
4426     if self.op.ndparams:
4427       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4428
4429   def Exec(self, feedback_fn):
4430     """Adds the new node to the cluster.
4431
4432     """
4433     new_node = self.new_node
4434     node = new_node.name
4435
4436     # We adding a new node so we assume it's powered
4437     new_node.powered = True
4438
4439     # for re-adds, reset the offline/drained/master-candidate flags;
4440     # we need to reset here, otherwise offline would prevent RPC calls
4441     # later in the procedure; this also means that if the re-add
4442     # fails, we are left with a non-offlined, broken node
4443     if self.op.readd:
4444       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4445       self.LogInfo("Readding a node, the offline/drained flags were reset")
4446       # if we demote the node, we do cleanup later in the procedure
4447       new_node.master_candidate = self.master_candidate
4448       if self.changed_primary_ip:
4449         new_node.primary_ip = self.op.primary_ip
4450
4451     # copy the master/vm_capable flags
4452     for attr in self._NFLAGS:
4453       setattr(new_node, attr, getattr(self.op, attr))
4454
4455     # notify the user about any possible mc promotion
4456     if new_node.master_candidate:
4457       self.LogInfo("Node will be a master candidate")
4458
4459     if self.op.ndparams:
4460       new_node.ndparams = self.op.ndparams
4461     else:
4462       new_node.ndparams = {}
4463
4464     # check connectivity
4465     result = self.rpc.call_version([node])[node]
4466     result.Raise("Can't get version information from node %s" % node)
4467     if constants.PROTOCOL_VERSION == result.payload:
4468       logging.info("Communication to node %s fine, sw version %s match",
4469                    node, result.payload)
4470     else:
4471       raise errors.OpExecError("Version mismatch master version %s,"
4472                                " node version %s" %
4473                                (constants.PROTOCOL_VERSION, result.payload))
4474
4475     # Add node to our /etc/hosts, and add key to known_hosts
4476     if self.cfg.GetClusterInfo().modify_etc_hosts:
4477       master_node = self.cfg.GetMasterNode()
4478       result = self.rpc.call_etc_hosts_modify(master_node,
4479                                               constants.ETC_HOSTS_ADD,
4480                                               self.hostname.name,
4481                                               self.hostname.ip)
4482       result.Raise("Can't update hosts file with new host data")
4483
4484     if new_node.secondary_ip != new_node.primary_ip:
4485       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4486                                False)
4487
4488     node_verify_list = [self.cfg.GetMasterNode()]
4489     node_verify_param = {
4490       constants.NV_NODELIST: [node],
4491       # TODO: do a node-net-test as well?
4492     }
4493
4494     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4495                                        self.cfg.GetClusterName())
4496     for verifier in node_verify_list:
4497       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4498       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4499       if nl_payload:
4500         for failed in nl_payload:
4501           feedback_fn("ssh/hostname verification failed"
4502                       " (checking from %s): %s" %
4503                       (verifier, nl_payload[failed]))
4504         raise errors.OpExecError("ssh/hostname verification failed.")
4505
4506     if self.op.readd:
4507       _RedistributeAncillaryFiles(self)
4508       self.context.ReaddNode(new_node)
4509       # make sure we redistribute the config
4510       self.cfg.Update(new_node, feedback_fn)
4511       # and make sure the new node will not have old files around
4512       if not new_node.master_candidate:
4513         result = self.rpc.call_node_demote_from_mc(new_node.name)
4514         msg = result.fail_msg
4515         if msg:
4516           self.LogWarning("Node failed to demote itself from master"
4517                           " candidate status: %s" % msg)
4518     else:
4519       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4520                                   additional_vm=self.op.vm_capable)
4521       self.context.AddNode(new_node, self.proc.GetECId())
4522
4523
4524 class LUNodeSetParams(LogicalUnit):
4525   """Modifies the parameters of a node.
4526
4527   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4528       to the node role (as _ROLE_*)
4529   @cvar _R2F: a dictionary from node role to tuples of flags
4530   @cvar _FLAGS: a list of attribute names corresponding to the flags
4531
4532   """
4533   HPATH = "node-modify"
4534   HTYPE = constants.HTYPE_NODE
4535   REQ_BGL = False
4536   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4537   _F2R = {
4538     (True, False, False): _ROLE_CANDIDATE,
4539     (False, True, False): _ROLE_DRAINED,
4540     (False, False, True): _ROLE_OFFLINE,
4541     (False, False, False): _ROLE_REGULAR,
4542     }
4543   _R2F = dict((v, k) for k, v in _F2R.items())
4544   _FLAGS = ["master_candidate", "drained", "offline"]
4545
4546   def CheckArguments(self):
4547     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4548     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4549                 self.op.master_capable, self.op.vm_capable,
4550                 self.op.secondary_ip, self.op.ndparams]
4551     if all_mods.count(None) == len(all_mods):
4552       raise errors.OpPrereqError("Please pass at least one modification",
4553                                  errors.ECODE_INVAL)
4554     if all_mods.count(True) > 1:
4555       raise errors.OpPrereqError("Can't set the node into more than one"
4556                                  " state at the same time",
4557                                  errors.ECODE_INVAL)
4558
4559     # Boolean value that tells us whether we might be demoting from MC
4560     self.might_demote = (self.op.master_candidate == False or
4561                          self.op.offline == True or
4562                          self.op.drained == True or
4563                          self.op.master_capable == False)
4564
4565     if self.op.secondary_ip:
4566       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4567         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4568                                    " address" % self.op.secondary_ip,
4569                                    errors.ECODE_INVAL)
4570
4571     self.lock_all = self.op.auto_promote and self.might_demote
4572     self.lock_instances = self.op.secondary_ip is not None
4573
4574   def ExpandNames(self):
4575     if self.lock_all:
4576       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4577     else:
4578       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4579
4580     if self.lock_instances:
4581       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4582
4583   def DeclareLocks(self, level):
4584     # If we have locked all instances, before waiting to lock nodes, release
4585     # all the ones living on nodes unrelated to the current operation.
4586     if level == locking.LEVEL_NODE and self.lock_instances:
4587       instances_release = []
4588       instances_keep = []
4589       self.affected_instances = []
4590       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4591         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4592           instance = self.context.cfg.GetInstanceInfo(instance_name)
4593           i_mirrored = instance.disk_template in constants.DTS_INT_MIRROR
4594           if i_mirrored and self.op.node_name in instance.all_nodes:
4595             instances_keep.append(instance_name)
4596             self.affected_instances.append(instance)
4597           else:
4598             instances_release.append(instance_name)
4599         if instances_release:
4600           self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4601           self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4602
4603   def BuildHooksEnv(self):
4604     """Build hooks env.
4605
4606     This runs on the master node.
4607
4608     """
4609     return {
4610       "OP_TARGET": self.op.node_name,
4611       "MASTER_CANDIDATE": str(self.op.master_candidate),
4612       "OFFLINE": str(self.op.offline),
4613       "DRAINED": str(self.op.drained),
4614       "MASTER_CAPABLE": str(self.op.master_capable),
4615       "VM_CAPABLE": str(self.op.vm_capable),
4616       }
4617
4618   def BuildHooksNodes(self):
4619     """Build hooks nodes.
4620
4621     """
4622     nl = [self.cfg.GetMasterNode(), self.op.node_name]
4623     return (nl, nl)
4624
4625   def CheckPrereq(self):
4626     """Check prerequisites.
4627
4628     This only checks the instance list against the existing names.
4629
4630     """
4631     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4632
4633     if (self.op.master_candidate is not None or
4634         self.op.drained is not None or
4635         self.op.offline is not None):
4636       # we can't change the master's node flags
4637       if self.op.node_name == self.cfg.GetMasterNode():
4638         raise errors.OpPrereqError("The master role can be changed"
4639                                    " only via master-failover",
4640                                    errors.ECODE_INVAL)
4641
4642     if self.op.master_candidate and not node.master_capable:
4643       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4644                                  " it a master candidate" % node.name,
4645                                  errors.ECODE_STATE)
4646
4647     if self.op.vm_capable == False:
4648       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4649       if ipri or isec:
4650         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4651                                    " the vm_capable flag" % node.name,
4652                                    errors.ECODE_STATE)
4653
4654     if node.master_candidate and self.might_demote and not self.lock_all:
4655       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4656       # check if after removing the current node, we're missing master
4657       # candidates
4658       (mc_remaining, mc_should, _) = \
4659           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4660       if mc_remaining < mc_should:
4661         raise errors.OpPrereqError("Not enough master candidates, please"
4662                                    " pass auto promote option to allow"
4663                                    " promotion", errors.ECODE_STATE)
4664
4665     self.old_flags = old_flags = (node.master_candidate,
4666                                   node.drained, node.offline)
4667     assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4668     self.old_role = old_role = self._F2R[old_flags]
4669
4670     # Check for ineffective changes
4671     for attr in self._FLAGS:
4672       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4673         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4674         setattr(self.op, attr, None)
4675
4676     # Past this point, any flag change to False means a transition
4677     # away from the respective state, as only real changes are kept
4678
4679     # TODO: We might query the real power state if it supports OOB
4680     if _SupportsOob(self.cfg, node):
4681       if self.op.offline is False and not (node.powered or
4682                                            self.op.powered == True):
4683         raise errors.OpPrereqError(("Please power on node %s first before you"
4684                                     " can reset offline state") %
4685                                    self.op.node_name)
4686     elif self.op.powered is not None:
4687       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4688                                   " which does not support out-of-band"
4689                                   " handling") % self.op.node_name)
4690
4691     # If we're being deofflined/drained, we'll MC ourself if needed
4692     if (self.op.drained == False or self.op.offline == False or
4693         (self.op.master_capable and not node.master_capable)):
4694       if _DecideSelfPromotion(self):
4695         self.op.master_candidate = True
4696         self.LogInfo("Auto-promoting node to master candidate")
4697
4698     # If we're no longer master capable, we'll demote ourselves from MC
4699     if self.op.master_capable == False and node.master_candidate:
4700       self.LogInfo("Demoting from master candidate")
4701       self.op.master_candidate = False
4702
4703     # Compute new role
4704     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4705     if self.op.master_candidate:
4706       new_role = self._ROLE_CANDIDATE
4707     elif self.op.drained:
4708       new_role = self._ROLE_DRAINED
4709     elif self.op.offline:
4710       new_role = self._ROLE_OFFLINE
4711     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4712       # False is still in new flags, which means we're un-setting (the
4713       # only) True flag
4714       new_role = self._ROLE_REGULAR
4715     else: # no new flags, nothing, keep old role
4716       new_role = old_role
4717
4718     self.new_role = new_role
4719
4720     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4721       # Trying to transition out of offline status
4722       result = self.rpc.call_version([node.name])[node.name]
4723       if result.fail_msg:
4724         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4725                                    " to report its version: %s" %
4726                                    (node.name, result.fail_msg),
4727                                    errors.ECODE_STATE)
4728       else:
4729         self.LogWarning("Transitioning node from offline to online state"
4730                         " without using re-add. Please make sure the node"
4731                         " is healthy!")
4732
4733     if self.op.secondary_ip:
4734       # Ok even without locking, because this can't be changed by any LU
4735       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4736       master_singlehomed = master.secondary_ip == master.primary_ip
4737       if master_singlehomed and self.op.secondary_ip:
4738         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4739                                    " homed cluster", errors.ECODE_INVAL)
4740
4741       if node.offline:
4742         if self.affected_instances:
4743           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4744                                      " node has instances (%s) configured"
4745                                      " to use it" % self.affected_instances)
4746       else:
4747         # On online nodes, check that no instances are running, and that
4748         # the node has the new ip and we can reach it.
4749         for instance in self.affected_instances:
4750           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4751
4752         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4753         if master.name != node.name:
4754           # check reachability from master secondary ip to new secondary ip
4755           if not netutils.TcpPing(self.op.secondary_ip,
4756                                   constants.DEFAULT_NODED_PORT,
4757                                   source=master.secondary_ip):
4758             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4759                                        " based ping to node daemon port",
4760                                        errors.ECODE_ENVIRON)
4761
4762     if self.op.ndparams:
4763       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4764       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4765       self.new_ndparams = new_ndparams
4766
4767   def Exec(self, feedback_fn):
4768     """Modifies a node.
4769
4770     """
4771     node = self.node
4772     old_role = self.old_role
4773     new_role = self.new_role
4774
4775     result = []
4776
4777     if self.op.ndparams:
4778       node.ndparams = self.new_ndparams
4779
4780     if self.op.powered is not None:
4781       node.powered = self.op.powered
4782
4783     for attr in ["master_capable", "vm_capable"]:
4784       val = getattr(self.op, attr)
4785       if val is not None:
4786         setattr(node, attr, val)
4787         result.append((attr, str(val)))
4788
4789     if new_role != old_role:
4790       # Tell the node to demote itself, if no longer MC and not offline
4791       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4792         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4793         if msg:
4794           self.LogWarning("Node failed to demote itself: %s", msg)
4795
4796       new_flags = self._R2F[new_role]
4797       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4798         if of != nf:
4799           result.append((desc, str(nf)))
4800       (node.master_candidate, node.drained, node.offline) = new_flags
4801
4802       # we locked all nodes, we adjust the CP before updating this node
4803       if self.lock_all:
4804         _AdjustCandidatePool(self, [node.name])
4805
4806     if self.op.secondary_ip:
4807       node.secondary_ip = self.op.secondary_ip
4808       result.append(("secondary_ip", self.op.secondary_ip))
4809
4810     # this will trigger configuration file update, if needed
4811     self.cfg.Update(node, feedback_fn)
4812
4813     # this will trigger job queue propagation or cleanup if the mc
4814     # flag changed
4815     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4816       self.context.ReaddNode(node)
4817
4818     return result
4819
4820
4821 class LUNodePowercycle(NoHooksLU):
4822   """Powercycles a node.
4823
4824   """
4825   REQ_BGL = False
4826
4827   def CheckArguments(self):
4828     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4829     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4830       raise errors.OpPrereqError("The node is the master and the force"
4831                                  " parameter was not set",
4832                                  errors.ECODE_INVAL)
4833
4834   def ExpandNames(self):
4835     """Locking for PowercycleNode.
4836
4837     This is a last-resort option and shouldn't block on other
4838     jobs. Therefore, we grab no locks.
4839
4840     """
4841     self.needed_locks = {}
4842
4843   def Exec(self, feedback_fn):
4844     """Reboots a node.
4845
4846     """
4847     result = self.rpc.call_node_powercycle(self.op.node_name,
4848                                            self.cfg.GetHypervisorType())
4849     result.Raise("Failed to schedule the reboot")
4850     return result.payload
4851
4852
4853 class LUClusterQuery(NoHooksLU):
4854   """Query cluster configuration.
4855
4856   """
4857   REQ_BGL = False
4858
4859   def ExpandNames(self):
4860     self.needed_locks = {}
4861
4862   def Exec(self, feedback_fn):
4863     """Return cluster config.
4864
4865     """
4866     cluster = self.cfg.GetClusterInfo()
4867     os_hvp = {}
4868
4869     # Filter just for enabled hypervisors
4870     for os_name, hv_dict in cluster.os_hvp.items():
4871       os_hvp[os_name] = {}
4872       for hv_name, hv_params in hv_dict.items():
4873         if hv_name in cluster.enabled_hypervisors:
4874           os_hvp[os_name][hv_name] = hv_params
4875
4876     # Convert ip_family to ip_version
4877     primary_ip_version = constants.IP4_VERSION
4878     if cluster.primary_ip_family == netutils.IP6Address.family:
4879       primary_ip_version = constants.IP6_VERSION
4880
4881     result = {
4882       "software_version": constants.RELEASE_VERSION,
4883       "protocol_version": constants.PROTOCOL_VERSION,
4884       "config_version": constants.CONFIG_VERSION,
4885       "os_api_version": max(constants.OS_API_VERSIONS),
4886       "export_version": constants.EXPORT_VERSION,
4887       "architecture": (platform.architecture()[0], platform.machine()),
4888       "name": cluster.cluster_name,
4889       "master": cluster.master_node,
4890       "default_hypervisor": cluster.enabled_hypervisors[0],
4891       "enabled_hypervisors": cluster.enabled_hypervisors,
4892       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4893                         for hypervisor_name in cluster.enabled_hypervisors]),
4894       "os_hvp": os_hvp,
4895       "beparams": cluster.beparams,
4896       "osparams": cluster.osparams,
4897       "nicparams": cluster.nicparams,
4898       "ndparams": cluster.ndparams,
4899       "candidate_pool_size": cluster.candidate_pool_size,
4900       "master_netdev": cluster.master_netdev,
4901       "volume_group_name": cluster.volume_group_name,
4902       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4903       "file_storage_dir": cluster.file_storage_dir,
4904       "shared_file_storage_dir": cluster.shared_file_storage_dir,
4905       "maintain_node_health": cluster.maintain_node_health,
4906       "ctime": cluster.ctime,
4907       "mtime": cluster.mtime,
4908       "uuid": cluster.uuid,
4909       "tags": list(cluster.GetTags()),
4910       "uid_pool": cluster.uid_pool,
4911       "default_iallocator": cluster.default_iallocator,
4912       "reserved_lvs": cluster.reserved_lvs,
4913       "primary_ip_version": primary_ip_version,
4914       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4915       "hidden_os": cluster.hidden_os,
4916       "blacklisted_os": cluster.blacklisted_os,
4917       }
4918
4919     return result
4920
4921
4922 class LUClusterConfigQuery(NoHooksLU):
4923   """Return configuration values.
4924
4925   """
4926   REQ_BGL = False
4927   _FIELDS_DYNAMIC = utils.FieldSet()
4928   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4929                                   "watcher_pause", "volume_group_name")
4930
4931   def CheckArguments(self):
4932     _CheckOutputFields(static=self._FIELDS_STATIC,
4933                        dynamic=self._FIELDS_DYNAMIC,
4934                        selected=self.op.output_fields)
4935
4936   def ExpandNames(self):
4937     self.needed_locks = {}
4938
4939   def Exec(self, feedback_fn):
4940     """Dump a representation of the cluster config to the standard output.
4941
4942     """
4943     values = []
4944     for field in self.op.output_fields:
4945       if field == "cluster_name":
4946         entry = self.cfg.GetClusterName()
4947       elif field == "master_node":
4948         entry = self.cfg.GetMasterNode()
4949       elif field == "drain_flag":
4950         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4951       elif field == "watcher_pause":
4952         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4953       elif field == "volume_group_name":
4954         entry = self.cfg.GetVGName()
4955       else:
4956         raise errors.ParameterError(field)
4957       values.append(entry)
4958     return values
4959
4960
4961 class LUInstanceActivateDisks(NoHooksLU):
4962   """Bring up an instance's disks.
4963
4964   """
4965   REQ_BGL = False
4966
4967   def ExpandNames(self):
4968     self._ExpandAndLockInstance()
4969     self.needed_locks[locking.LEVEL_NODE] = []
4970     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4971
4972   def DeclareLocks(self, level):
4973     if level == locking.LEVEL_NODE:
4974       self._LockInstancesNodes()
4975
4976   def CheckPrereq(self):
4977     """Check prerequisites.
4978
4979     This checks that the instance is in the cluster.
4980
4981     """
4982     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4983     assert self.instance is not None, \
4984       "Cannot retrieve locked instance %s" % self.op.instance_name
4985     _CheckNodeOnline(self, self.instance.primary_node)
4986
4987   def Exec(self, feedback_fn):
4988     """Activate the disks.
4989
4990     """
4991     disks_ok, disks_info = \
4992               _AssembleInstanceDisks(self, self.instance,
4993                                      ignore_size=self.op.ignore_size)
4994     if not disks_ok:
4995       raise errors.OpExecError("Cannot activate block devices")
4996
4997     return disks_info
4998
4999
5000 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5001                            ignore_size=False):
5002   """Prepare the block devices for an instance.
5003
5004   This sets up the block devices on all nodes.
5005
5006   @type lu: L{LogicalUnit}
5007   @param lu: the logical unit on whose behalf we execute
5008   @type instance: L{objects.Instance}
5009   @param instance: the instance for whose disks we assemble
5010   @type disks: list of L{objects.Disk} or None
5011   @param disks: which disks to assemble (or all, if None)
5012   @type ignore_secondaries: boolean
5013   @param ignore_secondaries: if true, errors on secondary nodes
5014       won't result in an error return from the function
5015   @type ignore_size: boolean
5016   @param ignore_size: if true, the current known size of the disk
5017       will not be used during the disk activation, useful for cases
5018       when the size is wrong
5019   @return: False if the operation failed, otherwise a list of
5020       (host, instance_visible_name, node_visible_name)
5021       with the mapping from node devices to instance devices
5022
5023   """
5024   device_info = []
5025   disks_ok = True
5026   iname = instance.name
5027   disks = _ExpandCheckDisks(instance, disks)
5028
5029   # With the two passes mechanism we try to reduce the window of
5030   # opportunity for the race condition of switching DRBD to primary
5031   # before handshaking occured, but we do not eliminate it
5032
5033   # The proper fix would be to wait (with some limits) until the
5034   # connection has been made and drbd transitions from WFConnection
5035   # into any other network-connected state (Connected, SyncTarget,
5036   # SyncSource, etc.)
5037
5038   # 1st pass, assemble on all nodes in secondary mode
5039   for idx, inst_disk in enumerate(disks):
5040     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5041       if ignore_size:
5042         node_disk = node_disk.Copy()
5043         node_disk.UnsetSize()
5044       lu.cfg.SetDiskID(node_disk, node)
5045       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5046       msg = result.fail_msg
5047       if msg:
5048         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5049                            " (is_primary=False, pass=1): %s",
5050                            inst_disk.iv_name, node, msg)
5051         if not ignore_secondaries:
5052           disks_ok = False
5053
5054   # FIXME: race condition on drbd migration to primary
5055
5056   # 2nd pass, do only the primary node
5057   for idx, inst_disk in enumerate(disks):
5058     dev_path = None
5059
5060     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5061       if node != instance.primary_node:
5062         continue
5063       if ignore_size:
5064         node_disk = node_disk.Copy()
5065         node_disk.UnsetSize()
5066       lu.cfg.SetDiskID(node_disk, node)
5067       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5068       msg = result.fail_msg
5069       if msg:
5070         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5071                            " (is_primary=True, pass=2): %s",
5072                            inst_disk.iv_name, node, msg)
5073         disks_ok = False
5074       else:
5075         dev_path = result.payload
5076
5077     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5078
5079   # leave the disks configured for the primary node
5080   # this is a workaround that would be fixed better by
5081   # improving the logical/physical id handling
5082   for disk in disks:
5083     lu.cfg.SetDiskID(disk, instance.primary_node)
5084
5085   return disks_ok, device_info
5086
5087
5088 def _StartInstanceDisks(lu, instance, force):
5089   """Start the disks of an instance.
5090
5091   """
5092   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5093                                            ignore_secondaries=force)
5094   if not disks_ok:
5095     _ShutdownInstanceDisks(lu, instance)
5096     if force is not None and not force:
5097       lu.proc.LogWarning("", hint="If the message above refers to a"
5098                          " secondary node,"
5099                          " you can retry the operation using '--force'.")
5100     raise errors.OpExecError("Disk consistency error")
5101
5102
5103 class LUInstanceDeactivateDisks(NoHooksLU):
5104   """Shutdown an instance's disks.
5105
5106   """
5107   REQ_BGL = False
5108
5109   def ExpandNames(self):
5110     self._ExpandAndLockInstance()
5111     self.needed_locks[locking.LEVEL_NODE] = []
5112     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5113
5114   def DeclareLocks(self, level):
5115     if level == locking.LEVEL_NODE:
5116       self._LockInstancesNodes()
5117
5118   def CheckPrereq(self):
5119     """Check prerequisites.
5120
5121     This checks that the instance is in the cluster.
5122
5123     """
5124     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5125     assert self.instance is not None, \
5126       "Cannot retrieve locked instance %s" % self.op.instance_name
5127
5128   def Exec(self, feedback_fn):
5129     """Deactivate the disks
5130
5131     """
5132     instance = self.instance
5133     if self.op.force:
5134       _ShutdownInstanceDisks(self, instance)
5135     else:
5136       _SafeShutdownInstanceDisks(self, instance)
5137
5138
5139 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5140   """Shutdown block devices of an instance.
5141
5142   This function checks if an instance is running, before calling
5143   _ShutdownInstanceDisks.
5144
5145   """
5146   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5147   _ShutdownInstanceDisks(lu, instance, disks=disks)
5148
5149
5150 def _ExpandCheckDisks(instance, disks):
5151   """Return the instance disks selected by the disks list
5152
5153   @type disks: list of L{objects.Disk} or None
5154   @param disks: selected disks
5155   @rtype: list of L{objects.Disk}
5156   @return: selected instance disks to act on
5157
5158   """
5159   if disks is None:
5160     return instance.disks
5161   else:
5162     if not set(disks).issubset(instance.disks):
5163       raise errors.ProgrammerError("Can only act on disks belonging to the"
5164                                    " target instance")
5165     return disks
5166
5167
5168 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5169   """Shutdown block devices of an instance.
5170
5171   This does the shutdown on all nodes of the instance.
5172
5173   If the ignore_primary is false, errors on the primary node are
5174   ignored.
5175
5176   """
5177   all_result = True
5178   disks = _ExpandCheckDisks(instance, disks)
5179
5180   for disk in disks:
5181     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5182       lu.cfg.SetDiskID(top_disk, node)
5183       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5184       msg = result.fail_msg
5185       if msg:
5186         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5187                       disk.iv_name, node, msg)
5188         if ((node == instance.primary_node and not ignore_primary) or
5189             (node != instance.primary_node and not result.offline)):
5190           all_result = False
5191   return all_result
5192
5193
5194 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5195   """Checks if a node has enough free memory.
5196
5197   This function check if a given node has the needed amount of free
5198   memory. In case the node has less memory or we cannot get the
5199   information from the node, this function raise an OpPrereqError
5200   exception.
5201
5202   @type lu: C{LogicalUnit}
5203   @param lu: a logical unit from which we get configuration data
5204   @type node: C{str}
5205   @param node: the node to check
5206   @type reason: C{str}
5207   @param reason: string to use in the error message
5208   @type requested: C{int}
5209   @param requested: the amount of memory in MiB to check for
5210   @type hypervisor_name: C{str}
5211   @param hypervisor_name: the hypervisor to ask for memory stats
5212   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5213       we cannot check the node
5214
5215   """
5216   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5217   nodeinfo[node].Raise("Can't get data from node %s" % node,
5218                        prereq=True, ecode=errors.ECODE_ENVIRON)
5219   free_mem = nodeinfo[node].payload.get('memory_free', None)
5220   if not isinstance(free_mem, int):
5221     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5222                                " was '%s'" % (node, free_mem),
5223                                errors.ECODE_ENVIRON)
5224   if requested > free_mem:
5225     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5226                                " needed %s MiB, available %s MiB" %
5227                                (node, reason, requested, free_mem),
5228                                errors.ECODE_NORES)
5229
5230
5231 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5232   """Checks if nodes have enough free disk space in the all VGs.
5233
5234   This function check if all given nodes have the needed amount of
5235   free disk. In case any node has less disk or we cannot get the
5236   information from the node, this function raise an OpPrereqError
5237   exception.
5238
5239   @type lu: C{LogicalUnit}
5240   @param lu: a logical unit from which we get configuration data
5241   @type nodenames: C{list}
5242   @param nodenames: the list of node names to check
5243   @type req_sizes: C{dict}
5244   @param req_sizes: the hash of vg and corresponding amount of disk in
5245       MiB to check for
5246   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5247       or we cannot check the node
5248
5249   """
5250   for vg, req_size in req_sizes.items():
5251     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5252
5253
5254 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5255   """Checks if nodes have enough free disk space in the specified VG.
5256
5257   This function check if all given nodes have the needed amount of
5258   free disk. In case any node has less disk or we cannot get the
5259   information from the node, this function raise an OpPrereqError
5260   exception.
5261
5262   @type lu: C{LogicalUnit}
5263   @param lu: a logical unit from which we get configuration data
5264   @type nodenames: C{list}
5265   @param nodenames: the list of node names to check
5266   @type vg: C{str}
5267   @param vg: the volume group to check
5268   @type requested: C{int}
5269   @param requested: the amount of disk in MiB to check for
5270   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5271       or we cannot check the node
5272
5273   """
5274   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5275   for node in nodenames:
5276     info = nodeinfo[node]
5277     info.Raise("Cannot get current information from node %s" % node,
5278                prereq=True, ecode=errors.ECODE_ENVIRON)
5279     vg_free = info.payload.get("vg_free", None)
5280     if not isinstance(vg_free, int):
5281       raise errors.OpPrereqError("Can't compute free disk space on node"
5282                                  " %s for vg %s, result was '%s'" %
5283                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5284     if requested > vg_free:
5285       raise errors.OpPrereqError("Not enough disk space on target node %s"
5286                                  " vg %s: required %d MiB, available %d MiB" %
5287                                  (node, vg, requested, vg_free),
5288                                  errors.ECODE_NORES)
5289
5290
5291 class LUInstanceStartup(LogicalUnit):
5292   """Starts an instance.
5293
5294   """
5295   HPATH = "instance-start"
5296   HTYPE = constants.HTYPE_INSTANCE
5297   REQ_BGL = False
5298
5299   def CheckArguments(self):
5300     # extra beparams
5301     if self.op.beparams:
5302       # fill the beparams dict
5303       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5304
5305   def ExpandNames(self):
5306     self._ExpandAndLockInstance()
5307
5308   def BuildHooksEnv(self):
5309     """Build hooks env.
5310
5311     This runs on master, primary and secondary nodes of the instance.
5312
5313     """
5314     env = {
5315       "FORCE": self.op.force,
5316       }
5317
5318     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5319
5320     return env
5321
5322   def BuildHooksNodes(self):
5323     """Build hooks nodes.
5324
5325     """
5326     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5327     return (nl, nl)
5328
5329   def CheckPrereq(self):
5330     """Check prerequisites.
5331
5332     This checks that the instance is in the cluster.
5333
5334     """
5335     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5336     assert self.instance is not None, \
5337       "Cannot retrieve locked instance %s" % self.op.instance_name
5338
5339     # extra hvparams
5340     if self.op.hvparams:
5341       # check hypervisor parameter syntax (locally)
5342       cluster = self.cfg.GetClusterInfo()
5343       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5344       filled_hvp = cluster.FillHV(instance)
5345       filled_hvp.update(self.op.hvparams)
5346       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5347       hv_type.CheckParameterSyntax(filled_hvp)
5348       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5349
5350     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5351
5352     if self.primary_offline and self.op.ignore_offline_nodes:
5353       self.proc.LogWarning("Ignoring offline primary node")
5354
5355       if self.op.hvparams or self.op.beparams:
5356         self.proc.LogWarning("Overridden parameters are ignored")
5357     else:
5358       _CheckNodeOnline(self, instance.primary_node)
5359
5360       bep = self.cfg.GetClusterInfo().FillBE(instance)
5361
5362       # check bridges existence
5363       _CheckInstanceBridgesExist(self, instance)
5364
5365       remote_info = self.rpc.call_instance_info(instance.primary_node,
5366                                                 instance.name,
5367                                                 instance.hypervisor)
5368       remote_info.Raise("Error checking node %s" % instance.primary_node,
5369                         prereq=True, ecode=errors.ECODE_ENVIRON)
5370       if not remote_info.payload: # not running already
5371         _CheckNodeFreeMemory(self, instance.primary_node,
5372                              "starting instance %s" % instance.name,
5373                              bep[constants.BE_MEMORY], instance.hypervisor)
5374
5375   def Exec(self, feedback_fn):
5376     """Start the instance.
5377
5378     """
5379     instance = self.instance
5380     force = self.op.force
5381
5382     self.cfg.MarkInstanceUp(instance.name)
5383
5384     if self.primary_offline:
5385       assert self.op.ignore_offline_nodes
5386       self.proc.LogInfo("Primary node offline, marked instance as started")
5387     else:
5388       node_current = instance.primary_node
5389
5390       _StartInstanceDisks(self, instance, force)
5391
5392       result = self.rpc.call_instance_start(node_current, instance,
5393                                             self.op.hvparams, self.op.beparams)
5394       msg = result.fail_msg
5395       if msg:
5396         _ShutdownInstanceDisks(self, instance)
5397         raise errors.OpExecError("Could not start instance: %s" % msg)
5398
5399
5400 class LUInstanceReboot(LogicalUnit):
5401   """Reboot an instance.
5402
5403   """
5404   HPATH = "instance-reboot"
5405   HTYPE = constants.HTYPE_INSTANCE
5406   REQ_BGL = False
5407
5408   def ExpandNames(self):
5409     self._ExpandAndLockInstance()
5410
5411   def BuildHooksEnv(self):
5412     """Build hooks env.
5413
5414     This runs on master, primary and secondary nodes of the instance.
5415
5416     """
5417     env = {
5418       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5419       "REBOOT_TYPE": self.op.reboot_type,
5420       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5421       }
5422
5423     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5424
5425     return env
5426
5427   def BuildHooksNodes(self):
5428     """Build hooks nodes.
5429
5430     """
5431     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5432     return (nl, nl)
5433
5434   def CheckPrereq(self):
5435     """Check prerequisites.
5436
5437     This checks that the instance is in the cluster.
5438
5439     """
5440     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5441     assert self.instance is not None, \
5442       "Cannot retrieve locked instance %s" % self.op.instance_name
5443
5444     _CheckNodeOnline(self, instance.primary_node)
5445
5446     # check bridges existence
5447     _CheckInstanceBridgesExist(self, instance)
5448
5449   def Exec(self, feedback_fn):
5450     """Reboot the instance.
5451
5452     """
5453     instance = self.instance
5454     ignore_secondaries = self.op.ignore_secondaries
5455     reboot_type = self.op.reboot_type
5456
5457     remote_info = self.rpc.call_instance_info(instance.primary_node,
5458                                               instance.name,
5459                                               instance.hypervisor)
5460     remote_info.Raise("Error checking node %s" % instance.primary_node)
5461     instance_running = bool(remote_info.payload)
5462
5463     node_current = instance.primary_node
5464
5465     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5466                                             constants.INSTANCE_REBOOT_HARD]:
5467       for disk in instance.disks:
5468         self.cfg.SetDiskID(disk, node_current)
5469       result = self.rpc.call_instance_reboot(node_current, instance,
5470                                              reboot_type,
5471                                              self.op.shutdown_timeout)
5472       result.Raise("Could not reboot instance")
5473     else:
5474       if instance_running:
5475         result = self.rpc.call_instance_shutdown(node_current, instance,
5476                                                  self.op.shutdown_timeout)
5477         result.Raise("Could not shutdown instance for full reboot")
5478         _ShutdownInstanceDisks(self, instance)
5479       else:
5480         self.LogInfo("Instance %s was already stopped, starting now",
5481                      instance.name)
5482       _StartInstanceDisks(self, instance, ignore_secondaries)
5483       result = self.rpc.call_instance_start(node_current, instance, None, None)
5484       msg = result.fail_msg
5485       if msg:
5486         _ShutdownInstanceDisks(self, instance)
5487         raise errors.OpExecError("Could not start instance for"
5488                                  " full reboot: %s" % msg)
5489
5490     self.cfg.MarkInstanceUp(instance.name)
5491
5492
5493 class LUInstanceShutdown(LogicalUnit):
5494   """Shutdown an instance.
5495
5496   """
5497   HPATH = "instance-stop"
5498   HTYPE = constants.HTYPE_INSTANCE
5499   REQ_BGL = False
5500
5501   def ExpandNames(self):
5502     self._ExpandAndLockInstance()
5503
5504   def BuildHooksEnv(self):
5505     """Build hooks env.
5506
5507     This runs on master, primary and secondary nodes of the instance.
5508
5509     """
5510     env = _BuildInstanceHookEnvByObject(self, self.instance)
5511     env["TIMEOUT"] = self.op.timeout
5512     return env
5513
5514   def BuildHooksNodes(self):
5515     """Build hooks nodes.
5516
5517     """
5518     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5519     return (nl, nl)
5520
5521   def CheckPrereq(self):
5522     """Check prerequisites.
5523
5524     This checks that the instance is in the cluster.
5525
5526     """
5527     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5528     assert self.instance is not None, \
5529       "Cannot retrieve locked instance %s" % self.op.instance_name
5530
5531     self.primary_offline = \
5532       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5533
5534     if self.primary_offline and self.op.ignore_offline_nodes:
5535       self.proc.LogWarning("Ignoring offline primary node")
5536     else:
5537       _CheckNodeOnline(self, self.instance.primary_node)
5538
5539   def Exec(self, feedback_fn):
5540     """Shutdown the instance.
5541
5542     """
5543     instance = self.instance
5544     node_current = instance.primary_node
5545     timeout = self.op.timeout
5546
5547     self.cfg.MarkInstanceDown(instance.name)
5548
5549     if self.primary_offline:
5550       assert self.op.ignore_offline_nodes
5551       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5552     else:
5553       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5554       msg = result.fail_msg
5555       if msg:
5556         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5557
5558       _ShutdownInstanceDisks(self, instance)
5559
5560
5561 class LUInstanceReinstall(LogicalUnit):
5562   """Reinstall an instance.
5563
5564   """
5565   HPATH = "instance-reinstall"
5566   HTYPE = constants.HTYPE_INSTANCE
5567   REQ_BGL = False
5568
5569   def ExpandNames(self):
5570     self._ExpandAndLockInstance()
5571
5572   def BuildHooksEnv(self):
5573     """Build hooks env.
5574
5575     This runs on master, primary and secondary nodes of the instance.
5576
5577     """
5578     return _BuildInstanceHookEnvByObject(self, self.instance)
5579
5580   def BuildHooksNodes(self):
5581     """Build hooks nodes.
5582
5583     """
5584     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5585     return (nl, nl)
5586
5587   def CheckPrereq(self):
5588     """Check prerequisites.
5589
5590     This checks that the instance is in the cluster and is not running.
5591
5592     """
5593     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5594     assert instance is not None, \
5595       "Cannot retrieve locked instance %s" % self.op.instance_name
5596     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5597                      " offline, cannot reinstall")
5598     for node in instance.secondary_nodes:
5599       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5600                        " cannot reinstall")
5601
5602     if instance.disk_template == constants.DT_DISKLESS:
5603       raise errors.OpPrereqError("Instance '%s' has no disks" %
5604                                  self.op.instance_name,
5605                                  errors.ECODE_INVAL)
5606     _CheckInstanceDown(self, instance, "cannot reinstall")
5607
5608     if self.op.os_type is not None:
5609       # OS verification
5610       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5611       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5612       instance_os = self.op.os_type
5613     else:
5614       instance_os = instance.os
5615
5616     nodelist = list(instance.all_nodes)
5617
5618     if self.op.osparams:
5619       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5620       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5621       self.os_inst = i_osdict # the new dict (without defaults)
5622     else:
5623       self.os_inst = None
5624
5625     self.instance = instance
5626
5627   def Exec(self, feedback_fn):
5628     """Reinstall the instance.
5629
5630     """
5631     inst = self.instance
5632
5633     if self.op.os_type is not None:
5634       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5635       inst.os = self.op.os_type
5636       # Write to configuration
5637       self.cfg.Update(inst, feedback_fn)
5638
5639     _StartInstanceDisks(self, inst, None)
5640     try:
5641       feedback_fn("Running the instance OS create scripts...")
5642       # FIXME: pass debug option from opcode to backend
5643       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5644                                              self.op.debug_level,
5645                                              osparams=self.os_inst)
5646       result.Raise("Could not install OS for instance %s on node %s" %
5647                    (inst.name, inst.primary_node))
5648     finally:
5649       _ShutdownInstanceDisks(self, inst)
5650
5651
5652 class LUInstanceRecreateDisks(LogicalUnit):
5653   """Recreate an instance's missing disks.
5654
5655   """
5656   HPATH = "instance-recreate-disks"
5657   HTYPE = constants.HTYPE_INSTANCE
5658   REQ_BGL = False
5659
5660   def ExpandNames(self):
5661     self._ExpandAndLockInstance()
5662
5663   def BuildHooksEnv(self):
5664     """Build hooks env.
5665
5666     This runs on master, primary and secondary nodes of the instance.
5667
5668     """
5669     return _BuildInstanceHookEnvByObject(self, self.instance)
5670
5671   def BuildHooksNodes(self):
5672     """Build hooks nodes.
5673
5674     """
5675     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5676     return (nl, nl)
5677
5678   def CheckPrereq(self):
5679     """Check prerequisites.
5680
5681     This checks that the instance is in the cluster and is not running.
5682
5683     """
5684     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5685     assert instance is not None, \
5686       "Cannot retrieve locked instance %s" % self.op.instance_name
5687     _CheckNodeOnline(self, instance.primary_node)
5688
5689     if instance.disk_template == constants.DT_DISKLESS:
5690       raise errors.OpPrereqError("Instance '%s' has no disks" %
5691                                  self.op.instance_name, errors.ECODE_INVAL)
5692     _CheckInstanceDown(self, instance, "cannot recreate disks")
5693
5694     if not self.op.disks:
5695       self.op.disks = range(len(instance.disks))
5696     else:
5697       for idx in self.op.disks:
5698         if idx >= len(instance.disks):
5699           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5700                                      errors.ECODE_INVAL)
5701
5702     self.instance = instance
5703
5704   def Exec(self, feedback_fn):
5705     """Recreate the disks.
5706
5707     """
5708     to_skip = []
5709     for idx, _ in enumerate(self.instance.disks):
5710       if idx not in self.op.disks: # disk idx has not been passed in
5711         to_skip.append(idx)
5712         continue
5713
5714     _CreateDisks(self, self.instance, to_skip=to_skip)
5715
5716
5717 class LUInstanceRename(LogicalUnit):
5718   """Rename an instance.
5719
5720   """
5721   HPATH = "instance-rename"
5722   HTYPE = constants.HTYPE_INSTANCE
5723
5724   def CheckArguments(self):
5725     """Check arguments.
5726
5727     """
5728     if self.op.ip_check and not self.op.name_check:
5729       # TODO: make the ip check more flexible and not depend on the name check
5730       raise errors.OpPrereqError("Cannot do ip check without a name check",
5731                                  errors.ECODE_INVAL)
5732
5733   def BuildHooksEnv(self):
5734     """Build hooks env.
5735
5736     This runs on master, primary and secondary nodes of the instance.
5737
5738     """
5739     env = _BuildInstanceHookEnvByObject(self, self.instance)
5740     env["INSTANCE_NEW_NAME"] = self.op.new_name
5741     return env
5742
5743   def BuildHooksNodes(self):
5744     """Build hooks nodes.
5745
5746     """
5747     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5748     return (nl, nl)
5749
5750   def CheckPrereq(self):
5751     """Check prerequisites.
5752
5753     This checks that the instance is in the cluster and is not running.
5754
5755     """
5756     self.op.instance_name = _ExpandInstanceName(self.cfg,
5757                                                 self.op.instance_name)
5758     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5759     assert instance is not None
5760     _CheckNodeOnline(self, instance.primary_node)
5761     _CheckInstanceDown(self, instance, "cannot rename")
5762     self.instance = instance
5763
5764     new_name = self.op.new_name
5765     if self.op.name_check:
5766       hostname = netutils.GetHostname(name=new_name)
5767       self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5768                    hostname.name)
5769       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5770         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5771                                     " same as given hostname '%s'") %
5772                                     (hostname.name, self.op.new_name),
5773                                     errors.ECODE_INVAL)
5774       new_name = self.op.new_name = hostname.name
5775       if (self.op.ip_check and
5776           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5777         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5778                                    (hostname.ip, new_name),
5779                                    errors.ECODE_NOTUNIQUE)
5780
5781     instance_list = self.cfg.GetInstanceList()
5782     if new_name in instance_list and new_name != instance.name:
5783       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5784                                  new_name, errors.ECODE_EXISTS)
5785
5786   def Exec(self, feedback_fn):
5787     """Rename the instance.
5788
5789     """
5790     inst = self.instance
5791     old_name = inst.name
5792
5793     rename_file_storage = False
5794     if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5795         self.op.new_name != inst.name):
5796       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5797       rename_file_storage = True
5798
5799     self.cfg.RenameInstance(inst.name, self.op.new_name)
5800     # Change the instance lock. This is definitely safe while we hold the BGL
5801     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5802     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5803
5804     # re-read the instance from the configuration after rename
5805     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5806
5807     if rename_file_storage:
5808       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5809       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5810                                                      old_file_storage_dir,
5811                                                      new_file_storage_dir)
5812       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5813                    " (but the instance has been renamed in Ganeti)" %
5814                    (inst.primary_node, old_file_storage_dir,
5815                     new_file_storage_dir))
5816
5817     _StartInstanceDisks(self, inst, None)
5818     try:
5819       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5820                                                  old_name, self.op.debug_level)
5821       msg = result.fail_msg
5822       if msg:
5823         msg = ("Could not run OS rename script for instance %s on node %s"
5824                " (but the instance has been renamed in Ganeti): %s" %
5825                (inst.name, inst.primary_node, msg))
5826         self.proc.LogWarning(msg)
5827     finally:
5828       _ShutdownInstanceDisks(self, inst)
5829
5830     return inst.name
5831
5832
5833 class LUInstanceRemove(LogicalUnit):
5834   """Remove an instance.
5835
5836   """
5837   HPATH = "instance-remove"
5838   HTYPE = constants.HTYPE_INSTANCE
5839   REQ_BGL = False
5840
5841   def ExpandNames(self):
5842     self._ExpandAndLockInstance()
5843     self.needed_locks[locking.LEVEL_NODE] = []
5844     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5845
5846   def DeclareLocks(self, level):
5847     if level == locking.LEVEL_NODE:
5848       self._LockInstancesNodes()
5849
5850   def BuildHooksEnv(self):
5851     """Build hooks env.
5852
5853     This runs on master, primary and secondary nodes of the instance.
5854
5855     """
5856     env = _BuildInstanceHookEnvByObject(self, self.instance)
5857     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5858     return env
5859
5860   def BuildHooksNodes(self):
5861     """Build hooks nodes.
5862
5863     """
5864     nl = [self.cfg.GetMasterNode()]
5865     nl_post = list(self.instance.all_nodes) + nl
5866     return (nl, nl_post)
5867
5868   def CheckPrereq(self):
5869     """Check prerequisites.
5870
5871     This checks that the instance is in the cluster.
5872
5873     """
5874     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5875     assert self.instance is not None, \
5876       "Cannot retrieve locked instance %s" % self.op.instance_name
5877
5878   def Exec(self, feedback_fn):
5879     """Remove the instance.
5880
5881     """
5882     instance = self.instance
5883     logging.info("Shutting down instance %s on node %s",
5884                  instance.name, instance.primary_node)
5885
5886     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5887                                              self.op.shutdown_timeout)
5888     msg = result.fail_msg
5889     if msg:
5890       if self.op.ignore_failures:
5891         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5892       else:
5893         raise errors.OpExecError("Could not shutdown instance %s on"
5894                                  " node %s: %s" %
5895                                  (instance.name, instance.primary_node, msg))
5896
5897     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5898
5899
5900 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5901   """Utility function to remove an instance.
5902
5903   """
5904   logging.info("Removing block devices for instance %s", instance.name)
5905
5906   if not _RemoveDisks(lu, instance):
5907     if not ignore_failures:
5908       raise errors.OpExecError("Can't remove instance's disks")
5909     feedback_fn("Warning: can't remove instance's disks")
5910
5911   logging.info("Removing instance %s out of cluster config", instance.name)
5912
5913   lu.cfg.RemoveInstance(instance.name)
5914
5915   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5916     "Instance lock removal conflict"
5917
5918   # Remove lock for the instance
5919   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5920
5921
5922 class LUInstanceQuery(NoHooksLU):
5923   """Logical unit for querying instances.
5924
5925   """
5926   # pylint: disable-msg=W0142
5927   REQ_BGL = False
5928
5929   def CheckArguments(self):
5930     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5931                              self.op.output_fields, self.op.use_locking)
5932
5933   def ExpandNames(self):
5934     self.iq.ExpandNames(self)
5935
5936   def DeclareLocks(self, level):
5937     self.iq.DeclareLocks(self, level)
5938
5939   def Exec(self, feedback_fn):
5940     return self.iq.OldStyleQuery(self)
5941
5942
5943 class LUInstanceFailover(LogicalUnit):
5944   """Failover an instance.
5945
5946   """
5947   HPATH = "instance-failover"
5948   HTYPE = constants.HTYPE_INSTANCE
5949   REQ_BGL = False
5950
5951   def CheckArguments(self):
5952     """Check the arguments.
5953
5954     """
5955     self.iallocator = getattr(self.op, "iallocator", None)
5956     self.target_node = getattr(self.op, "target_node", None)
5957
5958   def ExpandNames(self):
5959     self._ExpandAndLockInstance()
5960
5961     if self.op.target_node is not None:
5962       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5963
5964     self.needed_locks[locking.LEVEL_NODE] = []
5965     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5966
5967     ignore_consistency = self.op.ignore_consistency
5968     shutdown_timeout = self.op.shutdown_timeout
5969     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5970                                        cleanup=False,
5971                                        iallocator=self.op.iallocator,
5972                                        target_node=self.op.target_node,
5973                                        failover=True,
5974                                        ignore_consistency=ignore_consistency,
5975                                        shutdown_timeout=shutdown_timeout)
5976     self.tasklets = [self._migrater]
5977
5978   def DeclareLocks(self, level):
5979     if level == locking.LEVEL_NODE:
5980       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5981       if instance.disk_template in constants.DTS_EXT_MIRROR:
5982         if self.op.target_node is None:
5983           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5984         else:
5985           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5986                                                    self.op.target_node]
5987         del self.recalculate_locks[locking.LEVEL_NODE]
5988       else:
5989         self._LockInstancesNodes()
5990
5991   def BuildHooksEnv(self):
5992     """Build hooks env.
5993
5994     This runs on master, primary and secondary nodes of the instance.
5995
5996     """
5997     instance = self._migrater.instance
5998     source_node = instance.primary_node
5999     target_node = self._migrater.target_node
6000     env = {
6001       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6002       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6003       "OLD_PRIMARY": source_node,
6004       "NEW_PRIMARY": target_node,
6005       }
6006
6007     if instance.disk_template in constants.DTS_INT_MIRROR:
6008       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6009       env["NEW_SECONDARY"] = source_node
6010     else:
6011       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6012
6013     env.update(_BuildInstanceHookEnvByObject(self, instance))
6014
6015     return env
6016
6017   def BuildHooksNodes(self):
6018     """Build hooks nodes.
6019
6020     """
6021     instance = self._migrater.instance
6022     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6023     return (nl, nl + [instance.primary_node])
6024
6025
6026 class LUInstanceMigrate(LogicalUnit):
6027   """Migrate an instance.
6028
6029   This is migration without shutting down, compared to the failover,
6030   which is done with shutdown.
6031
6032   """
6033   HPATH = "instance-migrate"
6034   HTYPE = constants.HTYPE_INSTANCE
6035   REQ_BGL = False
6036
6037   def ExpandNames(self):
6038     self._ExpandAndLockInstance()
6039
6040     if self.op.target_node is not None:
6041       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6042
6043     self.needed_locks[locking.LEVEL_NODE] = []
6044     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6045
6046     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6047                                        cleanup=self.op.cleanup,
6048                                        iallocator=self.op.iallocator,
6049                                        target_node=self.op.target_node,
6050                                        failover=False,
6051                                        fallback=self.op.allow_failover)
6052     self.tasklets = [self._migrater]
6053
6054   def DeclareLocks(self, level):
6055     if level == locking.LEVEL_NODE:
6056       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6057       if instance.disk_template in constants.DTS_EXT_MIRROR:
6058         if self.op.target_node is None:
6059           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6060         else:
6061           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6062                                                    self.op.target_node]
6063         del self.recalculate_locks[locking.LEVEL_NODE]
6064       else:
6065         self._LockInstancesNodes()
6066
6067   def BuildHooksEnv(self):
6068     """Build hooks env.
6069
6070     This runs on master, primary and secondary nodes of the instance.
6071
6072     """
6073     instance = self._migrater.instance
6074     source_node = instance.primary_node
6075     target_node = self._migrater.target_node
6076     env = _BuildInstanceHookEnvByObject(self, instance)
6077     env.update({
6078       "MIGRATE_LIVE": self._migrater.live,
6079       "MIGRATE_CLEANUP": self.op.cleanup,
6080       "OLD_PRIMARY": source_node,
6081       "NEW_PRIMARY": target_node,
6082       })
6083
6084     if instance.disk_template in constants.DTS_INT_MIRROR:
6085       env["OLD_SECONDARY"] = target_node
6086       env["NEW_SECONDARY"] = source_node
6087     else:
6088       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6089
6090     return env
6091
6092   def BuildHooksNodes(self):
6093     """Build hooks nodes.
6094
6095     """
6096     instance = self._migrater.instance
6097     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6098     return (nl, nl + [instance.primary_node])
6099
6100
6101 class LUInstanceMove(LogicalUnit):
6102   """Move an instance by data-copying.
6103
6104   """
6105   HPATH = "instance-move"
6106   HTYPE = constants.HTYPE_INSTANCE
6107   REQ_BGL = False
6108
6109   def ExpandNames(self):
6110     self._ExpandAndLockInstance()
6111     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6112     self.op.target_node = target_node
6113     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6114     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6115
6116   def DeclareLocks(self, level):
6117     if level == locking.LEVEL_NODE:
6118       self._LockInstancesNodes(primary_only=True)
6119
6120   def BuildHooksEnv(self):
6121     """Build hooks env.
6122
6123     This runs on master, primary and secondary nodes of the instance.
6124
6125     """
6126     env = {
6127       "TARGET_NODE": self.op.target_node,
6128       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6129       }
6130     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6131     return env
6132
6133   def BuildHooksNodes(self):
6134     """Build hooks nodes.
6135
6136     """
6137     nl = [
6138       self.cfg.GetMasterNode(),
6139       self.instance.primary_node,
6140       self.op.target_node,
6141       ]
6142     return (nl, nl)
6143
6144   def CheckPrereq(self):
6145     """Check prerequisites.
6146
6147     This checks that the instance is in the cluster.
6148
6149     """
6150     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6151     assert self.instance is not None, \
6152       "Cannot retrieve locked instance %s" % self.op.instance_name
6153
6154     node = self.cfg.GetNodeInfo(self.op.target_node)
6155     assert node is not None, \
6156       "Cannot retrieve locked node %s" % self.op.target_node
6157
6158     self.target_node = target_node = node.name
6159
6160     if target_node == instance.primary_node:
6161       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6162                                  (instance.name, target_node),
6163                                  errors.ECODE_STATE)
6164
6165     bep = self.cfg.GetClusterInfo().FillBE(instance)
6166
6167     for idx, dsk in enumerate(instance.disks):
6168       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6169         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6170                                    " cannot copy" % idx, errors.ECODE_STATE)
6171
6172     _CheckNodeOnline(self, target_node)
6173     _CheckNodeNotDrained(self, target_node)
6174     _CheckNodeVmCapable(self, target_node)
6175
6176     if instance.admin_up:
6177       # check memory requirements on the secondary node
6178       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6179                            instance.name, bep[constants.BE_MEMORY],
6180                            instance.hypervisor)
6181     else:
6182       self.LogInfo("Not checking memory on the secondary node as"
6183                    " instance will not be started")
6184
6185     # check bridge existance
6186     _CheckInstanceBridgesExist(self, instance, node=target_node)
6187
6188   def Exec(self, feedback_fn):
6189     """Move an instance.
6190
6191     The move is done by shutting it down on its present node, copying
6192     the data over (slow) and starting it on the new node.
6193
6194     """
6195     instance = self.instance
6196
6197     source_node = instance.primary_node
6198     target_node = self.target_node
6199
6200     self.LogInfo("Shutting down instance %s on source node %s",
6201                  instance.name, source_node)
6202
6203     result = self.rpc.call_instance_shutdown(source_node, instance,
6204                                              self.op.shutdown_timeout)
6205     msg = result.fail_msg
6206     if msg:
6207       if self.op.ignore_consistency:
6208         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6209                              " Proceeding anyway. Please make sure node"
6210                              " %s is down. Error details: %s",
6211                              instance.name, source_node, source_node, msg)
6212       else:
6213         raise errors.OpExecError("Could not shutdown instance %s on"
6214                                  " node %s: %s" %
6215                                  (instance.name, source_node, msg))
6216
6217     # create the target disks
6218     try:
6219       _CreateDisks(self, instance, target_node=target_node)
6220     except errors.OpExecError:
6221       self.LogWarning("Device creation failed, reverting...")
6222       try:
6223         _RemoveDisks(self, instance, target_node=target_node)
6224       finally:
6225         self.cfg.ReleaseDRBDMinors(instance.name)
6226         raise
6227
6228     cluster_name = self.cfg.GetClusterInfo().cluster_name
6229
6230     errs = []
6231     # activate, get path, copy the data over
6232     for idx, disk in enumerate(instance.disks):
6233       self.LogInfo("Copying data for disk %d", idx)
6234       result = self.rpc.call_blockdev_assemble(target_node, disk,
6235                                                instance.name, True, idx)
6236       if result.fail_msg:
6237         self.LogWarning("Can't assemble newly created disk %d: %s",
6238                         idx, result.fail_msg)
6239         errs.append(result.fail_msg)
6240         break
6241       dev_path = result.payload
6242       result = self.rpc.call_blockdev_export(source_node, disk,
6243                                              target_node, dev_path,
6244                                              cluster_name)
6245       if result.fail_msg:
6246         self.LogWarning("Can't copy data over for disk %d: %s",
6247                         idx, result.fail_msg)
6248         errs.append(result.fail_msg)
6249         break
6250
6251     if errs:
6252       self.LogWarning("Some disks failed to copy, aborting")
6253       try:
6254         _RemoveDisks(self, instance, target_node=target_node)
6255       finally:
6256         self.cfg.ReleaseDRBDMinors(instance.name)
6257         raise errors.OpExecError("Errors during disk copy: %s" %
6258                                  (",".join(errs),))
6259
6260     instance.primary_node = target_node
6261     self.cfg.Update(instance, feedback_fn)
6262
6263     self.LogInfo("Removing the disks on the original node")
6264     _RemoveDisks(self, instance, target_node=source_node)
6265
6266     # Only start the instance if it's marked as up
6267     if instance.admin_up:
6268       self.LogInfo("Starting instance %s on node %s",
6269                    instance.name, target_node)
6270
6271       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6272                                            ignore_secondaries=True)
6273       if not disks_ok:
6274         _ShutdownInstanceDisks(self, instance)
6275         raise errors.OpExecError("Can't activate the instance's disks")
6276
6277       result = self.rpc.call_instance_start(target_node, instance, None, None)
6278       msg = result.fail_msg
6279       if msg:
6280         _ShutdownInstanceDisks(self, instance)
6281         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6282                                  (instance.name, target_node, msg))
6283
6284
6285 class LUNodeMigrate(LogicalUnit):
6286   """Migrate all instances from a node.
6287
6288   """
6289   HPATH = "node-migrate"
6290   HTYPE = constants.HTYPE_NODE
6291   REQ_BGL = False
6292
6293   def CheckArguments(self):
6294     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6295
6296   def ExpandNames(self):
6297     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6298
6299     self.needed_locks = {}
6300
6301     # Create tasklets for migrating instances for all instances on this node
6302     names = []
6303     tasklets = []
6304
6305     self.lock_all_nodes = False
6306
6307     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6308       logging.debug("Migrating instance %s", inst.name)
6309       names.append(inst.name)
6310
6311       tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False,
6312                                         iallocator=self.op.iallocator,
6313                                         taget_node=None))
6314
6315       if inst.disk_template in constants.DTS_EXT_MIRROR:
6316         # We need to lock all nodes, as the iallocator will choose the
6317         # destination nodes afterwards
6318         self.lock_all_nodes = True
6319
6320     self.tasklets = tasklets
6321
6322     # Declare node locks
6323     if self.lock_all_nodes:
6324       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6325     else:
6326       self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6327       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6328
6329     # Declare instance locks
6330     self.needed_locks[locking.LEVEL_INSTANCE] = names
6331
6332   def DeclareLocks(self, level):
6333     if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6334       self._LockInstancesNodes()
6335
6336   def BuildHooksEnv(self):
6337     """Build hooks env.
6338
6339     This runs on the master, the primary and all the secondaries.
6340
6341     """
6342     return {
6343       "NODE_NAME": self.op.node_name,
6344       }
6345
6346   def BuildHooksNodes(self):
6347     """Build hooks nodes.
6348
6349     """
6350     nl = [self.cfg.GetMasterNode()]
6351     return (nl, nl)
6352
6353
6354 class TLMigrateInstance(Tasklet):
6355   """Tasklet class for instance migration.
6356
6357   @type live: boolean
6358   @ivar live: whether the migration will be done live or non-live;
6359       this variable is initalized only after CheckPrereq has run
6360   @type cleanup: boolean
6361   @ivar cleanup: Wheater we cleanup from a failed migration
6362   @type iallocator: string
6363   @ivar iallocator: The iallocator used to determine target_node
6364   @type target_node: string
6365   @ivar target_node: If given, the target_node to reallocate the instance to
6366   @type failover: boolean
6367   @ivar failover: Whether operation results in failover or migration
6368   @type fallback: boolean
6369   @ivar fallback: Whether fallback to failover is allowed if migration not
6370                   possible
6371   @type ignore_consistency: boolean
6372   @ivar ignore_consistency: Wheter we should ignore consistency between source
6373                             and target node
6374   @type shutdown_timeout: int
6375   @ivar shutdown_timeout: In case of failover timeout of the shutdown
6376
6377   """
6378   def __init__(self, lu, instance_name, cleanup=False, iallocator=None,
6379                target_node=None, failover=False, fallback=False,
6380                ignore_consistency=False,
6381                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6382     """Initializes this class.
6383
6384     """
6385     Tasklet.__init__(self, lu)
6386
6387     # Parameters
6388     self.instance_name = instance_name
6389     self.cleanup = cleanup
6390     self.live = False # will be overridden later
6391     self.iallocator = iallocator
6392     self.target_node = target_node
6393     self.failover = failover
6394     self.fallback = fallback
6395     self.ignore_consistency = ignore_consistency
6396     self.shutdown_timeout = shutdown_timeout
6397
6398   def CheckPrereq(self):
6399     """Check prerequisites.
6400
6401     This checks that the instance is in the cluster.
6402
6403     """
6404     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6405     instance = self.cfg.GetInstanceInfo(instance_name)
6406     assert instance is not None
6407     self.instance = instance
6408
6409     if (not self.cleanup and not instance.admin_up and not self.failover and
6410         self.fallback):
6411       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6412                       " to failover")
6413       self.failover = True
6414
6415     if instance.disk_template not in constants.DTS_MIRRORED:
6416       if self.failover:
6417         text = "failovers"
6418       else:
6419         text = "migrations"
6420       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6421                                  " %s" % (instance.disk_template, text),
6422                                  errors.ECODE_STATE)
6423
6424     if instance.disk_template in constants.DTS_EXT_MIRROR:
6425       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6426
6427       if self.iallocator:
6428         self._RunAllocator()
6429
6430       # self.target_node is already populated, either directly or by the
6431       # iallocator run
6432       target_node = self.target_node
6433
6434       if len(self.lu.tasklets) == 1:
6435         # It is safe to remove locks only when we're the only tasklet in the LU
6436         nodes_keep = [instance.primary_node, self.target_node]
6437         nodes_rel = [node for node in self.lu.acquired_locks[locking.LEVEL_NODE]
6438                      if node not in nodes_keep]
6439         self.lu.context.glm.release(locking.LEVEL_NODE, nodes_rel)
6440         self.lu.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6441
6442     else:
6443       secondary_nodes = instance.secondary_nodes
6444       if not secondary_nodes:
6445         raise errors.ConfigurationError("No secondary node but using"
6446                                         " %s disk template" %
6447                                         instance.disk_template)
6448       target_node = secondary_nodes[0]
6449       if self.iallocator or (self.target_node and
6450                              self.target_node != target_node):
6451         if self.failover:
6452           text = "failed over"
6453         else:
6454           text = "migrated"
6455         raise errors.OpPrereqError("Instances with disk template %s cannot"
6456                                    " be %s over to arbitrary nodes"
6457                                    " (neither an iallocator nor a target"
6458                                    " node can be passed)" %
6459                                    (text, instance.disk_template),
6460                                    errors.ECODE_INVAL)
6461
6462     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6463
6464     # check memory requirements on the secondary node
6465     if not self.failover or instance.admin_up:
6466       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6467                            instance.name, i_be[constants.BE_MEMORY],
6468                            instance.hypervisor)
6469     else:
6470       self.lu.LogInfo("Not checking memory on the secondary node as"
6471                       " instance will not be started")
6472
6473     # check bridge existance
6474     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6475
6476     if not self.cleanup:
6477       _CheckNodeNotDrained(self.lu, target_node)
6478       if not self.failover:
6479         result = self.rpc.call_instance_migratable(instance.primary_node,
6480                                                    instance)
6481         if result.fail_msg and self.fallback:
6482           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6483                           " failover")
6484           self.failover = True
6485         else:
6486           result.Raise("Can't migrate, please use failover",
6487                        prereq=True, ecode=errors.ECODE_STATE)
6488
6489     assert not (self.failover and self.cleanup)
6490
6491   def _RunAllocator(self):
6492     """Run the allocator based on input opcode.
6493
6494     """
6495     ial = IAllocator(self.cfg, self.rpc,
6496                      mode=constants.IALLOCATOR_MODE_RELOC,
6497                      name=self.instance_name,
6498                      # TODO See why hail breaks with a single node below
6499                      relocate_from=[self.instance.primary_node,
6500                                     self.instance.primary_node],
6501                      )
6502
6503     ial.Run(self.iallocator)
6504
6505     if not ial.success:
6506       raise errors.OpPrereqError("Can't compute nodes using"
6507                                  " iallocator '%s': %s" %
6508                                  (self.iallocator, ial.info),
6509                                  errors.ECODE_NORES)
6510     if len(ial.result) != ial.required_nodes:
6511       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6512                                  " of nodes (%s), required %s" %
6513                                  (self.iallocator, len(ial.result),
6514                                   ial.required_nodes), errors.ECODE_FAULT)
6515     self.target_node = ial.result[0]
6516     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6517                  self.instance_name, self.iallocator,
6518                  utils.CommaJoin(ial.result))
6519
6520     if not self.failover:
6521       if self.lu.op.live is not None and self.lu.op.mode is not None:
6522         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6523                                    " parameters are accepted",
6524                                    errors.ECODE_INVAL)
6525       if self.lu.op.live is not None:
6526         if self.lu.op.live:
6527           self.lu.op.mode = constants.HT_MIGRATION_LIVE
6528         else:
6529           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6530         # reset the 'live' parameter to None so that repeated
6531         # invocations of CheckPrereq do not raise an exception
6532         self.lu.op.live = None
6533       elif self.lu.op.mode is None:
6534         # read the default value from the hypervisor
6535         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6536                                                 skip_globals=False)
6537         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6538
6539       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6540     else:
6541       # Failover is never live
6542       self.live = False
6543
6544   def _WaitUntilSync(self):
6545     """Poll with custom rpc for disk sync.
6546
6547     This uses our own step-based rpc call.
6548
6549     """
6550     self.feedback_fn("* wait until resync is done")
6551     all_done = False
6552     while not all_done:
6553       all_done = True
6554       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6555                                             self.nodes_ip,
6556                                             self.instance.disks)
6557       min_percent = 100
6558       for node, nres in result.items():
6559         nres.Raise("Cannot resync disks on node %s" % node)
6560         node_done, node_percent = nres.payload
6561         all_done = all_done and node_done
6562         if node_percent is not None:
6563           min_percent = min(min_percent, node_percent)
6564       if not all_done:
6565         if min_percent < 100:
6566           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6567         time.sleep(2)
6568
6569   def _EnsureSecondary(self, node):
6570     """Demote a node to secondary.
6571
6572     """
6573     self.feedback_fn("* switching node %s to secondary mode" % node)
6574
6575     for dev in self.instance.disks:
6576       self.cfg.SetDiskID(dev, node)
6577
6578     result = self.rpc.call_blockdev_close(node, self.instance.name,
6579                                           self.instance.disks)
6580     result.Raise("Cannot change disk to secondary on node %s" % node)
6581
6582   def _GoStandalone(self):
6583     """Disconnect from the network.
6584
6585     """
6586     self.feedback_fn("* changing into standalone mode")
6587     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6588                                                self.instance.disks)
6589     for node, nres in result.items():
6590       nres.Raise("Cannot disconnect disks node %s" % node)
6591
6592   def _GoReconnect(self, multimaster):
6593     """Reconnect to the network.
6594
6595     """
6596     if multimaster:
6597       msg = "dual-master"
6598     else:
6599       msg = "single-master"
6600     self.feedback_fn("* changing disks into %s mode" % msg)
6601     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6602                                            self.instance.disks,
6603                                            self.instance.name, multimaster)
6604     for node, nres in result.items():
6605       nres.Raise("Cannot change disks config on node %s" % node)
6606
6607   def _ExecCleanup(self):
6608     """Try to cleanup after a failed migration.
6609
6610     The cleanup is done by:
6611       - check that the instance is running only on one node
6612         (and update the config if needed)
6613       - change disks on its secondary node to secondary
6614       - wait until disks are fully synchronized
6615       - disconnect from the network
6616       - change disks into single-master mode
6617       - wait again until disks are fully synchronized
6618
6619     """
6620     instance = self.instance
6621     target_node = self.target_node
6622     source_node = self.source_node
6623
6624     # check running on only one node
6625     self.feedback_fn("* checking where the instance actually runs"
6626                      " (if this hangs, the hypervisor might be in"
6627                      " a bad state)")
6628     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6629     for node, result in ins_l.items():
6630       result.Raise("Can't contact node %s" % node)
6631
6632     runningon_source = instance.name in ins_l[source_node].payload
6633     runningon_target = instance.name in ins_l[target_node].payload
6634
6635     if runningon_source and runningon_target:
6636       raise errors.OpExecError("Instance seems to be running on two nodes,"
6637                                " or the hypervisor is confused. You will have"
6638                                " to ensure manually that it runs only on one"
6639                                " and restart this operation.")
6640
6641     if not (runningon_source or runningon_target):
6642       raise errors.OpExecError("Instance does not seem to be running at all."
6643                                " In this case, it's safer to repair by"
6644                                " running 'gnt-instance stop' to ensure disk"
6645                                " shutdown, and then restarting it.")
6646
6647     if runningon_target:
6648       # the migration has actually succeeded, we need to update the config
6649       self.feedback_fn("* instance running on secondary node (%s),"
6650                        " updating config" % target_node)
6651       instance.primary_node = target_node
6652       self.cfg.Update(instance, self.feedback_fn)
6653       demoted_node = source_node
6654     else:
6655       self.feedback_fn("* instance confirmed to be running on its"
6656                        " primary node (%s)" % source_node)
6657       demoted_node = target_node
6658
6659     if instance.disk_template in constants.DTS_INT_MIRROR:
6660       self._EnsureSecondary(demoted_node)
6661       try:
6662         self._WaitUntilSync()
6663       except errors.OpExecError:
6664         # we ignore here errors, since if the device is standalone, it
6665         # won't be able to sync
6666         pass
6667       self._GoStandalone()
6668       self._GoReconnect(False)
6669       self._WaitUntilSync()
6670
6671     self.feedback_fn("* done")
6672
6673   def _RevertDiskStatus(self):
6674     """Try to revert the disk status after a failed migration.
6675
6676     """
6677     target_node = self.target_node
6678     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6679       return
6680
6681     try:
6682       self._EnsureSecondary(target_node)
6683       self._GoStandalone()
6684       self._GoReconnect(False)
6685       self._WaitUntilSync()
6686     except errors.OpExecError, err:
6687       self.lu.LogWarning("Migration failed and I can't reconnect the"
6688                          " drives: error '%s'\n"
6689                          "Please look and recover the instance status" %
6690                          str(err))
6691
6692   def _AbortMigration(self):
6693     """Call the hypervisor code to abort a started migration.
6694
6695     """
6696     instance = self.instance
6697     target_node = self.target_node
6698     migration_info = self.migration_info
6699
6700     abort_result = self.rpc.call_finalize_migration(target_node,
6701                                                     instance,
6702                                                     migration_info,
6703                                                     False)
6704     abort_msg = abort_result.fail_msg
6705     if abort_msg:
6706       logging.error("Aborting migration failed on target node %s: %s",
6707                     target_node, abort_msg)
6708       # Don't raise an exception here, as we stil have to try to revert the
6709       # disk status, even if this step failed.
6710
6711   def _ExecMigration(self):
6712     """Migrate an instance.
6713
6714     The migrate is done by:
6715       - change the disks into dual-master mode
6716       - wait until disks are fully synchronized again
6717       - migrate the instance
6718       - change disks on the new secondary node (the old primary) to secondary
6719       - wait until disks are fully synchronized
6720       - change disks into single-master mode
6721
6722     """
6723     instance = self.instance
6724     target_node = self.target_node
6725     source_node = self.source_node
6726
6727     self.feedback_fn("* checking disk consistency between source and target")
6728     for dev in instance.disks:
6729       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6730         raise errors.OpExecError("Disk %s is degraded or not fully"
6731                                  " synchronized on target node,"
6732                                  " aborting migrate." % dev.iv_name)
6733
6734     # First get the migration information from the remote node
6735     result = self.rpc.call_migration_info(source_node, instance)
6736     msg = result.fail_msg
6737     if msg:
6738       log_err = ("Failed fetching source migration information from %s: %s" %
6739                  (source_node, msg))
6740       logging.error(log_err)
6741       raise errors.OpExecError(log_err)
6742
6743     self.migration_info = migration_info = result.payload
6744
6745     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6746       # Then switch the disks to master/master mode
6747       self._EnsureSecondary(target_node)
6748       self._GoStandalone()
6749       self._GoReconnect(True)
6750       self._WaitUntilSync()
6751
6752     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6753     result = self.rpc.call_accept_instance(target_node,
6754                                            instance,
6755                                            migration_info,
6756                                            self.nodes_ip[target_node])
6757
6758     msg = result.fail_msg
6759     if msg:
6760       logging.error("Instance pre-migration failed, trying to revert"
6761                     " disk status: %s", msg)
6762       self.feedback_fn("Pre-migration failed, aborting")
6763       self._AbortMigration()
6764       self._RevertDiskStatus()
6765       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6766                                (instance.name, msg))
6767
6768     self.feedback_fn("* migrating instance to %s" % target_node)
6769     result = self.rpc.call_instance_migrate(source_node, instance,
6770                                             self.nodes_ip[target_node],
6771                                             self.live)
6772     msg = result.fail_msg
6773     if msg:
6774       logging.error("Instance migration failed, trying to revert"
6775                     " disk status: %s", msg)
6776       self.feedback_fn("Migration failed, aborting")
6777       self._AbortMigration()
6778       self._RevertDiskStatus()
6779       raise errors.OpExecError("Could not migrate instance %s: %s" %
6780                                (instance.name, msg))
6781
6782     instance.primary_node = target_node
6783     # distribute new instance config to the other nodes
6784     self.cfg.Update(instance, self.feedback_fn)
6785
6786     result = self.rpc.call_finalize_migration(target_node,
6787                                               instance,
6788                                               migration_info,
6789                                               True)
6790     msg = result.fail_msg
6791     if msg:
6792       logging.error("Instance migration succeeded, but finalization failed:"
6793                     " %s", msg)
6794       raise errors.OpExecError("Could not finalize instance migration: %s" %
6795                                msg)
6796
6797     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6798       self._EnsureSecondary(source_node)
6799       self._WaitUntilSync()
6800       self._GoStandalone()
6801       self._GoReconnect(False)
6802       self._WaitUntilSync()
6803
6804     self.feedback_fn("* done")
6805
6806   def _ExecFailover(self):
6807     """Failover an instance.
6808
6809     The failover is done by shutting it down on its present node and
6810     starting it on the secondary.
6811
6812     """
6813     instance = self.instance
6814     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6815
6816     source_node = instance.primary_node
6817     target_node = self.target_node
6818
6819     if instance.admin_up:
6820       self.feedback_fn("* checking disk consistency between source and target")
6821       for dev in instance.disks:
6822         # for drbd, these are drbd over lvm
6823         if not _CheckDiskConsistency(self, dev, target_node, False):
6824           if not self.ignore_consistency:
6825             raise errors.OpExecError("Disk %s is degraded on target node,"
6826                                      " aborting failover." % dev.iv_name)
6827     else:
6828       self.feedback_fn("* not checking disk consistency as instance is not"
6829                        " running")
6830
6831     self.feedback_fn("* shutting down instance on source node")
6832     logging.info("Shutting down instance %s on node %s",
6833                  instance.name, source_node)
6834
6835     result = self.rpc.call_instance_shutdown(source_node, instance,
6836                                              self.shutdown_timeout)
6837     msg = result.fail_msg
6838     if msg:
6839       if self.ignore_consistency or primary_node.offline:
6840         self.lu.LogWarning("Could not shutdown instance %s on node %s."
6841                            " Proceeding anyway. Please make sure node"
6842                            " %s is down. Error details: %s",
6843                            instance.name, source_node, source_node, msg)
6844       else:
6845         raise errors.OpExecError("Could not shutdown instance %s on"
6846                                  " node %s: %s" %
6847                                  (instance.name, source_node, msg))
6848
6849     self.feedback_fn("* deactivating the instance's disks on source node")
6850     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6851       raise errors.OpExecError("Can't shut down the instance's disks.")
6852
6853     instance.primary_node = target_node
6854     # distribute new instance config to the other nodes
6855     self.cfg.Update(instance, self.feedback_fn)
6856
6857     # Only start the instance if it's marked as up
6858     if instance.admin_up:
6859       self.feedback_fn("* activating the instance's disks on target node")
6860       logging.info("Starting instance %s on node %s",
6861                    instance.name, target_node)
6862
6863       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6864                                            ignore_secondaries=True)
6865       if not disks_ok:
6866         _ShutdownInstanceDisks(self, instance)
6867         raise errors.OpExecError("Can't activate the instance's disks")
6868
6869       self.feedback_fn("* starting the instance on the target node")
6870       result = self.rpc.call_instance_start(target_node, instance, None, None)
6871       msg = result.fail_msg
6872       if msg:
6873         _ShutdownInstanceDisks(self, instance)
6874         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6875                                  (instance.name, target_node, msg))
6876
6877   def Exec(self, feedback_fn):
6878     """Perform the migration.
6879
6880     """
6881     self.feedback_fn = feedback_fn
6882     self.source_node = self.instance.primary_node
6883
6884     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6885     if self.instance.disk_template in constants.DTS_INT_MIRROR:
6886       self.target_node = self.instance.secondary_nodes[0]
6887       # Otherwise self.target_node has been populated either
6888       # directly, or through an iallocator.
6889
6890     self.all_nodes = [self.source_node, self.target_node]
6891     self.nodes_ip = {
6892       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6893       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6894       }
6895
6896     if self.failover:
6897       feedback_fn("Failover instance %s" % self.instance.name)
6898       self._ExecFailover()
6899     else:
6900       feedback_fn("Migrating instance %s" % self.instance.name)
6901
6902       if self.cleanup:
6903         return self._ExecCleanup()
6904       else:
6905         return self._ExecMigration()
6906
6907
6908 def _CreateBlockDev(lu, node, instance, device, force_create,
6909                     info, force_open):
6910   """Create a tree of block devices on a given node.
6911
6912   If this device type has to be created on secondaries, create it and
6913   all its children.
6914
6915   If not, just recurse to children keeping the same 'force' value.
6916
6917   @param lu: the lu on whose behalf we execute
6918   @param node: the node on which to create the device
6919   @type instance: L{objects.Instance}
6920   @param instance: the instance which owns the device
6921   @type device: L{objects.Disk}
6922   @param device: the device to create
6923   @type force_create: boolean
6924   @param force_create: whether to force creation of this device; this
6925       will be change to True whenever we find a device which has
6926       CreateOnSecondary() attribute
6927   @param info: the extra 'metadata' we should attach to the device
6928       (this will be represented as a LVM tag)
6929   @type force_open: boolean
6930   @param force_open: this parameter will be passes to the
6931       L{backend.BlockdevCreate} function where it specifies
6932       whether we run on primary or not, and it affects both
6933       the child assembly and the device own Open() execution
6934
6935   """
6936   if device.CreateOnSecondary():
6937     force_create = True
6938
6939   if device.children:
6940     for child in device.children:
6941       _CreateBlockDev(lu, node, instance, child, force_create,
6942                       info, force_open)
6943
6944   if not force_create:
6945     return
6946
6947   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6948
6949
6950 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6951   """Create a single block device on a given node.
6952
6953   This will not recurse over children of the device, so they must be
6954   created in advance.
6955
6956   @param lu: the lu on whose behalf we execute
6957   @param node: the node on which to create the device
6958   @type instance: L{objects.Instance}
6959   @param instance: the instance which owns the device
6960   @type device: L{objects.Disk}
6961   @param device: the device to create
6962   @param info: the extra 'metadata' we should attach to the device
6963       (this will be represented as a LVM tag)
6964   @type force_open: boolean
6965   @param force_open: this parameter will be passes to the
6966       L{backend.BlockdevCreate} function where it specifies
6967       whether we run on primary or not, and it affects both
6968       the child assembly and the device own Open() execution
6969
6970   """
6971   lu.cfg.SetDiskID(device, node)
6972   result = lu.rpc.call_blockdev_create(node, device, device.size,
6973                                        instance.name, force_open, info)
6974   result.Raise("Can't create block device %s on"
6975                " node %s for instance %s" % (device, node, instance.name))
6976   if device.physical_id is None:
6977     device.physical_id = result.payload
6978
6979
6980 def _GenerateUniqueNames(lu, exts):
6981   """Generate a suitable LV name.
6982
6983   This will generate a logical volume name for the given instance.
6984
6985   """
6986   results = []
6987   for val in exts:
6988     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6989     results.append("%s%s" % (new_id, val))
6990   return results
6991
6992
6993 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6994                          p_minor, s_minor):
6995   """Generate a drbd8 device complete with its children.
6996
6997   """
6998   port = lu.cfg.AllocatePort()
6999   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7000   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7001                           logical_id=(vgname, names[0]))
7002   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7003                           logical_id=(vgname, names[1]))
7004   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7005                           logical_id=(primary, secondary, port,
7006                                       p_minor, s_minor,
7007                                       shared_secret),
7008                           children=[dev_data, dev_meta],
7009                           iv_name=iv_name)
7010   return drbd_dev
7011
7012
7013 def _GenerateDiskTemplate(lu, template_name,
7014                           instance_name, primary_node,
7015                           secondary_nodes, disk_info,
7016                           file_storage_dir, file_driver,
7017                           base_index, feedback_fn):
7018   """Generate the entire disk layout for a given template type.
7019
7020   """
7021   #TODO: compute space requirements
7022
7023   vgname = lu.cfg.GetVGName()
7024   disk_count = len(disk_info)
7025   disks = []
7026   if template_name == constants.DT_DISKLESS:
7027     pass
7028   elif template_name == constants.DT_PLAIN:
7029     if len(secondary_nodes) != 0:
7030       raise errors.ProgrammerError("Wrong template configuration")
7031
7032     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7033                                       for i in range(disk_count)])
7034     for idx, disk in enumerate(disk_info):
7035       disk_index = idx + base_index
7036       vg = disk.get(constants.IDISK_VG, vgname)
7037       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7038       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7039                               size=disk[constants.IDISK_SIZE],
7040                               logical_id=(vg, names[idx]),
7041                               iv_name="disk/%d" % disk_index,
7042                               mode=disk[constants.IDISK_MODE])
7043       disks.append(disk_dev)
7044   elif template_name == constants.DT_DRBD8:
7045     if len(secondary_nodes) != 1:
7046       raise errors.ProgrammerError("Wrong template configuration")
7047     remote_node = secondary_nodes[0]
7048     minors = lu.cfg.AllocateDRBDMinor(
7049       [primary_node, remote_node] * len(disk_info), instance_name)
7050
7051     names = []
7052     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7053                                                for i in range(disk_count)]):
7054       names.append(lv_prefix + "_data")
7055       names.append(lv_prefix + "_meta")
7056     for idx, disk in enumerate(disk_info):
7057       disk_index = idx + base_index
7058       vg = disk.get(constants.IDISK_VG, vgname)
7059       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7060                                       disk[constants.IDISK_SIZE], vg,
7061                                       names[idx * 2:idx * 2 + 2],
7062                                       "disk/%d" % disk_index,
7063                                       minors[idx * 2], minors[idx * 2 + 1])
7064       disk_dev.mode = disk[constants.IDISK_MODE]
7065       disks.append(disk_dev)
7066   elif template_name == constants.DT_FILE:
7067     if len(secondary_nodes) != 0:
7068       raise errors.ProgrammerError("Wrong template configuration")
7069
7070     opcodes.RequireFileStorage()
7071
7072     for idx, disk in enumerate(disk_info):
7073       disk_index = idx + base_index
7074       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7075                               size=disk[constants.IDISK_SIZE],
7076                               iv_name="disk/%d" % disk_index,
7077                               logical_id=(file_driver,
7078                                           "%s/disk%d" % (file_storage_dir,
7079                                                          disk_index)),
7080                               mode=disk[constants.IDISK_MODE])
7081       disks.append(disk_dev)
7082   elif template_name == constants.DT_SHARED_FILE:
7083     if len(secondary_nodes) != 0:
7084       raise errors.ProgrammerError("Wrong template configuration")
7085
7086     opcodes.RequireSharedFileStorage()
7087
7088     for idx, disk in enumerate(disk_info):
7089       disk_index = idx + base_index
7090       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7091                               size=disk[constants.IDISK_SIZE],
7092                               iv_name="disk/%d" % disk_index,
7093                               logical_id=(file_driver,
7094                                           "%s/disk%d" % (file_storage_dir,
7095                                                          disk_index)),
7096                               mode=disk[constants.IDISK_MODE])
7097       disks.append(disk_dev)
7098   elif template_name == constants.DT_BLOCK:
7099     if len(secondary_nodes) != 0:
7100       raise errors.ProgrammerError("Wrong template configuration")
7101
7102     for idx, disk in enumerate(disk_info):
7103       disk_index = idx + base_index
7104       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7105                               size=disk[constants.IDISK_SIZE],
7106                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7107                                           disk[constants.IDISK_ADOPT]),
7108                               iv_name="disk/%d" % disk_index,
7109                               mode=disk[constants.IDISK_MODE])
7110       disks.append(disk_dev)
7111
7112   else:
7113     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7114   return disks
7115
7116
7117 def _GetInstanceInfoText(instance):
7118   """Compute that text that should be added to the disk's metadata.
7119
7120   """
7121   return "originstname+%s" % instance.name
7122
7123
7124 def _CalcEta(time_taken, written, total_size):
7125   """Calculates the ETA based on size written and total size.
7126
7127   @param time_taken: The time taken so far
7128   @param written: amount written so far
7129   @param total_size: The total size of data to be written
7130   @return: The remaining time in seconds
7131
7132   """
7133   avg_time = time_taken / float(written)
7134   return (total_size - written) * avg_time
7135
7136
7137 def _WipeDisks(lu, instance):
7138   """Wipes instance disks.
7139
7140   @type lu: L{LogicalUnit}
7141   @param lu: the logical unit on whose behalf we execute
7142   @type instance: L{objects.Instance}
7143   @param instance: the instance whose disks we should create
7144   @return: the success of the wipe
7145
7146   """
7147   node = instance.primary_node
7148
7149   for device in instance.disks:
7150     lu.cfg.SetDiskID(device, node)
7151
7152   logging.info("Pause sync of instance %s disks", instance.name)
7153   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7154
7155   for idx, success in enumerate(result.payload):
7156     if not success:
7157       logging.warn("pause-sync of instance %s for disks %d failed",
7158                    instance.name, idx)
7159
7160   try:
7161     for idx, device in enumerate(instance.disks):
7162       lu.LogInfo("* Wiping disk %d", idx)
7163       logging.info("Wiping disk %d for instance %s, node %s",
7164                    idx, instance.name, node)
7165
7166       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7167       # MAX_WIPE_CHUNK at max
7168       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7169                             constants.MIN_WIPE_CHUNK_PERCENT)
7170
7171       offset = 0
7172       size = device.size
7173       last_output = 0
7174       start_time = time.time()
7175
7176       while offset < size:
7177         wipe_size = min(wipe_chunk_size, size - offset)
7178         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7179         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7180                      (idx, offset, wipe_size))
7181         now = time.time()
7182         offset += wipe_size
7183         if now - last_output >= 60:
7184           eta = _CalcEta(now - start_time, offset, size)
7185           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7186                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7187           last_output = now
7188   finally:
7189     logging.info("Resume sync of instance %s disks", instance.name)
7190
7191     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7192
7193     for idx, success in enumerate(result.payload):
7194       if not success:
7195         lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
7196                       " look at the status and troubleshoot the issue.", idx)
7197         logging.warn("resume-sync of instance %s for disks %d failed",
7198                      instance.name, idx)
7199
7200
7201 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7202   """Create all disks for an instance.
7203
7204   This abstracts away some work from AddInstance.
7205
7206   @type lu: L{LogicalUnit}
7207   @param lu: the logical unit on whose behalf we execute
7208   @type instance: L{objects.Instance}
7209   @param instance: the instance whose disks we should create
7210   @type to_skip: list
7211   @param to_skip: list of indices to skip
7212   @type target_node: string
7213   @param target_node: if passed, overrides the target node for creation
7214   @rtype: boolean
7215   @return: the success of the creation
7216
7217   """
7218   info = _GetInstanceInfoText(instance)
7219   if target_node is None:
7220     pnode = instance.primary_node
7221     all_nodes = instance.all_nodes
7222   else:
7223     pnode = target_node
7224     all_nodes = [pnode]
7225
7226   if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7227     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7228     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7229
7230     result.Raise("Failed to create directory '%s' on"
7231                  " node %s" % (file_storage_dir, pnode))
7232
7233   # Note: this needs to be kept in sync with adding of disks in
7234   # LUInstanceSetParams
7235   for idx, device in enumerate(instance.disks):
7236     if to_skip and idx in to_skip:
7237       continue
7238     logging.info("Creating volume %s for instance %s",
7239                  device.iv_name, instance.name)
7240     #HARDCODE
7241     for node in all_nodes:
7242       f_create = node == pnode
7243       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7244
7245
7246 def _RemoveDisks(lu, instance, target_node=None):
7247   """Remove all disks for an instance.
7248
7249   This abstracts away some work from `AddInstance()` and
7250   `RemoveInstance()`. Note that in case some of the devices couldn't
7251   be removed, the removal will continue with the other ones (compare
7252   with `_CreateDisks()`).
7253
7254   @type lu: L{LogicalUnit}
7255   @param lu: the logical unit on whose behalf we execute
7256   @type instance: L{objects.Instance}
7257   @param instance: the instance whose disks we should remove
7258   @type target_node: string
7259   @param target_node: used to override the node on which to remove the disks
7260   @rtype: boolean
7261   @return: the success of the removal
7262
7263   """
7264   logging.info("Removing block devices for instance %s", instance.name)
7265
7266   all_result = True
7267   for device in instance.disks:
7268     if target_node:
7269       edata = [(target_node, device)]
7270     else:
7271       edata = device.ComputeNodeTree(instance.primary_node)
7272     for node, disk in edata:
7273       lu.cfg.SetDiskID(disk, node)
7274       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7275       if msg:
7276         lu.LogWarning("Could not remove block device %s on node %s,"
7277                       " continuing anyway: %s", device.iv_name, node, msg)
7278         all_result = False
7279
7280   if instance.disk_template == constants.DT_FILE:
7281     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7282     if target_node:
7283       tgt = target_node
7284     else:
7285       tgt = instance.primary_node
7286     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7287     if result.fail_msg:
7288       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7289                     file_storage_dir, instance.primary_node, result.fail_msg)
7290       all_result = False
7291
7292   return all_result
7293
7294
7295 def _ComputeDiskSizePerVG(disk_template, disks):
7296   """Compute disk size requirements in the volume group
7297
7298   """
7299   def _compute(disks, payload):
7300     """Universal algorithm.
7301
7302     """
7303     vgs = {}
7304     for disk in disks:
7305       vgs[disk[constants.IDISK_VG]] = \
7306         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7307
7308     return vgs
7309
7310   # Required free disk space as a function of disk and swap space
7311   req_size_dict = {
7312     constants.DT_DISKLESS: {},
7313     constants.DT_PLAIN: _compute(disks, 0),
7314     # 128 MB are added for drbd metadata for each disk
7315     constants.DT_DRBD8: _compute(disks, 128),
7316     constants.DT_FILE: {},
7317     constants.DT_SHARED_FILE: {},
7318   }
7319
7320   if disk_template not in req_size_dict:
7321     raise errors.ProgrammerError("Disk template '%s' size requirement"
7322                                  " is unknown" %  disk_template)
7323
7324   return req_size_dict[disk_template]
7325
7326
7327 def _ComputeDiskSize(disk_template, disks):
7328   """Compute disk size requirements in the volume group
7329
7330   """
7331   # Required free disk space as a function of disk and swap space
7332   req_size_dict = {
7333     constants.DT_DISKLESS: None,
7334     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7335     # 128 MB are added for drbd metadata for each disk
7336     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7337     constants.DT_FILE: None,
7338     constants.DT_SHARED_FILE: 0,
7339     constants.DT_BLOCK: 0,
7340   }
7341
7342   if disk_template not in req_size_dict:
7343     raise errors.ProgrammerError("Disk template '%s' size requirement"
7344                                  " is unknown" %  disk_template)
7345
7346   return req_size_dict[disk_template]
7347
7348
7349 def _FilterVmNodes(lu, nodenames):
7350   """Filters out non-vm_capable nodes from a list.
7351
7352   @type lu: L{LogicalUnit}
7353   @param lu: the logical unit for which we check
7354   @type nodenames: list
7355   @param nodenames: the list of nodes on which we should check
7356   @rtype: list
7357   @return: the list of vm-capable nodes
7358
7359   """
7360   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7361   return [name for name in nodenames if name not in vm_nodes]
7362
7363
7364 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7365   """Hypervisor parameter validation.
7366
7367   This function abstract the hypervisor parameter validation to be
7368   used in both instance create and instance modify.
7369
7370   @type lu: L{LogicalUnit}
7371   @param lu: the logical unit for which we check
7372   @type nodenames: list
7373   @param nodenames: the list of nodes on which we should check
7374   @type hvname: string
7375   @param hvname: the name of the hypervisor we should use
7376   @type hvparams: dict
7377   @param hvparams: the parameters which we need to check
7378   @raise errors.OpPrereqError: if the parameters are not valid
7379
7380   """
7381   nodenames = _FilterVmNodes(lu, nodenames)
7382   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7383                                                   hvname,
7384                                                   hvparams)
7385   for node in nodenames:
7386     info = hvinfo[node]
7387     if info.offline:
7388       continue
7389     info.Raise("Hypervisor parameter validation failed on node %s" % node)
7390
7391
7392 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7393   """OS parameters validation.
7394
7395   @type lu: L{LogicalUnit}
7396   @param lu: the logical unit for which we check
7397   @type required: boolean
7398   @param required: whether the validation should fail if the OS is not
7399       found
7400   @type nodenames: list
7401   @param nodenames: the list of nodes on which we should check
7402   @type osname: string
7403   @param osname: the name of the hypervisor we should use
7404   @type osparams: dict
7405   @param osparams: the parameters which we need to check
7406   @raise errors.OpPrereqError: if the parameters are not valid
7407
7408   """
7409   nodenames = _FilterVmNodes(lu, nodenames)
7410   result = lu.rpc.call_os_validate(required, nodenames, osname,
7411                                    [constants.OS_VALIDATE_PARAMETERS],
7412                                    osparams)
7413   for node, nres in result.items():
7414     # we don't check for offline cases since this should be run only
7415     # against the master node and/or an instance's nodes
7416     nres.Raise("OS Parameters validation failed on node %s" % node)
7417     if not nres.payload:
7418       lu.LogInfo("OS %s not found on node %s, validation skipped",
7419                  osname, node)
7420
7421
7422 class LUInstanceCreate(LogicalUnit):
7423   """Create an instance.
7424
7425   """
7426   HPATH = "instance-add"
7427   HTYPE = constants.HTYPE_INSTANCE
7428   REQ_BGL = False
7429
7430   def CheckArguments(self):
7431     """Check arguments.
7432
7433     """
7434     # do not require name_check to ease forward/backward compatibility
7435     # for tools
7436     if self.op.no_install and self.op.start:
7437       self.LogInfo("No-installation mode selected, disabling startup")
7438       self.op.start = False
7439     # validate/normalize the instance name
7440     self.op.instance_name = \
7441       netutils.Hostname.GetNormalizedName(self.op.instance_name)
7442
7443     if self.op.ip_check and not self.op.name_check:
7444       # TODO: make the ip check more flexible and not depend on the name check
7445       raise errors.OpPrereqError("Cannot do ip check without a name check",
7446                                  errors.ECODE_INVAL)
7447
7448     # check nics' parameter names
7449     for nic in self.op.nics:
7450       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7451
7452     # check disks. parameter names and consistent adopt/no-adopt strategy
7453     has_adopt = has_no_adopt = False
7454     for disk in self.op.disks:
7455       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7456       if constants.IDISK_ADOPT in disk:
7457         has_adopt = True
7458       else:
7459         has_no_adopt = True
7460     if has_adopt and has_no_adopt:
7461       raise errors.OpPrereqError("Either all disks are adopted or none is",
7462                                  errors.ECODE_INVAL)
7463     if has_adopt:
7464       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7465         raise errors.OpPrereqError("Disk adoption is not supported for the"
7466                                    " '%s' disk template" %
7467                                    self.op.disk_template,
7468                                    errors.ECODE_INVAL)
7469       if self.op.iallocator is not None:
7470         raise errors.OpPrereqError("Disk adoption not allowed with an"
7471                                    " iallocator script", errors.ECODE_INVAL)
7472       if self.op.mode == constants.INSTANCE_IMPORT:
7473         raise errors.OpPrereqError("Disk adoption not allowed for"
7474                                    " instance import", errors.ECODE_INVAL)
7475     else:
7476       if self.op.disk_template in constants.DTS_MUST_ADOPT:
7477         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7478                                    " but no 'adopt' parameter given" %
7479                                    self.op.disk_template,
7480                                    errors.ECODE_INVAL)
7481
7482     self.adopt_disks = has_adopt
7483
7484     # instance name verification
7485     if self.op.name_check:
7486       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7487       self.op.instance_name = self.hostname1.name
7488       # used in CheckPrereq for ip ping check
7489       self.check_ip = self.hostname1.ip
7490     else:
7491       self.check_ip = None
7492
7493     # file storage checks
7494     if (self.op.file_driver and
7495         not self.op.file_driver in constants.FILE_DRIVER):
7496       raise errors.OpPrereqError("Invalid file driver name '%s'" %
7497                                  self.op.file_driver, errors.ECODE_INVAL)
7498
7499     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7500       raise errors.OpPrereqError("File storage directory path not absolute",
7501                                  errors.ECODE_INVAL)
7502
7503     ### Node/iallocator related checks
7504     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7505
7506     if self.op.pnode is not None:
7507       if self.op.disk_template in constants.DTS_INT_MIRROR:
7508         if self.op.snode is None:
7509           raise errors.OpPrereqError("The networked disk templates need"
7510                                      " a mirror node", errors.ECODE_INVAL)
7511       elif self.op.snode:
7512         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7513                         " template")
7514         self.op.snode = None
7515
7516     self._cds = _GetClusterDomainSecret()
7517
7518     if self.op.mode == constants.INSTANCE_IMPORT:
7519       # On import force_variant must be True, because if we forced it at
7520       # initial install, our only chance when importing it back is that it
7521       # works again!
7522       self.op.force_variant = True
7523
7524       if self.op.no_install:
7525         self.LogInfo("No-installation mode has no effect during import")
7526
7527     elif self.op.mode == constants.INSTANCE_CREATE:
7528       if self.op.os_type is None:
7529         raise errors.OpPrereqError("No guest OS specified",
7530                                    errors.ECODE_INVAL)
7531       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7532         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7533                                    " installation" % self.op.os_type,
7534                                    errors.ECODE_STATE)
7535       if self.op.disk_template is None:
7536         raise errors.OpPrereqError("No disk template specified",
7537                                    errors.ECODE_INVAL)
7538
7539     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7540       # Check handshake to ensure both clusters have the same domain secret
7541       src_handshake = self.op.source_handshake
7542       if not src_handshake:
7543         raise errors.OpPrereqError("Missing source handshake",
7544                                    errors.ECODE_INVAL)
7545
7546       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7547                                                            src_handshake)
7548       if errmsg:
7549         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7550                                    errors.ECODE_INVAL)
7551
7552       # Load and check source CA
7553       self.source_x509_ca_pem = self.op.source_x509_ca
7554       if not self.source_x509_ca_pem:
7555         raise errors.OpPrereqError("Missing source X509 CA",
7556                                    errors.ECODE_INVAL)
7557
7558       try:
7559         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7560                                                     self._cds)
7561       except OpenSSL.crypto.Error, err:
7562         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7563                                    (err, ), errors.ECODE_INVAL)
7564
7565       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7566       if errcode is not None:
7567         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7568                                    errors.ECODE_INVAL)
7569
7570       self.source_x509_ca = cert
7571
7572       src_instance_name = self.op.source_instance_name
7573       if not src_instance_name:
7574         raise errors.OpPrereqError("Missing source instance name",
7575                                    errors.ECODE_INVAL)
7576
7577       self.source_instance_name = \
7578           netutils.GetHostname(name=src_instance_name).name
7579
7580     else:
7581       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7582                                  self.op.mode, errors.ECODE_INVAL)
7583
7584   def ExpandNames(self):
7585     """ExpandNames for CreateInstance.
7586
7587     Figure out the right locks for instance creation.
7588
7589     """
7590     self.needed_locks = {}
7591
7592     instance_name = self.op.instance_name
7593     # this is just a preventive check, but someone might still add this
7594     # instance in the meantime, and creation will fail at lock-add time
7595     if instance_name in self.cfg.GetInstanceList():
7596       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7597                                  instance_name, errors.ECODE_EXISTS)
7598
7599     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7600
7601     if self.op.iallocator:
7602       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7603     else:
7604       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7605       nodelist = [self.op.pnode]
7606       if self.op.snode is not None:
7607         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7608         nodelist.append(self.op.snode)
7609       self.needed_locks[locking.LEVEL_NODE] = nodelist
7610
7611     # in case of import lock the source node too
7612     if self.op.mode == constants.INSTANCE_IMPORT:
7613       src_node = self.op.src_node
7614       src_path = self.op.src_path
7615
7616       if src_path is None:
7617         self.op.src_path = src_path = self.op.instance_name
7618
7619       if src_node is None:
7620         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7621         self.op.src_node = None
7622         if os.path.isabs(src_path):
7623           raise errors.OpPrereqError("Importing an instance from an absolute"
7624                                      " path requires a source node option.",
7625                                      errors.ECODE_INVAL)
7626       else:
7627         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7628         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7629           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7630         if not os.path.isabs(src_path):
7631           self.op.src_path = src_path = \
7632             utils.PathJoin(constants.EXPORT_DIR, src_path)
7633
7634   def _RunAllocator(self):
7635     """Run the allocator based on input opcode.
7636
7637     """
7638     nics = [n.ToDict() for n in self.nics]
7639     ial = IAllocator(self.cfg, self.rpc,
7640                      mode=constants.IALLOCATOR_MODE_ALLOC,
7641                      name=self.op.instance_name,
7642                      disk_template=self.op.disk_template,
7643                      tags=[],
7644                      os=self.op.os_type,
7645                      vcpus=self.be_full[constants.BE_VCPUS],
7646                      mem_size=self.be_full[constants.BE_MEMORY],
7647                      disks=self.disks,
7648                      nics=nics,
7649                      hypervisor=self.op.hypervisor,
7650                      )
7651
7652     ial.Run(self.op.iallocator)
7653
7654     if not ial.success:
7655       raise errors.OpPrereqError("Can't compute nodes using"
7656                                  " iallocator '%s': %s" %
7657                                  (self.op.iallocator, ial.info),
7658                                  errors.ECODE_NORES)
7659     if len(ial.result) != ial.required_nodes:
7660       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7661                                  " of nodes (%s), required %s" %
7662                                  (self.op.iallocator, len(ial.result),
7663                                   ial.required_nodes), errors.ECODE_FAULT)
7664     self.op.pnode = ial.result[0]
7665     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7666                  self.op.instance_name, self.op.iallocator,
7667                  utils.CommaJoin(ial.result))
7668     if ial.required_nodes == 2:
7669       self.op.snode = ial.result[1]
7670
7671   def BuildHooksEnv(self):
7672     """Build hooks env.
7673
7674     This runs on master, primary and secondary nodes of the instance.
7675
7676     """
7677     env = {
7678       "ADD_MODE": self.op.mode,
7679       }
7680     if self.op.mode == constants.INSTANCE_IMPORT:
7681       env["SRC_NODE"] = self.op.src_node
7682       env["SRC_PATH"] = self.op.src_path
7683       env["SRC_IMAGES"] = self.src_images
7684
7685     env.update(_BuildInstanceHookEnv(
7686       name=self.op.instance_name,
7687       primary_node=self.op.pnode,
7688       secondary_nodes=self.secondaries,
7689       status=self.op.start,
7690       os_type=self.op.os_type,
7691       memory=self.be_full[constants.BE_MEMORY],
7692       vcpus=self.be_full[constants.BE_VCPUS],
7693       nics=_NICListToTuple(self, self.nics),
7694       disk_template=self.op.disk_template,
7695       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7696              for d in self.disks],
7697       bep=self.be_full,
7698       hvp=self.hv_full,
7699       hypervisor_name=self.op.hypervisor,
7700     ))
7701
7702     return env
7703
7704   def BuildHooksNodes(self):
7705     """Build hooks nodes.
7706
7707     """
7708     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7709     return nl, nl
7710
7711   def _ReadExportInfo(self):
7712     """Reads the export information from disk.
7713
7714     It will override the opcode source node and path with the actual
7715     information, if these two were not specified before.
7716
7717     @return: the export information
7718
7719     """
7720     assert self.op.mode == constants.INSTANCE_IMPORT
7721
7722     src_node = self.op.src_node
7723     src_path = self.op.src_path
7724
7725     if src_node is None:
7726       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7727       exp_list = self.rpc.call_export_list(locked_nodes)
7728       found = False
7729       for node in exp_list:
7730         if exp_list[node].fail_msg:
7731           continue
7732         if src_path in exp_list[node].payload:
7733           found = True
7734           self.op.src_node = src_node = node
7735           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7736                                                        src_path)
7737           break
7738       if not found:
7739         raise errors.OpPrereqError("No export found for relative path %s" %
7740                                     src_path, errors.ECODE_INVAL)
7741
7742     _CheckNodeOnline(self, src_node)
7743     result = self.rpc.call_export_info(src_node, src_path)
7744     result.Raise("No export or invalid export found in dir %s" % src_path)
7745
7746     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7747     if not export_info.has_section(constants.INISECT_EXP):
7748       raise errors.ProgrammerError("Corrupted export config",
7749                                    errors.ECODE_ENVIRON)
7750
7751     ei_version = export_info.get(constants.INISECT_EXP, "version")
7752     if (int(ei_version) != constants.EXPORT_VERSION):
7753       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7754                                  (ei_version, constants.EXPORT_VERSION),
7755                                  errors.ECODE_ENVIRON)
7756     return export_info
7757
7758   def _ReadExportParams(self, einfo):
7759     """Use export parameters as defaults.
7760
7761     In case the opcode doesn't specify (as in override) some instance
7762     parameters, then try to use them from the export information, if
7763     that declares them.
7764
7765     """
7766     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7767
7768     if self.op.disk_template is None:
7769       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7770         self.op.disk_template = einfo.get(constants.INISECT_INS,
7771                                           "disk_template")
7772       else:
7773         raise errors.OpPrereqError("No disk template specified and the export"
7774                                    " is missing the disk_template information",
7775                                    errors.ECODE_INVAL)
7776
7777     if not self.op.disks:
7778       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7779         disks = []
7780         # TODO: import the disk iv_name too
7781         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7782           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7783           disks.append({constants.IDISK_SIZE: disk_sz})
7784         self.op.disks = disks
7785       else:
7786         raise errors.OpPrereqError("No disk info specified and the export"
7787                                    " is missing the disk information",
7788                                    errors.ECODE_INVAL)
7789
7790     if (not self.op.nics and
7791         einfo.has_option(constants.INISECT_INS, "nic_count")):
7792       nics = []
7793       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7794         ndict = {}
7795         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7796           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7797           ndict[name] = v
7798         nics.append(ndict)
7799       self.op.nics = nics
7800
7801     if (self.op.hypervisor is None and
7802         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7803       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7804     if einfo.has_section(constants.INISECT_HYP):
7805       # use the export parameters but do not override the ones
7806       # specified by the user
7807       for name, value in einfo.items(constants.INISECT_HYP):
7808         if name not in self.op.hvparams:
7809           self.op.hvparams[name] = value
7810
7811     if einfo.has_section(constants.INISECT_BEP):
7812       # use the parameters, without overriding
7813       for name, value in einfo.items(constants.INISECT_BEP):
7814         if name not in self.op.beparams:
7815           self.op.beparams[name] = value
7816     else:
7817       # try to read the parameters old style, from the main section
7818       for name in constants.BES_PARAMETERS:
7819         if (name not in self.op.beparams and
7820             einfo.has_option(constants.INISECT_INS, name)):
7821           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7822
7823     if einfo.has_section(constants.INISECT_OSP):
7824       # use the parameters, without overriding
7825       for name, value in einfo.items(constants.INISECT_OSP):
7826         if name not in self.op.osparams:
7827           self.op.osparams[name] = value
7828
7829   def _RevertToDefaults(self, cluster):
7830     """Revert the instance parameters to the default values.
7831
7832     """
7833     # hvparams
7834     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7835     for name in self.op.hvparams.keys():
7836       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7837         del self.op.hvparams[name]
7838     # beparams
7839     be_defs = cluster.SimpleFillBE({})
7840     for name in self.op.beparams.keys():
7841       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7842         del self.op.beparams[name]
7843     # nic params
7844     nic_defs = cluster.SimpleFillNIC({})
7845     for nic in self.op.nics:
7846       for name in constants.NICS_PARAMETERS:
7847         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7848           del nic[name]
7849     # osparams
7850     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7851     for name in self.op.osparams.keys():
7852       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7853         del self.op.osparams[name]
7854
7855   def CheckPrereq(self):
7856     """Check prerequisites.
7857
7858     """
7859     if self.op.mode == constants.INSTANCE_IMPORT:
7860       export_info = self._ReadExportInfo()
7861       self._ReadExportParams(export_info)
7862
7863     if (not self.cfg.GetVGName() and
7864         self.op.disk_template not in constants.DTS_NOT_LVM):
7865       raise errors.OpPrereqError("Cluster does not support lvm-based"
7866                                  " instances", errors.ECODE_STATE)
7867
7868     if self.op.hypervisor is None:
7869       self.op.hypervisor = self.cfg.GetHypervisorType()
7870
7871     cluster = self.cfg.GetClusterInfo()
7872     enabled_hvs = cluster.enabled_hypervisors
7873     if self.op.hypervisor not in enabled_hvs:
7874       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7875                                  " cluster (%s)" % (self.op.hypervisor,
7876                                   ",".join(enabled_hvs)),
7877                                  errors.ECODE_STATE)
7878
7879     # check hypervisor parameter syntax (locally)
7880     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7881     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7882                                       self.op.hvparams)
7883     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7884     hv_type.CheckParameterSyntax(filled_hvp)
7885     self.hv_full = filled_hvp
7886     # check that we don't specify global parameters on an instance
7887     _CheckGlobalHvParams(self.op.hvparams)
7888
7889     # fill and remember the beparams dict
7890     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7891     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7892
7893     # build os parameters
7894     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7895
7896     # now that hvp/bep are in final format, let's reset to defaults,
7897     # if told to do so
7898     if self.op.identify_defaults:
7899       self._RevertToDefaults(cluster)
7900
7901     # NIC buildup
7902     self.nics = []
7903     for idx, nic in enumerate(self.op.nics):
7904       nic_mode_req = nic.get(constants.INIC_MODE, None)
7905       nic_mode = nic_mode_req
7906       if nic_mode is None:
7907         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7908
7909       # in routed mode, for the first nic, the default ip is 'auto'
7910       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7911         default_ip_mode = constants.VALUE_AUTO
7912       else:
7913         default_ip_mode = constants.VALUE_NONE
7914
7915       # ip validity checks
7916       ip = nic.get(constants.INIC_IP, default_ip_mode)
7917       if ip is None or ip.lower() == constants.VALUE_NONE:
7918         nic_ip = None
7919       elif ip.lower() == constants.VALUE_AUTO:
7920         if not self.op.name_check:
7921           raise errors.OpPrereqError("IP address set to auto but name checks"
7922                                      " have been skipped",
7923                                      errors.ECODE_INVAL)
7924         nic_ip = self.hostname1.ip
7925       else:
7926         if not netutils.IPAddress.IsValid(ip):
7927           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7928                                      errors.ECODE_INVAL)
7929         nic_ip = ip
7930
7931       # TODO: check the ip address for uniqueness
7932       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7933         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7934                                    errors.ECODE_INVAL)
7935
7936       # MAC address verification
7937       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
7938       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7939         mac = utils.NormalizeAndValidateMac(mac)
7940
7941         try:
7942           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7943         except errors.ReservationError:
7944           raise errors.OpPrereqError("MAC address %s already in use"
7945                                      " in cluster" % mac,
7946                                      errors.ECODE_NOTUNIQUE)
7947
7948       #  Build nic parameters
7949       link = nic.get(constants.INIC_LINK, None)
7950       nicparams = {}
7951       if nic_mode_req:
7952         nicparams[constants.NIC_MODE] = nic_mode_req
7953       if link:
7954         nicparams[constants.NIC_LINK] = link
7955
7956       check_params = cluster.SimpleFillNIC(nicparams)
7957       objects.NIC.CheckParameterSyntax(check_params)
7958       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7959
7960     # disk checks/pre-build
7961     default_vg = self.cfg.GetVGName()
7962     self.disks = []
7963     for disk in self.op.disks:
7964       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
7965       if mode not in constants.DISK_ACCESS_SET:
7966         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7967                                    mode, errors.ECODE_INVAL)
7968       size = disk.get(constants.IDISK_SIZE, None)
7969       if size is None:
7970         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7971       try:
7972         size = int(size)
7973       except (TypeError, ValueError):
7974         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7975                                    errors.ECODE_INVAL)
7976       new_disk = {
7977         constants.IDISK_SIZE: size,
7978         constants.IDISK_MODE: mode,
7979         constants.IDISK_VG: disk.get(constants.IDISK_VG, default_vg),
7980         }
7981       if constants.IDISK_ADOPT in disk:
7982         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
7983       self.disks.append(new_disk)
7984
7985     if self.op.mode == constants.INSTANCE_IMPORT:
7986
7987       # Check that the new instance doesn't have less disks than the export
7988       instance_disks = len(self.disks)
7989       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7990       if instance_disks < export_disks:
7991         raise errors.OpPrereqError("Not enough disks to import."
7992                                    " (instance: %d, export: %d)" %
7993                                    (instance_disks, export_disks),
7994                                    errors.ECODE_INVAL)
7995
7996       disk_images = []
7997       for idx in range(export_disks):
7998         option = 'disk%d_dump' % idx
7999         if export_info.has_option(constants.INISECT_INS, option):
8000           # FIXME: are the old os-es, disk sizes, etc. useful?
8001           export_name = export_info.get(constants.INISECT_INS, option)
8002           image = utils.PathJoin(self.op.src_path, export_name)
8003           disk_images.append(image)
8004         else:
8005           disk_images.append(False)
8006
8007       self.src_images = disk_images
8008
8009       old_name = export_info.get(constants.INISECT_INS, 'name')
8010       try:
8011         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8012       except (TypeError, ValueError), err:
8013         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8014                                    " an integer: %s" % str(err),
8015                                    errors.ECODE_STATE)
8016       if self.op.instance_name == old_name:
8017         for idx, nic in enumerate(self.nics):
8018           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8019             nic_mac_ini = 'nic%d_mac' % idx
8020             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8021
8022     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8023
8024     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8025     if self.op.ip_check:
8026       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8027         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8028                                    (self.check_ip, self.op.instance_name),
8029                                    errors.ECODE_NOTUNIQUE)
8030
8031     #### mac address generation
8032     # By generating here the mac address both the allocator and the hooks get
8033     # the real final mac address rather than the 'auto' or 'generate' value.
8034     # There is a race condition between the generation and the instance object
8035     # creation, which means that we know the mac is valid now, but we're not
8036     # sure it will be when we actually add the instance. If things go bad
8037     # adding the instance will abort because of a duplicate mac, and the
8038     # creation job will fail.
8039     for nic in self.nics:
8040       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8041         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8042
8043     #### allocator run
8044
8045     if self.op.iallocator is not None:
8046       self._RunAllocator()
8047
8048     #### node related checks
8049
8050     # check primary node
8051     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8052     assert self.pnode is not None, \
8053       "Cannot retrieve locked node %s" % self.op.pnode
8054     if pnode.offline:
8055       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8056                                  pnode.name, errors.ECODE_STATE)
8057     if pnode.drained:
8058       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8059                                  pnode.name, errors.ECODE_STATE)
8060     if not pnode.vm_capable:
8061       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8062                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8063
8064     self.secondaries = []
8065
8066     # mirror node verification
8067     if self.op.disk_template in constants.DTS_INT_MIRROR:
8068       if self.op.snode == pnode.name:
8069         raise errors.OpPrereqError("The secondary node cannot be the"
8070                                    " primary node.", errors.ECODE_INVAL)
8071       _CheckNodeOnline(self, self.op.snode)
8072       _CheckNodeNotDrained(self, self.op.snode)
8073       _CheckNodeVmCapable(self, self.op.snode)
8074       self.secondaries.append(self.op.snode)
8075
8076     nodenames = [pnode.name] + self.secondaries
8077
8078     if not self.adopt_disks:
8079       # Check lv size requirements, if not adopting
8080       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8081       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8082
8083     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8084       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8085                                 disk[constants.IDISK_ADOPT])
8086                      for disk in self.disks])
8087       if len(all_lvs) != len(self.disks):
8088         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8089                                    errors.ECODE_INVAL)
8090       for lv_name in all_lvs:
8091         try:
8092           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8093           # to ReserveLV uses the same syntax
8094           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8095         except errors.ReservationError:
8096           raise errors.OpPrereqError("LV named %s used by another instance" %
8097                                      lv_name, errors.ECODE_NOTUNIQUE)
8098
8099       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8100       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8101
8102       node_lvs = self.rpc.call_lv_list([pnode.name],
8103                                        vg_names.payload.keys())[pnode.name]
8104       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8105       node_lvs = node_lvs.payload
8106
8107       delta = all_lvs.difference(node_lvs.keys())
8108       if delta:
8109         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8110                                    utils.CommaJoin(delta),
8111                                    errors.ECODE_INVAL)
8112       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8113       if online_lvs:
8114         raise errors.OpPrereqError("Online logical volumes found, cannot"
8115                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8116                                    errors.ECODE_STATE)
8117       # update the size of disk based on what is found
8118       for dsk in self.disks:
8119         dsk[constants.IDISK_SIZE] = \
8120           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8121                                         dsk[constants.IDISK_ADOPT])][0]))
8122
8123     elif self.op.disk_template == constants.DT_BLOCK:
8124       # Normalize and de-duplicate device paths
8125       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8126                        for disk in self.disks])
8127       if len(all_disks) != len(self.disks):
8128         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8129                                    errors.ECODE_INVAL)
8130       baddisks = [d for d in all_disks
8131                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8132       if baddisks:
8133         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8134                                    " cannot be adopted" %
8135                                    (", ".join(baddisks),
8136                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8137                                    errors.ECODE_INVAL)
8138
8139       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8140                                             list(all_disks))[pnode.name]
8141       node_disks.Raise("Cannot get block device information from node %s" %
8142                        pnode.name)
8143       node_disks = node_disks.payload
8144       delta = all_disks.difference(node_disks.keys())
8145       if delta:
8146         raise errors.OpPrereqError("Missing block device(s): %s" %
8147                                    utils.CommaJoin(delta),
8148                                    errors.ECODE_INVAL)
8149       for dsk in self.disks:
8150         dsk[constants.IDISK_SIZE] = \
8151           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8152
8153     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8154
8155     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8156     # check OS parameters (remotely)
8157     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8158
8159     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8160
8161     # memory check on primary node
8162     if self.op.start:
8163       _CheckNodeFreeMemory(self, self.pnode.name,
8164                            "creating instance %s" % self.op.instance_name,
8165                            self.be_full[constants.BE_MEMORY],
8166                            self.op.hypervisor)
8167
8168     self.dry_run_result = list(nodenames)
8169
8170   def Exec(self, feedback_fn):
8171     """Create and add the instance to the cluster.
8172
8173     """
8174     instance = self.op.instance_name
8175     pnode_name = self.pnode.name
8176
8177     ht_kind = self.op.hypervisor
8178     if ht_kind in constants.HTS_REQ_PORT:
8179       network_port = self.cfg.AllocatePort()
8180     else:
8181       network_port = None
8182
8183     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8184       # this is needed because os.path.join does not accept None arguments
8185       if self.op.file_storage_dir is None:
8186         string_file_storage_dir = ""
8187       else:
8188         string_file_storage_dir = self.op.file_storage_dir
8189
8190       # build the full file storage dir path
8191       if self.op.disk_template == constants.DT_SHARED_FILE:
8192         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8193       else:
8194         get_fsd_fn = self.cfg.GetFileStorageDir
8195
8196       file_storage_dir = utils.PathJoin(get_fsd_fn(),
8197                                         string_file_storage_dir, instance)
8198     else:
8199       file_storage_dir = ""
8200
8201     disks = _GenerateDiskTemplate(self,
8202                                   self.op.disk_template,
8203                                   instance, pnode_name,
8204                                   self.secondaries,
8205                                   self.disks,
8206                                   file_storage_dir,
8207                                   self.op.file_driver,
8208                                   0,
8209                                   feedback_fn)
8210
8211     iobj = objects.Instance(name=instance, os=self.op.os_type,
8212                             primary_node=pnode_name,
8213                             nics=self.nics, disks=disks,
8214                             disk_template=self.op.disk_template,
8215                             admin_up=False,
8216                             network_port=network_port,
8217                             beparams=self.op.beparams,
8218                             hvparams=self.op.hvparams,
8219                             hypervisor=self.op.hypervisor,
8220                             osparams=self.op.osparams,
8221                             )
8222
8223     if self.adopt_disks:
8224       if self.op.disk_template == constants.DT_PLAIN:
8225         # rename LVs to the newly-generated names; we need to construct
8226         # 'fake' LV disks with the old data, plus the new unique_id
8227         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8228         rename_to = []
8229         for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8230           rename_to.append(t_dsk.logical_id)
8231           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8232           self.cfg.SetDiskID(t_dsk, pnode_name)
8233         result = self.rpc.call_blockdev_rename(pnode_name,
8234                                                zip(tmp_disks, rename_to))
8235         result.Raise("Failed to rename adoped LVs")
8236     else:
8237       feedback_fn("* creating instance disks...")
8238       try:
8239         _CreateDisks(self, iobj)
8240       except errors.OpExecError:
8241         self.LogWarning("Device creation failed, reverting...")
8242         try:
8243           _RemoveDisks(self, iobj)
8244         finally:
8245           self.cfg.ReleaseDRBDMinors(instance)
8246           raise
8247
8248       if self.cfg.GetClusterInfo().prealloc_wipe_disks:
8249         feedback_fn("* wiping instance disks...")
8250         try:
8251           _WipeDisks(self, iobj)
8252         except errors.OpExecError:
8253           self.LogWarning("Device wiping failed, reverting...")
8254           try:
8255             _RemoveDisks(self, iobj)
8256           finally:
8257             self.cfg.ReleaseDRBDMinors(instance)
8258             raise
8259
8260     feedback_fn("adding instance %s to cluster config" % instance)
8261
8262     self.cfg.AddInstance(iobj, self.proc.GetECId())
8263
8264     # Declare that we don't want to remove the instance lock anymore, as we've
8265     # added the instance to the config
8266     del self.remove_locks[locking.LEVEL_INSTANCE]
8267     # Unlock all the nodes
8268     if self.op.mode == constants.INSTANCE_IMPORT:
8269       nodes_keep = [self.op.src_node]
8270       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
8271                        if node != self.op.src_node]
8272       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
8273       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
8274     else:
8275       self.context.glm.release(locking.LEVEL_NODE)
8276       del self.acquired_locks[locking.LEVEL_NODE]
8277
8278     if self.op.wait_for_sync:
8279       disk_abort = not _WaitForSync(self, iobj)
8280     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8281       # make sure the disks are not degraded (still sync-ing is ok)
8282       time.sleep(15)
8283       feedback_fn("* checking mirrors status")
8284       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8285     else:
8286       disk_abort = False
8287
8288     if disk_abort:
8289       _RemoveDisks(self, iobj)
8290       self.cfg.RemoveInstance(iobj.name)
8291       # Make sure the instance lock gets removed
8292       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8293       raise errors.OpExecError("There are some degraded disks for"
8294                                " this instance")
8295
8296     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8297       if self.op.mode == constants.INSTANCE_CREATE:
8298         if not self.op.no_install:
8299           feedback_fn("* running the instance OS create scripts...")
8300           # FIXME: pass debug option from opcode to backend
8301           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8302                                                  self.op.debug_level)
8303           result.Raise("Could not add os for instance %s"
8304                        " on node %s" % (instance, pnode_name))
8305
8306       elif self.op.mode == constants.INSTANCE_IMPORT:
8307         feedback_fn("* running the instance OS import scripts...")
8308
8309         transfers = []
8310
8311         for idx, image in enumerate(self.src_images):
8312           if not image:
8313             continue
8314
8315           # FIXME: pass debug option from opcode to backend
8316           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8317                                              constants.IEIO_FILE, (image, ),
8318                                              constants.IEIO_SCRIPT,
8319                                              (iobj.disks[idx], idx),
8320                                              None)
8321           transfers.append(dt)
8322
8323         import_result = \
8324           masterd.instance.TransferInstanceData(self, feedback_fn,
8325                                                 self.op.src_node, pnode_name,
8326                                                 self.pnode.secondary_ip,
8327                                                 iobj, transfers)
8328         if not compat.all(import_result):
8329           self.LogWarning("Some disks for instance %s on node %s were not"
8330                           " imported successfully" % (instance, pnode_name))
8331
8332       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8333         feedback_fn("* preparing remote import...")
8334         # The source cluster will stop the instance before attempting to make a
8335         # connection. In some cases stopping an instance can take a long time,
8336         # hence the shutdown timeout is added to the connection timeout.
8337         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8338                            self.op.source_shutdown_timeout)
8339         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8340
8341         assert iobj.primary_node == self.pnode.name
8342         disk_results = \
8343           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8344                                         self.source_x509_ca,
8345                                         self._cds, timeouts)
8346         if not compat.all(disk_results):
8347           # TODO: Should the instance still be started, even if some disks
8348           # failed to import (valid for local imports, too)?
8349           self.LogWarning("Some disks for instance %s on node %s were not"
8350                           " imported successfully" % (instance, pnode_name))
8351
8352         # Run rename script on newly imported instance
8353         assert iobj.name == instance
8354         feedback_fn("Running rename script for %s" % instance)
8355         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8356                                                    self.source_instance_name,
8357                                                    self.op.debug_level)
8358         if result.fail_msg:
8359           self.LogWarning("Failed to run rename script for %s on node"
8360                           " %s: %s" % (instance, pnode_name, result.fail_msg))
8361
8362       else:
8363         # also checked in the prereq part
8364         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8365                                      % self.op.mode)
8366
8367     if self.op.start:
8368       iobj.admin_up = True
8369       self.cfg.Update(iobj, feedback_fn)
8370       logging.info("Starting instance %s on node %s", instance, pnode_name)
8371       feedback_fn("* starting instance...")
8372       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8373       result.Raise("Could not start instance")
8374
8375     return list(iobj.all_nodes)
8376
8377
8378 class LUInstanceConsole(NoHooksLU):
8379   """Connect to an instance's console.
8380
8381   This is somewhat special in that it returns the command line that
8382   you need to run on the master node in order to connect to the
8383   console.
8384
8385   """
8386   REQ_BGL = False
8387
8388   def ExpandNames(self):
8389     self._ExpandAndLockInstance()
8390
8391   def CheckPrereq(self):
8392     """Check prerequisites.
8393
8394     This checks that the instance is in the cluster.
8395
8396     """
8397     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8398     assert self.instance is not None, \
8399       "Cannot retrieve locked instance %s" % self.op.instance_name
8400     _CheckNodeOnline(self, self.instance.primary_node)
8401
8402   def Exec(self, feedback_fn):
8403     """Connect to the console of an instance
8404
8405     """
8406     instance = self.instance
8407     node = instance.primary_node
8408
8409     node_insts = self.rpc.call_instance_list([node],
8410                                              [instance.hypervisor])[node]
8411     node_insts.Raise("Can't get node information from %s" % node)
8412
8413     if instance.name not in node_insts.payload:
8414       if instance.admin_up:
8415         state = constants.INSTST_ERRORDOWN
8416       else:
8417         state = constants.INSTST_ADMINDOWN
8418       raise errors.OpExecError("Instance %s is not running (state %s)" %
8419                                (instance.name, state))
8420
8421     logging.debug("Connecting to console of %s on %s", instance.name, node)
8422
8423     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8424
8425
8426 def _GetInstanceConsole(cluster, instance):
8427   """Returns console information for an instance.
8428
8429   @type cluster: L{objects.Cluster}
8430   @type instance: L{objects.Instance}
8431   @rtype: dict
8432
8433   """
8434   hyper = hypervisor.GetHypervisor(instance.hypervisor)
8435   # beparams and hvparams are passed separately, to avoid editing the
8436   # instance and then saving the defaults in the instance itself.
8437   hvparams = cluster.FillHV(instance)
8438   beparams = cluster.FillBE(instance)
8439   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8440
8441   assert console.instance == instance.name
8442   assert console.Validate()
8443
8444   return console.ToDict()
8445
8446
8447 class LUInstanceReplaceDisks(LogicalUnit):
8448   """Replace the disks of an instance.
8449
8450   """
8451   HPATH = "mirrors-replace"
8452   HTYPE = constants.HTYPE_INSTANCE
8453   REQ_BGL = False
8454
8455   def CheckArguments(self):
8456     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8457                                   self.op.iallocator)
8458
8459   def ExpandNames(self):
8460     self._ExpandAndLockInstance()
8461
8462     if self.op.iallocator is not None:
8463       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8464
8465     elif self.op.remote_node is not None:
8466       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8467       self.op.remote_node = remote_node
8468
8469       # Warning: do not remove the locking of the new secondary here
8470       # unless DRBD8.AddChildren is changed to work in parallel;
8471       # currently it doesn't since parallel invocations of
8472       # FindUnusedMinor will conflict
8473       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8474       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8475
8476     else:
8477       self.needed_locks[locking.LEVEL_NODE] = []
8478       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8479
8480     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8481                                    self.op.iallocator, self.op.remote_node,
8482                                    self.op.disks, False, self.op.early_release)
8483
8484     self.tasklets = [self.replacer]
8485
8486   def DeclareLocks(self, level):
8487     # If we're not already locking all nodes in the set we have to declare the
8488     # instance's primary/secondary nodes.
8489     if (level == locking.LEVEL_NODE and
8490         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8491       self._LockInstancesNodes()
8492
8493   def BuildHooksEnv(self):
8494     """Build hooks env.
8495
8496     This runs on the master, the primary and all the secondaries.
8497
8498     """
8499     instance = self.replacer.instance
8500     env = {
8501       "MODE": self.op.mode,
8502       "NEW_SECONDARY": self.op.remote_node,
8503       "OLD_SECONDARY": instance.secondary_nodes[0],
8504       }
8505     env.update(_BuildInstanceHookEnvByObject(self, instance))
8506     return env
8507
8508   def BuildHooksNodes(self):
8509     """Build hooks nodes.
8510
8511     """
8512     instance = self.replacer.instance
8513     nl = [
8514       self.cfg.GetMasterNode(),
8515       instance.primary_node,
8516       ]
8517     if self.op.remote_node is not None:
8518       nl.append(self.op.remote_node)
8519     return nl, nl
8520
8521
8522 class TLReplaceDisks(Tasklet):
8523   """Replaces disks for an instance.
8524
8525   Note: Locking is not within the scope of this class.
8526
8527   """
8528   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8529                disks, delay_iallocator, early_release):
8530     """Initializes this class.
8531
8532     """
8533     Tasklet.__init__(self, lu)
8534
8535     # Parameters
8536     self.instance_name = instance_name
8537     self.mode = mode
8538     self.iallocator_name = iallocator_name
8539     self.remote_node = remote_node
8540     self.disks = disks
8541     self.delay_iallocator = delay_iallocator
8542     self.early_release = early_release
8543
8544     # Runtime data
8545     self.instance = None
8546     self.new_node = None
8547     self.target_node = None
8548     self.other_node = None
8549     self.remote_node_info = None
8550     self.node_secondary_ip = None
8551
8552   @staticmethod
8553   def CheckArguments(mode, remote_node, iallocator):
8554     """Helper function for users of this class.
8555
8556     """
8557     # check for valid parameter combination
8558     if mode == constants.REPLACE_DISK_CHG:
8559       if remote_node is None and iallocator is None:
8560         raise errors.OpPrereqError("When changing the secondary either an"
8561                                    " iallocator script must be used or the"
8562                                    " new node given", errors.ECODE_INVAL)
8563
8564       if remote_node is not None and iallocator is not None:
8565         raise errors.OpPrereqError("Give either the iallocator or the new"
8566                                    " secondary, not both", errors.ECODE_INVAL)
8567
8568     elif remote_node is not None or iallocator is not None:
8569       # Not replacing the secondary
8570       raise errors.OpPrereqError("The iallocator and new node options can"
8571                                  " only be used when changing the"
8572                                  " secondary node", errors.ECODE_INVAL)
8573
8574   @staticmethod
8575   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8576     """Compute a new secondary node using an IAllocator.
8577
8578     """
8579     ial = IAllocator(lu.cfg, lu.rpc,
8580                      mode=constants.IALLOCATOR_MODE_RELOC,
8581                      name=instance_name,
8582                      relocate_from=relocate_from)
8583
8584     ial.Run(iallocator_name)
8585
8586     if not ial.success:
8587       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8588                                  " %s" % (iallocator_name, ial.info),
8589                                  errors.ECODE_NORES)
8590
8591     if len(ial.result) != ial.required_nodes:
8592       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8593                                  " of nodes (%s), required %s" %
8594                                  (iallocator_name,
8595                                   len(ial.result), ial.required_nodes),
8596                                  errors.ECODE_FAULT)
8597
8598     remote_node_name = ial.result[0]
8599
8600     lu.LogInfo("Selected new secondary for instance '%s': %s",
8601                instance_name, remote_node_name)
8602
8603     return remote_node_name
8604
8605   def _FindFaultyDisks(self, node_name):
8606     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8607                                     node_name, True)
8608
8609   def _CheckDisksActivated(self, instance):
8610     """Checks if the instance disks are activated.
8611
8612     @param instance: The instance to check disks
8613     @return: True if they are activated, False otherwise
8614
8615     """
8616     nodes = instance.all_nodes
8617
8618     for idx, dev in enumerate(instance.disks):
8619       for node in nodes:
8620         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8621         self.cfg.SetDiskID(dev, node)
8622
8623         result = self.rpc.call_blockdev_find(node, dev)
8624
8625         if result.offline:
8626           continue
8627         elif result.fail_msg or not result.payload:
8628           return False
8629
8630     return True
8631
8632
8633   def CheckPrereq(self):
8634     """Check prerequisites.
8635
8636     This checks that the instance is in the cluster.
8637
8638     """
8639     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8640     assert instance is not None, \
8641       "Cannot retrieve locked instance %s" % self.instance_name
8642
8643     if instance.disk_template != constants.DT_DRBD8:
8644       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8645                                  " instances", errors.ECODE_INVAL)
8646
8647     if len(instance.secondary_nodes) != 1:
8648       raise errors.OpPrereqError("The instance has a strange layout,"
8649                                  " expected one secondary but found %d" %
8650                                  len(instance.secondary_nodes),
8651                                  errors.ECODE_FAULT)
8652
8653     if not self.delay_iallocator:
8654       self._CheckPrereq2()
8655
8656   def _CheckPrereq2(self):
8657     """Check prerequisites, second part.
8658
8659     This function should always be part of CheckPrereq. It was separated and is
8660     now called from Exec because during node evacuation iallocator was only
8661     called with an unmodified cluster model, not taking planned changes into
8662     account.
8663
8664     """
8665     instance = self.instance
8666     secondary_node = instance.secondary_nodes[0]
8667
8668     if self.iallocator_name is None:
8669       remote_node = self.remote_node
8670     else:
8671       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8672                                        instance.name, instance.secondary_nodes)
8673
8674     if remote_node is not None:
8675       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8676       assert self.remote_node_info is not None, \
8677         "Cannot retrieve locked node %s" % remote_node
8678     else:
8679       self.remote_node_info = None
8680
8681     if remote_node == self.instance.primary_node:
8682       raise errors.OpPrereqError("The specified node is the primary node of"
8683                                  " the instance.", errors.ECODE_INVAL)
8684
8685     if remote_node == secondary_node:
8686       raise errors.OpPrereqError("The specified node is already the"
8687                                  " secondary node of the instance.",
8688                                  errors.ECODE_INVAL)
8689
8690     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8691                                     constants.REPLACE_DISK_CHG):
8692       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8693                                  errors.ECODE_INVAL)
8694
8695     if self.mode == constants.REPLACE_DISK_AUTO:
8696       if not self._CheckDisksActivated(instance):
8697         raise errors.OpPrereqError("Please run activate-disks on instance %s"
8698                                    " first" % self.instance_name,
8699                                    errors.ECODE_STATE)
8700       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8701       faulty_secondary = self._FindFaultyDisks(secondary_node)
8702
8703       if faulty_primary and faulty_secondary:
8704         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8705                                    " one node and can not be repaired"
8706                                    " automatically" % self.instance_name,
8707                                    errors.ECODE_STATE)
8708
8709       if faulty_primary:
8710         self.disks = faulty_primary
8711         self.target_node = instance.primary_node
8712         self.other_node = secondary_node
8713         check_nodes = [self.target_node, self.other_node]
8714       elif faulty_secondary:
8715         self.disks = faulty_secondary
8716         self.target_node = secondary_node
8717         self.other_node = instance.primary_node
8718         check_nodes = [self.target_node, self.other_node]
8719       else:
8720         self.disks = []
8721         check_nodes = []
8722
8723     else:
8724       # Non-automatic modes
8725       if self.mode == constants.REPLACE_DISK_PRI:
8726         self.target_node = instance.primary_node
8727         self.other_node = secondary_node
8728         check_nodes = [self.target_node, self.other_node]
8729
8730       elif self.mode == constants.REPLACE_DISK_SEC:
8731         self.target_node = secondary_node
8732         self.other_node = instance.primary_node
8733         check_nodes = [self.target_node, self.other_node]
8734
8735       elif self.mode == constants.REPLACE_DISK_CHG:
8736         self.new_node = remote_node
8737         self.other_node = instance.primary_node
8738         self.target_node = secondary_node
8739         check_nodes = [self.new_node, self.other_node]
8740
8741         _CheckNodeNotDrained(self.lu, remote_node)
8742         _CheckNodeVmCapable(self.lu, remote_node)
8743
8744         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8745         assert old_node_info is not None
8746         if old_node_info.offline and not self.early_release:
8747           # doesn't make sense to delay the release
8748           self.early_release = True
8749           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8750                           " early-release mode", secondary_node)
8751
8752       else:
8753         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8754                                      self.mode)
8755
8756       # If not specified all disks should be replaced
8757       if not self.disks:
8758         self.disks = range(len(self.instance.disks))
8759
8760     for node in check_nodes:
8761       _CheckNodeOnline(self.lu, node)
8762
8763     # Check whether disks are valid
8764     for disk_idx in self.disks:
8765       instance.FindDisk(disk_idx)
8766
8767     # Get secondary node IP addresses
8768     node_2nd_ip = {}
8769
8770     for node_name in [self.target_node, self.other_node, self.new_node]:
8771       if node_name is not None:
8772         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8773
8774     self.node_secondary_ip = node_2nd_ip
8775
8776   def Exec(self, feedback_fn):
8777     """Execute disk replacement.
8778
8779     This dispatches the disk replacement to the appropriate handler.
8780
8781     """
8782     if self.delay_iallocator:
8783       self._CheckPrereq2()
8784
8785     if not self.disks:
8786       feedback_fn("No disks need replacement")
8787       return
8788
8789     feedback_fn("Replacing disk(s) %s for %s" %
8790                 (utils.CommaJoin(self.disks), self.instance.name))
8791
8792     activate_disks = (not self.instance.admin_up)
8793
8794     # Activate the instance disks if we're replacing them on a down instance
8795     if activate_disks:
8796       _StartInstanceDisks(self.lu, self.instance, True)
8797
8798     try:
8799       # Should we replace the secondary node?
8800       if self.new_node is not None:
8801         fn = self._ExecDrbd8Secondary
8802       else:
8803         fn = self._ExecDrbd8DiskOnly
8804
8805       return fn(feedback_fn)
8806
8807     finally:
8808       # Deactivate the instance disks if we're replacing them on a
8809       # down instance
8810       if activate_disks:
8811         _SafeShutdownInstanceDisks(self.lu, self.instance)
8812
8813   def _CheckVolumeGroup(self, nodes):
8814     self.lu.LogInfo("Checking volume groups")
8815
8816     vgname = self.cfg.GetVGName()
8817
8818     # Make sure volume group exists on all involved nodes
8819     results = self.rpc.call_vg_list(nodes)
8820     if not results:
8821       raise errors.OpExecError("Can't list volume groups on the nodes")
8822
8823     for node in nodes:
8824       res = results[node]
8825       res.Raise("Error checking node %s" % node)
8826       if vgname not in res.payload:
8827         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8828                                  (vgname, node))
8829
8830   def _CheckDisksExistence(self, nodes):
8831     # Check disk existence
8832     for idx, dev in enumerate(self.instance.disks):
8833       if idx not in self.disks:
8834         continue
8835
8836       for node in nodes:
8837         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8838         self.cfg.SetDiskID(dev, node)
8839
8840         result = self.rpc.call_blockdev_find(node, dev)
8841
8842         msg = result.fail_msg
8843         if msg or not result.payload:
8844           if not msg:
8845             msg = "disk not found"
8846           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8847                                    (idx, node, msg))
8848
8849   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8850     for idx, dev in enumerate(self.instance.disks):
8851       if idx not in self.disks:
8852         continue
8853
8854       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8855                       (idx, node_name))
8856
8857       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8858                                    ldisk=ldisk):
8859         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8860                                  " replace disks for instance %s" %
8861                                  (node_name, self.instance.name))
8862
8863   def _CreateNewStorage(self, node_name):
8864     vgname = self.cfg.GetVGName()
8865     iv_names = {}
8866
8867     for idx, dev in enumerate(self.instance.disks):
8868       if idx not in self.disks:
8869         continue
8870
8871       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8872
8873       self.cfg.SetDiskID(dev, node_name)
8874
8875       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8876       names = _GenerateUniqueNames(self.lu, lv_names)
8877
8878       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8879                              logical_id=(vgname, names[0]))
8880       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8881                              logical_id=(vgname, names[1]))
8882
8883       new_lvs = [lv_data, lv_meta]
8884       old_lvs = dev.children
8885       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8886
8887       # we pass force_create=True to force the LVM creation
8888       for new_lv in new_lvs:
8889         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8890                         _GetInstanceInfoText(self.instance), False)
8891
8892     return iv_names
8893
8894   def _CheckDevices(self, node_name, iv_names):
8895     for name, (dev, _, _) in iv_names.iteritems():
8896       self.cfg.SetDiskID(dev, node_name)
8897
8898       result = self.rpc.call_blockdev_find(node_name, dev)
8899
8900       msg = result.fail_msg
8901       if msg or not result.payload:
8902         if not msg:
8903           msg = "disk not found"
8904         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8905                                  (name, msg))
8906
8907       if result.payload.is_degraded:
8908         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8909
8910   def _RemoveOldStorage(self, node_name, iv_names):
8911     for name, (_, old_lvs, _) in iv_names.iteritems():
8912       self.lu.LogInfo("Remove logical volumes for %s" % name)
8913
8914       for lv in old_lvs:
8915         self.cfg.SetDiskID(lv, node_name)
8916
8917         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8918         if msg:
8919           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8920                              hint="remove unused LVs manually")
8921
8922   def _ReleaseNodeLock(self, node_name):
8923     """Releases the lock for a given node."""
8924     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8925
8926   def _ExecDrbd8DiskOnly(self, feedback_fn):
8927     """Replace a disk on the primary or secondary for DRBD 8.
8928
8929     The algorithm for replace is quite complicated:
8930
8931       1. for each disk to be replaced:
8932
8933         1. create new LVs on the target node with unique names
8934         1. detach old LVs from the drbd device
8935         1. rename old LVs to name_replaced.<time_t>
8936         1. rename new LVs to old LVs
8937         1. attach the new LVs (with the old names now) to the drbd device
8938
8939       1. wait for sync across all devices
8940
8941       1. for each modified disk:
8942
8943         1. remove old LVs (which have the name name_replaces.<time_t>)
8944
8945     Failures are not very well handled.
8946
8947     """
8948     steps_total = 6
8949
8950     # Step: check device activation
8951     self.lu.LogStep(1, steps_total, "Check device existence")
8952     self._CheckDisksExistence([self.other_node, self.target_node])
8953     self._CheckVolumeGroup([self.target_node, self.other_node])
8954
8955     # Step: check other node consistency
8956     self.lu.LogStep(2, steps_total, "Check peer consistency")
8957     self._CheckDisksConsistency(self.other_node,
8958                                 self.other_node == self.instance.primary_node,
8959                                 False)
8960
8961     # Step: create new storage
8962     self.lu.LogStep(3, steps_total, "Allocate new storage")
8963     iv_names = self._CreateNewStorage(self.target_node)
8964
8965     # Step: for each lv, detach+rename*2+attach
8966     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8967     for dev, old_lvs, new_lvs in iv_names.itervalues():
8968       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8969
8970       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8971                                                      old_lvs)
8972       result.Raise("Can't detach drbd from local storage on node"
8973                    " %s for device %s" % (self.target_node, dev.iv_name))
8974       #dev.children = []
8975       #cfg.Update(instance)
8976
8977       # ok, we created the new LVs, so now we know we have the needed
8978       # storage; as such, we proceed on the target node to rename
8979       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8980       # using the assumption that logical_id == physical_id (which in
8981       # turn is the unique_id on that node)
8982
8983       # FIXME(iustin): use a better name for the replaced LVs
8984       temp_suffix = int(time.time())
8985       ren_fn = lambda d, suff: (d.physical_id[0],
8986                                 d.physical_id[1] + "_replaced-%s" % suff)
8987
8988       # Build the rename list based on what LVs exist on the node
8989       rename_old_to_new = []
8990       for to_ren in old_lvs:
8991         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8992         if not result.fail_msg and result.payload:
8993           # device exists
8994           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8995
8996       self.lu.LogInfo("Renaming the old LVs on the target node")
8997       result = self.rpc.call_blockdev_rename(self.target_node,
8998                                              rename_old_to_new)
8999       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9000
9001       # Now we rename the new LVs to the old LVs
9002       self.lu.LogInfo("Renaming the new LVs on the target node")
9003       rename_new_to_old = [(new, old.physical_id)
9004                            for old, new in zip(old_lvs, new_lvs)]
9005       result = self.rpc.call_blockdev_rename(self.target_node,
9006                                              rename_new_to_old)
9007       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9008
9009       for old, new in zip(old_lvs, new_lvs):
9010         new.logical_id = old.logical_id
9011         self.cfg.SetDiskID(new, self.target_node)
9012
9013       for disk in old_lvs:
9014         disk.logical_id = ren_fn(disk, temp_suffix)
9015         self.cfg.SetDiskID(disk, self.target_node)
9016
9017       # Now that the new lvs have the old name, we can add them to the device
9018       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9019       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9020                                                   new_lvs)
9021       msg = result.fail_msg
9022       if msg:
9023         for new_lv in new_lvs:
9024           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9025                                                new_lv).fail_msg
9026           if msg2:
9027             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9028                                hint=("cleanup manually the unused logical"
9029                                      "volumes"))
9030         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9031
9032       dev.children = new_lvs
9033
9034       self.cfg.Update(self.instance, feedback_fn)
9035
9036     cstep = 5
9037     if self.early_release:
9038       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9039       cstep += 1
9040       self._RemoveOldStorage(self.target_node, iv_names)
9041       # WARNING: we release both node locks here, do not do other RPCs
9042       # than WaitForSync to the primary node
9043       self._ReleaseNodeLock([self.target_node, self.other_node])
9044
9045     # Wait for sync
9046     # This can fail as the old devices are degraded and _WaitForSync
9047     # does a combined result over all disks, so we don't check its return value
9048     self.lu.LogStep(cstep, steps_total, "Sync devices")
9049     cstep += 1
9050     _WaitForSync(self.lu, self.instance)
9051
9052     # Check all devices manually
9053     self._CheckDevices(self.instance.primary_node, iv_names)
9054
9055     # Step: remove old storage
9056     if not self.early_release:
9057       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9058       cstep += 1
9059       self._RemoveOldStorage(self.target_node, iv_names)
9060
9061   def _ExecDrbd8Secondary(self, feedback_fn):
9062     """Replace the secondary node for DRBD 8.
9063
9064     The algorithm for replace is quite complicated:
9065       - for all disks of the instance:
9066         - create new LVs on the new node with same names
9067         - shutdown the drbd device on the old secondary
9068         - disconnect the drbd network on the primary
9069         - create the drbd device on the new secondary
9070         - network attach the drbd on the primary, using an artifice:
9071           the drbd code for Attach() will connect to the network if it
9072           finds a device which is connected to the good local disks but
9073           not network enabled
9074       - wait for sync across all devices
9075       - remove all disks from the old secondary
9076
9077     Failures are not very well handled.
9078
9079     """
9080     steps_total = 6
9081
9082     # Step: check device activation
9083     self.lu.LogStep(1, steps_total, "Check device existence")
9084     self._CheckDisksExistence([self.instance.primary_node])
9085     self._CheckVolumeGroup([self.instance.primary_node])
9086
9087     # Step: check other node consistency
9088     self.lu.LogStep(2, steps_total, "Check peer consistency")
9089     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9090
9091     # Step: create new storage
9092     self.lu.LogStep(3, steps_total, "Allocate new storage")
9093     for idx, dev in enumerate(self.instance.disks):
9094       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9095                       (self.new_node, idx))
9096       # we pass force_create=True to force LVM creation
9097       for new_lv in dev.children:
9098         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9099                         _GetInstanceInfoText(self.instance), False)
9100
9101     # Step 4: dbrd minors and drbd setups changes
9102     # after this, we must manually remove the drbd minors on both the
9103     # error and the success paths
9104     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9105     minors = self.cfg.AllocateDRBDMinor([self.new_node
9106                                          for dev in self.instance.disks],
9107                                         self.instance.name)
9108     logging.debug("Allocated minors %r", minors)
9109
9110     iv_names = {}
9111     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9112       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9113                       (self.new_node, idx))
9114       # create new devices on new_node; note that we create two IDs:
9115       # one without port, so the drbd will be activated without
9116       # networking information on the new node at this stage, and one
9117       # with network, for the latter activation in step 4
9118       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9119       if self.instance.primary_node == o_node1:
9120         p_minor = o_minor1
9121       else:
9122         assert self.instance.primary_node == o_node2, "Three-node instance?"
9123         p_minor = o_minor2
9124
9125       new_alone_id = (self.instance.primary_node, self.new_node, None,
9126                       p_minor, new_minor, o_secret)
9127       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9128                     p_minor, new_minor, o_secret)
9129
9130       iv_names[idx] = (dev, dev.children, new_net_id)
9131       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9132                     new_net_id)
9133       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9134                               logical_id=new_alone_id,
9135                               children=dev.children,
9136                               size=dev.size)
9137       try:
9138         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9139                               _GetInstanceInfoText(self.instance), False)
9140       except errors.GenericError:
9141         self.cfg.ReleaseDRBDMinors(self.instance.name)
9142         raise
9143
9144     # We have new devices, shutdown the drbd on the old secondary
9145     for idx, dev in enumerate(self.instance.disks):
9146       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9147       self.cfg.SetDiskID(dev, self.target_node)
9148       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9149       if msg:
9150         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9151                            "node: %s" % (idx, msg),
9152                            hint=("Please cleanup this device manually as"
9153                                  " soon as possible"))
9154
9155     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9156     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9157                                                self.node_secondary_ip,
9158                                                self.instance.disks)\
9159                                               [self.instance.primary_node]
9160
9161     msg = result.fail_msg
9162     if msg:
9163       # detaches didn't succeed (unlikely)
9164       self.cfg.ReleaseDRBDMinors(self.instance.name)
9165       raise errors.OpExecError("Can't detach the disks from the network on"
9166                                " old node: %s" % (msg,))
9167
9168     # if we managed to detach at least one, we update all the disks of
9169     # the instance to point to the new secondary
9170     self.lu.LogInfo("Updating instance configuration")
9171     for dev, _, new_logical_id in iv_names.itervalues():
9172       dev.logical_id = new_logical_id
9173       self.cfg.SetDiskID(dev, self.instance.primary_node)
9174
9175     self.cfg.Update(self.instance, feedback_fn)
9176
9177     # and now perform the drbd attach
9178     self.lu.LogInfo("Attaching primary drbds to new secondary"
9179                     " (standalone => connected)")
9180     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9181                                             self.new_node],
9182                                            self.node_secondary_ip,
9183                                            self.instance.disks,
9184                                            self.instance.name,
9185                                            False)
9186     for to_node, to_result in result.items():
9187       msg = to_result.fail_msg
9188       if msg:
9189         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9190                            to_node, msg,
9191                            hint=("please do a gnt-instance info to see the"
9192                                  " status of disks"))
9193     cstep = 5
9194     if self.early_release:
9195       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9196       cstep += 1
9197       self._RemoveOldStorage(self.target_node, iv_names)
9198       # WARNING: we release all node locks here, do not do other RPCs
9199       # than WaitForSync to the primary node
9200       self._ReleaseNodeLock([self.instance.primary_node,
9201                              self.target_node,
9202                              self.new_node])
9203
9204     # Wait for sync
9205     # This can fail as the old devices are degraded and _WaitForSync
9206     # does a combined result over all disks, so we don't check its return value
9207     self.lu.LogStep(cstep, steps_total, "Sync devices")
9208     cstep += 1
9209     _WaitForSync(self.lu, self.instance)
9210
9211     # Check all devices manually
9212     self._CheckDevices(self.instance.primary_node, iv_names)
9213
9214     # Step: remove old storage
9215     if not self.early_release:
9216       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9217       self._RemoveOldStorage(self.target_node, iv_names)
9218
9219
9220 class LURepairNodeStorage(NoHooksLU):
9221   """Repairs the volume group on a node.
9222
9223   """
9224   REQ_BGL = False
9225
9226   def CheckArguments(self):
9227     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9228
9229     storage_type = self.op.storage_type
9230
9231     if (constants.SO_FIX_CONSISTENCY not in
9232         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9233       raise errors.OpPrereqError("Storage units of type '%s' can not be"
9234                                  " repaired" % storage_type,
9235                                  errors.ECODE_INVAL)
9236
9237   def ExpandNames(self):
9238     self.needed_locks = {
9239       locking.LEVEL_NODE: [self.op.node_name],
9240       }
9241
9242   def _CheckFaultyDisks(self, instance, node_name):
9243     """Ensure faulty disks abort the opcode or at least warn."""
9244     try:
9245       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9246                                   node_name, True):
9247         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9248                                    " node '%s'" % (instance.name, node_name),
9249                                    errors.ECODE_STATE)
9250     except errors.OpPrereqError, err:
9251       if self.op.ignore_consistency:
9252         self.proc.LogWarning(str(err.args[0]))
9253       else:
9254         raise
9255
9256   def CheckPrereq(self):
9257     """Check prerequisites.
9258
9259     """
9260     # Check whether any instance on this node has faulty disks
9261     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9262       if not inst.admin_up:
9263         continue
9264       check_nodes = set(inst.all_nodes)
9265       check_nodes.discard(self.op.node_name)
9266       for inst_node_name in check_nodes:
9267         self._CheckFaultyDisks(inst, inst_node_name)
9268
9269   def Exec(self, feedback_fn):
9270     feedback_fn("Repairing storage unit '%s' on %s ..." %
9271                 (self.op.name, self.op.node_name))
9272
9273     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9274     result = self.rpc.call_storage_execute(self.op.node_name,
9275                                            self.op.storage_type, st_args,
9276                                            self.op.name,
9277                                            constants.SO_FIX_CONSISTENCY)
9278     result.Raise("Failed to repair storage unit '%s' on %s" %
9279                  (self.op.name, self.op.node_name))
9280
9281
9282 class LUNodeEvacStrategy(NoHooksLU):
9283   """Computes the node evacuation strategy.
9284
9285   """
9286   REQ_BGL = False
9287
9288   def CheckArguments(self):
9289     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9290
9291   def ExpandNames(self):
9292     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9293     self.needed_locks = locks = {}
9294     if self.op.remote_node is None:
9295       locks[locking.LEVEL_NODE] = locking.ALL_SET
9296     else:
9297       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9298       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9299
9300   def Exec(self, feedback_fn):
9301     if self.op.remote_node is not None:
9302       instances = []
9303       for node in self.op.nodes:
9304         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9305       result = []
9306       for i in instances:
9307         if i.primary_node == self.op.remote_node:
9308           raise errors.OpPrereqError("Node %s is the primary node of"
9309                                      " instance %s, cannot use it as"
9310                                      " secondary" %
9311                                      (self.op.remote_node, i.name),
9312                                      errors.ECODE_INVAL)
9313         result.append([i.name, self.op.remote_node])
9314     else:
9315       ial = IAllocator(self.cfg, self.rpc,
9316                        mode=constants.IALLOCATOR_MODE_MEVAC,
9317                        evac_nodes=self.op.nodes)
9318       ial.Run(self.op.iallocator, validate=True)
9319       if not ial.success:
9320         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9321                                  errors.ECODE_NORES)
9322       result = ial.result
9323     return result
9324
9325
9326 class LUInstanceGrowDisk(LogicalUnit):
9327   """Grow a disk of an instance.
9328
9329   """
9330   HPATH = "disk-grow"
9331   HTYPE = constants.HTYPE_INSTANCE
9332   REQ_BGL = False
9333
9334   def ExpandNames(self):
9335     self._ExpandAndLockInstance()
9336     self.needed_locks[locking.LEVEL_NODE] = []
9337     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9338
9339   def DeclareLocks(self, level):
9340     if level == locking.LEVEL_NODE:
9341       self._LockInstancesNodes()
9342
9343   def BuildHooksEnv(self):
9344     """Build hooks env.
9345
9346     This runs on the master, the primary and all the secondaries.
9347
9348     """
9349     env = {
9350       "DISK": self.op.disk,
9351       "AMOUNT": self.op.amount,
9352       }
9353     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9354     return env
9355
9356   def BuildHooksNodes(self):
9357     """Build hooks nodes.
9358
9359     """
9360     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9361     return (nl, nl)
9362
9363   def CheckPrereq(self):
9364     """Check prerequisites.
9365
9366     This checks that the instance is in the cluster.
9367
9368     """
9369     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9370     assert instance is not None, \
9371       "Cannot retrieve locked instance %s" % self.op.instance_name
9372     nodenames = list(instance.all_nodes)
9373     for node in nodenames:
9374       _CheckNodeOnline(self, node)
9375
9376     self.instance = instance
9377
9378     if instance.disk_template not in constants.DTS_GROWABLE:
9379       raise errors.OpPrereqError("Instance's disk layout does not support"
9380                                  " growing.", errors.ECODE_INVAL)
9381
9382     self.disk = instance.FindDisk(self.op.disk)
9383
9384     if instance.disk_template not in (constants.DT_FILE,
9385                                       constants.DT_SHARED_FILE):
9386       # TODO: check the free disk space for file, when that feature will be
9387       # supported
9388       _CheckNodesFreeDiskPerVG(self, nodenames,
9389                                self.disk.ComputeGrowth(self.op.amount))
9390
9391   def Exec(self, feedback_fn):
9392     """Execute disk grow.
9393
9394     """
9395     instance = self.instance
9396     disk = self.disk
9397
9398     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9399     if not disks_ok:
9400       raise errors.OpExecError("Cannot activate block device to grow")
9401
9402     for node in instance.all_nodes:
9403       self.cfg.SetDiskID(disk, node)
9404       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9405       result.Raise("Grow request failed to node %s" % node)
9406
9407       # TODO: Rewrite code to work properly
9408       # DRBD goes into sync mode for a short amount of time after executing the
9409       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9410       # calling "resize" in sync mode fails. Sleeping for a short amount of
9411       # time is a work-around.
9412       time.sleep(5)
9413
9414     disk.RecordGrow(self.op.amount)
9415     self.cfg.Update(instance, feedback_fn)
9416     if self.op.wait_for_sync:
9417       disk_abort = not _WaitForSync(self, instance, disks=[disk])
9418       if disk_abort:
9419         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9420                              " status.\nPlease check the instance.")
9421       if not instance.admin_up:
9422         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9423     elif not instance.admin_up:
9424       self.proc.LogWarning("Not shutting down the disk even if the instance is"
9425                            " not supposed to be running because no wait for"
9426                            " sync mode was requested.")
9427
9428
9429 class LUInstanceQueryData(NoHooksLU):
9430   """Query runtime instance data.
9431
9432   """
9433   REQ_BGL = False
9434
9435   def ExpandNames(self):
9436     self.needed_locks = {}
9437
9438     # Use locking if requested or when non-static information is wanted
9439     if not (self.op.static or self.op.use_locking):
9440       self.LogWarning("Non-static data requested, locks need to be acquired")
9441       self.op.use_locking = True
9442
9443     if self.op.instances or not self.op.use_locking:
9444       # Expand instance names right here
9445       self.wanted_names = _GetWantedInstances(self, self.op.instances)
9446     else:
9447       # Will use acquired locks
9448       self.wanted_names = None
9449
9450     if self.op.use_locking:
9451       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9452
9453       if self.wanted_names is None:
9454         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9455       else:
9456         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9457
9458       self.needed_locks[locking.LEVEL_NODE] = []
9459       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9460       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9461
9462   def DeclareLocks(self, level):
9463     if self.op.use_locking and level == locking.LEVEL_NODE:
9464       self._LockInstancesNodes()
9465
9466   def CheckPrereq(self):
9467     """Check prerequisites.
9468
9469     This only checks the optional instance list against the existing names.
9470
9471     """
9472     if self.wanted_names is None:
9473       assert self.op.use_locking, "Locking was not used"
9474       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9475
9476     self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9477                              for name in self.wanted_names]
9478
9479   def _ComputeBlockdevStatus(self, node, instance_name, dev):
9480     """Returns the status of a block device
9481
9482     """
9483     if self.op.static or not node:
9484       return None
9485
9486     self.cfg.SetDiskID(dev, node)
9487
9488     result = self.rpc.call_blockdev_find(node, dev)
9489     if result.offline:
9490       return None
9491
9492     result.Raise("Can't compute disk status for %s" % instance_name)
9493
9494     status = result.payload
9495     if status is None:
9496       return None
9497
9498     return (status.dev_path, status.major, status.minor,
9499             status.sync_percent, status.estimated_time,
9500             status.is_degraded, status.ldisk_status)
9501
9502   def _ComputeDiskStatus(self, instance, snode, dev):
9503     """Compute block device status.
9504
9505     """
9506     if dev.dev_type in constants.LDS_DRBD:
9507       # we change the snode then (otherwise we use the one passed in)
9508       if dev.logical_id[0] == instance.primary_node:
9509         snode = dev.logical_id[1]
9510       else:
9511         snode = dev.logical_id[0]
9512
9513     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9514                                               instance.name, dev)
9515     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9516
9517     if dev.children:
9518       dev_children = [self._ComputeDiskStatus(instance, snode, child)
9519                       for child in dev.children]
9520     else:
9521       dev_children = []
9522
9523     return {
9524       "iv_name": dev.iv_name,
9525       "dev_type": dev.dev_type,
9526       "logical_id": dev.logical_id,
9527       "physical_id": dev.physical_id,
9528       "pstatus": dev_pstatus,
9529       "sstatus": dev_sstatus,
9530       "children": dev_children,
9531       "mode": dev.mode,
9532       "size": dev.size,
9533       }
9534
9535   def Exec(self, feedback_fn):
9536     """Gather and return data"""
9537     result = {}
9538
9539     cluster = self.cfg.GetClusterInfo()
9540
9541     for instance in self.wanted_instances:
9542       if not self.op.static:
9543         remote_info = self.rpc.call_instance_info(instance.primary_node,
9544                                                   instance.name,
9545                                                   instance.hypervisor)
9546         remote_info.Raise("Error checking node %s" % instance.primary_node)
9547         remote_info = remote_info.payload
9548         if remote_info and "state" in remote_info:
9549           remote_state = "up"
9550         else:
9551           remote_state = "down"
9552       else:
9553         remote_state = None
9554       if instance.admin_up:
9555         config_state = "up"
9556       else:
9557         config_state = "down"
9558
9559       disks = [self._ComputeDiskStatus(instance, None, device)
9560                for device in instance.disks]
9561
9562       result[instance.name] = {
9563         "name": instance.name,
9564         "config_state": config_state,
9565         "run_state": remote_state,
9566         "pnode": instance.primary_node,
9567         "snodes": instance.secondary_nodes,
9568         "os": instance.os,
9569         # this happens to be the same format used for hooks
9570         "nics": _NICListToTuple(self, instance.nics),
9571         "disk_template": instance.disk_template,
9572         "disks": disks,
9573         "hypervisor": instance.hypervisor,
9574         "network_port": instance.network_port,
9575         "hv_instance": instance.hvparams,
9576         "hv_actual": cluster.FillHV(instance, skip_globals=True),
9577         "be_instance": instance.beparams,
9578         "be_actual": cluster.FillBE(instance),
9579         "os_instance": instance.osparams,
9580         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9581         "serial_no": instance.serial_no,
9582         "mtime": instance.mtime,
9583         "ctime": instance.ctime,
9584         "uuid": instance.uuid,
9585         }
9586
9587     return result
9588
9589
9590 class LUInstanceSetParams(LogicalUnit):
9591   """Modifies an instances's parameters.
9592
9593   """
9594   HPATH = "instance-modify"
9595   HTYPE = constants.HTYPE_INSTANCE
9596   REQ_BGL = False
9597
9598   def CheckArguments(self):
9599     if not (self.op.nics or self.op.disks or self.op.disk_template or
9600             self.op.hvparams or self.op.beparams or self.op.os_name):
9601       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9602
9603     if self.op.hvparams:
9604       _CheckGlobalHvParams(self.op.hvparams)
9605
9606     # Disk validation
9607     disk_addremove = 0
9608     for disk_op, disk_dict in self.op.disks:
9609       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9610       if disk_op == constants.DDM_REMOVE:
9611         disk_addremove += 1
9612         continue
9613       elif disk_op == constants.DDM_ADD:
9614         disk_addremove += 1
9615       else:
9616         if not isinstance(disk_op, int):
9617           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9618         if not isinstance(disk_dict, dict):
9619           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9620           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9621
9622       if disk_op == constants.DDM_ADD:
9623         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9624         if mode not in constants.DISK_ACCESS_SET:
9625           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9626                                      errors.ECODE_INVAL)
9627         size = disk_dict.get(constants.IDISK_SIZE, None)
9628         if size is None:
9629           raise errors.OpPrereqError("Required disk parameter size missing",
9630                                      errors.ECODE_INVAL)
9631         try:
9632           size = int(size)
9633         except (TypeError, ValueError), err:
9634           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9635                                      str(err), errors.ECODE_INVAL)
9636         disk_dict[constants.IDISK_SIZE] = size
9637       else:
9638         # modification of disk
9639         if constants.IDISK_SIZE in disk_dict:
9640           raise errors.OpPrereqError("Disk size change not possible, use"
9641                                      " grow-disk", errors.ECODE_INVAL)
9642
9643     if disk_addremove > 1:
9644       raise errors.OpPrereqError("Only one disk add or remove operation"
9645                                  " supported at a time", errors.ECODE_INVAL)
9646
9647     if self.op.disks and self.op.disk_template is not None:
9648       raise errors.OpPrereqError("Disk template conversion and other disk"
9649                                  " changes not supported at the same time",
9650                                  errors.ECODE_INVAL)
9651
9652     if (self.op.disk_template and
9653         self.op.disk_template in constants.DTS_INT_MIRROR and
9654         self.op.remote_node is None):
9655       raise errors.OpPrereqError("Changing the disk template to a mirrored"
9656                                  " one requires specifying a secondary node",
9657                                  errors.ECODE_INVAL)
9658
9659     # NIC validation
9660     nic_addremove = 0
9661     for nic_op, nic_dict in self.op.nics:
9662       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9663       if nic_op == constants.DDM_REMOVE:
9664         nic_addremove += 1
9665         continue
9666       elif nic_op == constants.DDM_ADD:
9667         nic_addremove += 1
9668       else:
9669         if not isinstance(nic_op, int):
9670           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9671         if not isinstance(nic_dict, dict):
9672           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9673           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9674
9675       # nic_dict should be a dict
9676       nic_ip = nic_dict.get(constants.INIC_IP, None)
9677       if nic_ip is not None:
9678         if nic_ip.lower() == constants.VALUE_NONE:
9679           nic_dict[constants.INIC_IP] = None
9680         else:
9681           if not netutils.IPAddress.IsValid(nic_ip):
9682             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9683                                        errors.ECODE_INVAL)
9684
9685       nic_bridge = nic_dict.get('bridge', None)
9686       nic_link = nic_dict.get(constants.INIC_LINK, None)
9687       if nic_bridge and nic_link:
9688         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9689                                    " at the same time", errors.ECODE_INVAL)
9690       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9691         nic_dict['bridge'] = None
9692       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9693         nic_dict[constants.INIC_LINK] = None
9694
9695       if nic_op == constants.DDM_ADD:
9696         nic_mac = nic_dict.get(constants.INIC_MAC, None)
9697         if nic_mac is None:
9698           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9699
9700       if constants.INIC_MAC in nic_dict:
9701         nic_mac = nic_dict[constants.INIC_MAC]
9702         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9703           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9704
9705         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9706           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9707                                      " modifying an existing nic",
9708                                      errors.ECODE_INVAL)
9709
9710     if nic_addremove > 1:
9711       raise errors.OpPrereqError("Only one NIC add or remove operation"
9712                                  " supported at a time", errors.ECODE_INVAL)
9713
9714   def ExpandNames(self):
9715     self._ExpandAndLockInstance()
9716     self.needed_locks[locking.LEVEL_NODE] = []
9717     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9718
9719   def DeclareLocks(self, level):
9720     if level == locking.LEVEL_NODE:
9721       self._LockInstancesNodes()
9722       if self.op.disk_template and self.op.remote_node:
9723         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9724         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9725
9726   def BuildHooksEnv(self):
9727     """Build hooks env.
9728
9729     This runs on the master, primary and secondaries.
9730
9731     """
9732     args = dict()
9733     if constants.BE_MEMORY in self.be_new:
9734       args['memory'] = self.be_new[constants.BE_MEMORY]
9735     if constants.BE_VCPUS in self.be_new:
9736       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9737     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9738     # information at all.
9739     if self.op.nics:
9740       args['nics'] = []
9741       nic_override = dict(self.op.nics)
9742       for idx, nic in enumerate(self.instance.nics):
9743         if idx in nic_override:
9744           this_nic_override = nic_override[idx]
9745         else:
9746           this_nic_override = {}
9747         if constants.INIC_IP in this_nic_override:
9748           ip = this_nic_override[constants.INIC_IP]
9749         else:
9750           ip = nic.ip
9751         if constants.INIC_MAC in this_nic_override:
9752           mac = this_nic_override[constants.INIC_MAC]
9753         else:
9754           mac = nic.mac
9755         if idx in self.nic_pnew:
9756           nicparams = self.nic_pnew[idx]
9757         else:
9758           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9759         mode = nicparams[constants.NIC_MODE]
9760         link = nicparams[constants.NIC_LINK]
9761         args['nics'].append((ip, mac, mode, link))
9762       if constants.DDM_ADD in nic_override:
9763         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
9764         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
9765         nicparams = self.nic_pnew[constants.DDM_ADD]
9766         mode = nicparams[constants.NIC_MODE]
9767         link = nicparams[constants.NIC_LINK]
9768         args['nics'].append((ip, mac, mode, link))
9769       elif constants.DDM_REMOVE in nic_override:
9770         del args['nics'][-1]
9771
9772     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9773     if self.op.disk_template:
9774       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9775
9776     return env
9777
9778   def BuildHooksNodes(self):
9779     """Build hooks nodes.
9780
9781     """
9782     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9783     return (nl, nl)
9784
9785   def CheckPrereq(self):
9786     """Check prerequisites.
9787
9788     This only checks the instance list against the existing names.
9789
9790     """
9791     # checking the new params on the primary/secondary nodes
9792
9793     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9794     cluster = self.cluster = self.cfg.GetClusterInfo()
9795     assert self.instance is not None, \
9796       "Cannot retrieve locked instance %s" % self.op.instance_name
9797     pnode = instance.primary_node
9798     nodelist = list(instance.all_nodes)
9799
9800     # OS change
9801     if self.op.os_name and not self.op.force:
9802       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9803                       self.op.force_variant)
9804       instance_os = self.op.os_name
9805     else:
9806       instance_os = instance.os
9807
9808     if self.op.disk_template:
9809       if instance.disk_template == self.op.disk_template:
9810         raise errors.OpPrereqError("Instance already has disk template %s" %
9811                                    instance.disk_template, errors.ECODE_INVAL)
9812
9813       if (instance.disk_template,
9814           self.op.disk_template) not in self._DISK_CONVERSIONS:
9815         raise errors.OpPrereqError("Unsupported disk template conversion from"
9816                                    " %s to %s" % (instance.disk_template,
9817                                                   self.op.disk_template),
9818                                    errors.ECODE_INVAL)
9819       _CheckInstanceDown(self, instance, "cannot change disk template")
9820       if self.op.disk_template in constants.DTS_INT_MIRROR:
9821         if self.op.remote_node == pnode:
9822           raise errors.OpPrereqError("Given new secondary node %s is the same"
9823                                      " as the primary node of the instance" %
9824                                      self.op.remote_node, errors.ECODE_STATE)
9825         _CheckNodeOnline(self, self.op.remote_node)
9826         _CheckNodeNotDrained(self, self.op.remote_node)
9827         # FIXME: here we assume that the old instance type is DT_PLAIN
9828         assert instance.disk_template == constants.DT_PLAIN
9829         disks = [{constants.IDISK_SIZE: d.size,
9830                   constants.IDISK_VG: d.logical_id[0]}
9831                  for d in instance.disks]
9832         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9833         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9834
9835     # hvparams processing
9836     if self.op.hvparams:
9837       hv_type = instance.hypervisor
9838       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9839       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9840       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9841
9842       # local check
9843       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9844       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9845       self.hv_new = hv_new # the new actual values
9846       self.hv_inst = i_hvdict # the new dict (without defaults)
9847     else:
9848       self.hv_new = self.hv_inst = {}
9849
9850     # beparams processing
9851     if self.op.beparams:
9852       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9853                                    use_none=True)
9854       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9855       be_new = cluster.SimpleFillBE(i_bedict)
9856       self.be_new = be_new # the new actual values
9857       self.be_inst = i_bedict # the new dict (without defaults)
9858     else:
9859       self.be_new = self.be_inst = {}
9860
9861     # osparams processing
9862     if self.op.osparams:
9863       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9864       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9865       self.os_inst = i_osdict # the new dict (without defaults)
9866     else:
9867       self.os_inst = {}
9868
9869     self.warn = []
9870
9871     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9872       mem_check_list = [pnode]
9873       if be_new[constants.BE_AUTO_BALANCE]:
9874         # either we changed auto_balance to yes or it was from before
9875         mem_check_list.extend(instance.secondary_nodes)
9876       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9877                                                   instance.hypervisor)
9878       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9879                                          instance.hypervisor)
9880       pninfo = nodeinfo[pnode]
9881       msg = pninfo.fail_msg
9882       if msg:
9883         # Assume the primary node is unreachable and go ahead
9884         self.warn.append("Can't get info from primary node %s: %s" %
9885                          (pnode,  msg))
9886       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9887         self.warn.append("Node data from primary node %s doesn't contain"
9888                          " free memory information" % pnode)
9889       elif instance_info.fail_msg:
9890         self.warn.append("Can't get instance runtime information: %s" %
9891                         instance_info.fail_msg)
9892       else:
9893         if instance_info.payload:
9894           current_mem = int(instance_info.payload['memory'])
9895         else:
9896           # Assume instance not running
9897           # (there is a slight race condition here, but it's not very probable,
9898           # and we have no other way to check)
9899           current_mem = 0
9900         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9901                     pninfo.payload['memory_free'])
9902         if miss_mem > 0:
9903           raise errors.OpPrereqError("This change will prevent the instance"
9904                                      " from starting, due to %d MB of memory"
9905                                      " missing on its primary node" % miss_mem,
9906                                      errors.ECODE_NORES)
9907
9908       if be_new[constants.BE_AUTO_BALANCE]:
9909         for node, nres in nodeinfo.items():
9910           if node not in instance.secondary_nodes:
9911             continue
9912           msg = nres.fail_msg
9913           if msg:
9914             self.warn.append("Can't get info from secondary node %s: %s" %
9915                              (node, msg))
9916           elif not isinstance(nres.payload.get('memory_free', None), int):
9917             self.warn.append("Secondary node %s didn't return free"
9918                              " memory information" % node)
9919           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9920             self.warn.append("Not enough memory to failover instance to"
9921                              " secondary node %s" % node)
9922
9923     # NIC processing
9924     self.nic_pnew = {}
9925     self.nic_pinst = {}
9926     for nic_op, nic_dict in self.op.nics:
9927       if nic_op == constants.DDM_REMOVE:
9928         if not instance.nics:
9929           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9930                                      errors.ECODE_INVAL)
9931         continue
9932       if nic_op != constants.DDM_ADD:
9933         # an existing nic
9934         if not instance.nics:
9935           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9936                                      " no NICs" % nic_op,
9937                                      errors.ECODE_INVAL)
9938         if nic_op < 0 or nic_op >= len(instance.nics):
9939           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9940                                      " are 0 to %d" %
9941                                      (nic_op, len(instance.nics) - 1),
9942                                      errors.ECODE_INVAL)
9943         old_nic_params = instance.nics[nic_op].nicparams
9944         old_nic_ip = instance.nics[nic_op].ip
9945       else:
9946         old_nic_params = {}
9947         old_nic_ip = None
9948
9949       update_params_dict = dict([(key, nic_dict[key])
9950                                  for key in constants.NICS_PARAMETERS
9951                                  if key in nic_dict])
9952
9953       if 'bridge' in nic_dict:
9954         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9955
9956       new_nic_params = _GetUpdatedParams(old_nic_params,
9957                                          update_params_dict)
9958       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9959       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9960       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9961       self.nic_pinst[nic_op] = new_nic_params
9962       self.nic_pnew[nic_op] = new_filled_nic_params
9963       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9964
9965       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9966         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9967         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9968         if msg:
9969           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9970           if self.op.force:
9971             self.warn.append(msg)
9972           else:
9973             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9974       if new_nic_mode == constants.NIC_MODE_ROUTED:
9975         if constants.INIC_IP in nic_dict:
9976           nic_ip = nic_dict[constants.INIC_IP]
9977         else:
9978           nic_ip = old_nic_ip
9979         if nic_ip is None:
9980           raise errors.OpPrereqError('Cannot set the nic ip to None'
9981                                      ' on a routed nic', errors.ECODE_INVAL)
9982       if constants.INIC_MAC in nic_dict:
9983         nic_mac = nic_dict[constants.INIC_MAC]
9984         if nic_mac is None:
9985           raise errors.OpPrereqError('Cannot set the nic mac to None',
9986                                      errors.ECODE_INVAL)
9987         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9988           # otherwise generate the mac
9989           nic_dict[constants.INIC_MAC] = \
9990             self.cfg.GenerateMAC(self.proc.GetECId())
9991         else:
9992           # or validate/reserve the current one
9993           try:
9994             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9995           except errors.ReservationError:
9996             raise errors.OpPrereqError("MAC address %s already in use"
9997                                        " in cluster" % nic_mac,
9998                                        errors.ECODE_NOTUNIQUE)
9999
10000     # DISK processing
10001     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10002       raise errors.OpPrereqError("Disk operations not supported for"
10003                                  " diskless instances",
10004                                  errors.ECODE_INVAL)
10005     for disk_op, _ in self.op.disks:
10006       if disk_op == constants.DDM_REMOVE:
10007         if len(instance.disks) == 1:
10008           raise errors.OpPrereqError("Cannot remove the last disk of"
10009                                      " an instance", errors.ECODE_INVAL)
10010         _CheckInstanceDown(self, instance, "cannot remove disks")
10011
10012       if (disk_op == constants.DDM_ADD and
10013           len(instance.disks) >= constants.MAX_DISKS):
10014         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10015                                    " add more" % constants.MAX_DISKS,
10016                                    errors.ECODE_STATE)
10017       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10018         # an existing disk
10019         if disk_op < 0 or disk_op >= len(instance.disks):
10020           raise errors.OpPrereqError("Invalid disk index %s, valid values"
10021                                      " are 0 to %d" %
10022                                      (disk_op, len(instance.disks)),
10023                                      errors.ECODE_INVAL)
10024
10025     return
10026
10027   def _ConvertPlainToDrbd(self, feedback_fn):
10028     """Converts an instance from plain to drbd.
10029
10030     """
10031     feedback_fn("Converting template to drbd")
10032     instance = self.instance
10033     pnode = instance.primary_node
10034     snode = self.op.remote_node
10035
10036     # create a fake disk info for _GenerateDiskTemplate
10037     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode}
10038                  for d in instance.disks]
10039     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10040                                       instance.name, pnode, [snode],
10041                                       disk_info, None, None, 0, feedback_fn)
10042     info = _GetInstanceInfoText(instance)
10043     feedback_fn("Creating aditional volumes...")
10044     # first, create the missing data and meta devices
10045     for disk in new_disks:
10046       # unfortunately this is... not too nice
10047       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10048                             info, True)
10049       for child in disk.children:
10050         _CreateSingleBlockDev(self, snode, instance, child, info, True)
10051     # at this stage, all new LVs have been created, we can rename the
10052     # old ones
10053     feedback_fn("Renaming original volumes...")
10054     rename_list = [(o, n.children[0].logical_id)
10055                    for (o, n) in zip(instance.disks, new_disks)]
10056     result = self.rpc.call_blockdev_rename(pnode, rename_list)
10057     result.Raise("Failed to rename original LVs")
10058
10059     feedback_fn("Initializing DRBD devices...")
10060     # all child devices are in place, we can now create the DRBD devices
10061     for disk in new_disks:
10062       for node in [pnode, snode]:
10063         f_create = node == pnode
10064         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10065
10066     # at this point, the instance has been modified
10067     instance.disk_template = constants.DT_DRBD8
10068     instance.disks = new_disks
10069     self.cfg.Update(instance, feedback_fn)
10070
10071     # disks are created, waiting for sync
10072     disk_abort = not _WaitForSync(self, instance)
10073     if disk_abort:
10074       raise errors.OpExecError("There are some degraded disks for"
10075                                " this instance, please cleanup manually")
10076
10077   def _ConvertDrbdToPlain(self, feedback_fn):
10078     """Converts an instance from drbd to plain.
10079
10080     """
10081     instance = self.instance
10082     assert len(instance.secondary_nodes) == 1
10083     pnode = instance.primary_node
10084     snode = instance.secondary_nodes[0]
10085     feedback_fn("Converting template to plain")
10086
10087     old_disks = instance.disks
10088     new_disks = [d.children[0] for d in old_disks]
10089
10090     # copy over size and mode
10091     for parent, child in zip(old_disks, new_disks):
10092       child.size = parent.size
10093       child.mode = parent.mode
10094
10095     # update instance structure
10096     instance.disks = new_disks
10097     instance.disk_template = constants.DT_PLAIN
10098     self.cfg.Update(instance, feedback_fn)
10099
10100     feedback_fn("Removing volumes on the secondary node...")
10101     for disk in old_disks:
10102       self.cfg.SetDiskID(disk, snode)
10103       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10104       if msg:
10105         self.LogWarning("Could not remove block device %s on node %s,"
10106                         " continuing anyway: %s", disk.iv_name, snode, msg)
10107
10108     feedback_fn("Removing unneeded volumes on the primary node...")
10109     for idx, disk in enumerate(old_disks):
10110       meta = disk.children[1]
10111       self.cfg.SetDiskID(meta, pnode)
10112       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10113       if msg:
10114         self.LogWarning("Could not remove metadata for disk %d on node %s,"
10115                         " continuing anyway: %s", idx, pnode, msg)
10116
10117   def Exec(self, feedback_fn):
10118     """Modifies an instance.
10119
10120     All parameters take effect only at the next restart of the instance.
10121
10122     """
10123     # Process here the warnings from CheckPrereq, as we don't have a
10124     # feedback_fn there.
10125     for warn in self.warn:
10126       feedback_fn("WARNING: %s" % warn)
10127
10128     result = []
10129     instance = self.instance
10130     # disk changes
10131     for disk_op, disk_dict in self.op.disks:
10132       if disk_op == constants.DDM_REMOVE:
10133         # remove the last disk
10134         device = instance.disks.pop()
10135         device_idx = len(instance.disks)
10136         for node, disk in device.ComputeNodeTree(instance.primary_node):
10137           self.cfg.SetDiskID(disk, node)
10138           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10139           if msg:
10140             self.LogWarning("Could not remove disk/%d on node %s: %s,"
10141                             " continuing anyway", device_idx, node, msg)
10142         result.append(("disk/%d" % device_idx, "remove"))
10143       elif disk_op == constants.DDM_ADD:
10144         # add a new disk
10145         if instance.disk_template in (constants.DT_FILE,
10146                                         constants.DT_SHARED_FILE):
10147           file_driver, file_path = instance.disks[0].logical_id
10148           file_path = os.path.dirname(file_path)
10149         else:
10150           file_driver = file_path = None
10151         disk_idx_base = len(instance.disks)
10152         new_disk = _GenerateDiskTemplate(self,
10153                                          instance.disk_template,
10154                                          instance.name, instance.primary_node,
10155                                          instance.secondary_nodes,
10156                                          [disk_dict],
10157                                          file_path,
10158                                          file_driver,
10159                                          disk_idx_base, feedback_fn)[0]
10160         instance.disks.append(new_disk)
10161         info = _GetInstanceInfoText(instance)
10162
10163         logging.info("Creating volume %s for instance %s",
10164                      new_disk.iv_name, instance.name)
10165         # Note: this needs to be kept in sync with _CreateDisks
10166         #HARDCODE
10167         for node in instance.all_nodes:
10168           f_create = node == instance.primary_node
10169           try:
10170             _CreateBlockDev(self, node, instance, new_disk,
10171                             f_create, info, f_create)
10172           except errors.OpExecError, err:
10173             self.LogWarning("Failed to create volume %s (%s) on"
10174                             " node %s: %s",
10175                             new_disk.iv_name, new_disk, node, err)
10176         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10177                        (new_disk.size, new_disk.mode)))
10178       else:
10179         # change a given disk
10180         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10181         result.append(("disk.mode/%d" % disk_op,
10182                        disk_dict[constants.IDISK_MODE]))
10183
10184     if self.op.disk_template:
10185       r_shut = _ShutdownInstanceDisks(self, instance)
10186       if not r_shut:
10187         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10188                                  " proceed with disk template conversion")
10189       mode = (instance.disk_template, self.op.disk_template)
10190       try:
10191         self._DISK_CONVERSIONS[mode](self, feedback_fn)
10192       except:
10193         self.cfg.ReleaseDRBDMinors(instance.name)
10194         raise
10195       result.append(("disk_template", self.op.disk_template))
10196
10197     # NIC changes
10198     for nic_op, nic_dict in self.op.nics:
10199       if nic_op == constants.DDM_REMOVE:
10200         # remove the last nic
10201         del instance.nics[-1]
10202         result.append(("nic.%d" % len(instance.nics), "remove"))
10203       elif nic_op == constants.DDM_ADD:
10204         # mac and bridge should be set, by now
10205         mac = nic_dict[constants.INIC_MAC]
10206         ip = nic_dict.get(constants.INIC_IP, None)
10207         nicparams = self.nic_pinst[constants.DDM_ADD]
10208         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10209         instance.nics.append(new_nic)
10210         result.append(("nic.%d" % (len(instance.nics) - 1),
10211                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
10212                        (new_nic.mac, new_nic.ip,
10213                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10214                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10215                        )))
10216       else:
10217         for key in (constants.INIC_MAC, constants.INIC_IP):
10218           if key in nic_dict:
10219             setattr(instance.nics[nic_op], key, nic_dict[key])
10220         if nic_op in self.nic_pinst:
10221           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10222         for key, val in nic_dict.iteritems():
10223           result.append(("nic.%s/%d" % (key, nic_op), val))
10224
10225     # hvparams changes
10226     if self.op.hvparams:
10227       instance.hvparams = self.hv_inst
10228       for key, val in self.op.hvparams.iteritems():
10229         result.append(("hv/%s" % key, val))
10230
10231     # beparams changes
10232     if self.op.beparams:
10233       instance.beparams = self.be_inst
10234       for key, val in self.op.beparams.iteritems():
10235         result.append(("be/%s" % key, val))
10236
10237     # OS change
10238     if self.op.os_name:
10239       instance.os = self.op.os_name
10240
10241     # osparams changes
10242     if self.op.osparams:
10243       instance.osparams = self.os_inst
10244       for key, val in self.op.osparams.iteritems():
10245         result.append(("os/%s" % key, val))
10246
10247     self.cfg.Update(instance, feedback_fn)
10248
10249     return result
10250
10251   _DISK_CONVERSIONS = {
10252     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10253     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10254     }
10255
10256
10257 class LUBackupQuery(NoHooksLU):
10258   """Query the exports list
10259
10260   """
10261   REQ_BGL = False
10262
10263   def ExpandNames(self):
10264     self.needed_locks = {}
10265     self.share_locks[locking.LEVEL_NODE] = 1
10266     if not self.op.nodes:
10267       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10268     else:
10269       self.needed_locks[locking.LEVEL_NODE] = \
10270         _GetWantedNodes(self, self.op.nodes)
10271
10272   def Exec(self, feedback_fn):
10273     """Compute the list of all the exported system images.
10274
10275     @rtype: dict
10276     @return: a dictionary with the structure node->(export-list)
10277         where export-list is a list of the instances exported on
10278         that node.
10279
10280     """
10281     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
10282     rpcresult = self.rpc.call_export_list(self.nodes)
10283     result = {}
10284     for node in rpcresult:
10285       if rpcresult[node].fail_msg:
10286         result[node] = False
10287       else:
10288         result[node] = rpcresult[node].payload
10289
10290     return result
10291
10292
10293 class LUBackupPrepare(NoHooksLU):
10294   """Prepares an instance for an export and returns useful information.
10295
10296   """
10297   REQ_BGL = False
10298
10299   def ExpandNames(self):
10300     self._ExpandAndLockInstance()
10301
10302   def CheckPrereq(self):
10303     """Check prerequisites.
10304
10305     """
10306     instance_name = self.op.instance_name
10307
10308     self.instance = self.cfg.GetInstanceInfo(instance_name)
10309     assert self.instance is not None, \
10310           "Cannot retrieve locked instance %s" % self.op.instance_name
10311     _CheckNodeOnline(self, self.instance.primary_node)
10312
10313     self._cds = _GetClusterDomainSecret()
10314
10315   def Exec(self, feedback_fn):
10316     """Prepares an instance for an export.
10317
10318     """
10319     instance = self.instance
10320
10321     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10322       salt = utils.GenerateSecret(8)
10323
10324       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10325       result = self.rpc.call_x509_cert_create(instance.primary_node,
10326                                               constants.RIE_CERT_VALIDITY)
10327       result.Raise("Can't create X509 key and certificate on %s" % result.node)
10328
10329       (name, cert_pem) = result.payload
10330
10331       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10332                                              cert_pem)
10333
10334       return {
10335         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10336         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10337                           salt),
10338         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10339         }
10340
10341     return None
10342
10343
10344 class LUBackupExport(LogicalUnit):
10345   """Export an instance to an image in the cluster.
10346
10347   """
10348   HPATH = "instance-export"
10349   HTYPE = constants.HTYPE_INSTANCE
10350   REQ_BGL = False
10351
10352   def CheckArguments(self):
10353     """Check the arguments.
10354
10355     """
10356     self.x509_key_name = self.op.x509_key_name
10357     self.dest_x509_ca_pem = self.op.destination_x509_ca
10358
10359     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10360       if not self.x509_key_name:
10361         raise errors.OpPrereqError("Missing X509 key name for encryption",
10362                                    errors.ECODE_INVAL)
10363
10364       if not self.dest_x509_ca_pem:
10365         raise errors.OpPrereqError("Missing destination X509 CA",
10366                                    errors.ECODE_INVAL)
10367
10368   def ExpandNames(self):
10369     self._ExpandAndLockInstance()
10370
10371     # Lock all nodes for local exports
10372     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10373       # FIXME: lock only instance primary and destination node
10374       #
10375       # Sad but true, for now we have do lock all nodes, as we don't know where
10376       # the previous export might be, and in this LU we search for it and
10377       # remove it from its current node. In the future we could fix this by:
10378       #  - making a tasklet to search (share-lock all), then create the
10379       #    new one, then one to remove, after
10380       #  - removing the removal operation altogether
10381       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10382
10383   def DeclareLocks(self, level):
10384     """Last minute lock declaration."""
10385     # All nodes are locked anyway, so nothing to do here.
10386
10387   def BuildHooksEnv(self):
10388     """Build hooks env.
10389
10390     This will run on the master, primary node and target node.
10391
10392     """
10393     env = {
10394       "EXPORT_MODE": self.op.mode,
10395       "EXPORT_NODE": self.op.target_node,
10396       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10397       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10398       # TODO: Generic function for boolean env variables
10399       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10400       }
10401
10402     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10403
10404     return env
10405
10406   def BuildHooksNodes(self):
10407     """Build hooks nodes.
10408
10409     """
10410     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10411
10412     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10413       nl.append(self.op.target_node)
10414
10415     return (nl, nl)
10416
10417   def CheckPrereq(self):
10418     """Check prerequisites.
10419
10420     This checks that the instance and node names are valid.
10421
10422     """
10423     instance_name = self.op.instance_name
10424
10425     self.instance = self.cfg.GetInstanceInfo(instance_name)
10426     assert self.instance is not None, \
10427           "Cannot retrieve locked instance %s" % self.op.instance_name
10428     _CheckNodeOnline(self, self.instance.primary_node)
10429
10430     if (self.op.remove_instance and self.instance.admin_up and
10431         not self.op.shutdown):
10432       raise errors.OpPrereqError("Can not remove instance without shutting it"
10433                                  " down before")
10434
10435     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10436       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10437       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10438       assert self.dst_node is not None
10439
10440       _CheckNodeOnline(self, self.dst_node.name)
10441       _CheckNodeNotDrained(self, self.dst_node.name)
10442
10443       self._cds = None
10444       self.dest_disk_info = None
10445       self.dest_x509_ca = None
10446
10447     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10448       self.dst_node = None
10449
10450       if len(self.op.target_node) != len(self.instance.disks):
10451         raise errors.OpPrereqError(("Received destination information for %s"
10452                                     " disks, but instance %s has %s disks") %
10453                                    (len(self.op.target_node), instance_name,
10454                                     len(self.instance.disks)),
10455                                    errors.ECODE_INVAL)
10456
10457       cds = _GetClusterDomainSecret()
10458
10459       # Check X509 key name
10460       try:
10461         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10462       except (TypeError, ValueError), err:
10463         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10464
10465       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10466         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10467                                    errors.ECODE_INVAL)
10468
10469       # Load and verify CA
10470       try:
10471         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10472       except OpenSSL.crypto.Error, err:
10473         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10474                                    (err, ), errors.ECODE_INVAL)
10475
10476       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10477       if errcode is not None:
10478         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10479                                    (msg, ), errors.ECODE_INVAL)
10480
10481       self.dest_x509_ca = cert
10482
10483       # Verify target information
10484       disk_info = []
10485       for idx, disk_data in enumerate(self.op.target_node):
10486         try:
10487           (host, port, magic) = \
10488             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10489         except errors.GenericError, err:
10490           raise errors.OpPrereqError("Target info for disk %s: %s" %
10491                                      (idx, err), errors.ECODE_INVAL)
10492
10493         disk_info.append((host, port, magic))
10494
10495       assert len(disk_info) == len(self.op.target_node)
10496       self.dest_disk_info = disk_info
10497
10498     else:
10499       raise errors.ProgrammerError("Unhandled export mode %r" %
10500                                    self.op.mode)
10501
10502     # instance disk type verification
10503     # TODO: Implement export support for file-based disks
10504     for disk in self.instance.disks:
10505       if disk.dev_type == constants.LD_FILE:
10506         raise errors.OpPrereqError("Export not supported for instances with"
10507                                    " file-based disks", errors.ECODE_INVAL)
10508
10509   def _CleanupExports(self, feedback_fn):
10510     """Removes exports of current instance from all other nodes.
10511
10512     If an instance in a cluster with nodes A..D was exported to node C, its
10513     exports will be removed from the nodes A, B and D.
10514
10515     """
10516     assert self.op.mode != constants.EXPORT_MODE_REMOTE
10517
10518     nodelist = self.cfg.GetNodeList()
10519     nodelist.remove(self.dst_node.name)
10520
10521     # on one-node clusters nodelist will be empty after the removal
10522     # if we proceed the backup would be removed because OpBackupQuery
10523     # substitutes an empty list with the full cluster node list.
10524     iname = self.instance.name
10525     if nodelist:
10526       feedback_fn("Removing old exports for instance %s" % iname)
10527       exportlist = self.rpc.call_export_list(nodelist)
10528       for node in exportlist:
10529         if exportlist[node].fail_msg:
10530           continue
10531         if iname in exportlist[node].payload:
10532           msg = self.rpc.call_export_remove(node, iname).fail_msg
10533           if msg:
10534             self.LogWarning("Could not remove older export for instance %s"
10535                             " on node %s: %s", iname, node, msg)
10536
10537   def Exec(self, feedback_fn):
10538     """Export an instance to an image in the cluster.
10539
10540     """
10541     assert self.op.mode in constants.EXPORT_MODES
10542
10543     instance = self.instance
10544     src_node = instance.primary_node
10545
10546     if self.op.shutdown:
10547       # shutdown the instance, but not the disks
10548       feedback_fn("Shutting down instance %s" % instance.name)
10549       result = self.rpc.call_instance_shutdown(src_node, instance,
10550                                                self.op.shutdown_timeout)
10551       # TODO: Maybe ignore failures if ignore_remove_failures is set
10552       result.Raise("Could not shutdown instance %s on"
10553                    " node %s" % (instance.name, src_node))
10554
10555     # set the disks ID correctly since call_instance_start needs the
10556     # correct drbd minor to create the symlinks
10557     for disk in instance.disks:
10558       self.cfg.SetDiskID(disk, src_node)
10559
10560     activate_disks = (not instance.admin_up)
10561
10562     if activate_disks:
10563       # Activate the instance disks if we'exporting a stopped instance
10564       feedback_fn("Activating disks for %s" % instance.name)
10565       _StartInstanceDisks(self, instance, None)
10566
10567     try:
10568       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10569                                                      instance)
10570
10571       helper.CreateSnapshots()
10572       try:
10573         if (self.op.shutdown and instance.admin_up and
10574             not self.op.remove_instance):
10575           assert not activate_disks
10576           feedback_fn("Starting instance %s" % instance.name)
10577           result = self.rpc.call_instance_start(src_node, instance, None, None)
10578           msg = result.fail_msg
10579           if msg:
10580             feedback_fn("Failed to start instance: %s" % msg)
10581             _ShutdownInstanceDisks(self, instance)
10582             raise errors.OpExecError("Could not start instance: %s" % msg)
10583
10584         if self.op.mode == constants.EXPORT_MODE_LOCAL:
10585           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10586         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10587           connect_timeout = constants.RIE_CONNECT_TIMEOUT
10588           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10589
10590           (key_name, _, _) = self.x509_key_name
10591
10592           dest_ca_pem = \
10593             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10594                                             self.dest_x509_ca)
10595
10596           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10597                                                      key_name, dest_ca_pem,
10598                                                      timeouts)
10599       finally:
10600         helper.Cleanup()
10601
10602       # Check for backwards compatibility
10603       assert len(dresults) == len(instance.disks)
10604       assert compat.all(isinstance(i, bool) for i in dresults), \
10605              "Not all results are boolean: %r" % dresults
10606
10607     finally:
10608       if activate_disks:
10609         feedback_fn("Deactivating disks for %s" % instance.name)
10610         _ShutdownInstanceDisks(self, instance)
10611
10612     if not (compat.all(dresults) and fin_resu):
10613       failures = []
10614       if not fin_resu:
10615         failures.append("export finalization")
10616       if not compat.all(dresults):
10617         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10618                                if not dsk)
10619         failures.append("disk export: disk(s) %s" % fdsk)
10620
10621       raise errors.OpExecError("Export failed, errors in %s" %
10622                                utils.CommaJoin(failures))
10623
10624     # At this point, the export was successful, we can cleanup/finish
10625
10626     # Remove instance if requested
10627     if self.op.remove_instance:
10628       feedback_fn("Removing instance %s" % instance.name)
10629       _RemoveInstance(self, feedback_fn, instance,
10630                       self.op.ignore_remove_failures)
10631
10632     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10633       self._CleanupExports(feedback_fn)
10634
10635     return fin_resu, dresults
10636
10637
10638 class LUBackupRemove(NoHooksLU):
10639   """Remove exports related to the named instance.
10640
10641   """
10642   REQ_BGL = False
10643
10644   def ExpandNames(self):
10645     self.needed_locks = {}
10646     # We need all nodes to be locked in order for RemoveExport to work, but we
10647     # don't need to lock the instance itself, as nothing will happen to it (and
10648     # we can remove exports also for a removed instance)
10649     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10650
10651   def Exec(self, feedback_fn):
10652     """Remove any export.
10653
10654     """
10655     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10656     # If the instance was not found we'll try with the name that was passed in.
10657     # This will only work if it was an FQDN, though.
10658     fqdn_warn = False
10659     if not instance_name:
10660       fqdn_warn = True
10661       instance_name = self.op.instance_name
10662
10663     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10664     exportlist = self.rpc.call_export_list(locked_nodes)
10665     found = False
10666     for node in exportlist:
10667       msg = exportlist[node].fail_msg
10668       if msg:
10669         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10670         continue
10671       if instance_name in exportlist[node].payload:
10672         found = True
10673         result = self.rpc.call_export_remove(node, instance_name)
10674         msg = result.fail_msg
10675         if msg:
10676           logging.error("Could not remove export for instance %s"
10677                         " on node %s: %s", instance_name, node, msg)
10678
10679     if fqdn_warn and not found:
10680       feedback_fn("Export not found. If trying to remove an export belonging"
10681                   " to a deleted instance please use its Fully Qualified"
10682                   " Domain Name.")
10683
10684
10685 class LUGroupAdd(LogicalUnit):
10686   """Logical unit for creating node groups.
10687
10688   """
10689   HPATH = "group-add"
10690   HTYPE = constants.HTYPE_GROUP
10691   REQ_BGL = False
10692
10693   def ExpandNames(self):
10694     # We need the new group's UUID here so that we can create and acquire the
10695     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10696     # that it should not check whether the UUID exists in the configuration.
10697     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10698     self.needed_locks = {}
10699     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10700
10701   def CheckPrereq(self):
10702     """Check prerequisites.
10703
10704     This checks that the given group name is not an existing node group
10705     already.
10706
10707     """
10708     try:
10709       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10710     except errors.OpPrereqError:
10711       pass
10712     else:
10713       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10714                                  " node group (UUID: %s)" %
10715                                  (self.op.group_name, existing_uuid),
10716                                  errors.ECODE_EXISTS)
10717
10718     if self.op.ndparams:
10719       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10720
10721   def BuildHooksEnv(self):
10722     """Build hooks env.
10723
10724     """
10725     return {
10726       "GROUP_NAME": self.op.group_name,
10727       }
10728
10729   def BuildHooksNodes(self):
10730     """Build hooks nodes.
10731
10732     """
10733     mn = self.cfg.GetMasterNode()
10734     return ([mn], [mn])
10735
10736   def Exec(self, feedback_fn):
10737     """Add the node group to the cluster.
10738
10739     """
10740     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10741                                   uuid=self.group_uuid,
10742                                   alloc_policy=self.op.alloc_policy,
10743                                   ndparams=self.op.ndparams)
10744
10745     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10746     del self.remove_locks[locking.LEVEL_NODEGROUP]
10747
10748
10749 class LUGroupAssignNodes(NoHooksLU):
10750   """Logical unit for assigning nodes to groups.
10751
10752   """
10753   REQ_BGL = False
10754
10755   def ExpandNames(self):
10756     # These raise errors.OpPrereqError on their own:
10757     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10758     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10759
10760     # We want to lock all the affected nodes and groups. We have readily
10761     # available the list of nodes, and the *destination* group. To gather the
10762     # list of "source" groups, we need to fetch node information.
10763     self.node_data = self.cfg.GetAllNodesInfo()
10764     affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10765     affected_groups.add(self.group_uuid)
10766
10767     self.needed_locks = {
10768       locking.LEVEL_NODEGROUP: list(affected_groups),
10769       locking.LEVEL_NODE: self.op.nodes,
10770       }
10771
10772   def CheckPrereq(self):
10773     """Check prerequisites.
10774
10775     """
10776     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10777     instance_data = self.cfg.GetAllInstancesInfo()
10778
10779     if self.group is None:
10780       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10781                                (self.op.group_name, self.group_uuid))
10782
10783     (new_splits, previous_splits) = \
10784       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10785                                              for node in self.op.nodes],
10786                                             self.node_data, instance_data)
10787
10788     if new_splits:
10789       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10790
10791       if not self.op.force:
10792         raise errors.OpExecError("The following instances get split by this"
10793                                  " change and --force was not given: %s" %
10794                                  fmt_new_splits)
10795       else:
10796         self.LogWarning("This operation will split the following instances: %s",
10797                         fmt_new_splits)
10798
10799         if previous_splits:
10800           self.LogWarning("In addition, these already-split instances continue"
10801                           " to be spit across groups: %s",
10802                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10803
10804   def Exec(self, feedback_fn):
10805     """Assign nodes to a new group.
10806
10807     """
10808     for node in self.op.nodes:
10809       self.node_data[node].group = self.group_uuid
10810
10811     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10812
10813   @staticmethod
10814   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10815     """Check for split instances after a node assignment.
10816
10817     This method considers a series of node assignments as an atomic operation,
10818     and returns information about split instances after applying the set of
10819     changes.
10820
10821     In particular, it returns information about newly split instances, and
10822     instances that were already split, and remain so after the change.
10823
10824     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10825     considered.
10826
10827     @type changes: list of (node_name, new_group_uuid) pairs.
10828     @param changes: list of node assignments to consider.
10829     @param node_data: a dict with data for all nodes
10830     @param instance_data: a dict with all instances to consider
10831     @rtype: a two-tuple
10832     @return: a list of instances that were previously okay and result split as a
10833       consequence of this change, and a list of instances that were previously
10834       split and this change does not fix.
10835
10836     """
10837     changed_nodes = dict((node, group) for node, group in changes
10838                          if node_data[node].group != group)
10839
10840     all_split_instances = set()
10841     previously_split_instances = set()
10842
10843     def InstanceNodes(instance):
10844       return [instance.primary_node] + list(instance.secondary_nodes)
10845
10846     for inst in instance_data.values():
10847       if inst.disk_template not in constants.DTS_INT_MIRROR:
10848         continue
10849
10850       instance_nodes = InstanceNodes(inst)
10851
10852       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10853         previously_split_instances.add(inst.name)
10854
10855       if len(set(changed_nodes.get(node, node_data[node].group)
10856                  for node in instance_nodes)) > 1:
10857         all_split_instances.add(inst.name)
10858
10859     return (list(all_split_instances - previously_split_instances),
10860             list(previously_split_instances & all_split_instances))
10861
10862
10863 class _GroupQuery(_QueryBase):
10864   FIELDS = query.GROUP_FIELDS
10865
10866   def ExpandNames(self, lu):
10867     lu.needed_locks = {}
10868
10869     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10870     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10871
10872     if not self.names:
10873       self.wanted = [name_to_uuid[name]
10874                      for name in utils.NiceSort(name_to_uuid.keys())]
10875     else:
10876       # Accept names to be either names or UUIDs.
10877       missing = []
10878       self.wanted = []
10879       all_uuid = frozenset(self._all_groups.keys())
10880
10881       for name in self.names:
10882         if name in all_uuid:
10883           self.wanted.append(name)
10884         elif name in name_to_uuid:
10885           self.wanted.append(name_to_uuid[name])
10886         else:
10887           missing.append(name)
10888
10889       if missing:
10890         raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10891                                    errors.ECODE_NOENT)
10892
10893   def DeclareLocks(self, lu, level):
10894     pass
10895
10896   def _GetQueryData(self, lu):
10897     """Computes the list of node groups and their attributes.
10898
10899     """
10900     do_nodes = query.GQ_NODE in self.requested_data
10901     do_instances = query.GQ_INST in self.requested_data
10902
10903     group_to_nodes = None
10904     group_to_instances = None
10905
10906     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10907     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10908     # latter GetAllInstancesInfo() is not enough, for we have to go through
10909     # instance->node. Hence, we will need to process nodes even if we only need
10910     # instance information.
10911     if do_nodes or do_instances:
10912       all_nodes = lu.cfg.GetAllNodesInfo()
10913       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10914       node_to_group = {}
10915
10916       for node in all_nodes.values():
10917         if node.group in group_to_nodes:
10918           group_to_nodes[node.group].append(node.name)
10919           node_to_group[node.name] = node.group
10920
10921       if do_instances:
10922         all_instances = lu.cfg.GetAllInstancesInfo()
10923         group_to_instances = dict((uuid, []) for uuid in self.wanted)
10924
10925         for instance in all_instances.values():
10926           node = instance.primary_node
10927           if node in node_to_group:
10928             group_to_instances[node_to_group[node]].append(instance.name)
10929
10930         if not do_nodes:
10931           # Do not pass on node information if it was not requested.
10932           group_to_nodes = None
10933
10934     return query.GroupQueryData([self._all_groups[uuid]
10935                                  for uuid in self.wanted],
10936                                 group_to_nodes, group_to_instances)
10937
10938
10939 class LUGroupQuery(NoHooksLU):
10940   """Logical unit for querying node groups.
10941
10942   """
10943   REQ_BGL = False
10944
10945   def CheckArguments(self):
10946     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10947                           self.op.output_fields, False)
10948
10949   def ExpandNames(self):
10950     self.gq.ExpandNames(self)
10951
10952   def Exec(self, feedback_fn):
10953     return self.gq.OldStyleQuery(self)
10954
10955
10956 class LUGroupSetParams(LogicalUnit):
10957   """Modifies the parameters of a node group.
10958
10959   """
10960   HPATH = "group-modify"
10961   HTYPE = constants.HTYPE_GROUP
10962   REQ_BGL = False
10963
10964   def CheckArguments(self):
10965     all_changes = [
10966       self.op.ndparams,
10967       self.op.alloc_policy,
10968       ]
10969
10970     if all_changes.count(None) == len(all_changes):
10971       raise errors.OpPrereqError("Please pass at least one modification",
10972                                  errors.ECODE_INVAL)
10973
10974   def ExpandNames(self):
10975     # This raises errors.OpPrereqError on its own:
10976     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10977
10978     self.needed_locks = {
10979       locking.LEVEL_NODEGROUP: [self.group_uuid],
10980       }
10981
10982   def CheckPrereq(self):
10983     """Check prerequisites.
10984
10985     """
10986     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10987
10988     if self.group is None:
10989       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10990                                (self.op.group_name, self.group_uuid))
10991
10992     if self.op.ndparams:
10993       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10994       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10995       self.new_ndparams = new_ndparams
10996
10997   def BuildHooksEnv(self):
10998     """Build hooks env.
10999
11000     """
11001     return {
11002       "GROUP_NAME": self.op.group_name,
11003       "NEW_ALLOC_POLICY": self.op.alloc_policy,
11004       }
11005
11006   def BuildHooksNodes(self):
11007     """Build hooks nodes.
11008
11009     """
11010     mn = self.cfg.GetMasterNode()
11011     return ([mn], [mn])
11012
11013   def Exec(self, feedback_fn):
11014     """Modifies the node group.
11015
11016     """
11017     result = []
11018
11019     if self.op.ndparams:
11020       self.group.ndparams = self.new_ndparams
11021       result.append(("ndparams", str(self.group.ndparams)))
11022
11023     if self.op.alloc_policy:
11024       self.group.alloc_policy = self.op.alloc_policy
11025
11026     self.cfg.Update(self.group, feedback_fn)
11027     return result
11028
11029
11030
11031 class LUGroupRemove(LogicalUnit):
11032   HPATH = "group-remove"
11033   HTYPE = constants.HTYPE_GROUP
11034   REQ_BGL = False
11035
11036   def ExpandNames(self):
11037     # This will raises errors.OpPrereqError on its own:
11038     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11039     self.needed_locks = {
11040       locking.LEVEL_NODEGROUP: [self.group_uuid],
11041       }
11042
11043   def CheckPrereq(self):
11044     """Check prerequisites.
11045
11046     This checks that the given group name exists as a node group, that is
11047     empty (i.e., contains no nodes), and that is not the last group of the
11048     cluster.
11049
11050     """
11051     # Verify that the group is empty.
11052     group_nodes = [node.name
11053                    for node in self.cfg.GetAllNodesInfo().values()
11054                    if node.group == self.group_uuid]
11055
11056     if group_nodes:
11057       raise errors.OpPrereqError("Group '%s' not empty, has the following"
11058                                  " nodes: %s" %
11059                                  (self.op.group_name,
11060                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
11061                                  errors.ECODE_STATE)
11062
11063     # Verify the cluster would not be left group-less.
11064     if len(self.cfg.GetNodeGroupList()) == 1:
11065       raise errors.OpPrereqError("Group '%s' is the only group,"
11066                                  " cannot be removed" %
11067                                  self.op.group_name,
11068                                  errors.ECODE_STATE)
11069
11070   def BuildHooksEnv(self):
11071     """Build hooks env.
11072
11073     """
11074     return {
11075       "GROUP_NAME": self.op.group_name,
11076       }
11077
11078   def BuildHooksNodes(self):
11079     """Build hooks nodes.
11080
11081     """
11082     mn = self.cfg.GetMasterNode()
11083     return ([mn], [mn])
11084
11085   def Exec(self, feedback_fn):
11086     """Remove the node group.
11087
11088     """
11089     try:
11090       self.cfg.RemoveNodeGroup(self.group_uuid)
11091     except errors.ConfigurationError:
11092       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11093                                (self.op.group_name, self.group_uuid))
11094
11095     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11096
11097
11098 class LUGroupRename(LogicalUnit):
11099   HPATH = "group-rename"
11100   HTYPE = constants.HTYPE_GROUP
11101   REQ_BGL = False
11102
11103   def ExpandNames(self):
11104     # This raises errors.OpPrereqError on its own:
11105     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11106
11107     self.needed_locks = {
11108       locking.LEVEL_NODEGROUP: [self.group_uuid],
11109       }
11110
11111   def CheckPrereq(self):
11112     """Check prerequisites.
11113
11114     Ensures requested new name is not yet used.
11115
11116     """
11117     try:
11118       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11119     except errors.OpPrereqError:
11120       pass
11121     else:
11122       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11123                                  " node group (UUID: %s)" %
11124                                  (self.op.new_name, new_name_uuid),
11125                                  errors.ECODE_EXISTS)
11126
11127   def BuildHooksEnv(self):
11128     """Build hooks env.
11129
11130     """
11131     return {
11132       "OLD_NAME": self.op.group_name,
11133       "NEW_NAME": self.op.new_name,
11134       }
11135
11136   def BuildHooksNodes(self):
11137     """Build hooks nodes.
11138
11139     """
11140     mn = self.cfg.GetMasterNode()
11141
11142     all_nodes = self.cfg.GetAllNodesInfo()
11143     all_nodes.pop(mn, None)
11144
11145     run_nodes = [mn]
11146     run_nodes.extend(node.name for node in all_nodes.values()
11147                      if node.group == self.group_uuid)
11148
11149     return (run_nodes, run_nodes)
11150
11151   def Exec(self, feedback_fn):
11152     """Rename the node group.
11153
11154     """
11155     group = self.cfg.GetNodeGroup(self.group_uuid)
11156
11157     if group is None:
11158       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11159                                (self.op.group_name, self.group_uuid))
11160
11161     group.name = self.op.new_name
11162     self.cfg.Update(group, feedback_fn)
11163
11164     return self.op.new_name
11165
11166
11167 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11168   """Generic tags LU.
11169
11170   This is an abstract class which is the parent of all the other tags LUs.
11171
11172   """
11173   def ExpandNames(self):
11174     self.group_uuid = None
11175     self.needed_locks = {}
11176     if self.op.kind == constants.TAG_NODE:
11177       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11178       self.needed_locks[locking.LEVEL_NODE] = self.op.name
11179     elif self.op.kind == constants.TAG_INSTANCE:
11180       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11181       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11182     elif self.op.kind == constants.TAG_NODEGROUP:
11183       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11184
11185     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11186     # not possible to acquire the BGL based on opcode parameters)
11187
11188   def CheckPrereq(self):
11189     """Check prerequisites.
11190
11191     """
11192     if self.op.kind == constants.TAG_CLUSTER:
11193       self.target = self.cfg.GetClusterInfo()
11194     elif self.op.kind == constants.TAG_NODE:
11195       self.target = self.cfg.GetNodeInfo(self.op.name)
11196     elif self.op.kind == constants.TAG_INSTANCE:
11197       self.target = self.cfg.GetInstanceInfo(self.op.name)
11198     elif self.op.kind == constants.TAG_NODEGROUP:
11199       self.target = self.cfg.GetNodeGroup(self.group_uuid)
11200     else:
11201       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11202                                  str(self.op.kind), errors.ECODE_INVAL)
11203
11204
11205 class LUTagsGet(TagsLU):
11206   """Returns the tags of a given object.
11207
11208   """
11209   REQ_BGL = False
11210
11211   def ExpandNames(self):
11212     TagsLU.ExpandNames(self)
11213
11214     # Share locks as this is only a read operation
11215     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11216
11217   def Exec(self, feedback_fn):
11218     """Returns the tag list.
11219
11220     """
11221     return list(self.target.GetTags())
11222
11223
11224 class LUTagsSearch(NoHooksLU):
11225   """Searches the tags for a given pattern.
11226
11227   """
11228   REQ_BGL = False
11229
11230   def ExpandNames(self):
11231     self.needed_locks = {}
11232
11233   def CheckPrereq(self):
11234     """Check prerequisites.
11235
11236     This checks the pattern passed for validity by compiling it.
11237
11238     """
11239     try:
11240       self.re = re.compile(self.op.pattern)
11241     except re.error, err:
11242       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11243                                  (self.op.pattern, err), errors.ECODE_INVAL)
11244
11245   def Exec(self, feedback_fn):
11246     """Returns the tag list.
11247
11248     """
11249     cfg = self.cfg
11250     tgts = [("/cluster", cfg.GetClusterInfo())]
11251     ilist = cfg.GetAllInstancesInfo().values()
11252     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11253     nlist = cfg.GetAllNodesInfo().values()
11254     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11255     tgts.extend(("/nodegroup/%s" % n.name, n)
11256                 for n in cfg.GetAllNodeGroupsInfo().values())
11257     results = []
11258     for path, target in tgts:
11259       for tag in target.GetTags():
11260         if self.re.search(tag):
11261           results.append((path, tag))
11262     return results
11263
11264
11265 class LUTagsSet(TagsLU):
11266   """Sets a tag on a given object.
11267
11268   """
11269   REQ_BGL = False
11270
11271   def CheckPrereq(self):
11272     """Check prerequisites.
11273
11274     This checks the type and length of the tag name and value.
11275
11276     """
11277     TagsLU.CheckPrereq(self)
11278     for tag in self.op.tags:
11279       objects.TaggableObject.ValidateTag(tag)
11280
11281   def Exec(self, feedback_fn):
11282     """Sets the tag.
11283
11284     """
11285     try:
11286       for tag in self.op.tags:
11287         self.target.AddTag(tag)
11288     except errors.TagError, err:
11289       raise errors.OpExecError("Error while setting tag: %s" % str(err))
11290     self.cfg.Update(self.target, feedback_fn)
11291
11292
11293 class LUTagsDel(TagsLU):
11294   """Delete a list of tags from a given object.
11295
11296   """
11297   REQ_BGL = False
11298
11299   def CheckPrereq(self):
11300     """Check prerequisites.
11301
11302     This checks that we have the given tag.
11303
11304     """
11305     TagsLU.CheckPrereq(self)
11306     for tag in self.op.tags:
11307       objects.TaggableObject.ValidateTag(tag)
11308     del_tags = frozenset(self.op.tags)
11309     cur_tags = self.target.GetTags()
11310
11311     diff_tags = del_tags - cur_tags
11312     if diff_tags:
11313       diff_names = ("'%s'" % i for i in sorted(diff_tags))
11314       raise errors.OpPrereqError("Tag(s) %s not found" %
11315                                  (utils.CommaJoin(diff_names), ),
11316                                  errors.ECODE_NOENT)
11317
11318   def Exec(self, feedback_fn):
11319     """Remove the tag from the object.
11320
11321     """
11322     for tag in self.op.tags:
11323       self.target.RemoveTag(tag)
11324     self.cfg.Update(self.target, feedback_fn)
11325
11326
11327 class LUTestDelay(NoHooksLU):
11328   """Sleep for a specified amount of time.
11329
11330   This LU sleeps on the master and/or nodes for a specified amount of
11331   time.
11332
11333   """
11334   REQ_BGL = False
11335
11336   def ExpandNames(self):
11337     """Expand names and set required locks.
11338
11339     This expands the node list, if any.
11340
11341     """
11342     self.needed_locks = {}
11343     if self.op.on_nodes:
11344       # _GetWantedNodes can be used here, but is not always appropriate to use
11345       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11346       # more information.
11347       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11348       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11349
11350   def _TestDelay(self):
11351     """Do the actual sleep.
11352
11353     """
11354     if self.op.on_master:
11355       if not utils.TestDelay(self.op.duration):
11356         raise errors.OpExecError("Error during master delay test")
11357     if self.op.on_nodes:
11358       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11359       for node, node_result in result.items():
11360         node_result.Raise("Failure during rpc call to node %s" % node)
11361
11362   def Exec(self, feedback_fn):
11363     """Execute the test delay opcode, with the wanted repetitions.
11364
11365     """
11366     if self.op.repeat == 0:
11367       self._TestDelay()
11368     else:
11369       top_value = self.op.repeat - 1
11370       for i in range(self.op.repeat):
11371         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11372         self._TestDelay()
11373
11374
11375 class LUTestJqueue(NoHooksLU):
11376   """Utility LU to test some aspects of the job queue.
11377
11378   """
11379   REQ_BGL = False
11380
11381   # Must be lower than default timeout for WaitForJobChange to see whether it
11382   # notices changed jobs
11383   _CLIENT_CONNECT_TIMEOUT = 20.0
11384   _CLIENT_CONFIRM_TIMEOUT = 60.0
11385
11386   @classmethod
11387   def _NotifyUsingSocket(cls, cb, errcls):
11388     """Opens a Unix socket and waits for another program to connect.
11389
11390     @type cb: callable
11391     @param cb: Callback to send socket name to client
11392     @type errcls: class
11393     @param errcls: Exception class to use for errors
11394
11395     """
11396     # Using a temporary directory as there's no easy way to create temporary
11397     # sockets without writing a custom loop around tempfile.mktemp and
11398     # socket.bind
11399     tmpdir = tempfile.mkdtemp()
11400     try:
11401       tmpsock = utils.PathJoin(tmpdir, "sock")
11402
11403       logging.debug("Creating temporary socket at %s", tmpsock)
11404       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11405       try:
11406         sock.bind(tmpsock)
11407         sock.listen(1)
11408
11409         # Send details to client
11410         cb(tmpsock)
11411
11412         # Wait for client to connect before continuing
11413         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11414         try:
11415           (conn, _) = sock.accept()
11416         except socket.error, err:
11417           raise errcls("Client didn't connect in time (%s)" % err)
11418       finally:
11419         sock.close()
11420     finally:
11421       # Remove as soon as client is connected
11422       shutil.rmtree(tmpdir)
11423
11424     # Wait for client to close
11425     try:
11426       try:
11427         # pylint: disable-msg=E1101
11428         # Instance of '_socketobject' has no ... member
11429         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11430         conn.recv(1)
11431       except socket.error, err:
11432         raise errcls("Client failed to confirm notification (%s)" % err)
11433     finally:
11434       conn.close()
11435
11436   def _SendNotification(self, test, arg, sockname):
11437     """Sends a notification to the client.
11438
11439     @type test: string
11440     @param test: Test name
11441     @param arg: Test argument (depends on test)
11442     @type sockname: string
11443     @param sockname: Socket path
11444
11445     """
11446     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11447
11448   def _Notify(self, prereq, test, arg):
11449     """Notifies the client of a test.
11450
11451     @type prereq: bool
11452     @param prereq: Whether this is a prereq-phase test
11453     @type test: string
11454     @param test: Test name
11455     @param arg: Test argument (depends on test)
11456
11457     """
11458     if prereq:
11459       errcls = errors.OpPrereqError
11460     else:
11461       errcls = errors.OpExecError
11462
11463     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11464                                                   test, arg),
11465                                    errcls)
11466
11467   def CheckArguments(self):
11468     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11469     self.expandnames_calls = 0
11470
11471   def ExpandNames(self):
11472     checkargs_calls = getattr(self, "checkargs_calls", 0)
11473     if checkargs_calls < 1:
11474       raise errors.ProgrammerError("CheckArguments was not called")
11475
11476     self.expandnames_calls += 1
11477
11478     if self.op.notify_waitlock:
11479       self._Notify(True, constants.JQT_EXPANDNAMES, None)
11480
11481     self.LogInfo("Expanding names")
11482
11483     # Get lock on master node (just to get a lock, not for a particular reason)
11484     self.needed_locks = {
11485       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11486       }
11487
11488   def Exec(self, feedback_fn):
11489     if self.expandnames_calls < 1:
11490       raise errors.ProgrammerError("ExpandNames was not called")
11491
11492     if self.op.notify_exec:
11493       self._Notify(False, constants.JQT_EXEC, None)
11494
11495     self.LogInfo("Executing")
11496
11497     if self.op.log_messages:
11498       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11499       for idx, msg in enumerate(self.op.log_messages):
11500         self.LogInfo("Sending log message %s", idx + 1)
11501         feedback_fn(constants.JQT_MSGPREFIX + msg)
11502         # Report how many test messages have been sent
11503         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11504
11505     if self.op.fail:
11506       raise errors.OpExecError("Opcode failure was requested")
11507
11508     return True
11509
11510
11511 class IAllocator(object):
11512   """IAllocator framework.
11513
11514   An IAllocator instance has three sets of attributes:
11515     - cfg that is needed to query the cluster
11516     - input data (all members of the _KEYS class attribute are required)
11517     - four buffer attributes (in|out_data|text), that represent the
11518       input (to the external script) in text and data structure format,
11519       and the output from it, again in two formats
11520     - the result variables from the script (success, info, nodes) for
11521       easy usage
11522
11523   """
11524   # pylint: disable-msg=R0902
11525   # lots of instance attributes
11526   _ALLO_KEYS = [
11527     "name", "mem_size", "disks", "disk_template",
11528     "os", "tags", "nics", "vcpus", "hypervisor",
11529     ]
11530   _RELO_KEYS = [
11531     "name", "relocate_from",
11532     ]
11533   _EVAC_KEYS = [
11534     "evac_nodes",
11535     ]
11536
11537   def __init__(self, cfg, rpc, mode, **kwargs):
11538     self.cfg = cfg
11539     self.rpc = rpc
11540     # init buffer variables
11541     self.in_text = self.out_text = self.in_data = self.out_data = None
11542     # init all input fields so that pylint is happy
11543     self.mode = mode
11544     self.mem_size = self.disks = self.disk_template = None
11545     self.os = self.tags = self.nics = self.vcpus = None
11546     self.hypervisor = None
11547     self.relocate_from = None
11548     self.name = None
11549     self.evac_nodes = None
11550     # computed fields
11551     self.required_nodes = None
11552     # init result fields
11553     self.success = self.info = self.result = None
11554     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11555       keyset = self._ALLO_KEYS
11556       fn = self._AddNewInstance
11557     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11558       keyset = self._RELO_KEYS
11559       fn = self._AddRelocateInstance
11560     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11561       keyset = self._EVAC_KEYS
11562       fn = self._AddEvacuateNodes
11563     else:
11564       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11565                                    " IAllocator" % self.mode)
11566     for key in kwargs:
11567       if key not in keyset:
11568         raise errors.ProgrammerError("Invalid input parameter '%s' to"
11569                                      " IAllocator" % key)
11570       setattr(self, key, kwargs[key])
11571
11572     for key in keyset:
11573       if key not in kwargs:
11574         raise errors.ProgrammerError("Missing input parameter '%s' to"
11575                                      " IAllocator" % key)
11576     self._BuildInputData(fn)
11577
11578   def _ComputeClusterData(self):
11579     """Compute the generic allocator input data.
11580
11581     This is the data that is independent of the actual operation.
11582
11583     """
11584     cfg = self.cfg
11585     cluster_info = cfg.GetClusterInfo()
11586     # cluster data
11587     data = {
11588       "version": constants.IALLOCATOR_VERSION,
11589       "cluster_name": cfg.GetClusterName(),
11590       "cluster_tags": list(cluster_info.GetTags()),
11591       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11592       # we don't have job IDs
11593       }
11594     ninfo = cfg.GetAllNodesInfo()
11595     iinfo = cfg.GetAllInstancesInfo().values()
11596     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11597
11598     # node data
11599     node_list = [n.name for n in ninfo.values() if n.vm_capable]
11600
11601     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11602       hypervisor_name = self.hypervisor
11603     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11604       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11605     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11606       hypervisor_name = cluster_info.enabled_hypervisors[0]
11607
11608     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11609                                         hypervisor_name)
11610     node_iinfo = \
11611       self.rpc.call_all_instances_info(node_list,
11612                                        cluster_info.enabled_hypervisors)
11613
11614     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11615
11616     config_ndata = self._ComputeBasicNodeData(ninfo)
11617     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11618                                                  i_list, config_ndata)
11619     assert len(data["nodes"]) == len(ninfo), \
11620         "Incomplete node data computed"
11621
11622     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11623
11624     self.in_data = data
11625
11626   @staticmethod
11627   def _ComputeNodeGroupData(cfg):
11628     """Compute node groups data.
11629
11630     """
11631     ng = {}
11632     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11633       ng[guuid] = {
11634         "name": gdata.name,
11635         "alloc_policy": gdata.alloc_policy,
11636         }
11637     return ng
11638
11639   @staticmethod
11640   def _ComputeBasicNodeData(node_cfg):
11641     """Compute global node data.
11642
11643     @rtype: dict
11644     @returns: a dict of name: (node dict, node config)
11645
11646     """
11647     node_results = {}
11648     for ninfo in node_cfg.values():
11649       # fill in static (config-based) values
11650       pnr = {
11651         "tags": list(ninfo.GetTags()),
11652         "primary_ip": ninfo.primary_ip,
11653         "secondary_ip": ninfo.secondary_ip,
11654         "offline": ninfo.offline,
11655         "drained": ninfo.drained,
11656         "master_candidate": ninfo.master_candidate,
11657         "group": ninfo.group,
11658         "master_capable": ninfo.master_capable,
11659         "vm_capable": ninfo.vm_capable,
11660         }
11661
11662       node_results[ninfo.name] = pnr
11663
11664     return node_results
11665
11666   @staticmethod
11667   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11668                               node_results):
11669     """Compute global node data.
11670
11671     @param node_results: the basic node structures as filled from the config
11672
11673     """
11674     # make a copy of the current dict
11675     node_results = dict(node_results)
11676     for nname, nresult in node_data.items():
11677       assert nname in node_results, "Missing basic data for node %s" % nname
11678       ninfo = node_cfg[nname]
11679
11680       if not (ninfo.offline or ninfo.drained):
11681         nresult.Raise("Can't get data for node %s" % nname)
11682         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11683                                 nname)
11684         remote_info = nresult.payload
11685
11686         for attr in ['memory_total', 'memory_free', 'memory_dom0',
11687                      'vg_size', 'vg_free', 'cpu_total']:
11688           if attr not in remote_info:
11689             raise errors.OpExecError("Node '%s' didn't return attribute"
11690                                      " '%s'" % (nname, attr))
11691           if not isinstance(remote_info[attr], int):
11692             raise errors.OpExecError("Node '%s' returned invalid value"
11693                                      " for '%s': %s" %
11694                                      (nname, attr, remote_info[attr]))
11695         # compute memory used by primary instances
11696         i_p_mem = i_p_up_mem = 0
11697         for iinfo, beinfo in i_list:
11698           if iinfo.primary_node == nname:
11699             i_p_mem += beinfo[constants.BE_MEMORY]
11700             if iinfo.name not in node_iinfo[nname].payload:
11701               i_used_mem = 0
11702             else:
11703               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11704             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11705             remote_info['memory_free'] -= max(0, i_mem_diff)
11706
11707             if iinfo.admin_up:
11708               i_p_up_mem += beinfo[constants.BE_MEMORY]
11709
11710         # compute memory used by instances
11711         pnr_dyn = {
11712           "total_memory": remote_info['memory_total'],
11713           "reserved_memory": remote_info['memory_dom0'],
11714           "free_memory": remote_info['memory_free'],
11715           "total_disk": remote_info['vg_size'],
11716           "free_disk": remote_info['vg_free'],
11717           "total_cpus": remote_info['cpu_total'],
11718           "i_pri_memory": i_p_mem,
11719           "i_pri_up_memory": i_p_up_mem,
11720           }
11721         pnr_dyn.update(node_results[nname])
11722         node_results[nname] = pnr_dyn
11723
11724     return node_results
11725
11726   @staticmethod
11727   def _ComputeInstanceData(cluster_info, i_list):
11728     """Compute global instance data.
11729
11730     """
11731     instance_data = {}
11732     for iinfo, beinfo in i_list:
11733       nic_data = []
11734       for nic in iinfo.nics:
11735         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11736         nic_dict = {"mac": nic.mac,
11737                     "ip": nic.ip,
11738                     "mode": filled_params[constants.NIC_MODE],
11739                     "link": filled_params[constants.NIC_LINK],
11740                    }
11741         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11742           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11743         nic_data.append(nic_dict)
11744       pir = {
11745         "tags": list(iinfo.GetTags()),
11746         "admin_up": iinfo.admin_up,
11747         "vcpus": beinfo[constants.BE_VCPUS],
11748         "memory": beinfo[constants.BE_MEMORY],
11749         "os": iinfo.os,
11750         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11751         "nics": nic_data,
11752         "disks": [{constants.IDISK_SIZE: dsk.size,
11753                    constants.IDISK_MODE: dsk.mode}
11754                   for dsk in iinfo.disks],
11755         "disk_template": iinfo.disk_template,
11756         "hypervisor": iinfo.hypervisor,
11757         }
11758       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11759                                                  pir["disks"])
11760       instance_data[iinfo.name] = pir
11761
11762     return instance_data
11763
11764   def _AddNewInstance(self):
11765     """Add new instance data to allocator structure.
11766
11767     This in combination with _AllocatorGetClusterData will create the
11768     correct structure needed as input for the allocator.
11769
11770     The checks for the completeness of the opcode must have already been
11771     done.
11772
11773     """
11774     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11775
11776     if self.disk_template in constants.DTS_INT_MIRROR:
11777       self.required_nodes = 2
11778     else:
11779       self.required_nodes = 1
11780     request = {
11781       "name": self.name,
11782       "disk_template": self.disk_template,
11783       "tags": self.tags,
11784       "os": self.os,
11785       "vcpus": self.vcpus,
11786       "memory": self.mem_size,
11787       "disks": self.disks,
11788       "disk_space_total": disk_space,
11789       "nics": self.nics,
11790       "required_nodes": self.required_nodes,
11791       }
11792     return request
11793
11794   def _AddRelocateInstance(self):
11795     """Add relocate instance data to allocator structure.
11796
11797     This in combination with _IAllocatorGetClusterData will create the
11798     correct structure needed as input for the allocator.
11799
11800     The checks for the completeness of the opcode must have already been
11801     done.
11802
11803     """
11804     instance = self.cfg.GetInstanceInfo(self.name)
11805     if instance is None:
11806       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11807                                    " IAllocator" % self.name)
11808
11809     if instance.disk_template not in constants.DTS_MIRRORED:
11810       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11811                                  errors.ECODE_INVAL)
11812
11813     if instance.disk_template in constants.DTS_INT_MIRROR and \
11814         len(instance.secondary_nodes) != 1:
11815       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11816                                  errors.ECODE_STATE)
11817
11818     self.required_nodes = 1
11819     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
11820     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11821
11822     request = {
11823       "name": self.name,
11824       "disk_space_total": disk_space,
11825       "required_nodes": self.required_nodes,
11826       "relocate_from": self.relocate_from,
11827       }
11828     return request
11829
11830   def _AddEvacuateNodes(self):
11831     """Add evacuate nodes data to allocator structure.
11832
11833     """
11834     request = {
11835       "evac_nodes": self.evac_nodes
11836       }
11837     return request
11838
11839   def _BuildInputData(self, fn):
11840     """Build input data structures.
11841
11842     """
11843     self._ComputeClusterData()
11844
11845     request = fn()
11846     request["type"] = self.mode
11847     self.in_data["request"] = request
11848
11849     self.in_text = serializer.Dump(self.in_data)
11850
11851   def Run(self, name, validate=True, call_fn=None):
11852     """Run an instance allocator and return the results.
11853
11854     """
11855     if call_fn is None:
11856       call_fn = self.rpc.call_iallocator_runner
11857
11858     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11859     result.Raise("Failure while running the iallocator script")
11860
11861     self.out_text = result.payload
11862     if validate:
11863       self._ValidateResult()
11864
11865   def _ValidateResult(self):
11866     """Process the allocator results.
11867
11868     This will process and if successful save the result in
11869     self.out_data and the other parameters.
11870
11871     """
11872     try:
11873       rdict = serializer.Load(self.out_text)
11874     except Exception, err:
11875       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11876
11877     if not isinstance(rdict, dict):
11878       raise errors.OpExecError("Can't parse iallocator results: not a dict")
11879
11880     # TODO: remove backwards compatiblity in later versions
11881     if "nodes" in rdict and "result" not in rdict:
11882       rdict["result"] = rdict["nodes"]
11883       del rdict["nodes"]
11884
11885     for key in "success", "info", "result":
11886       if key not in rdict:
11887         raise errors.OpExecError("Can't parse iallocator results:"
11888                                  " missing key '%s'" % key)
11889       setattr(self, key, rdict[key])
11890
11891     if not isinstance(rdict["result"], list):
11892       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11893                                " is not a list")
11894
11895     if self.mode == constants.IALLOCATOR_MODE_RELOC:
11896       assert self.relocate_from is not None
11897       assert self.required_nodes == 1
11898
11899       node2group = dict((name, ndata["group"])
11900                         for (name, ndata) in self.in_data["nodes"].items())
11901
11902       fn = compat.partial(self._NodesToGroups, node2group,
11903                           self.in_data["nodegroups"])
11904
11905       request_groups = fn(self.relocate_from)
11906       result_groups = fn(rdict["result"])
11907
11908       if result_groups != request_groups:
11909         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
11910                                  " differ from original groups (%s)" %
11911                                  (utils.CommaJoin(result_groups),
11912                                   utils.CommaJoin(request_groups)))
11913
11914     self.out_data = rdict
11915
11916   @staticmethod
11917   def _NodesToGroups(node2group, groups, nodes):
11918     """Returns a list of unique group names for a list of nodes.
11919
11920     @type node2group: dict
11921     @param node2group: Map from node name to group UUID
11922     @type groups: dict
11923     @param groups: Group information
11924     @type nodes: list
11925     @param nodes: Node names
11926
11927     """
11928     result = set()
11929
11930     for node in nodes:
11931       try:
11932         group_uuid = node2group[node]
11933       except KeyError:
11934         # Ignore unknown node
11935         pass
11936       else:
11937         try:
11938           group = groups[group_uuid]
11939         except KeyError:
11940           # Can't find group, let's use UUID
11941           group_name = group_uuid
11942         else:
11943           group_name = group["name"]
11944
11945         result.add(group_name)
11946
11947     return sorted(result)
11948
11949
11950 class LUTestAllocator(NoHooksLU):
11951   """Run allocator tests.
11952
11953   This LU runs the allocator tests
11954
11955   """
11956   def CheckPrereq(self):
11957     """Check prerequisites.
11958
11959     This checks the opcode parameters depending on the director and mode test.
11960
11961     """
11962     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11963       for attr in ["mem_size", "disks", "disk_template",
11964                    "os", "tags", "nics", "vcpus"]:
11965         if not hasattr(self.op, attr):
11966           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11967                                      attr, errors.ECODE_INVAL)
11968       iname = self.cfg.ExpandInstanceName(self.op.name)
11969       if iname is not None:
11970         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11971                                    iname, errors.ECODE_EXISTS)
11972       if not isinstance(self.op.nics, list):
11973         raise errors.OpPrereqError("Invalid parameter 'nics'",
11974                                    errors.ECODE_INVAL)
11975       if not isinstance(self.op.disks, list):
11976         raise errors.OpPrereqError("Invalid parameter 'disks'",
11977                                    errors.ECODE_INVAL)
11978       for row in self.op.disks:
11979         if (not isinstance(row, dict) or
11980             "size" not in row or
11981             not isinstance(row["size"], int) or
11982             "mode" not in row or
11983             row["mode"] not in ['r', 'w']):
11984           raise errors.OpPrereqError("Invalid contents of the 'disks'"
11985                                      " parameter", errors.ECODE_INVAL)
11986       if self.op.hypervisor is None:
11987         self.op.hypervisor = self.cfg.GetHypervisorType()
11988     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11989       fname = _ExpandInstanceName(self.cfg, self.op.name)
11990       self.op.name = fname
11991       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11992     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11993       if not hasattr(self.op, "evac_nodes"):
11994         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11995                                    " opcode input", errors.ECODE_INVAL)
11996     else:
11997       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11998                                  self.op.mode, errors.ECODE_INVAL)
11999
12000     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12001       if self.op.allocator is None:
12002         raise errors.OpPrereqError("Missing allocator name",
12003                                    errors.ECODE_INVAL)
12004     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12005       raise errors.OpPrereqError("Wrong allocator test '%s'" %
12006                                  self.op.direction, errors.ECODE_INVAL)
12007
12008   def Exec(self, feedback_fn):
12009     """Run the allocator test.
12010
12011     """
12012     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12013       ial = IAllocator(self.cfg, self.rpc,
12014                        mode=self.op.mode,
12015                        name=self.op.name,
12016                        mem_size=self.op.mem_size,
12017                        disks=self.op.disks,
12018                        disk_template=self.op.disk_template,
12019                        os=self.op.os,
12020                        tags=self.op.tags,
12021                        nics=self.op.nics,
12022                        vcpus=self.op.vcpus,
12023                        hypervisor=self.op.hypervisor,
12024                        )
12025     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12026       ial = IAllocator(self.cfg, self.rpc,
12027                        mode=self.op.mode,
12028                        name=self.op.name,
12029                        relocate_from=list(self.relocate_from),
12030                        )
12031     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12032       ial = IAllocator(self.cfg, self.rpc,
12033                        mode=self.op.mode,
12034                        evac_nodes=self.op.evac_nodes)
12035     else:
12036       raise errors.ProgrammerError("Uncatched mode %s in"
12037                                    " LUTestAllocator.Exec", self.op.mode)
12038
12039     if self.op.direction == constants.IALLOCATOR_DIR_IN:
12040       result = ial.in_text
12041     else:
12042       ial.Run(self.op.allocator, validate=False)
12043       result = ial.out_text
12044     return result
12045
12046
12047 #: Query type implementations
12048 _QUERY_IMPL = {
12049   constants.QR_INSTANCE: _InstanceQuery,
12050   constants.QR_NODE: _NodeQuery,
12051   constants.QR_GROUP: _GroupQuery,
12052   constants.QR_OS: _OsQuery,
12053   }
12054
12055 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12056
12057
12058 def _GetQueryImplementation(name):
12059   """Returns the implemtnation for a query type.
12060
12061   @param name: Query type, must be one of L{constants.QR_VIA_OP}
12062
12063   """
12064   try:
12065     return _QUERY_IMPL[name]
12066   except KeyError:
12067     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12068                                errors.ECODE_INVAL)