code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 # End types
  78 class LogicalUnit(object):
  79   """Logical Unit base class.
  80
  81   Subclasses must follow these rules:
  82     - implement ExpandNames
  83     - implement CheckPrereq (except when tasklets are used)
  84     - implement Exec (except when tasklets are used)
  85     - implement BuildHooksEnv
  86     - implement BuildHooksNodes
  87     - redefine HPATH and HTYPE
  88     - optionally redefine their run requirements:
  89         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  90
  91   Note that all commands require root permissions.
  92
  93   @ivar dry_run_result: the value (if any) that will be returned to the caller
  94       in dry-run mode (signalled by opcode dry_run parameter)
  95
  96   """
  97   HPATH = None
  98   HTYPE = None
  99   REQ_BGL = True
 100
 101   def __init__(self, processor, op, context, rpc):
 102     """Constructor for LogicalUnit.
 103
 104     This needs to be overridden in derived classes in order to check op
 105     validity.
 106
 107     """
 108     self.proc = processor
 109     self.op = op
 110     self.cfg = context.cfg
 111     self.context = context
 112     self.rpc = rpc
 113     # Dicts used to declare locking needs to mcpu
 114     self.needed_locks = None
 115     self.acquired_locks = {}
 116     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 117     self.add_locks = {}
 118     self.remove_locks = {}
 119     # Used to force good behavior when calling helper functions
 120     self.recalculate_locks = {}
 121     self.__ssh = None
 122     # logging
 123     self.Log = processor.Log # pylint: disable-msg=C0103
 124     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 125     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 126     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 127     # support for dry-run
 128     self.dry_run_result = None
 129     # support for generic debug attribute
 130     if (not hasattr(self.op, "debug_level") or
 131         not isinstance(self.op.debug_level, int)):
 132       self.op.debug_level = 0
 133
 134     # Tasklets
 135     self.tasklets = None
 136
 137     # Validate opcode parameters and set defaults
 138     self.op.Validate(True)
 139
 140     self.CheckArguments()
 141
 142   def __GetSSH(self):
 143     """Returns the SshRunner object
 144
 145     """
 146     if not self.__ssh:
 147       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 148     return self.__ssh
 149
 150   ssh = property(fget=__GetSSH)
 151
 152   def CheckArguments(self):
 153     """Check syntactic validity for the opcode arguments.
 154
 155     This method is for doing a simple syntactic check and ensure
 156     validity of opcode parameters, without any cluster-related
 157     checks. While the same can be accomplished in ExpandNames and/or
 158     CheckPrereq, doing these separate is better because:
 159
 160       - ExpandNames is left as as purely a lock-related function
 161       - CheckPrereq is run after we have acquired locks (and possible
 162         waited for them)
 163
 164     The function is allowed to change the self.op attribute so that
 165     later methods can no longer worry about missing parameters.
 166
 167     """
 168     pass
 169
 170   def ExpandNames(self):
 171     """Expand names for this LU.
 172
 173     This method is called before starting to execute the opcode, and it should
 174     update all the parameters of the opcode to their canonical form (e.g. a
 175     short node name must be fully expanded after this method has successfully
 176     completed). This way locking, hooks, logging, etc. can work correctly.
 177
 178     LUs which implement this method must also populate the self.needed_locks
 179     member, as a dict with lock levels as keys, and a list of needed lock names
 180     as values. Rules:
 181
 182       - use an empty dict if you don't need any lock
 183       - if you don't need any lock at a particular level omit that level
 184       - don't put anything for the BGL level
 185       - if you want all locks at a level use locking.ALL_SET as a value
 186
 187     If you need to share locks (rather than acquire them exclusively) at one
 188     level you can modify self.share_locks, setting a true value (usually 1) for
 189     that level. By default locks are not shared.
 190
 191     This function can also define a list of tasklets, which then will be
 192     executed in order instead of the usual LU-level CheckPrereq and Exec
 193     functions, if those are not defined by the LU.
 194
 195     Examples::
 196
 197       # Acquire all nodes and one instance
 198       self.needed_locks = {
 199         locking.LEVEL_NODE: locking.ALL_SET,
 200         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 201       }
 202       # Acquire just two nodes
 203       self.needed_locks = {
 204         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 205       }
 206       # Acquire no locks
 207       self.needed_locks = {} # No, you can't leave it to the default value None
 208
 209     """
 210     # The implementation of this method is mandatory only if the new LU is
 211     # concurrent, so that old LUs don't need to be changed all at the same
 212     # time.
 213     if self.REQ_BGL:
 214       self.needed_locks = {} # Exclusive LUs don't need locks.
 215     else:
 216       raise NotImplementedError
 217
 218   def DeclareLocks(self, level):
 219     """Declare LU locking needs for a level
 220
 221     While most LUs can just declare their locking needs at ExpandNames time,
 222     sometimes there's the need to calculate some locks after having acquired
 223     the ones before. This function is called just before acquiring locks at a
 224     particular level, but after acquiring the ones at lower levels, and permits
 225     such calculations. It can be used to modify self.needed_locks, and by
 226     default it does nothing.
 227
 228     This function is only called if you have something already set in
 229     self.needed_locks for the level.
 230
 231     @param level: Locking level which is going to be locked
 232     @type level: member of ganeti.locking.LEVELS
 233
 234     """
 235
 236   def CheckPrereq(self):
 237     """Check prerequisites for this LU.
 238
 239     This method should check that the prerequisites for the execution
 240     of this LU are fulfilled. It can do internode communication, but
 241     it should be idempotent - no cluster or system changes are
 242     allowed.
 243
 244     The method should raise errors.OpPrereqError in case something is
 245     not fulfilled. Its return value is ignored.
 246
 247     This method should also update all the parameters of the opcode to
 248     their canonical form if it hasn't been done by ExpandNames before.
 249
 250     """
 251     if self.tasklets is not None:
 252       for (idx, tl) in enumerate(self.tasklets):
 253         logging.debug("Checking prerequisites for tasklet %s/%s",
 254                       idx + 1, len(self.tasklets))
 255         tl.CheckPrereq()
 256     else:
 257       pass
 258
 259   def Exec(self, feedback_fn):
 260     """Execute the LU.
 261
 262     This method should implement the actual work. It should raise
 263     errors.OpExecError for failures that are somewhat dealt with in
 264     code, or expected.
 265
 266     """
 267     if self.tasklets is not None:
 268       for (idx, tl) in enumerate(self.tasklets):
 269         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 270         tl.Exec(feedback_fn)
 271     else:
 272       raise NotImplementedError
 273
 274   def BuildHooksEnv(self):
 275     """Build hooks environment for this LU.
 276
 277     @rtype: dict
 278     @return: Dictionary containing the environment that will be used for
 279       running the hooks for this LU. The keys of the dict must not be prefixed
 280       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 281       will extend the environment with additional variables. If no environment
 282       should be defined, an empty dictionary should be returned (not C{None}).
 283     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 284       will not be called.
 285
 286     """
 287     raise NotImplementedError
 288
 289   def BuildHooksNodes(self):
 290     """Build list of nodes to run LU's hooks.
 291
 292     @rtype: tuple; (list, list)
 293     @return: Tuple containing a list of node names on which the hook
 294       should run before the execution and a list of node names on which the
 295       hook should run after the execution. No nodes should be returned as an
 296       empty list (and not None).
 297     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 298       will not be called.
 299
 300     """
 301     raise NotImplementedError
 302
 303   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 304     """Notify the LU about the results of its hooks.
 305
 306     This method is called every time a hooks phase is executed, and notifies
 307     the Logical Unit about the hooks' result. The LU can then use it to alter
 308     its result based on the hooks.  By default the method does nothing and the
 309     previous result is passed back unchanged but any LU can define it if it
 310     wants to use the local cluster hook-scripts somehow.
 311
 312     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 313         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 314     @param hook_results: the results of the multi-node hooks rpc call
 315     @param feedback_fn: function used send feedback back to the caller
 316     @param lu_result: the previous Exec result this LU had, or None
 317         in the PRE phase
 318     @return: the new Exec result, based on the previous result
 319         and hook results
 320
 321     """
 322     # API must be kept, thus we ignore the unused argument and could
 323     # be a function warnings
 324     # pylint: disable-msg=W0613,R0201
 325     return lu_result
 326
 327   def _ExpandAndLockInstance(self):
 328     """Helper function to expand and lock an instance.
 329
 330     Many LUs that work on an instance take its name in self.op.instance_name
 331     and need to expand it and then declare the expanded name for locking. This
 332     function does it, and then updates self.op.instance_name to the expanded
 333     name. It also initializes needed_locks as a dict, if this hasn't been done
 334     before.
 335
 336     """
 337     if self.needed_locks is None:
 338       self.needed_locks = {}
 339     else:
 340       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 341         "_ExpandAndLockInstance called with instance-level locks set"
 342     self.op.instance_name = _ExpandInstanceName(self.cfg,
 343                                                 self.op.instance_name)
 344     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 345
 346   def _LockInstancesNodes(self, primary_only=False):
 347     """Helper function to declare instances' nodes for locking.
 348
 349     This function should be called after locking one or more instances to lock
 350     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 351     with all primary or secondary nodes for instances already locked and
 352     present in self.needed_locks[locking.LEVEL_INSTANCE].
 353
 354     It should be called from DeclareLocks, and for safety only works if
 355     self.recalculate_locks[locking.LEVEL_NODE] is set.
 356
 357     In the future it may grow parameters to just lock some instance's nodes, or
 358     to just lock primaries or secondary nodes, if needed.
 359
 360     If should be called in DeclareLocks in a way similar to::
 361
 362       if level == locking.LEVEL_NODE:
 363         self._LockInstancesNodes()
 364
 365     @type primary_only: boolean
 366     @param primary_only: only lock primary nodes of locked instances
 367
 368     """
 369     assert locking.LEVEL_NODE in self.recalculate_locks, \
 370       "_LockInstancesNodes helper function called with no nodes to recalculate"
 371
 372     # TODO: check if we're really been called with the instance locks held
 373
 374     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 375     # future we might want to have different behaviors depending on the value
 376     # of self.recalculate_locks[locking.LEVEL_NODE]
 377     wanted_nodes = []
 378     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 379       instance = self.context.cfg.GetInstanceInfo(instance_name)
 380       wanted_nodes.append(instance.primary_node)
 381       if not primary_only:
 382         wanted_nodes.extend(instance.secondary_nodes)
 383
 384     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 385       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 386     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 387       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 388
 389     del self.recalculate_locks[locking.LEVEL_NODE]
 390
 391
 392 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 393   """Simple LU which runs no hooks.
 394
 395   This LU is intended as a parent for other LogicalUnits which will
 396   run no hooks, in order to reduce duplicate code.
 397
 398   """
 399   HPATH = None
 400   HTYPE = None
 401
 402   def BuildHooksEnv(self):
 403     """Empty BuildHooksEnv for NoHooksLu.
 404
 405     This just raises an error.
 406
 407     """
 408     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 409
 410   def BuildHooksNodes(self):
 411     """Empty BuildHooksNodes for NoHooksLU.
 412
 413     """
 414     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 415
 416
 417 class Tasklet:
 418   """Tasklet base class.
 419
 420   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 421   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 422   tasklets know nothing about locks.
 423
 424   Subclasses must follow these rules:
 425     - Implement CheckPrereq
 426     - Implement Exec
 427
 428   """
 429   def __init__(self, lu):
 430     self.lu = lu
 431
 432     # Shortcuts
 433     self.cfg = lu.cfg
 434     self.rpc = lu.rpc
 435
 436   def CheckPrereq(self):
 437     """Check prerequisites for this tasklets.
 438
 439     This method should check whether the prerequisites for the execution of
 440     this tasklet are fulfilled. It can do internode communication, but it
 441     should be idempotent - no cluster or system changes are allowed.
 442
 443     The method should raise errors.OpPrereqError in case something is not
 444     fulfilled. Its return value is ignored.
 445
 446     This method should also update all parameters to their canonical form if it
 447     hasn't been done before.
 448
 449     """
 450     pass
 451
 452   def Exec(self, feedback_fn):
 453     """Execute the tasklet.
 454
 455     This method should implement the actual work. It should raise
 456     errors.OpExecError for failures that are somewhat dealt with in code, or
 457     expected.
 458
 459     """
 460     raise NotImplementedError
 461
 462
 463 class _QueryBase:
 464   """Base for query utility classes.
 465
 466   """
 467   #: Attribute holding field definitions
 468   FIELDS = None
 469
 470   def __init__(self, filter_, fields, use_locking):
 471     """Initializes this class.
 472
 473     """
 474     self.use_locking = use_locking
 475
 476     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 477                              namefield="name")
 478     self.requested_data = self.query.RequestedData()
 479     self.names = self.query.RequestedNames()
 480
 481     # Sort only if no names were requested
 482     self.sort_by_name = not self.names
 483
 484     self.do_locking = None
 485     self.wanted = None
 486
 487   def _GetNames(self, lu, all_names, lock_level):
 488     """Helper function to determine names asked for in the query.
 489
 490     """
 491     if self.do_locking:
 492       names = lu.acquired_locks[lock_level]
 493     else:
 494       names = all_names
 495
 496     if self.wanted == locking.ALL_SET:
 497       assert not self.names
 498       # caller didn't specify names, so ordering is not important
 499       return utils.NiceSort(names)
 500
 501     # caller specified names and we must keep the same order
 502     assert self.names
 503     assert not self.do_locking or lu.acquired_locks[lock_level]
 504
 505     missing = set(self.wanted).difference(names)
 506     if missing:
 507       raise errors.OpExecError("Some items were removed before retrieving"
 508                                " their data: %s" % missing)
 509
 510     # Return expanded names
 511     return self.wanted
 512
 513   def ExpandNames(self, lu):
 514     """Expand names for this query.
 515
 516     See L{LogicalUnit.ExpandNames}.
 517
 518     """
 519     raise NotImplementedError()
 520
 521   def DeclareLocks(self, lu, level):
 522     """Declare locks for this query.
 523
 524     See L{LogicalUnit.DeclareLocks}.
 525
 526     """
 527     raise NotImplementedError()
 528
 529   def _GetQueryData(self, lu):
 530     """Collects all data for this query.
 531
 532     @return: Query data object
 533
 534     """
 535     raise NotImplementedError()
 536
 537   def NewStyleQuery(self, lu):
 538     """Collect data and execute query.
 539
 540     """
 541     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 542                                   sort_by_name=self.sort_by_name)
 543
 544   def OldStyleQuery(self, lu):
 545     """Collect data and execute query.
 546
 547     """
 548     return self.query.OldStyleQuery(self._GetQueryData(lu),
 549                                     sort_by_name=self.sort_by_name)
 550
 551
 552 def _GetWantedNodes(lu, nodes):
 553   """Returns list of checked and expanded node names.
 554
 555   @type lu: L{LogicalUnit}
 556   @param lu: the logical unit on whose behalf we execute
 557   @type nodes: list
 558   @param nodes: list of node names or None for all nodes
 559   @rtype: list
 560   @return: the list of nodes, sorted
 561   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 562
 563   """
 564   if nodes:
 565     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 566
 567   return utils.NiceSort(lu.cfg.GetNodeList())
 568
 569
 570 def _GetWantedInstances(lu, instances):
 571   """Returns list of checked and expanded instance names.
 572
 573   @type lu: L{LogicalUnit}
 574   @param lu: the logical unit on whose behalf we execute
 575   @type instances: list
 576   @param instances: list of instance names or None for all instances
 577   @rtype: list
 578   @return: the list of instances, sorted
 579   @raise errors.OpPrereqError: if the instances parameter is wrong type
 580   @raise errors.OpPrereqError: if any of the passed instances is not found
 581
 582   """
 583   if instances:
 584     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 585   else:
 586     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 587   return wanted
 588
 589
 590 def _GetUpdatedParams(old_params, update_dict,
 591                       use_default=True, use_none=False):
 592   """Return the new version of a parameter dictionary.
 593
 594   @type old_params: dict
 595   @param old_params: old parameters
 596   @type update_dict: dict
 597   @param update_dict: dict containing new parameter values, or
 598       constants.VALUE_DEFAULT to reset the parameter to its default
 599       value
 600   @param use_default: boolean
 601   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 602       values as 'to be deleted' values
 603   @param use_none: boolean
 604   @type use_none: whether to recognise C{None} values as 'to be
 605       deleted' values
 606   @rtype: dict
 607   @return: the new parameter dictionary
 608
 609   """
 610   params_copy = copy.deepcopy(old_params)
 611   for key, val in update_dict.iteritems():
 612     if ((use_default and val == constants.VALUE_DEFAULT) or
 613         (use_none and val is None)):
 614       try:
 615         del params_copy[key]
 616       except KeyError:
 617         pass
 618     else:
 619       params_copy[key] = val
 620   return params_copy
 621
 622
 623 def _RunPostHook(lu, node_name):
 624   """Runs the post-hook for an opcode on a single node.
 625
 626   """
 627   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 628   try:
 629     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 630   except:
 631     # pylint: disable-msg=W0702
 632     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 633
 634
 635 def _CheckOutputFields(static, dynamic, selected):
 636   """Checks whether all selected fields are valid.
 637
 638   @type static: L{utils.FieldSet}
 639   @param static: static fields set
 640   @type dynamic: L{utils.FieldSet}
 641   @param dynamic: dynamic fields set
 642
 643   """
 644   f = utils.FieldSet()
 645   f.Extend(static)
 646   f.Extend(dynamic)
 647
 648   delta = f.NonMatching(selected)
 649   if delta:
 650     raise errors.OpPrereqError("Unknown output fields selected: %s"
 651                                % ",".join(delta), errors.ECODE_INVAL)
 652
 653
 654 def _CheckGlobalHvParams(params):
 655   """Validates that given hypervisor params are not global ones.
 656
 657   This will ensure that instances don't get customised versions of
 658   global params.
 659
 660   """
 661   used_globals = constants.HVC_GLOBALS.intersection(params)
 662   if used_globals:
 663     msg = ("The following hypervisor parameters are global and cannot"
 664            " be customized at instance level, please modify them at"
 665            " cluster level: %s" % utils.CommaJoin(used_globals))
 666     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 667
 668
 669 def _CheckNodeOnline(lu, node, msg=None):
 670   """Ensure that a given node is online.
 671
 672   @param lu: the LU on behalf of which we make the check
 673   @param node: the node to check
 674   @param msg: if passed, should be a message to replace the default one
 675   @raise errors.OpPrereqError: if the node is offline
 676
 677   """
 678   if msg is None:
 679     msg = "Can't use offline node"
 680   if lu.cfg.GetNodeInfo(node).offline:
 681     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 682
 683
 684 def _CheckNodeNotDrained(lu, node):
 685   """Ensure that a given node is not drained.
 686
 687   @param lu: the LU on behalf of which we make the check
 688   @param node: the node to check
 689   @raise errors.OpPrereqError: if the node is drained
 690
 691   """
 692   if lu.cfg.GetNodeInfo(node).drained:
 693     raise errors.OpPrereqError("Can't use drained node %s" % node,
 694                                errors.ECODE_STATE)
 695
 696
 697 def _CheckNodeVmCapable(lu, node):
 698   """Ensure that a given node is vm capable.
 699
 700   @param lu: the LU on behalf of which we make the check
 701   @param node: the node to check
 702   @raise errors.OpPrereqError: if the node is not vm capable
 703
 704   """
 705   if not lu.cfg.GetNodeInfo(node).vm_capable:
 706     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 707                                errors.ECODE_STATE)
 708
 709
 710 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 711   """Ensure that a node supports a given OS.
 712
 713   @param lu: the LU on behalf of which we make the check
 714   @param node: the node to check
 715   @param os_name: the OS to query about
 716   @param force_variant: whether to ignore variant errors
 717   @raise errors.OpPrereqError: if the node is not supporting the OS
 718
 719   """
 720   result = lu.rpc.call_os_get(node, os_name)
 721   result.Raise("OS '%s' not in supported OS list for node %s" %
 722                (os_name, node),
 723                prereq=True, ecode=errors.ECODE_INVAL)
 724   if not force_variant:
 725     _CheckOSVariant(result.payload, os_name)
 726
 727
 728 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 729   """Ensure that a node has the given secondary ip.
 730
 731   @type lu: L{LogicalUnit}
 732   @param lu: the LU on behalf of which we make the check
 733   @type node: string
 734   @param node: the node to check
 735   @type secondary_ip: string
 736   @param secondary_ip: the ip to check
 737   @type prereq: boolean
 738   @param prereq: whether to throw a prerequisite or an execute error
 739   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 740   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 741
 742   """
 743   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 744   result.Raise("Failure checking secondary ip on node %s" % node,
 745                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 746   if not result.payload:
 747     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 748            " please fix and re-run this command" % secondary_ip)
 749     if prereq:
 750       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 751     else:
 752       raise errors.OpExecError(msg)
 753
 754
 755 def _GetClusterDomainSecret():
 756   """Reads the cluster domain secret.
 757
 758   """
 759   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 760                                strict=True)
 761
 762
 763 def _CheckInstanceDown(lu, instance, reason):
 764   """Ensure that an instance is not running."""
 765   if instance.admin_up:
 766     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 767                                (instance.name, reason), errors.ECODE_STATE)
 768
 769   pnode = instance.primary_node
 770   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 771   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 772               prereq=True, ecode=errors.ECODE_ENVIRON)
 773
 774   if instance.name in ins_l.payload:
 775     raise errors.OpPrereqError("Instance %s is running, %s" %
 776                                (instance.name, reason), errors.ECODE_STATE)
 777
 778
 779 def _ExpandItemName(fn, name, kind):
 780   """Expand an item name.
 781
 782   @param fn: the function to use for expansion
 783   @param name: requested item name
 784   @param kind: text description ('Node' or 'Instance')
 785   @return: the resolved (full) name
 786   @raise errors.OpPrereqError: if the item is not found
 787
 788   """
 789   full_name = fn(name)
 790   if full_name is None:
 791     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 792                                errors.ECODE_NOENT)
 793   return full_name
 794
 795
 796 def _ExpandNodeName(cfg, name):
 797   """Wrapper over L{_ExpandItemName} for nodes."""
 798   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 799
 800
 801 def _ExpandInstanceName(cfg, name):
 802   """Wrapper over L{_ExpandItemName} for instance."""
 803   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 804
 805
 806 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 807                           memory, vcpus, nics, disk_template, disks,
 808                           bep, hvp, hypervisor_name):
 809   """Builds instance related env variables for hooks
 810
 811   This builds the hook environment from individual variables.
 812
 813   @type name: string
 814   @param name: the name of the instance
 815   @type primary_node: string
 816   @param primary_node: the name of the instance's primary node
 817   @type secondary_nodes: list
 818   @param secondary_nodes: list of secondary nodes as strings
 819   @type os_type: string
 820   @param os_type: the name of the instance's OS
 821   @type status: boolean
 822   @param status: the should_run status of the instance
 823   @type memory: string
 824   @param memory: the memory size of the instance
 825   @type vcpus: string
 826   @param vcpus: the count of VCPUs the instance has
 827   @type nics: list
 828   @param nics: list of tuples (ip, mac, mode, link) representing
 829       the NICs the instance has
 830   @type disk_template: string
 831   @param disk_template: the disk template of the instance
 832   @type disks: list
 833   @param disks: the list of (size, mode) pairs
 834   @type bep: dict
 835   @param bep: the backend parameters for the instance
 836   @type hvp: dict
 837   @param hvp: the hypervisor parameters for the instance
 838   @type hypervisor_name: string
 839   @param hypervisor_name: the hypervisor for the instance
 840   @rtype: dict
 841   @return: the hook environment for this instance
 842
 843   """
 844   if status:
 845     str_status = "up"
 846   else:
 847     str_status = "down"
 848   env = {
 849     "OP_TARGET": name,
 850     "INSTANCE_NAME": name,
 851     "INSTANCE_PRIMARY": primary_node,
 852     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 853     "INSTANCE_OS_TYPE": os_type,
 854     "INSTANCE_STATUS": str_status,
 855     "INSTANCE_MEMORY": memory,
 856     "INSTANCE_VCPUS": vcpus,
 857     "INSTANCE_DISK_TEMPLATE": disk_template,
 858     "INSTANCE_HYPERVISOR": hypervisor_name,
 859   }
 860
 861   if nics:
 862     nic_count = len(nics)
 863     for idx, (ip, mac, mode, link) in enumerate(nics):
 864       if ip is None:
 865         ip = ""
 866       env["INSTANCE_NIC%d_IP" % idx] = ip
 867       env["INSTANCE_NIC%d_MAC" % idx] = mac
 868       env["INSTANCE_NIC%d_MODE" % idx] = mode
 869       env["INSTANCE_NIC%d_LINK" % idx] = link
 870       if mode == constants.NIC_MODE_BRIDGED:
 871         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 872   else:
 873     nic_count = 0
 874
 875   env["INSTANCE_NIC_COUNT"] = nic_count
 876
 877   if disks:
 878     disk_count = len(disks)
 879     for idx, (size, mode) in enumerate(disks):
 880       env["INSTANCE_DISK%d_SIZE" % idx] = size
 881       env["INSTANCE_DISK%d_MODE" % idx] = mode
 882   else:
 883     disk_count = 0
 884
 885   env["INSTANCE_DISK_COUNT"] = disk_count
 886
 887   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 888     for key, value in source.items():
 889       env["INSTANCE_%s_%s" % (kind, key)] = value
 890
 891   return env
 892
 893
 894 def _NICListToTuple(lu, nics):
 895   """Build a list of nic information tuples.
 896
 897   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 898   value in LUInstanceQueryData.
 899
 900   @type lu:  L{LogicalUnit}
 901   @param lu: the logical unit on whose behalf we execute
 902   @type nics: list of L{objects.NIC}
 903   @param nics: list of nics to convert to hooks tuples
 904
 905   """
 906   hooks_nics = []
 907   cluster = lu.cfg.GetClusterInfo()
 908   for nic in nics:
 909     ip = nic.ip
 910     mac = nic.mac
 911     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 912     mode = filled_params[constants.NIC_MODE]
 913     link = filled_params[constants.NIC_LINK]
 914     hooks_nics.append((ip, mac, mode, link))
 915   return hooks_nics
 916
 917
 918 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 919   """Builds instance related env variables for hooks from an object.
 920
 921   @type lu: L{LogicalUnit}
 922   @param lu: the logical unit on whose behalf we execute
 923   @type instance: L{objects.Instance}
 924   @param instance: the instance for which we should build the
 925       environment
 926   @type override: dict
 927   @param override: dictionary with key/values that will override
 928       our values
 929   @rtype: dict
 930   @return: the hook environment dictionary
 931
 932   """
 933   cluster = lu.cfg.GetClusterInfo()
 934   bep = cluster.FillBE(instance)
 935   hvp = cluster.FillHV(instance)
 936   args = {
 937     'name': instance.name,
 938     'primary_node': instance.primary_node,
 939     'secondary_nodes': instance.secondary_nodes,
 940     'os_type': instance.os,
 941     'status': instance.admin_up,
 942     'memory': bep[constants.BE_MEMORY],
 943     'vcpus': bep[constants.BE_VCPUS],
 944     'nics': _NICListToTuple(lu, instance.nics),
 945     'disk_template': instance.disk_template,
 946     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 947     'bep': bep,
 948     'hvp': hvp,
 949     'hypervisor_name': instance.hypervisor,
 950   }
 951   if override:
 952     args.update(override)
 953   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 954
 955
 956 def _AdjustCandidatePool(lu, exceptions):
 957   """Adjust the candidate pool after node operations.
 958
 959   """
 960   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 961   if mod_list:
 962     lu.LogInfo("Promoted nodes to master candidate role: %s",
 963                utils.CommaJoin(node.name for node in mod_list))
 964     for name in mod_list:
 965       lu.context.ReaddNode(name)
 966   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 967   if mc_now > mc_max:
 968     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 969                (mc_now, mc_max))
 970
 971
 972 def _DecideSelfPromotion(lu, exceptions=None):
 973   """Decide whether I should promote myself as a master candidate.
 974
 975   """
 976   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 977   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 978   # the new node will increase mc_max with one, so:
 979   mc_should = min(mc_should + 1, cp_size)
 980   return mc_now < mc_should
 981
 982
 983 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 984   """Check that the brigdes needed by a list of nics exist.
 985
 986   """
 987   cluster = lu.cfg.GetClusterInfo()
 988   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 989   brlist = [params[constants.NIC_LINK] for params in paramslist
 990             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 991   if brlist:
 992     result = lu.rpc.call_bridges_exist(target_node, brlist)
 993     result.Raise("Error checking bridges on destination node '%s'" %
 994                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 995
 996
 997 def _CheckInstanceBridgesExist(lu, instance, node=None):
 998   """Check that the brigdes needed by an instance exist.
 999
1000   """
1001   if node is None:
1002     node = instance.primary_node
1003   _CheckNicsBridgesExist(lu, instance.nics, node)
1004
1005
1006 def _CheckOSVariant(os_obj, name):
1007   """Check whether an OS name conforms to the os variants specification.
1008
1009   @type os_obj: L{objects.OS}
1010   @param os_obj: OS object to check
1011   @type name: string
1012   @param name: OS name passed by the user, to check for validity
1013
1014   """
1015   if not os_obj.supported_variants:
1016     return
1017   variant = objects.OS.GetVariant(name)
1018   if not variant:
1019     raise errors.OpPrereqError("OS name must include a variant",
1020                                errors.ECODE_INVAL)
1021
1022   if variant not in os_obj.supported_variants:
1023     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1024
1025
1026 def _GetNodeInstancesInner(cfg, fn):
1027   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1028
1029
1030 def _GetNodeInstances(cfg, node_name):
1031   """Returns a list of all primary and secondary instances on a node.
1032
1033   """
1034
1035   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1036
1037
1038 def _GetNodePrimaryInstances(cfg, node_name):
1039   """Returns primary instances on a node.
1040
1041   """
1042   return _GetNodeInstancesInner(cfg,
1043                                 lambda inst: node_name == inst.primary_node)
1044
1045
1046 def _GetNodeSecondaryInstances(cfg, node_name):
1047   """Returns secondary instances on a node.
1048
1049   """
1050   return _GetNodeInstancesInner(cfg,
1051                                 lambda inst: node_name in inst.secondary_nodes)
1052
1053
1054 def _GetStorageTypeArgs(cfg, storage_type):
1055   """Returns the arguments for a storage type.
1056
1057   """
1058   # Special case for file storage
1059   if storage_type == constants.ST_FILE:
1060     # storage.FileStorage wants a list of storage directories
1061     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1062
1063   return []
1064
1065
1066 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1067   faulty = []
1068
1069   for dev in instance.disks:
1070     cfg.SetDiskID(dev, node_name)
1071
1072   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1073   result.Raise("Failed to get disk status from node %s" % node_name,
1074                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1075
1076   for idx, bdev_status in enumerate(result.payload):
1077     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1078       faulty.append(idx)
1079
1080   return faulty
1081
1082
1083 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1084   """Check the sanity of iallocator and node arguments and use the
1085   cluster-wide iallocator if appropriate.
1086
1087   Check that at most one of (iallocator, node) is specified. If none is
1088   specified, then the LU's opcode's iallocator slot is filled with the
1089   cluster-wide default iallocator.
1090
1091   @type iallocator_slot: string
1092   @param iallocator_slot: the name of the opcode iallocator slot
1093   @type node_slot: string
1094   @param node_slot: the name of the opcode target node slot
1095
1096   """
1097   node = getattr(lu.op, node_slot, None)
1098   iallocator = getattr(lu.op, iallocator_slot, None)
1099
1100   if node is not None and iallocator is not None:
1101     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1102                                errors.ECODE_INVAL)
1103   elif node is None and iallocator is None:
1104     default_iallocator = lu.cfg.GetDefaultIAllocator()
1105     if default_iallocator:
1106       setattr(lu.op, iallocator_slot, default_iallocator)
1107     else:
1108       raise errors.OpPrereqError("No iallocator or node given and no"
1109                                  " cluster-wide default iallocator found."
1110                                  " Please specify either an iallocator or a"
1111                                  " node, or set a cluster-wide default"
1112                                  " iallocator.")
1113
1114
1115 class LUClusterPostInit(LogicalUnit):
1116   """Logical unit for running hooks after cluster initialization.
1117
1118   """
1119   HPATH = "cluster-init"
1120   HTYPE = constants.HTYPE_CLUSTER
1121
1122   def BuildHooksEnv(self):
1123     """Build hooks env.
1124
1125     """
1126     return {
1127       "OP_TARGET": self.cfg.GetClusterName(),
1128       }
1129
1130   def BuildHooksNodes(self):
1131     """Build hooks nodes.
1132
1133     """
1134     return ([], [self.cfg.GetMasterNode()])
1135
1136   def Exec(self, feedback_fn):
1137     """Nothing to do.
1138
1139     """
1140     return True
1141
1142
1143 class LUClusterDestroy(LogicalUnit):
1144   """Logical unit for destroying the cluster.
1145
1146   """
1147   HPATH = "cluster-destroy"
1148   HTYPE = constants.HTYPE_CLUSTER
1149
1150   def BuildHooksEnv(self):
1151     """Build hooks env.
1152
1153     """
1154     return {
1155       "OP_TARGET": self.cfg.GetClusterName(),
1156       }
1157
1158   def BuildHooksNodes(self):
1159     """Build hooks nodes.
1160
1161     """
1162     return ([], [])
1163
1164   def CheckPrereq(self):
1165     """Check prerequisites.
1166
1167     This checks whether the cluster is empty.
1168
1169     Any errors are signaled by raising errors.OpPrereqError.
1170
1171     """
1172     master = self.cfg.GetMasterNode()
1173
1174     nodelist = self.cfg.GetNodeList()
1175     if len(nodelist) != 1 or nodelist[0] != master:
1176       raise errors.OpPrereqError("There are still %d node(s) in"
1177                                  " this cluster." % (len(nodelist) - 1),
1178                                  errors.ECODE_INVAL)
1179     instancelist = self.cfg.GetInstanceList()
1180     if instancelist:
1181       raise errors.OpPrereqError("There are still %d instance(s) in"
1182                                  " this cluster." % len(instancelist),
1183                                  errors.ECODE_INVAL)
1184
1185   def Exec(self, feedback_fn):
1186     """Destroys the cluster.
1187
1188     """
1189     master = self.cfg.GetMasterNode()
1190
1191     # Run post hooks on master node before it's removed
1192     _RunPostHook(self, master)
1193
1194     result = self.rpc.call_node_stop_master(master, False)
1195     result.Raise("Could not disable the master role")
1196
1197     return master
1198
1199
1200 def _VerifyCertificate(filename):
1201   """Verifies a certificate for LUClusterVerify.
1202
1203   @type filename: string
1204   @param filename: Path to PEM file
1205
1206   """
1207   try:
1208     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1209                                            utils.ReadFile(filename))
1210   except Exception, err: # pylint: disable-msg=W0703
1211     return (LUClusterVerify.ETYPE_ERROR,
1212             "Failed to load X509 certificate %s: %s" % (filename, err))
1213
1214   (errcode, msg) = \
1215     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1216                                 constants.SSL_CERT_EXPIRATION_ERROR)
1217
1218   if msg:
1219     fnamemsg = "While verifying %s: %s" % (filename, msg)
1220   else:
1221     fnamemsg = None
1222
1223   if errcode is None:
1224     return (None, fnamemsg)
1225   elif errcode == utils.CERT_WARNING:
1226     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1227   elif errcode == utils.CERT_ERROR:
1228     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1229
1230   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1231
1232
1233 class LUClusterVerify(LogicalUnit):
1234   """Verifies the cluster status.
1235
1236   """
1237   HPATH = "cluster-verify"
1238   HTYPE = constants.HTYPE_CLUSTER
1239   REQ_BGL = False
1240
1241   TCLUSTER = "cluster"
1242   TNODE = "node"
1243   TINSTANCE = "instance"
1244
1245   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1246   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1247   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1248   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1249   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1250   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1251   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1252   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1253   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1254   ENODEDRBD = (TNODE, "ENODEDRBD")
1255   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1256   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1257   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1258   ENODEHV = (TNODE, "ENODEHV")
1259   ENODELVM = (TNODE, "ENODELVM")
1260   ENODEN1 = (TNODE, "ENODEN1")
1261   ENODENET = (TNODE, "ENODENET")
1262   ENODEOS = (TNODE, "ENODEOS")
1263   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1264   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1265   ENODERPC = (TNODE, "ENODERPC")
1266   ENODESSH = (TNODE, "ENODESSH")
1267   ENODEVERSION = (TNODE, "ENODEVERSION")
1268   ENODESETUP = (TNODE, "ENODESETUP")
1269   ENODETIME = (TNODE, "ENODETIME")
1270   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1271
1272   ETYPE_FIELD = "code"
1273   ETYPE_ERROR = "ERROR"
1274   ETYPE_WARNING = "WARNING"
1275
1276   _HOOKS_INDENT_RE = re.compile("^", re.M)
1277
1278   class NodeImage(object):
1279     """A class representing the logical and physical status of a node.
1280
1281     @type name: string
1282     @ivar name: the node name to which this object refers
1283     @ivar volumes: a structure as returned from
1284         L{ganeti.backend.GetVolumeList} (runtime)
1285     @ivar instances: a list of running instances (runtime)
1286     @ivar pinst: list of configured primary instances (config)
1287     @ivar sinst: list of configured secondary instances (config)
1288     @ivar sbp: dictionary of {primary-node: list of instances} for all
1289         instances for which this node is secondary (config)
1290     @ivar mfree: free memory, as reported by hypervisor (runtime)
1291     @ivar dfree: free disk, as reported by the node (runtime)
1292     @ivar offline: the offline status (config)
1293     @type rpc_fail: boolean
1294     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1295         not whether the individual keys were correct) (runtime)
1296     @type lvm_fail: boolean
1297     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1298     @type hyp_fail: boolean
1299     @ivar hyp_fail: whether the RPC call didn't return the instance list
1300     @type ghost: boolean
1301     @ivar ghost: whether this is a known node or not (config)
1302     @type os_fail: boolean
1303     @ivar os_fail: whether the RPC call didn't return valid OS data
1304     @type oslist: list
1305     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1306     @type vm_capable: boolean
1307     @ivar vm_capable: whether the node can host instances
1308
1309     """
1310     def __init__(self, offline=False, name=None, vm_capable=True):
1311       self.name = name
1312       self.volumes = {}
1313       self.instances = []
1314       self.pinst = []
1315       self.sinst = []
1316       self.sbp = {}
1317       self.mfree = 0
1318       self.dfree = 0
1319       self.offline = offline
1320       self.vm_capable = vm_capable
1321       self.rpc_fail = False
1322       self.lvm_fail = False
1323       self.hyp_fail = False
1324       self.ghost = False
1325       self.os_fail = False
1326       self.oslist = {}
1327
1328   def ExpandNames(self):
1329     self.needed_locks = {
1330       locking.LEVEL_NODE: locking.ALL_SET,
1331       locking.LEVEL_INSTANCE: locking.ALL_SET,
1332     }
1333     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1334
1335   def _Error(self, ecode, item, msg, *args, **kwargs):
1336     """Format an error message.
1337
1338     Based on the opcode's error_codes parameter, either format a
1339     parseable error code, or a simpler error string.
1340
1341     This must be called only from Exec and functions called from Exec.
1342
1343     """
1344     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1345     itype, etxt = ecode
1346     # first complete the msg
1347     if args:
1348       msg = msg % args
1349     # then format the whole message
1350     if self.op.error_codes:
1351       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1352     else:
1353       if item:
1354         item = " " + item
1355       else:
1356         item = ""
1357       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1358     # and finally report it via the feedback_fn
1359     self._feedback_fn("  - %s" % msg)
1360
1361   def _ErrorIf(self, cond, *args, **kwargs):
1362     """Log an error message if the passed condition is True.
1363
1364     """
1365     cond = bool(cond) or self.op.debug_simulate_errors
1366     if cond:
1367       self._Error(*args, **kwargs)
1368     # do not mark the operation as failed for WARN cases only
1369     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1370       self.bad = self.bad or cond
1371
1372   def _VerifyNode(self, ninfo, nresult):
1373     """Perform some basic validation on data returned from a node.
1374
1375       - check the result data structure is well formed and has all the
1376         mandatory fields
1377       - check ganeti version
1378
1379     @type ninfo: L{objects.Node}
1380     @param ninfo: the node to check
1381     @param nresult: the results from the node
1382     @rtype: boolean
1383     @return: whether overall this call was successful (and we can expect
1384          reasonable values in the respose)
1385
1386     """
1387     node = ninfo.name
1388     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1389
1390     # main result, nresult should be a non-empty dict
1391     test = not nresult or not isinstance(nresult, dict)
1392     _ErrorIf(test, self.ENODERPC, node,
1393                   "unable to verify node: no data returned")
1394     if test:
1395       return False
1396
1397     # compares ganeti version
1398     local_version = constants.PROTOCOL_VERSION
1399     remote_version = nresult.get("version", None)
1400     test = not (remote_version and
1401                 isinstance(remote_version, (list, tuple)) and
1402                 len(remote_version) == 2)
1403     _ErrorIf(test, self.ENODERPC, node,
1404              "connection to node returned invalid data")
1405     if test:
1406       return False
1407
1408     test = local_version != remote_version[0]
1409     _ErrorIf(test, self.ENODEVERSION, node,
1410              "incompatible protocol versions: master %s,"
1411              " node %s", local_version, remote_version[0])
1412     if test:
1413       return False
1414
1415     # node seems compatible, we can actually try to look into its results
1416
1417     # full package version
1418     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1419                   self.ENODEVERSION, node,
1420                   "software version mismatch: master %s, node %s",
1421                   constants.RELEASE_VERSION, remote_version[1],
1422                   code=self.ETYPE_WARNING)
1423
1424     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1425     if ninfo.vm_capable and isinstance(hyp_result, dict):
1426       for hv_name, hv_result in hyp_result.iteritems():
1427         test = hv_result is not None
1428         _ErrorIf(test, self.ENODEHV, node,
1429                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1430
1431     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1432     if ninfo.vm_capable and isinstance(hvp_result, list):
1433       for item, hv_name, hv_result in hvp_result:
1434         _ErrorIf(True, self.ENODEHV, node,
1435                  "hypervisor %s parameter verify failure (source %s): %s",
1436                  hv_name, item, hv_result)
1437
1438     test = nresult.get(constants.NV_NODESETUP,
1439                            ["Missing NODESETUP results"])
1440     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1441              "; ".join(test))
1442
1443     return True
1444
1445   def _VerifyNodeTime(self, ninfo, nresult,
1446                       nvinfo_starttime, nvinfo_endtime):
1447     """Check the node time.
1448
1449     @type ninfo: L{objects.Node}
1450     @param ninfo: the node to check
1451     @param nresult: the remote results for the node
1452     @param nvinfo_starttime: the start time of the RPC call
1453     @param nvinfo_endtime: the end time of the RPC call
1454
1455     """
1456     node = ninfo.name
1457     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1458
1459     ntime = nresult.get(constants.NV_TIME, None)
1460     try:
1461       ntime_merged = utils.MergeTime(ntime)
1462     except (ValueError, TypeError):
1463       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1464       return
1465
1466     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1467       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1468     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1469       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1470     else:
1471       ntime_diff = None
1472
1473     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1474              "Node time diverges by at least %s from master node time",
1475              ntime_diff)
1476
1477   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1478     """Check the node time.
1479
1480     @type ninfo: L{objects.Node}
1481     @param ninfo: the node to check
1482     @param nresult: the remote results for the node
1483     @param vg_name: the configured VG name
1484
1485     """
1486     if vg_name is None:
1487       return
1488
1489     node = ninfo.name
1490     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1491
1492     # checks vg existence and size > 20G
1493     vglist = nresult.get(constants.NV_VGLIST, None)
1494     test = not vglist
1495     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1496     if not test:
1497       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1498                                             constants.MIN_VG_SIZE)
1499       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1500
1501     # check pv names
1502     pvlist = nresult.get(constants.NV_PVLIST, None)
1503     test = pvlist is None
1504     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1505     if not test:
1506       # check that ':' is not present in PV names, since it's a
1507       # special character for lvcreate (denotes the range of PEs to
1508       # use on the PV)
1509       for _, pvname, owner_vg in pvlist:
1510         test = ":" in pvname
1511         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1512                  " '%s' of VG '%s'", pvname, owner_vg)
1513
1514   def _VerifyNodeNetwork(self, ninfo, nresult):
1515     """Check the node time.
1516
1517     @type ninfo: L{objects.Node}
1518     @param ninfo: the node to check
1519     @param nresult: the remote results for the node
1520
1521     """
1522     node = ninfo.name
1523     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1524
1525     test = constants.NV_NODELIST not in nresult
1526     _ErrorIf(test, self.ENODESSH, node,
1527              "node hasn't returned node ssh connectivity data")
1528     if not test:
1529       if nresult[constants.NV_NODELIST]:
1530         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1531           _ErrorIf(True, self.ENODESSH, node,
1532                    "ssh communication with node '%s': %s", a_node, a_msg)
1533
1534     test = constants.NV_NODENETTEST not in nresult
1535     _ErrorIf(test, self.ENODENET, node,
1536              "node hasn't returned node tcp connectivity data")
1537     if not test:
1538       if nresult[constants.NV_NODENETTEST]:
1539         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1540         for anode in nlist:
1541           _ErrorIf(True, self.ENODENET, node,
1542                    "tcp communication with node '%s': %s",
1543                    anode, nresult[constants.NV_NODENETTEST][anode])
1544
1545     test = constants.NV_MASTERIP not in nresult
1546     _ErrorIf(test, self.ENODENET, node,
1547              "node hasn't returned node master IP reachability data")
1548     if not test:
1549       if not nresult[constants.NV_MASTERIP]:
1550         if node == self.master_node:
1551           msg = "the master node cannot reach the master IP (not configured?)"
1552         else:
1553           msg = "cannot reach the master IP"
1554         _ErrorIf(True, self.ENODENET, node, msg)
1555
1556   def _VerifyInstance(self, instance, instanceconfig, node_image,
1557                       diskstatus):
1558     """Verify an instance.
1559
1560     This function checks to see if the required block devices are
1561     available on the instance's node.
1562
1563     """
1564     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1565     node_current = instanceconfig.primary_node
1566
1567     node_vol_should = {}
1568     instanceconfig.MapLVsByNode(node_vol_should)
1569
1570     for node in node_vol_should:
1571       n_img = node_image[node]
1572       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1573         # ignore missing volumes on offline or broken nodes
1574         continue
1575       for volume in node_vol_should[node]:
1576         test = volume not in n_img.volumes
1577         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1578                  "volume %s missing on node %s", volume, node)
1579
1580     if instanceconfig.admin_up:
1581       pri_img = node_image[node_current]
1582       test = instance not in pri_img.instances and not pri_img.offline
1583       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1584                "instance not running on its primary node %s",
1585                node_current)
1586
1587     for node, n_img in node_image.items():
1588       if node != node_current:
1589         test = instance in n_img.instances
1590         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1591                  "instance should not run on node %s", node)
1592
1593     diskdata = [(nname, success, status, idx)
1594                 for (nname, disks) in diskstatus.items()
1595                 for idx, (success, status) in enumerate(disks)]
1596
1597     for nname, success, bdev_status, idx in diskdata:
1598       # the 'ghost node' construction in Exec() ensures that we have a
1599       # node here
1600       snode = node_image[nname]
1601       bad_snode = snode.ghost or snode.offline
1602       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1603                self.EINSTANCEFAULTYDISK, instance,
1604                "couldn't retrieve status for disk/%s on %s: %s",
1605                idx, nname, bdev_status)
1606       _ErrorIf((instanceconfig.admin_up and success and
1607                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1608                self.EINSTANCEFAULTYDISK, instance,
1609                "disk/%s on %s is faulty", idx, nname)
1610
1611   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1612     """Verify if there are any unknown volumes in the cluster.
1613
1614     The .os, .swap and backup volumes are ignored. All other volumes are
1615     reported as unknown.
1616
1617     @type reserved: L{ganeti.utils.FieldSet}
1618     @param reserved: a FieldSet of reserved volume names
1619
1620     """
1621     for node, n_img in node_image.items():
1622       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1623         # skip non-healthy nodes
1624         continue
1625       for volume in n_img.volumes:
1626         test = ((node not in node_vol_should or
1627                 volume not in node_vol_should[node]) and
1628                 not reserved.Matches(volume))
1629         self._ErrorIf(test, self.ENODEORPHANLV, node,
1630                       "volume %s is unknown", volume)
1631
1632   def _VerifyOrphanInstances(self, instancelist, node_image):
1633     """Verify the list of running instances.
1634
1635     This checks what instances are running but unknown to the cluster.
1636
1637     """
1638     for node, n_img in node_image.items():
1639       for o_inst in n_img.instances:
1640         test = o_inst not in instancelist
1641         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1642                       "instance %s on node %s should not exist", o_inst, node)
1643
1644   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1645     """Verify N+1 Memory Resilience.
1646
1647     Check that if one single node dies we can still start all the
1648     instances it was primary for.
1649
1650     """
1651     cluster_info = self.cfg.GetClusterInfo()
1652     for node, n_img in node_image.items():
1653       # This code checks that every node which is now listed as
1654       # secondary has enough memory to host all instances it is
1655       # supposed to should a single other node in the cluster fail.
1656       # FIXME: not ready for failover to an arbitrary node
1657       # FIXME: does not support file-backed instances
1658       # WARNING: we currently take into account down instances as well
1659       # as up ones, considering that even if they're down someone
1660       # might want to start them even in the event of a node failure.
1661       if n_img.offline:
1662         # we're skipping offline nodes from the N+1 warning, since
1663         # most likely we don't have good memory infromation from them;
1664         # we already list instances living on such nodes, and that's
1665         # enough warning
1666         continue
1667       for prinode, instances in n_img.sbp.items():
1668         needed_mem = 0
1669         for instance in instances:
1670           bep = cluster_info.FillBE(instance_cfg[instance])
1671           if bep[constants.BE_AUTO_BALANCE]:
1672             needed_mem += bep[constants.BE_MEMORY]
1673         test = n_img.mfree < needed_mem
1674         self._ErrorIf(test, self.ENODEN1, node,
1675                       "not enough memory to accomodate instance failovers"
1676                       " should node %s fail", prinode)
1677
1678   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1679                        master_files):
1680     """Verifies and computes the node required file checksums.
1681
1682     @type ninfo: L{objects.Node}
1683     @param ninfo: the node to check
1684     @param nresult: the remote results for the node
1685     @param file_list: required list of files
1686     @param local_cksum: dictionary of local files and their checksums
1687     @param master_files: list of files that only masters should have
1688
1689     """
1690     node = ninfo.name
1691     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1692
1693     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1694     test = not isinstance(remote_cksum, dict)
1695     _ErrorIf(test, self.ENODEFILECHECK, node,
1696              "node hasn't returned file checksum data")
1697     if test:
1698       return
1699
1700     for file_name in file_list:
1701       node_is_mc = ninfo.master_candidate
1702       must_have = (file_name not in master_files) or node_is_mc
1703       # missing
1704       test1 = file_name not in remote_cksum
1705       # invalid checksum
1706       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1707       # existing and good
1708       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1709       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1710                "file '%s' missing", file_name)
1711       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1712                "file '%s' has wrong checksum", file_name)
1713       # not candidate and this is not a must-have file
1714       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1715                "file '%s' should not exist on non master"
1716                " candidates (and the file is outdated)", file_name)
1717       # all good, except non-master/non-must have combination
1718       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1719                "file '%s' should not exist"
1720                " on non master candidates", file_name)
1721
1722   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1723                       drbd_map):
1724     """Verifies and the node DRBD status.
1725
1726     @type ninfo: L{objects.Node}
1727     @param ninfo: the node to check
1728     @param nresult: the remote results for the node
1729     @param instanceinfo: the dict of instances
1730     @param drbd_helper: the configured DRBD usermode helper
1731     @param drbd_map: the DRBD map as returned by
1732         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1733
1734     """
1735     node = ninfo.name
1736     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1737
1738     if drbd_helper:
1739       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1740       test = (helper_result == None)
1741       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1742                "no drbd usermode helper returned")
1743       if helper_result:
1744         status, payload = helper_result
1745         test = not status
1746         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1747                  "drbd usermode helper check unsuccessful: %s", payload)
1748         test = status and (payload != drbd_helper)
1749         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1750                  "wrong drbd usermode helper: %s", payload)
1751
1752     # compute the DRBD minors
1753     node_drbd = {}
1754     for minor, instance in drbd_map[node].items():
1755       test = instance not in instanceinfo
1756       _ErrorIf(test, self.ECLUSTERCFG, None,
1757                "ghost instance '%s' in temporary DRBD map", instance)
1758         # ghost instance should not be running, but otherwise we
1759         # don't give double warnings (both ghost instance and
1760         # unallocated minor in use)
1761       if test:
1762         node_drbd[minor] = (instance, False)
1763       else:
1764         instance = instanceinfo[instance]
1765         node_drbd[minor] = (instance.name, instance.admin_up)
1766
1767     # and now check them
1768     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1769     test = not isinstance(used_minors, (tuple, list))
1770     _ErrorIf(test, self.ENODEDRBD, node,
1771              "cannot parse drbd status file: %s", str(used_minors))
1772     if test:
1773       # we cannot check drbd status
1774       return
1775
1776     for minor, (iname, must_exist) in node_drbd.items():
1777       test = minor not in used_minors and must_exist
1778       _ErrorIf(test, self.ENODEDRBD, node,
1779                "drbd minor %d of instance %s is not active", minor, iname)
1780     for minor in used_minors:
1781       test = minor not in node_drbd
1782       _ErrorIf(test, self.ENODEDRBD, node,
1783                "unallocated drbd minor %d is in use", minor)
1784
1785   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1786     """Builds the node OS structures.
1787
1788     @type ninfo: L{objects.Node}
1789     @param ninfo: the node to check
1790     @param nresult: the remote results for the node
1791     @param nimg: the node image object
1792
1793     """
1794     node = ninfo.name
1795     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1796
1797     remote_os = nresult.get(constants.NV_OSLIST, None)
1798     test = (not isinstance(remote_os, list) or
1799             not compat.all(isinstance(v, list) and len(v) == 7
1800                            for v in remote_os))
1801
1802     _ErrorIf(test, self.ENODEOS, node,
1803              "node hasn't returned valid OS data")
1804
1805     nimg.os_fail = test
1806
1807     if test:
1808       return
1809
1810     os_dict = {}
1811
1812     for (name, os_path, status, diagnose,
1813          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1814
1815       if name not in os_dict:
1816         os_dict[name] = []
1817
1818       # parameters is a list of lists instead of list of tuples due to
1819       # JSON lacking a real tuple type, fix it:
1820       parameters = [tuple(v) for v in parameters]
1821       os_dict[name].append((os_path, status, diagnose,
1822                             set(variants), set(parameters), set(api_ver)))
1823
1824     nimg.oslist = os_dict
1825
1826   def _VerifyNodeOS(self, ninfo, nimg, base):
1827     """Verifies the node OS list.
1828
1829     @type ninfo: L{objects.Node}
1830     @param ninfo: the node to check
1831     @param nimg: the node image object
1832     @param base: the 'template' node we match against (e.g. from the master)
1833
1834     """
1835     node = ninfo.name
1836     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1837
1838     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1839
1840     for os_name, os_data in nimg.oslist.items():
1841       assert os_data, "Empty OS status for OS %s?!" % os_name
1842       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1843       _ErrorIf(not f_status, self.ENODEOS, node,
1844                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1845       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1846                "OS '%s' has multiple entries (first one shadows the rest): %s",
1847                os_name, utils.CommaJoin([v[0] for v in os_data]))
1848       # this will catched in backend too
1849       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1850                and not f_var, self.ENODEOS, node,
1851                "OS %s with API at least %d does not declare any variant",
1852                os_name, constants.OS_API_V15)
1853       # comparisons with the 'base' image
1854       test = os_name not in base.oslist
1855       _ErrorIf(test, self.ENODEOS, node,
1856                "Extra OS %s not present on reference node (%s)",
1857                os_name, base.name)
1858       if test:
1859         continue
1860       assert base.oslist[os_name], "Base node has empty OS status?"
1861       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1862       if not b_status:
1863         # base OS is invalid, skipping
1864         continue
1865       for kind, a, b in [("API version", f_api, b_api),
1866                          ("variants list", f_var, b_var),
1867                          ("parameters", f_param, b_param)]:
1868         _ErrorIf(a != b, self.ENODEOS, node,
1869                  "OS %s %s differs from reference node %s: %s vs. %s",
1870                  kind, os_name, base.name,
1871                  utils.CommaJoin(a), utils.CommaJoin(b))
1872
1873     # check any missing OSes
1874     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1875     _ErrorIf(missing, self.ENODEOS, node,
1876              "OSes present on reference node %s but missing on this node: %s",
1877              base.name, utils.CommaJoin(missing))
1878
1879   def _VerifyOob(self, ninfo, nresult):
1880     """Verifies out of band functionality of a node.
1881
1882     @type ninfo: L{objects.Node}
1883     @param ninfo: the node to check
1884     @param nresult: the remote results for the node
1885
1886     """
1887     node = ninfo.name
1888     # We just have to verify the paths on master and/or master candidates
1889     # as the oob helper is invoked on the master
1890     if ((ninfo.master_candidate or ninfo.master_capable) and
1891         constants.NV_OOB_PATHS in nresult):
1892       for path_result in nresult[constants.NV_OOB_PATHS]:
1893         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1894
1895   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1896     """Verifies and updates the node volume data.
1897
1898     This function will update a L{NodeImage}'s internal structures
1899     with data from the remote call.
1900
1901     @type ninfo: L{objects.Node}
1902     @param ninfo: the node to check
1903     @param nresult: the remote results for the node
1904     @param nimg: the node image object
1905     @param vg_name: the configured VG name
1906
1907     """
1908     node = ninfo.name
1909     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1910
1911     nimg.lvm_fail = True
1912     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1913     if vg_name is None:
1914       pass
1915     elif isinstance(lvdata, basestring):
1916       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1917                utils.SafeEncode(lvdata))
1918     elif not isinstance(lvdata, dict):
1919       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1920     else:
1921       nimg.volumes = lvdata
1922       nimg.lvm_fail = False
1923
1924   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1925     """Verifies and updates the node instance list.
1926
1927     If the listing was successful, then updates this node's instance
1928     list. Otherwise, it marks the RPC call as failed for the instance
1929     list key.
1930
1931     @type ninfo: L{objects.Node}
1932     @param ninfo: the node to check
1933     @param nresult: the remote results for the node
1934     @param nimg: the node image object
1935
1936     """
1937     idata = nresult.get(constants.NV_INSTANCELIST, None)
1938     test = not isinstance(idata, list)
1939     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1940                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1941     if test:
1942       nimg.hyp_fail = True
1943     else:
1944       nimg.instances = idata
1945
1946   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1947     """Verifies and computes a node information map
1948
1949     @type ninfo: L{objects.Node}
1950     @param ninfo: the node to check
1951     @param nresult: the remote results for the node
1952     @param nimg: the node image object
1953     @param vg_name: the configured VG name
1954
1955     """
1956     node = ninfo.name
1957     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1958
1959     # try to read free memory (from the hypervisor)
1960     hv_info = nresult.get(constants.NV_HVINFO, None)
1961     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1962     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1963     if not test:
1964       try:
1965         nimg.mfree = int(hv_info["memory_free"])
1966       except (ValueError, TypeError):
1967         _ErrorIf(True, self.ENODERPC, node,
1968                  "node returned invalid nodeinfo, check hypervisor")
1969
1970     # FIXME: devise a free space model for file based instances as well
1971     if vg_name is not None:
1972       test = (constants.NV_VGLIST not in nresult or
1973               vg_name not in nresult[constants.NV_VGLIST])
1974       _ErrorIf(test, self.ENODELVM, node,
1975                "node didn't return data for the volume group '%s'"
1976                " - it is either missing or broken", vg_name)
1977       if not test:
1978         try:
1979           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1980         except (ValueError, TypeError):
1981           _ErrorIf(True, self.ENODERPC, node,
1982                    "node returned invalid LVM info, check LVM status")
1983
1984   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1985     """Gets per-disk status information for all instances.
1986
1987     @type nodelist: list of strings
1988     @param nodelist: Node names
1989     @type node_image: dict of (name, L{objects.Node})
1990     @param node_image: Node objects
1991     @type instanceinfo: dict of (name, L{objects.Instance})
1992     @param instanceinfo: Instance objects
1993     @rtype: {instance: {node: [(succes, payload)]}}
1994     @return: a dictionary of per-instance dictionaries with nodes as
1995         keys and disk information as values; the disk information is a
1996         list of tuples (success, payload)
1997
1998     """
1999     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2000
2001     node_disks = {}
2002     node_disks_devonly = {}
2003     diskless_instances = set()
2004     diskless = constants.DT_DISKLESS
2005
2006     for nname in nodelist:
2007       node_instances = list(itertools.chain(node_image[nname].pinst,
2008                                             node_image[nname].sinst))
2009       diskless_instances.update(inst for inst in node_instances
2010                                 if instanceinfo[inst].disk_template == diskless)
2011       disks = [(inst, disk)
2012                for inst in node_instances
2013                for disk in instanceinfo[inst].disks]
2014
2015       if not disks:
2016         # No need to collect data
2017         continue
2018
2019       node_disks[nname] = disks
2020
2021       # Creating copies as SetDiskID below will modify the objects and that can
2022       # lead to incorrect data returned from nodes
2023       devonly = [dev.Copy() for (_, dev) in disks]
2024
2025       for dev in devonly:
2026         self.cfg.SetDiskID(dev, nname)
2027
2028       node_disks_devonly[nname] = devonly
2029
2030     assert len(node_disks) == len(node_disks_devonly)
2031
2032     # Collect data from all nodes with disks
2033     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2034                                                           node_disks_devonly)
2035
2036     assert len(result) == len(node_disks)
2037
2038     instdisk = {}
2039
2040     for (nname, nres) in result.items():
2041       disks = node_disks[nname]
2042
2043       if nres.offline:
2044         # No data from this node
2045         data = len(disks) * [(False, "node offline")]
2046       else:
2047         msg = nres.fail_msg
2048         _ErrorIf(msg, self.ENODERPC, nname,
2049                  "while getting disk information: %s", msg)
2050         if msg:
2051           # No data from this node
2052           data = len(disks) * [(False, msg)]
2053         else:
2054           data = []
2055           for idx, i in enumerate(nres.payload):
2056             if isinstance(i, (tuple, list)) and len(i) == 2:
2057               data.append(i)
2058             else:
2059               logging.warning("Invalid result from node %s, entry %d: %s",
2060                               nname, idx, i)
2061               data.append((False, "Invalid result from the remote node"))
2062
2063       for ((inst, _), status) in zip(disks, data):
2064         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2065
2066     # Add empty entries for diskless instances.
2067     for inst in diskless_instances:
2068       assert inst not in instdisk
2069       instdisk[inst] = {}
2070
2071     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2072                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2073                       compat.all(isinstance(s, (tuple, list)) and
2074                                  len(s) == 2 for s in statuses)
2075                       for inst, nnames in instdisk.items()
2076                       for nname, statuses in nnames.items())
2077     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2078
2079     return instdisk
2080
2081   def _VerifyHVP(self, hvp_data):
2082     """Verifies locally the syntax of the hypervisor parameters.
2083
2084     """
2085     for item, hv_name, hv_params in hvp_data:
2086       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2087              (item, hv_name))
2088       try:
2089         hv_class = hypervisor.GetHypervisor(hv_name)
2090         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2091         hv_class.CheckParameterSyntax(hv_params)
2092       except errors.GenericError, err:
2093         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2094
2095   def BuildHooksEnv(self):
2096     """Build hooks env.
2097
2098     Cluster-Verify hooks just ran in the post phase and their failure makes
2099     the output be logged in the verify output and the verification to fail.
2100
2101     """
2102     cfg = self.cfg
2103
2104     env = {
2105       "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2106       }
2107
2108     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2109                for node in cfg.GetAllNodesInfo().values())
2110
2111     return env
2112
2113   def BuildHooksNodes(self):
2114     """Build hooks nodes.
2115
2116     """
2117     return ([], self.cfg.GetNodeList())
2118
2119   def Exec(self, feedback_fn):
2120     """Verify integrity of cluster, performing various test on nodes.
2121
2122     """
2123     # This method has too many local variables. pylint: disable-msg=R0914
2124     self.bad = False
2125     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2126     verbose = self.op.verbose
2127     self._feedback_fn = feedback_fn
2128     feedback_fn("* Verifying global settings")
2129     for msg in self.cfg.VerifyConfig():
2130       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2131
2132     # Check the cluster certificates
2133     for cert_filename in constants.ALL_CERT_FILES:
2134       (errcode, msg) = _VerifyCertificate(cert_filename)
2135       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2136
2137     vg_name = self.cfg.GetVGName()
2138     drbd_helper = self.cfg.GetDRBDHelper()
2139     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2140     cluster = self.cfg.GetClusterInfo()
2141     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2142     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2143     nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2144     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2145     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2146                         for iname in instancelist)
2147     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2148     i_non_redundant = [] # Non redundant instances
2149     i_non_a_balanced = [] # Non auto-balanced instances
2150     n_offline = 0 # Count of offline nodes
2151     n_drained = 0 # Count of nodes being drained
2152     node_vol_should = {}
2153
2154     # FIXME: verify OS list
2155     # do local checksums
2156     master_files = [constants.CLUSTER_CONF_FILE]
2157     master_node = self.master_node = self.cfg.GetMasterNode()
2158     master_ip = self.cfg.GetMasterIP()
2159
2160     file_names = ssconf.SimpleStore().GetFileList()
2161     file_names.extend(constants.ALL_CERT_FILES)
2162     file_names.extend(master_files)
2163     if cluster.modify_etc_hosts:
2164       file_names.append(constants.ETC_HOSTS)
2165
2166     local_checksums = utils.FingerprintFiles(file_names)
2167
2168     # Compute the set of hypervisor parameters
2169     hvp_data = []
2170     for hv_name in hypervisors:
2171       hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2172     for os_name, os_hvp in cluster.os_hvp.items():
2173       for hv_name, hv_params in os_hvp.items():
2174         if not hv_params:
2175           continue
2176         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2177         hvp_data.append(("os %s" % os_name, hv_name, full_params))
2178     # TODO: collapse identical parameter values in a single one
2179     for instance in instanceinfo.values():
2180       if not instance.hvparams:
2181         continue
2182       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2183                        cluster.FillHV(instance)))
2184     # and verify them locally
2185     self._VerifyHVP(hvp_data)
2186
2187     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2188     node_verify_param = {
2189       constants.NV_FILELIST: file_names,
2190       constants.NV_NODELIST: [node.name for node in nodeinfo
2191                               if not node.offline],
2192       constants.NV_HYPERVISOR: hypervisors,
2193       constants.NV_HVPARAMS: hvp_data,
2194       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2195                                   node.secondary_ip) for node in nodeinfo
2196                                  if not node.offline],
2197       constants.NV_INSTANCELIST: hypervisors,
2198       constants.NV_VERSION: None,
2199       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2200       constants.NV_NODESETUP: None,
2201       constants.NV_TIME: None,
2202       constants.NV_MASTERIP: (master_node, master_ip),
2203       constants.NV_OSLIST: None,
2204       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2205       }
2206
2207     if vg_name is not None:
2208       node_verify_param[constants.NV_VGLIST] = None
2209       node_verify_param[constants.NV_LVLIST] = vg_name
2210       node_verify_param[constants.NV_PVLIST] = [vg_name]
2211       node_verify_param[constants.NV_DRBDLIST] = None
2212
2213     if drbd_helper:
2214       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2215
2216     # Build our expected cluster state
2217     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2218                                                  name=node.name,
2219                                                  vm_capable=node.vm_capable))
2220                       for node in nodeinfo)
2221
2222     # Gather OOB paths
2223     oob_paths = []
2224     for node in nodeinfo:
2225       path = _SupportsOob(self.cfg, node)
2226       if path and path not in oob_paths:
2227         oob_paths.append(path)
2228
2229     if oob_paths:
2230       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2231
2232     for instance in instancelist:
2233       inst_config = instanceinfo[instance]
2234
2235       for nname in inst_config.all_nodes:
2236         if nname not in node_image:
2237           # ghost node
2238           gnode = self.NodeImage(name=nname)
2239           gnode.ghost = True
2240           node_image[nname] = gnode
2241
2242       inst_config.MapLVsByNode(node_vol_should)
2243
2244       pnode = inst_config.primary_node
2245       node_image[pnode].pinst.append(instance)
2246
2247       for snode in inst_config.secondary_nodes:
2248         nimg = node_image[snode]
2249         nimg.sinst.append(instance)
2250         if pnode not in nimg.sbp:
2251           nimg.sbp[pnode] = []
2252         nimg.sbp[pnode].append(instance)
2253
2254     # At this point, we have the in-memory data structures complete,
2255     # except for the runtime information, which we'll gather next
2256
2257     # Due to the way our RPC system works, exact response times cannot be
2258     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2259     # time before and after executing the request, we can at least have a time
2260     # window.
2261     nvinfo_starttime = time.time()
2262     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2263                                            self.cfg.GetClusterName())
2264     nvinfo_endtime = time.time()
2265
2266     all_drbd_map = self.cfg.ComputeDRBDMap()
2267
2268     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2269     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2270
2271     feedback_fn("* Verifying node status")
2272
2273     refos_img = None
2274
2275     for node_i in nodeinfo:
2276       node = node_i.name
2277       nimg = node_image[node]
2278
2279       if node_i.offline:
2280         if verbose:
2281           feedback_fn("* Skipping offline node %s" % (node,))
2282         n_offline += 1
2283         continue
2284
2285       if node == master_node:
2286         ntype = "master"
2287       elif node_i.master_candidate:
2288         ntype = "master candidate"
2289       elif node_i.drained:
2290         ntype = "drained"
2291         n_drained += 1
2292       else:
2293         ntype = "regular"
2294       if verbose:
2295         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2296
2297       msg = all_nvinfo[node].fail_msg
2298       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2299       if msg:
2300         nimg.rpc_fail = True
2301         continue
2302
2303       nresult = all_nvinfo[node].payload
2304
2305       nimg.call_ok = self._VerifyNode(node_i, nresult)
2306       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2307       self._VerifyNodeNetwork(node_i, nresult)
2308       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2309                             master_files)
2310
2311       self._VerifyOob(node_i, nresult)
2312
2313       if nimg.vm_capable:
2314         self._VerifyNodeLVM(node_i, nresult, vg_name)
2315         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2316                              all_drbd_map)
2317
2318         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2319         self._UpdateNodeInstances(node_i, nresult, nimg)
2320         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2321         self._UpdateNodeOS(node_i, nresult, nimg)
2322         if not nimg.os_fail:
2323           if refos_img is None:
2324             refos_img = nimg
2325           self._VerifyNodeOS(node_i, nimg, refos_img)
2326
2327     feedback_fn("* Verifying instance status")
2328     for instance in instancelist:
2329       if verbose:
2330         feedback_fn("* Verifying instance %s" % instance)
2331       inst_config = instanceinfo[instance]
2332       self._VerifyInstance(instance, inst_config, node_image,
2333                            instdisk[instance])
2334       inst_nodes_offline = []
2335
2336       pnode = inst_config.primary_node
2337       pnode_img = node_image[pnode]
2338       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2339                self.ENODERPC, pnode, "instance %s, connection to"
2340                " primary node failed", instance)
2341
2342       _ErrorIf(pnode_img.offline, self.EINSTANCEBADNODE, instance,
2343                "instance lives on offline node %s", inst_config.primary_node)
2344
2345       # If the instance is non-redundant we cannot survive losing its primary
2346       # node, so we are not N+1 compliant. On the other hand we have no disk
2347       # templates with more than one secondary so that situation is not well
2348       # supported either.
2349       # FIXME: does not support file-backed instances
2350       if not inst_config.secondary_nodes:
2351         i_non_redundant.append(instance)
2352
2353       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2354                instance, "instance has multiple secondary nodes: %s",
2355                utils.CommaJoin(inst_config.secondary_nodes),
2356                code=self.ETYPE_WARNING)
2357
2358       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2359         pnode = inst_config.primary_node
2360         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2361         instance_groups = {}
2362
2363         for node in instance_nodes:
2364           instance_groups.setdefault(nodeinfo_byname[node].group,
2365                                      []).append(node)
2366
2367         pretty_list = [
2368           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2369           # Sort so that we always list the primary node first.
2370           for group, nodes in sorted(instance_groups.items(),
2371                                      key=lambda (_, nodes): pnode in nodes,
2372                                      reverse=True)]
2373
2374         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2375                       instance, "instance has primary and secondary nodes in"
2376                       " different groups: %s", utils.CommaJoin(pretty_list),
2377                       code=self.ETYPE_WARNING)
2378
2379       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2380         i_non_a_balanced.append(instance)
2381
2382       for snode in inst_config.secondary_nodes:
2383         s_img = node_image[snode]
2384         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2385                  "instance %s, connection to secondary node failed", instance)
2386
2387         if s_img.offline:
2388           inst_nodes_offline.append(snode)
2389
2390       # warn that the instance lives on offline nodes
2391       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2392                "instance has offline secondary node(s) %s",
2393                utils.CommaJoin(inst_nodes_offline))
2394       # ... or ghost/non-vm_capable nodes
2395       for node in inst_config.all_nodes:
2396         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2397                  "instance lives on ghost node %s", node)
2398         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2399                  instance, "instance lives on non-vm_capable node %s", node)
2400
2401     feedback_fn("* Verifying orphan volumes")
2402     reserved = utils.FieldSet(*cluster.reserved_lvs)
2403     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2404
2405     feedback_fn("* Verifying orphan instances")
2406     self._VerifyOrphanInstances(instancelist, node_image)
2407
2408     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2409       feedback_fn("* Verifying N+1 Memory redundancy")
2410       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2411
2412     feedback_fn("* Other Notes")
2413     if i_non_redundant:
2414       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2415                   % len(i_non_redundant))
2416
2417     if i_non_a_balanced:
2418       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2419                   % len(i_non_a_balanced))
2420
2421     if n_offline:
2422       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2423
2424     if n_drained:
2425       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2426
2427     return not self.bad
2428
2429   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2430     """Analyze the post-hooks' result
2431
2432     This method analyses the hook result, handles it, and sends some
2433     nicely-formatted feedback back to the user.
2434
2435     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2436         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2437     @param hooks_results: the results of the multi-node hooks rpc call
2438     @param feedback_fn: function used send feedback back to the caller
2439     @param lu_result: previous Exec result
2440     @return: the new Exec result, based on the previous result
2441         and hook results
2442
2443     """
2444     # We only really run POST phase hooks, and are only interested in
2445     # their results
2446     if phase == constants.HOOKS_PHASE_POST:
2447       # Used to change hooks' output to proper indentation
2448       feedback_fn("* Hooks Results")
2449       assert hooks_results, "invalid result from hooks"
2450
2451       for node_name in hooks_results:
2452         res = hooks_results[node_name]
2453         msg = res.fail_msg
2454         test = msg and not res.offline
2455         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2456                       "Communication failure in hooks execution: %s", msg)
2457         if res.offline or msg:
2458           # No need to investigate payload if node is offline or gave an error.
2459           # override manually lu_result here as _ErrorIf only
2460           # overrides self.bad
2461           lu_result = 1
2462           continue
2463         for script, hkr, output in res.payload:
2464           test = hkr == constants.HKR_FAIL
2465           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2466                         "Script %s failed, output:", script)
2467           if test:
2468             output = self._HOOKS_INDENT_RE.sub('      ', output)
2469             feedback_fn("%s" % output)
2470             lu_result = 0
2471
2472       return lu_result
2473
2474
2475 class LUClusterVerifyDisks(NoHooksLU):
2476   """Verifies the cluster disks status.
2477
2478   """
2479   REQ_BGL = False
2480
2481   def ExpandNames(self):
2482     self.needed_locks = {
2483       locking.LEVEL_NODE: locking.ALL_SET,
2484       locking.LEVEL_INSTANCE: locking.ALL_SET,
2485     }
2486     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2487
2488   def Exec(self, feedback_fn):
2489     """Verify integrity of cluster disks.
2490
2491     @rtype: tuple of three items
2492     @return: a tuple of (dict of node-to-node_error, list of instances
2493         which need activate-disks, dict of instance: (node, volume) for
2494         missing volumes
2495
2496     """
2497     result = res_nodes, res_instances, res_missing = {}, [], {}
2498
2499     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2500     instances = self.cfg.GetAllInstancesInfo().values()
2501
2502     nv_dict = {}
2503     for inst in instances:
2504       inst_lvs = {}
2505       if not inst.admin_up:
2506         continue
2507       inst.MapLVsByNode(inst_lvs)
2508       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2509       for node, vol_list in inst_lvs.iteritems():
2510         for vol in vol_list:
2511           nv_dict[(node, vol)] = inst
2512
2513     if not nv_dict:
2514       return result
2515
2516     node_lvs = self.rpc.call_lv_list(nodes, [])
2517     for node, node_res in node_lvs.items():
2518       if node_res.offline:
2519         continue
2520       msg = node_res.fail_msg
2521       if msg:
2522         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2523         res_nodes[node] = msg
2524         continue
2525
2526       lvs = node_res.payload
2527       for lv_name, (_, _, lv_online) in lvs.items():
2528         inst = nv_dict.pop((node, lv_name), None)
2529         if (not lv_online and inst is not None
2530             and inst.name not in res_instances):
2531           res_instances.append(inst.name)
2532
2533     # any leftover items in nv_dict are missing LVs, let's arrange the
2534     # data better
2535     for key, inst in nv_dict.iteritems():
2536       if inst.name not in res_missing:
2537         res_missing[inst.name] = []
2538       res_missing[inst.name].append(key)
2539
2540     return result
2541
2542
2543 class LUClusterRepairDiskSizes(NoHooksLU):
2544   """Verifies the cluster disks sizes.
2545
2546   """
2547   REQ_BGL = False
2548
2549   def ExpandNames(self):
2550     if self.op.instances:
2551       self.wanted_names = []
2552       for name in self.op.instances:
2553         full_name = _ExpandInstanceName(self.cfg, name)
2554         self.wanted_names.append(full_name)
2555       self.needed_locks = {
2556         locking.LEVEL_NODE: [],
2557         locking.LEVEL_INSTANCE: self.wanted_names,
2558         }
2559       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2560     else:
2561       self.wanted_names = None
2562       self.needed_locks = {
2563         locking.LEVEL_NODE: locking.ALL_SET,
2564         locking.LEVEL_INSTANCE: locking.ALL_SET,
2565         }
2566     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2567
2568   def DeclareLocks(self, level):
2569     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2570       self._LockInstancesNodes(primary_only=True)
2571
2572   def CheckPrereq(self):
2573     """Check prerequisites.
2574
2575     This only checks the optional instance list against the existing names.
2576
2577     """
2578     if self.wanted_names is None:
2579       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2580
2581     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2582                              in self.wanted_names]
2583
2584   def _EnsureChildSizes(self, disk):
2585     """Ensure children of the disk have the needed disk size.
2586
2587     This is valid mainly for DRBD8 and fixes an issue where the
2588     children have smaller disk size.
2589
2590     @param disk: an L{ganeti.objects.Disk} object
2591
2592     """
2593     if disk.dev_type == constants.LD_DRBD8:
2594       assert disk.children, "Empty children for DRBD8?"
2595       fchild = disk.children[0]
2596       mismatch = fchild.size < disk.size
2597       if mismatch:
2598         self.LogInfo("Child disk has size %d, parent %d, fixing",
2599                      fchild.size, disk.size)
2600         fchild.size = disk.size
2601
2602       # and we recurse on this child only, not on the metadev
2603       return self._EnsureChildSizes(fchild) or mismatch
2604     else:
2605       return False
2606
2607   def Exec(self, feedback_fn):
2608     """Verify the size of cluster disks.
2609
2610     """
2611     # TODO: check child disks too
2612     # TODO: check differences in size between primary/secondary nodes
2613     per_node_disks = {}
2614     for instance in self.wanted_instances:
2615       pnode = instance.primary_node
2616       if pnode not in per_node_disks:
2617         per_node_disks[pnode] = []
2618       for idx, disk in enumerate(instance.disks):
2619         per_node_disks[pnode].append((instance, idx, disk))
2620
2621     changed = []
2622     for node, dskl in per_node_disks.items():
2623       newl = [v[2].Copy() for v in dskl]
2624       for dsk in newl:
2625         self.cfg.SetDiskID(dsk, node)
2626       result = self.rpc.call_blockdev_getsize(node, newl)
2627       if result.fail_msg:
2628         self.LogWarning("Failure in blockdev_getsize call to node"
2629                         " %s, ignoring", node)
2630         continue
2631       if len(result.payload) != len(dskl):
2632         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2633                         " result.payload=%s", node, len(dskl), result.payload)
2634         self.LogWarning("Invalid result from node %s, ignoring node results",
2635                         node)
2636         continue
2637       for ((instance, idx, disk), size) in zip(dskl, result.payload):
2638         if size is None:
2639           self.LogWarning("Disk %d of instance %s did not return size"
2640                           " information, ignoring", idx, instance.name)
2641           continue
2642         if not isinstance(size, (int, long)):
2643           self.LogWarning("Disk %d of instance %s did not return valid"
2644                           " size information, ignoring", idx, instance.name)
2645           continue
2646         size = size >> 20
2647         if size != disk.size:
2648           self.LogInfo("Disk %d of instance %s has mismatched size,"
2649                        " correcting: recorded %d, actual %d", idx,
2650                        instance.name, disk.size, size)
2651           disk.size = size
2652           self.cfg.Update(instance, feedback_fn)
2653           changed.append((instance.name, idx, size))
2654         if self._EnsureChildSizes(disk):
2655           self.cfg.Update(instance, feedback_fn)
2656           changed.append((instance.name, idx, disk.size))
2657     return changed
2658
2659
2660 class LUClusterRename(LogicalUnit):
2661   """Rename the cluster.
2662
2663   """
2664   HPATH = "cluster-rename"
2665   HTYPE = constants.HTYPE_CLUSTER
2666
2667   def BuildHooksEnv(self):
2668     """Build hooks env.
2669
2670     """
2671     return {
2672       "OP_TARGET": self.cfg.GetClusterName(),
2673       "NEW_NAME": self.op.name,
2674       }
2675
2676   def BuildHooksNodes(self):
2677     """Build hooks nodes.
2678
2679     """
2680     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2681
2682   def CheckPrereq(self):
2683     """Verify that the passed name is a valid one.
2684
2685     """
2686     hostname = netutils.GetHostname(name=self.op.name,
2687                                     family=self.cfg.GetPrimaryIPFamily())
2688
2689     new_name = hostname.name
2690     self.ip = new_ip = hostname.ip
2691     old_name = self.cfg.GetClusterName()
2692     old_ip = self.cfg.GetMasterIP()
2693     if new_name == old_name and new_ip == old_ip:
2694       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2695                                  " cluster has changed",
2696                                  errors.ECODE_INVAL)
2697     if new_ip != old_ip:
2698       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2699         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2700                                    " reachable on the network" %
2701                                    new_ip, errors.ECODE_NOTUNIQUE)
2702
2703     self.op.name = new_name
2704
2705   def Exec(self, feedback_fn):
2706     """Rename the cluster.
2707
2708     """
2709     clustername = self.op.name
2710     ip = self.ip
2711
2712     # shutdown the master IP
2713     master = self.cfg.GetMasterNode()
2714     result = self.rpc.call_node_stop_master(master, False)
2715     result.Raise("Could not disable the master role")
2716
2717     try:
2718       cluster = self.cfg.GetClusterInfo()
2719       cluster.cluster_name = clustername
2720       cluster.master_ip = ip
2721       self.cfg.Update(cluster, feedback_fn)
2722
2723       # update the known hosts file
2724       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2725       node_list = self.cfg.GetOnlineNodeList()
2726       try:
2727         node_list.remove(master)
2728       except ValueError:
2729         pass
2730       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2731     finally:
2732       result = self.rpc.call_node_start_master(master, False, False)
2733       msg = result.fail_msg
2734       if msg:
2735         self.LogWarning("Could not re-enable the master role on"
2736                         " the master, please restart manually: %s", msg)
2737
2738     return clustername
2739
2740
2741 class LUClusterSetParams(LogicalUnit):
2742   """Change the parameters of the cluster.
2743
2744   """
2745   HPATH = "cluster-modify"
2746   HTYPE = constants.HTYPE_CLUSTER
2747   REQ_BGL = False
2748
2749   def CheckArguments(self):
2750     """Check parameters
2751
2752     """
2753     if self.op.uid_pool:
2754       uidpool.CheckUidPool(self.op.uid_pool)
2755
2756     if self.op.add_uids:
2757       uidpool.CheckUidPool(self.op.add_uids)
2758
2759     if self.op.remove_uids:
2760       uidpool.CheckUidPool(self.op.remove_uids)
2761
2762   def ExpandNames(self):
2763     # FIXME: in the future maybe other cluster params won't require checking on
2764     # all nodes to be modified.
2765     self.needed_locks = {
2766       locking.LEVEL_NODE: locking.ALL_SET,
2767     }
2768     self.share_locks[locking.LEVEL_NODE] = 1
2769
2770   def BuildHooksEnv(self):
2771     """Build hooks env.
2772
2773     """
2774     return {
2775       "OP_TARGET": self.cfg.GetClusterName(),
2776       "NEW_VG_NAME": self.op.vg_name,
2777       }
2778
2779   def BuildHooksNodes(self):
2780     """Build hooks nodes.
2781
2782     """
2783     mn = self.cfg.GetMasterNode()
2784     return ([mn], [mn])
2785
2786   def CheckPrereq(self):
2787     """Check prerequisites.
2788
2789     This checks whether the given params don't conflict and
2790     if the given volume group is valid.
2791
2792     """
2793     if self.op.vg_name is not None and not self.op.vg_name:
2794       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2795         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2796                                    " instances exist", errors.ECODE_INVAL)
2797
2798     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2799       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2800         raise errors.OpPrereqError("Cannot disable drbd helper while"
2801                                    " drbd-based instances exist",
2802                                    errors.ECODE_INVAL)
2803
2804     node_list = self.acquired_locks[locking.LEVEL_NODE]
2805
2806     # if vg_name not None, checks given volume group on all nodes
2807     if self.op.vg_name:
2808       vglist = self.rpc.call_vg_list(node_list)
2809       for node in node_list:
2810         msg = vglist[node].fail_msg
2811         if msg:
2812           # ignoring down node
2813           self.LogWarning("Error while gathering data on node %s"
2814                           " (ignoring node): %s", node, msg)
2815           continue
2816         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2817                                               self.op.vg_name,
2818                                               constants.MIN_VG_SIZE)
2819         if vgstatus:
2820           raise errors.OpPrereqError("Error on node '%s': %s" %
2821                                      (node, vgstatus), errors.ECODE_ENVIRON)
2822
2823     if self.op.drbd_helper:
2824       # checks given drbd helper on all nodes
2825       helpers = self.rpc.call_drbd_helper(node_list)
2826       for node in node_list:
2827         ninfo = self.cfg.GetNodeInfo(node)
2828         if ninfo.offline:
2829           self.LogInfo("Not checking drbd helper on offline node %s", node)
2830           continue
2831         msg = helpers[node].fail_msg
2832         if msg:
2833           raise errors.OpPrereqError("Error checking drbd helper on node"
2834                                      " '%s': %s" % (node, msg),
2835                                      errors.ECODE_ENVIRON)
2836         node_helper = helpers[node].payload
2837         if node_helper != self.op.drbd_helper:
2838           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2839                                      (node, node_helper), errors.ECODE_ENVIRON)
2840
2841     self.cluster = cluster = self.cfg.GetClusterInfo()
2842     # validate params changes
2843     if self.op.beparams:
2844       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2845       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2846
2847     if self.op.ndparams:
2848       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2849       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2850
2851     if self.op.nicparams:
2852       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2853       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2854       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2855       nic_errors = []
2856
2857       # check all instances for consistency
2858       for instance in self.cfg.GetAllInstancesInfo().values():
2859         for nic_idx, nic in enumerate(instance.nics):
2860           params_copy = copy.deepcopy(nic.nicparams)
2861           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2862
2863           # check parameter syntax
2864           try:
2865             objects.NIC.CheckParameterSyntax(params_filled)
2866           except errors.ConfigurationError, err:
2867             nic_errors.append("Instance %s, nic/%d: %s" %
2868                               (instance.name, nic_idx, err))
2869
2870           # if we're moving instances to routed, check that they have an ip
2871           target_mode = params_filled[constants.NIC_MODE]
2872           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2873             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2874                               (instance.name, nic_idx))
2875       if nic_errors:
2876         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2877                                    "\n".join(nic_errors))
2878
2879     # hypervisor list/parameters
2880     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2881     if self.op.hvparams:
2882       for hv_name, hv_dict in self.op.hvparams.items():
2883         if hv_name not in self.new_hvparams:
2884           self.new_hvparams[hv_name] = hv_dict
2885         else:
2886           self.new_hvparams[hv_name].update(hv_dict)
2887
2888     # os hypervisor parameters
2889     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2890     if self.op.os_hvp:
2891       for os_name, hvs in self.op.os_hvp.items():
2892         if os_name not in self.new_os_hvp:
2893           self.new_os_hvp[os_name] = hvs
2894         else:
2895           for hv_name, hv_dict in hvs.items():
2896             if hv_name not in self.new_os_hvp[os_name]:
2897               self.new_os_hvp[os_name][hv_name] = hv_dict
2898             else:
2899               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2900
2901     # os parameters
2902     self.new_osp = objects.FillDict(cluster.osparams, {})
2903     if self.op.osparams:
2904       for os_name, osp in self.op.osparams.items():
2905         if os_name not in self.new_osp:
2906           self.new_osp[os_name] = {}
2907
2908         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2909                                                   use_none=True)
2910
2911         if not self.new_osp[os_name]:
2912           # we removed all parameters
2913           del self.new_osp[os_name]
2914         else:
2915           # check the parameter validity (remote check)
2916           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2917                          os_name, self.new_osp[os_name])
2918
2919     # changes to the hypervisor list
2920     if self.op.enabled_hypervisors is not None:
2921       self.hv_list = self.op.enabled_hypervisors
2922       for hv in self.hv_list:
2923         # if the hypervisor doesn't already exist in the cluster
2924         # hvparams, we initialize it to empty, and then (in both
2925         # cases) we make sure to fill the defaults, as we might not
2926         # have a complete defaults list if the hypervisor wasn't
2927         # enabled before
2928         if hv not in new_hvp:
2929           new_hvp[hv] = {}
2930         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2931         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2932     else:
2933       self.hv_list = cluster.enabled_hypervisors
2934
2935     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2936       # either the enabled list has changed, or the parameters have, validate
2937       for hv_name, hv_params in self.new_hvparams.items():
2938         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2939             (self.op.enabled_hypervisors and
2940              hv_name in self.op.enabled_hypervisors)):
2941           # either this is a new hypervisor, or its parameters have changed
2942           hv_class = hypervisor.GetHypervisor(hv_name)
2943           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2944           hv_class.CheckParameterSyntax(hv_params)
2945           _CheckHVParams(self, node_list, hv_name, hv_params)
2946
2947     if self.op.os_hvp:
2948       # no need to check any newly-enabled hypervisors, since the
2949       # defaults have already been checked in the above code-block
2950       for os_name, os_hvp in self.new_os_hvp.items():
2951         for hv_name, hv_params in os_hvp.items():
2952           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2953           # we need to fill in the new os_hvp on top of the actual hv_p
2954           cluster_defaults = self.new_hvparams.get(hv_name, {})
2955           new_osp = objects.FillDict(cluster_defaults, hv_params)
2956           hv_class = hypervisor.GetHypervisor(hv_name)
2957           hv_class.CheckParameterSyntax(new_osp)
2958           _CheckHVParams(self, node_list, hv_name, new_osp)
2959
2960     if self.op.default_iallocator:
2961       alloc_script = utils.FindFile(self.op.default_iallocator,
2962                                     constants.IALLOCATOR_SEARCH_PATH,
2963                                     os.path.isfile)
2964       if alloc_script is None:
2965         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2966                                    " specified" % self.op.default_iallocator,
2967                                    errors.ECODE_INVAL)
2968
2969   def Exec(self, feedback_fn):
2970     """Change the parameters of the cluster.
2971
2972     """
2973     if self.op.vg_name is not None:
2974       new_volume = self.op.vg_name
2975       if not new_volume:
2976         new_volume = None
2977       if new_volume != self.cfg.GetVGName():
2978         self.cfg.SetVGName(new_volume)
2979       else:
2980         feedback_fn("Cluster LVM configuration already in desired"
2981                     " state, not changing")
2982     if self.op.drbd_helper is not None:
2983       new_helper = self.op.drbd_helper
2984       if not new_helper:
2985         new_helper = None
2986       if new_helper != self.cfg.GetDRBDHelper():
2987         self.cfg.SetDRBDHelper(new_helper)
2988       else:
2989         feedback_fn("Cluster DRBD helper already in desired state,"
2990                     " not changing")
2991     if self.op.hvparams:
2992       self.cluster.hvparams = self.new_hvparams
2993     if self.op.os_hvp:
2994       self.cluster.os_hvp = self.new_os_hvp
2995     if self.op.enabled_hypervisors is not None:
2996       self.cluster.hvparams = self.new_hvparams
2997       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2998     if self.op.beparams:
2999       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3000     if self.op.nicparams:
3001       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3002     if self.op.osparams:
3003       self.cluster.osparams = self.new_osp
3004     if self.op.ndparams:
3005       self.cluster.ndparams = self.new_ndparams
3006
3007     if self.op.candidate_pool_size is not None:
3008       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3009       # we need to update the pool size here, otherwise the save will fail
3010       _AdjustCandidatePool(self, [])
3011
3012     if self.op.maintain_node_health is not None:
3013       self.cluster.maintain_node_health = self.op.maintain_node_health
3014
3015     if self.op.prealloc_wipe_disks is not None:
3016       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3017
3018     if self.op.add_uids is not None:
3019       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3020
3021     if self.op.remove_uids is not None:
3022       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3023
3024     if self.op.uid_pool is not None:
3025       self.cluster.uid_pool = self.op.uid_pool
3026
3027     if self.op.default_iallocator is not None:
3028       self.cluster.default_iallocator = self.op.default_iallocator
3029
3030     if self.op.reserved_lvs is not None:
3031       self.cluster.reserved_lvs = self.op.reserved_lvs
3032
3033     def helper_os(aname, mods, desc):
3034       desc += " OS list"
3035       lst = getattr(self.cluster, aname)
3036       for key, val in mods:
3037         if key == constants.DDM_ADD:
3038           if val in lst:
3039             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3040           else:
3041             lst.append(val)
3042         elif key == constants.DDM_REMOVE:
3043           if val in lst:
3044             lst.remove(val)
3045           else:
3046             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3047         else:
3048           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3049
3050     if self.op.hidden_os:
3051       helper_os("hidden_os", self.op.hidden_os, "hidden")
3052
3053     if self.op.blacklisted_os:
3054       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3055
3056     if self.op.master_netdev:
3057       master = self.cfg.GetMasterNode()
3058       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3059                   self.cluster.master_netdev)
3060       result = self.rpc.call_node_stop_master(master, False)
3061       result.Raise("Could not disable the master ip")
3062       feedback_fn("Changing master_netdev from %s to %s" %
3063                   (self.cluster.master_netdev, self.op.master_netdev))
3064       self.cluster.master_netdev = self.op.master_netdev
3065
3066     self.cfg.Update(self.cluster, feedback_fn)
3067
3068     if self.op.master_netdev:
3069       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3070                   self.op.master_netdev)
3071       result = self.rpc.call_node_start_master(master, False, False)
3072       if result.fail_msg:
3073         self.LogWarning("Could not re-enable the master ip on"
3074                         " the master, please restart manually: %s",
3075                         result.fail_msg)
3076
3077
3078 def _UploadHelper(lu, nodes, fname):
3079   """Helper for uploading a file and showing warnings.
3080
3081   """
3082   if os.path.exists(fname):
3083     result = lu.rpc.call_upload_file(nodes, fname)
3084     for to_node, to_result in result.items():
3085       msg = to_result.fail_msg
3086       if msg:
3087         msg = ("Copy of file %s to node %s failed: %s" %
3088                (fname, to_node, msg))
3089         lu.proc.LogWarning(msg)
3090
3091
3092 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3093   """Distribute additional files which are part of the cluster configuration.
3094
3095   ConfigWriter takes care of distributing the config and ssconf files, but
3096   there are more files which should be distributed to all nodes. This function
3097   makes sure those are copied.
3098
3099   @param lu: calling logical unit
3100   @param additional_nodes: list of nodes not in the config to distribute to
3101   @type additional_vm: boolean
3102   @param additional_vm: whether the additional nodes are vm-capable or not
3103
3104   """
3105   # 1. Gather target nodes
3106   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3107   dist_nodes = lu.cfg.GetOnlineNodeList()
3108   nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3109   vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3110   if additional_nodes is not None:
3111     dist_nodes.extend(additional_nodes)
3112     if additional_vm:
3113       vm_nodes.extend(additional_nodes)
3114   if myself.name in dist_nodes:
3115     dist_nodes.remove(myself.name)
3116   if myself.name in vm_nodes:
3117     vm_nodes.remove(myself.name)
3118
3119   # 2. Gather files to distribute
3120   dist_files = set([constants.ETC_HOSTS,
3121                     constants.SSH_KNOWN_HOSTS_FILE,
3122                     constants.RAPI_CERT_FILE,
3123                     constants.RAPI_USERS_FILE,
3124                     constants.CONFD_HMAC_KEY,
3125                     constants.CLUSTER_DOMAIN_SECRET_FILE,
3126                    ])
3127
3128   vm_files = set()
3129   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3130   for hv_name in enabled_hypervisors:
3131     hv_class = hypervisor.GetHypervisor(hv_name)
3132     vm_files.update(hv_class.GetAncillaryFiles())
3133
3134   # 3. Perform the files upload
3135   for fname in dist_files:
3136     _UploadHelper(lu, dist_nodes, fname)
3137   for fname in vm_files:
3138     _UploadHelper(lu, vm_nodes, fname)
3139
3140
3141 class LUClusterRedistConf(NoHooksLU):
3142   """Force the redistribution of cluster configuration.
3143
3144   This is a very simple LU.
3145
3146   """
3147   REQ_BGL = False
3148
3149   def ExpandNames(self):
3150     self.needed_locks = {
3151       locking.LEVEL_NODE: locking.ALL_SET,
3152     }
3153     self.share_locks[locking.LEVEL_NODE] = 1
3154
3155   def Exec(self, feedback_fn):
3156     """Redistribute the configuration.
3157
3158     """
3159     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3160     _RedistributeAncillaryFiles(self)
3161
3162
3163 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3164   """Sleep and poll for an instance's disk to sync.
3165
3166   """
3167   if not instance.disks or disks is not None and not disks:
3168     return True
3169
3170   disks = _ExpandCheckDisks(instance, disks)
3171
3172   if not oneshot:
3173     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3174
3175   node = instance.primary_node
3176
3177   for dev in disks:
3178     lu.cfg.SetDiskID(dev, node)
3179
3180   # TODO: Convert to utils.Retry
3181
3182   retries = 0
3183   degr_retries = 10 # in seconds, as we sleep 1 second each time
3184   while True:
3185     max_time = 0
3186     done = True
3187     cumul_degraded = False
3188     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3189     msg = rstats.fail_msg
3190     if msg:
3191       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3192       retries += 1
3193       if retries >= 10:
3194         raise errors.RemoteError("Can't contact node %s for mirror data,"
3195                                  " aborting." % node)
3196       time.sleep(6)
3197       continue
3198     rstats = rstats.payload
3199     retries = 0
3200     for i, mstat in enumerate(rstats):
3201       if mstat is None:
3202         lu.LogWarning("Can't compute data for node %s/%s",
3203                            node, disks[i].iv_name)
3204         continue
3205
3206       cumul_degraded = (cumul_degraded or
3207                         (mstat.is_degraded and mstat.sync_percent is None))
3208       if mstat.sync_percent is not None:
3209         done = False
3210         if mstat.estimated_time is not None:
3211           rem_time = ("%s remaining (estimated)" %
3212                       utils.FormatSeconds(mstat.estimated_time))
3213           max_time = mstat.estimated_time
3214         else:
3215           rem_time = "no time estimate"
3216         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3217                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3218
3219     # if we're done but degraded, let's do a few small retries, to
3220     # make sure we see a stable and not transient situation; therefore
3221     # we force restart of the loop
3222     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3223       logging.info("Degraded disks found, %d retries left", degr_retries)
3224       degr_retries -= 1
3225       time.sleep(1)
3226       continue
3227
3228     if done or oneshot:
3229       break
3230
3231     time.sleep(min(60, max_time))
3232
3233   if done:
3234     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3235   return not cumul_degraded
3236
3237
3238 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3239   """Check that mirrors are not degraded.
3240
3241   The ldisk parameter, if True, will change the test from the
3242   is_degraded attribute (which represents overall non-ok status for
3243   the device(s)) to the ldisk (representing the local storage status).
3244
3245   """
3246   lu.cfg.SetDiskID(dev, node)
3247
3248   result = True
3249
3250   if on_primary or dev.AssembleOnSecondary():
3251     rstats = lu.rpc.call_blockdev_find(node, dev)
3252     msg = rstats.fail_msg
3253     if msg:
3254       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3255       result = False
3256     elif not rstats.payload:
3257       lu.LogWarning("Can't find disk on node %s", node)
3258       result = False
3259     else:
3260       if ldisk:
3261         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3262       else:
3263         result = result and not rstats.payload.is_degraded
3264
3265   if dev.children:
3266     for child in dev.children:
3267       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3268
3269   return result
3270
3271
3272 class LUOobCommand(NoHooksLU):
3273   """Logical unit for OOB handling.
3274
3275   """
3276   REG_BGL = False
3277   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3278
3279   def CheckPrereq(self):
3280     """Check prerequisites.
3281
3282     This checks:
3283      - the node exists in the configuration
3284      - OOB is supported
3285
3286     Any errors are signaled by raising errors.OpPrereqError.
3287
3288     """
3289     self.nodes = []
3290     self.master_node = self.cfg.GetMasterNode()
3291
3292     assert self.op.power_delay >= 0.0
3293
3294     if self.op.node_names:
3295       if self.op.command in self._SKIP_MASTER:
3296         if self.master_node in self.op.node_names:
3297           master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3298           master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3299
3300           if master_oob_handler:
3301             additional_text = ("Run '%s %s %s' if you want to operate on the"
3302                                " master regardless") % (master_oob_handler,
3303                                                         self.op.command,
3304                                                         self.master_node)
3305           else:
3306             additional_text = "The master node does not support out-of-band"
3307
3308           raise errors.OpPrereqError(("Operating on the master node %s is not"
3309                                       " allowed for %s\n%s") %
3310                                      (self.master_node, self.op.command,
3311                                       additional_text), errors.ECODE_INVAL)
3312     else:
3313       self.op.node_names = self.cfg.GetNodeList()
3314       if self.op.command in self._SKIP_MASTER:
3315         self.op.node_names.remove(self.master_node)
3316
3317     if self.op.command in self._SKIP_MASTER:
3318       assert self.master_node not in self.op.node_names
3319
3320     for node_name in self.op.node_names:
3321       node = self.cfg.GetNodeInfo(node_name)
3322
3323       if node is None:
3324         raise errors.OpPrereqError("Node %s not found" % node_name,
3325                                    errors.ECODE_NOENT)
3326       else:
3327         self.nodes.append(node)
3328
3329       if (not self.op.ignore_status and
3330           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3331         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3332                                     " not marked offline") % node_name,
3333                                    errors.ECODE_STATE)
3334
3335   def ExpandNames(self):
3336     """Gather locks we need.
3337
3338     """
3339     if self.op.node_names:
3340       self.op.node_names = [_ExpandNodeName(self.cfg, name)
3341                             for name in self.op.node_names]
3342       lock_names = self.op.node_names
3343     else:
3344       lock_names = locking.ALL_SET
3345
3346     self.needed_locks = {
3347       locking.LEVEL_NODE: lock_names,
3348       }
3349
3350   def Exec(self, feedback_fn):
3351     """Execute OOB and return result if we expect any.
3352
3353     """
3354     master_node = self.master_node
3355     ret = []
3356
3357     for idx, node in enumerate(self.nodes):
3358       node_entry = [(constants.RS_NORMAL, node.name)]
3359       ret.append(node_entry)
3360
3361       oob_program = _SupportsOob(self.cfg, node)
3362
3363       if not oob_program:
3364         node_entry.append((constants.RS_UNAVAIL, None))
3365         continue
3366
3367       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3368                    self.op.command, oob_program, node.name)
3369       result = self.rpc.call_run_oob(master_node, oob_program,
3370                                      self.op.command, node.name,
3371                                      self.op.timeout)
3372
3373       if result.fail_msg:
3374         self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3375                         node.name, result.fail_msg)
3376         node_entry.append((constants.RS_NODATA, None))
3377       else:
3378         try:
3379           self._CheckPayload(result)
3380         except errors.OpExecError, err:
3381           self.LogWarning("The payload returned by '%s' is not valid: %s",
3382                           node.name, err)
3383           node_entry.append((constants.RS_NODATA, None))
3384         else:
3385           if self.op.command == constants.OOB_HEALTH:
3386             # For health we should log important events
3387             for item, status in result.payload:
3388               if status in [constants.OOB_STATUS_WARNING,
3389                             constants.OOB_STATUS_CRITICAL]:
3390                 self.LogWarning("On node '%s' item '%s' has status '%s'",
3391                                 node.name, item, status)
3392
3393           if self.op.command == constants.OOB_POWER_ON:
3394             node.powered = True
3395           elif self.op.command == constants.OOB_POWER_OFF:
3396             node.powered = False
3397           elif self.op.command == constants.OOB_POWER_STATUS:
3398             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3399             if powered != node.powered:
3400               logging.warning(("Recorded power state (%s) of node '%s' does not"
3401                                " match actual power state (%s)"), node.powered,
3402                               node.name, powered)
3403
3404           # For configuration changing commands we should update the node
3405           if self.op.command in (constants.OOB_POWER_ON,
3406                                  constants.OOB_POWER_OFF):
3407             self.cfg.Update(node, feedback_fn)
3408
3409           node_entry.append((constants.RS_NORMAL, result.payload))
3410
3411           if (self.op.command == constants.OOB_POWER_ON and
3412               idx < len(self.nodes) - 1):
3413             time.sleep(self.op.power_delay)
3414
3415     return ret
3416
3417   def _CheckPayload(self, result):
3418     """Checks if the payload is valid.
3419
3420     @param result: RPC result
3421     @raises errors.OpExecError: If payload is not valid
3422
3423     """
3424     errs = []
3425     if self.op.command == constants.OOB_HEALTH:
3426       if not isinstance(result.payload, list):
3427         errs.append("command 'health' is expected to return a list but got %s" %
3428                     type(result.payload))
3429       else:
3430         for item, status in result.payload:
3431           if status not in constants.OOB_STATUSES:
3432             errs.append("health item '%s' has invalid status '%s'" %
3433                         (item, status))
3434
3435     if self.op.command == constants.OOB_POWER_STATUS:
3436       if not isinstance(result.payload, dict):
3437         errs.append("power-status is expected to return a dict but got %s" %
3438                     type(result.payload))
3439
3440     if self.op.command in [
3441         constants.OOB_POWER_ON,
3442         constants.OOB_POWER_OFF,
3443         constants.OOB_POWER_CYCLE,
3444         ]:
3445       if result.payload is not None:
3446         errs.append("%s is expected to not return payload but got '%s'" %
3447                     (self.op.command, result.payload))
3448
3449     if errs:
3450       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3451                                utils.CommaJoin(errs))
3452
3453 class _OsQuery(_QueryBase):
3454   FIELDS = query.OS_FIELDS
3455
3456   def ExpandNames(self, lu):
3457     # Lock all nodes in shared mode
3458     # Temporary removal of locks, should be reverted later
3459     # TODO: reintroduce locks when they are lighter-weight
3460     lu.needed_locks = {}
3461     #self.share_locks[locking.LEVEL_NODE] = 1
3462     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3463
3464     # The following variables interact with _QueryBase._GetNames
3465     if self.names:
3466       self.wanted = self.names
3467     else:
3468       self.wanted = locking.ALL_SET
3469
3470     self.do_locking = self.use_locking
3471
3472   def DeclareLocks(self, lu, level):
3473     pass
3474
3475   @staticmethod
3476   def _DiagnoseByOS(rlist):
3477     """Remaps a per-node return list into an a per-os per-node dictionary
3478
3479     @param rlist: a map with node names as keys and OS objects as values
3480
3481     @rtype: dict
3482     @return: a dictionary with osnames as keys and as value another
3483         map, with nodes as keys and tuples of (path, status, diagnose,
3484         variants, parameters, api_versions) as values, eg::
3485
3486           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3487                                      (/srv/..., False, "invalid api")],
3488                            "node2": [(/srv/..., True, "", [], [])]}
3489           }
3490
3491     """
3492     all_os = {}
3493     # we build here the list of nodes that didn't fail the RPC (at RPC
3494     # level), so that nodes with a non-responding node daemon don't
3495     # make all OSes invalid
3496     good_nodes = [node_name for node_name in rlist
3497                   if not rlist[node_name].fail_msg]
3498     for node_name, nr in rlist.items():
3499       if nr.fail_msg or not nr.payload:
3500         continue
3501       for (name, path, status, diagnose, variants,
3502            params, api_versions) in nr.payload:
3503         if name not in all_os:
3504           # build a list of nodes for this os containing empty lists
3505           # for each node in node_list
3506           all_os[name] = {}
3507           for nname in good_nodes:
3508             all_os[name][nname] = []
3509         # convert params from [name, help] to (name, help)
3510         params = [tuple(v) for v in params]
3511         all_os[name][node_name].append((path, status, diagnose,
3512                                         variants, params, api_versions))
3513     return all_os
3514
3515   def _GetQueryData(self, lu):
3516     """Computes the list of nodes and their attributes.
3517
3518     """
3519     # Locking is not used
3520     assert not (lu.acquired_locks or self.do_locking or self.use_locking)
3521
3522     valid_nodes = [node.name
3523                    for node in lu.cfg.GetAllNodesInfo().values()
3524                    if not node.offline and node.vm_capable]
3525     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3526     cluster = lu.cfg.GetClusterInfo()
3527
3528     data = {}
3529
3530     for (os_name, os_data) in pol.items():
3531       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3532                           hidden=(os_name in cluster.hidden_os),
3533                           blacklisted=(os_name in cluster.blacklisted_os))
3534
3535       variants = set()
3536       parameters = set()
3537       api_versions = set()
3538
3539       for idx, osl in enumerate(os_data.values()):
3540         info.valid = bool(info.valid and osl and osl[0][1])
3541         if not info.valid:
3542           break
3543
3544         (node_variants, node_params, node_api) = osl[0][3:6]
3545         if idx == 0:
3546           # First entry
3547           variants.update(node_variants)
3548           parameters.update(node_params)
3549           api_versions.update(node_api)
3550         else:
3551           # Filter out inconsistent values
3552           variants.intersection_update(node_variants)
3553           parameters.intersection_update(node_params)
3554           api_versions.intersection_update(node_api)
3555
3556       info.variants = list(variants)
3557       info.parameters = list(parameters)
3558       info.api_versions = list(api_versions)
3559
3560       data[os_name] = info
3561
3562     # Prepare data in requested order
3563     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3564             if name in data]
3565
3566
3567 class LUOsDiagnose(NoHooksLU):
3568   """Logical unit for OS diagnose/query.
3569
3570   """
3571   REQ_BGL = False
3572
3573   @staticmethod
3574   def _BuildFilter(fields, names):
3575     """Builds a filter for querying OSes.
3576
3577     """
3578     name_filter = qlang.MakeSimpleFilter("name", names)
3579
3580     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3581     # respective field is not requested
3582     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3583                      for fname in ["hidden", "blacklisted"]
3584                      if fname not in fields]
3585     if "valid" not in fields:
3586       status_filter.append([qlang.OP_TRUE, "valid"])
3587
3588     if status_filter:
3589       status_filter.insert(0, qlang.OP_AND)
3590     else:
3591       status_filter = None
3592
3593     if name_filter and status_filter:
3594       return [qlang.OP_AND, name_filter, status_filter]
3595     elif name_filter:
3596       return name_filter
3597     else:
3598       return status_filter
3599
3600   def CheckArguments(self):
3601     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3602                        self.op.output_fields, False)
3603
3604   def ExpandNames(self):
3605     self.oq.ExpandNames(self)
3606
3607   def Exec(self, feedback_fn):
3608     return self.oq.OldStyleQuery(self)
3609
3610
3611 class LUNodeRemove(LogicalUnit):
3612   """Logical unit for removing a node.
3613
3614   """
3615   HPATH = "node-remove"
3616   HTYPE = constants.HTYPE_NODE
3617
3618   def BuildHooksEnv(self):
3619     """Build hooks env.
3620
3621     This doesn't run on the target node in the pre phase as a failed
3622     node would then be impossible to remove.
3623
3624     """
3625     return {
3626       "OP_TARGET": self.op.node_name,
3627       "NODE_NAME": self.op.node_name,
3628       }
3629
3630   def BuildHooksNodes(self):
3631     """Build hooks nodes.
3632
3633     """
3634     all_nodes = self.cfg.GetNodeList()
3635     try:
3636       all_nodes.remove(self.op.node_name)
3637     except ValueError:
3638       logging.warning("Node '%s', which is about to be removed, was not found"
3639                       " in the list of all nodes", self.op.node_name)
3640     return (all_nodes, all_nodes)
3641
3642   def CheckPrereq(self):
3643     """Check prerequisites.
3644
3645     This checks:
3646      - the node exists in the configuration
3647      - it does not have primary or secondary instances
3648      - it's not the master
3649
3650     Any errors are signaled by raising errors.OpPrereqError.
3651
3652     """
3653     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3654     node = self.cfg.GetNodeInfo(self.op.node_name)
3655     assert node is not None
3656
3657     instance_list = self.cfg.GetInstanceList()
3658
3659     masternode = self.cfg.GetMasterNode()
3660     if node.name == masternode:
3661       raise errors.OpPrereqError("Node is the master node,"
3662                                  " you need to failover first.",
3663                                  errors.ECODE_INVAL)
3664
3665     for instance_name in instance_list:
3666       instance = self.cfg.GetInstanceInfo(instance_name)
3667       if node.name in instance.all_nodes:
3668         raise errors.OpPrereqError("Instance %s is still running on the node,"
3669                                    " please remove first." % instance_name,
3670                                    errors.ECODE_INVAL)
3671     self.op.node_name = node.name
3672     self.node = node
3673
3674   def Exec(self, feedback_fn):
3675     """Removes the node from the cluster.
3676
3677     """
3678     node = self.node
3679     logging.info("Stopping the node daemon and removing configs from node %s",
3680                  node.name)
3681
3682     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3683
3684     # Promote nodes to master candidate as needed
3685     _AdjustCandidatePool(self, exceptions=[node.name])
3686     self.context.RemoveNode(node.name)
3687
3688     # Run post hooks on the node before it's removed
3689     _RunPostHook(self, node.name)
3690
3691     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3692     msg = result.fail_msg
3693     if msg:
3694       self.LogWarning("Errors encountered on the remote node while leaving"
3695                       " the cluster: %s", msg)
3696
3697     # Remove node from our /etc/hosts
3698     if self.cfg.GetClusterInfo().modify_etc_hosts:
3699       master_node = self.cfg.GetMasterNode()
3700       result = self.rpc.call_etc_hosts_modify(master_node,
3701                                               constants.ETC_HOSTS_REMOVE,
3702                                               node.name, None)
3703       result.Raise("Can't update hosts file with new host data")
3704       _RedistributeAncillaryFiles(self)
3705
3706
3707 class _NodeQuery(_QueryBase):
3708   FIELDS = query.NODE_FIELDS
3709
3710   def ExpandNames(self, lu):
3711     lu.needed_locks = {}
3712     lu.share_locks[locking.LEVEL_NODE] = 1
3713
3714     if self.names:
3715       self.wanted = _GetWantedNodes(lu, self.names)
3716     else:
3717       self.wanted = locking.ALL_SET
3718
3719     self.do_locking = (self.use_locking and
3720                        query.NQ_LIVE in self.requested_data)
3721
3722     if self.do_locking:
3723       # if we don't request only static fields, we need to lock the nodes
3724       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3725
3726   def DeclareLocks(self, lu, level):
3727     pass
3728
3729   def _GetQueryData(self, lu):
3730     """Computes the list of nodes and their attributes.
3731
3732     """
3733     all_info = lu.cfg.GetAllNodesInfo()
3734
3735     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3736
3737     # Gather data as requested
3738     if query.NQ_LIVE in self.requested_data:
3739       # filter out non-vm_capable nodes
3740       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3741
3742       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3743                                         lu.cfg.GetHypervisorType())
3744       live_data = dict((name, nresult.payload)
3745                        for (name, nresult) in node_data.items()
3746                        if not nresult.fail_msg and nresult.payload)
3747     else:
3748       live_data = None
3749
3750     if query.NQ_INST in self.requested_data:
3751       node_to_primary = dict([(name, set()) for name in nodenames])
3752       node_to_secondary = dict([(name, set()) for name in nodenames])
3753
3754       inst_data = lu.cfg.GetAllInstancesInfo()
3755
3756       for inst in inst_data.values():
3757         if inst.primary_node in node_to_primary:
3758           node_to_primary[inst.primary_node].add(inst.name)
3759         for secnode in inst.secondary_nodes:
3760           if secnode in node_to_secondary:
3761             node_to_secondary[secnode].add(inst.name)
3762     else:
3763       node_to_primary = None
3764       node_to_secondary = None
3765
3766     if query.NQ_OOB in self.requested_data:
3767       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3768                          for name, node in all_info.iteritems())
3769     else:
3770       oob_support = None
3771
3772     if query.NQ_GROUP in self.requested_data:
3773       groups = lu.cfg.GetAllNodeGroupsInfo()
3774     else:
3775       groups = {}
3776
3777     return query.NodeQueryData([all_info[name] for name in nodenames],
3778                                live_data, lu.cfg.GetMasterNode(),
3779                                node_to_primary, node_to_secondary, groups,
3780                                oob_support, lu.cfg.GetClusterInfo())
3781
3782
3783 class LUNodeQuery(NoHooksLU):
3784   """Logical unit for querying nodes.
3785
3786   """
3787   # pylint: disable-msg=W0142
3788   REQ_BGL = False
3789
3790   def CheckArguments(self):
3791     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3792                          self.op.output_fields, self.op.use_locking)
3793
3794   def ExpandNames(self):
3795     self.nq.ExpandNames(self)
3796
3797   def Exec(self, feedback_fn):
3798     return self.nq.OldStyleQuery(self)
3799
3800
3801 class LUNodeQueryvols(NoHooksLU):
3802   """Logical unit for getting volumes on node(s).
3803
3804   """
3805   REQ_BGL = False
3806   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3807   _FIELDS_STATIC = utils.FieldSet("node")
3808
3809   def CheckArguments(self):
3810     _CheckOutputFields(static=self._FIELDS_STATIC,
3811                        dynamic=self._FIELDS_DYNAMIC,
3812                        selected=self.op.output_fields)
3813
3814   def ExpandNames(self):
3815     self.needed_locks = {}
3816     self.share_locks[locking.LEVEL_NODE] = 1
3817     if not self.op.nodes:
3818       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3819     else:
3820       self.needed_locks[locking.LEVEL_NODE] = \
3821         _GetWantedNodes(self, self.op.nodes)
3822
3823   def Exec(self, feedback_fn):
3824     """Computes the list of nodes and their attributes.
3825
3826     """
3827     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3828     volumes = self.rpc.call_node_volumes(nodenames)
3829
3830     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3831              in self.cfg.GetInstanceList()]
3832
3833     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3834
3835     output = []
3836     for node in nodenames:
3837       nresult = volumes[node]
3838       if nresult.offline:
3839         continue
3840       msg = nresult.fail_msg
3841       if msg:
3842         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3843         continue
3844
3845       node_vols = nresult.payload[:]
3846       node_vols.sort(key=lambda vol: vol['dev'])
3847
3848       for vol in node_vols:
3849         node_output = []
3850         for field in self.op.output_fields:
3851           if field == "node":
3852             val = node
3853           elif field == "phys":
3854             val = vol['dev']
3855           elif field == "vg":
3856             val = vol['vg']
3857           elif field == "name":
3858             val = vol['name']
3859           elif field == "size":
3860             val = int(float(vol['size']))
3861           elif field == "instance":
3862             for inst in ilist:
3863               if node not in lv_by_node[inst]:
3864                 continue
3865               if vol['name'] in lv_by_node[inst][node]:
3866                 val = inst.name
3867                 break
3868             else:
3869               val = '-'
3870           else:
3871             raise errors.ParameterError(field)
3872           node_output.append(str(val))
3873
3874         output.append(node_output)
3875
3876     return output
3877
3878
3879 class LUNodeQueryStorage(NoHooksLU):
3880   """Logical unit for getting information on storage units on node(s).
3881
3882   """
3883   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3884   REQ_BGL = False
3885
3886   def CheckArguments(self):
3887     _CheckOutputFields(static=self._FIELDS_STATIC,
3888                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3889                        selected=self.op.output_fields)
3890
3891   def ExpandNames(self):
3892     self.needed_locks = {}
3893     self.share_locks[locking.LEVEL_NODE] = 1
3894
3895     if self.op.nodes:
3896       self.needed_locks[locking.LEVEL_NODE] = \
3897         _GetWantedNodes(self, self.op.nodes)
3898     else:
3899       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3900
3901   def Exec(self, feedback_fn):
3902     """Computes the list of nodes and their attributes.
3903
3904     """
3905     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3906
3907     # Always get name to sort by
3908     if constants.SF_NAME in self.op.output_fields:
3909       fields = self.op.output_fields[:]
3910     else:
3911       fields = [constants.SF_NAME] + self.op.output_fields
3912
3913     # Never ask for node or type as it's only known to the LU
3914     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3915       while extra in fields:
3916         fields.remove(extra)
3917
3918     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3919     name_idx = field_idx[constants.SF_NAME]
3920
3921     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3922     data = self.rpc.call_storage_list(self.nodes,
3923                                       self.op.storage_type, st_args,
3924                                       self.op.name, fields)
3925
3926     result = []
3927
3928     for node in utils.NiceSort(self.nodes):
3929       nresult = data[node]
3930       if nresult.offline:
3931         continue
3932
3933       msg = nresult.fail_msg
3934       if msg:
3935         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3936         continue
3937
3938       rows = dict([(row[name_idx], row) for row in nresult.payload])
3939
3940       for name in utils.NiceSort(rows.keys()):
3941         row = rows[name]
3942
3943         out = []
3944
3945         for field in self.op.output_fields:
3946           if field == constants.SF_NODE:
3947             val = node
3948           elif field == constants.SF_TYPE:
3949             val = self.op.storage_type
3950           elif field in field_idx:
3951             val = row[field_idx[field]]
3952           else:
3953             raise errors.ParameterError(field)
3954
3955           out.append(val)
3956
3957         result.append(out)
3958
3959     return result
3960
3961
3962 class _InstanceQuery(_QueryBase):
3963   FIELDS = query.INSTANCE_FIELDS
3964
3965   def ExpandNames(self, lu):
3966     lu.needed_locks = {}
3967     lu.share_locks[locking.LEVEL_INSTANCE] = 1
3968     lu.share_locks[locking.LEVEL_NODE] = 1
3969
3970     if self.names:
3971       self.wanted = _GetWantedInstances(lu, self.names)
3972     else:
3973       self.wanted = locking.ALL_SET
3974
3975     self.do_locking = (self.use_locking and
3976                        query.IQ_LIVE in self.requested_data)
3977     if self.do_locking:
3978       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3979       lu.needed_locks[locking.LEVEL_NODE] = []
3980       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3981
3982   def DeclareLocks(self, lu, level):
3983     if level == locking.LEVEL_NODE and self.do_locking:
3984       lu._LockInstancesNodes() # pylint: disable-msg=W0212
3985
3986   def _GetQueryData(self, lu):
3987     """Computes the list of instances and their attributes.
3988
3989     """
3990     cluster = lu.cfg.GetClusterInfo()
3991     all_info = lu.cfg.GetAllInstancesInfo()
3992
3993     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3994
3995     instance_list = [all_info[name] for name in instance_names]
3996     nodes = frozenset(itertools.chain(*(inst.all_nodes
3997                                         for inst in instance_list)))
3998     hv_list = list(set([inst.hypervisor for inst in instance_list]))
3999     bad_nodes = []
4000     offline_nodes = []
4001     wrongnode_inst = set()
4002
4003     # Gather data as requested
4004     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4005       live_data = {}
4006       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4007       for name in nodes:
4008         result = node_data[name]
4009         if result.offline:
4010           # offline nodes will be in both lists
4011           assert result.fail_msg
4012           offline_nodes.append(name)
4013         if result.fail_msg:
4014           bad_nodes.append(name)
4015         elif result.payload:
4016           for inst in result.payload:
4017             if all_info[inst].primary_node == name:
4018               live_data.update(result.payload)
4019             else:
4020               wrongnode_inst.add(inst)
4021         # else no instance is alive
4022     else:
4023       live_data = {}
4024
4025     if query.IQ_DISKUSAGE in self.requested_data:
4026       disk_usage = dict((inst.name,
4027                          _ComputeDiskSize(inst.disk_template,
4028                                           [{"size": disk.size}
4029                                            for disk in inst.disks]))
4030                         for inst in instance_list)
4031     else:
4032       disk_usage = None
4033
4034     if query.IQ_CONSOLE in self.requested_data:
4035       consinfo = {}
4036       for inst in instance_list:
4037         if inst.name in live_data:
4038           # Instance is running
4039           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4040         else:
4041           consinfo[inst.name] = None
4042       assert set(consinfo.keys()) == set(instance_names)
4043     else:
4044       consinfo = None
4045
4046     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4047                                    disk_usage, offline_nodes, bad_nodes,
4048                                    live_data, wrongnode_inst, consinfo)
4049
4050
4051 class LUQuery(NoHooksLU):
4052   """Query for resources/items of a certain kind.
4053
4054   """
4055   # pylint: disable-msg=W0142
4056   REQ_BGL = False
4057
4058   def CheckArguments(self):
4059     qcls = _GetQueryImplementation(self.op.what)
4060
4061     self.impl = qcls(self.op.filter, self.op.fields, False)
4062
4063   def ExpandNames(self):
4064     self.impl.ExpandNames(self)
4065
4066   def DeclareLocks(self, level):
4067     self.impl.DeclareLocks(self, level)
4068
4069   def Exec(self, feedback_fn):
4070     return self.impl.NewStyleQuery(self)
4071
4072
4073 class LUQueryFields(NoHooksLU):
4074   """Query for resources/items of a certain kind.
4075
4076   """
4077   # pylint: disable-msg=W0142
4078   REQ_BGL = False
4079
4080   def CheckArguments(self):
4081     self.qcls = _GetQueryImplementation(self.op.what)
4082
4083   def ExpandNames(self):
4084     self.needed_locks = {}
4085
4086   def Exec(self, feedback_fn):
4087     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4088
4089
4090 class LUNodeModifyStorage(NoHooksLU):
4091   """Logical unit for modifying a storage volume on a node.
4092
4093   """
4094   REQ_BGL = False
4095
4096   def CheckArguments(self):
4097     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4098
4099     storage_type = self.op.storage_type
4100
4101     try:
4102       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4103     except KeyError:
4104       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4105                                  " modified" % storage_type,
4106                                  errors.ECODE_INVAL)
4107
4108     diff = set(self.op.changes.keys()) - modifiable
4109     if diff:
4110       raise errors.OpPrereqError("The following fields can not be modified for"
4111                                  " storage units of type '%s': %r" %
4112                                  (storage_type, list(diff)),
4113                                  errors.ECODE_INVAL)
4114
4115   def ExpandNames(self):
4116     self.needed_locks = {
4117       locking.LEVEL_NODE: self.op.node_name,
4118       }
4119
4120   def Exec(self, feedback_fn):
4121     """Computes the list of nodes and their attributes.
4122
4123     """
4124     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4125     result = self.rpc.call_storage_modify(self.op.node_name,
4126                                           self.op.storage_type, st_args,
4127                                           self.op.name, self.op.changes)
4128     result.Raise("Failed to modify storage unit '%s' on %s" %
4129                  (self.op.name, self.op.node_name))
4130
4131
4132 class LUNodeAdd(LogicalUnit):
4133   """Logical unit for adding node to the cluster.
4134
4135   """
4136   HPATH = "node-add"
4137   HTYPE = constants.HTYPE_NODE
4138   _NFLAGS = ["master_capable", "vm_capable"]
4139
4140   def CheckArguments(self):
4141     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4142     # validate/normalize the node name
4143     self.hostname = netutils.GetHostname(name=self.op.node_name,
4144                                          family=self.primary_ip_family)
4145     self.op.node_name = self.hostname.name
4146     if self.op.readd and self.op.group:
4147       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4148                                  " being readded", errors.ECODE_INVAL)
4149
4150   def BuildHooksEnv(self):
4151     """Build hooks env.
4152
4153     This will run on all nodes before, and on all nodes + the new node after.
4154
4155     """
4156     return {
4157       "OP_TARGET": self.op.node_name,
4158       "NODE_NAME": self.op.node_name,
4159       "NODE_PIP": self.op.primary_ip,
4160       "NODE_SIP": self.op.secondary_ip,
4161       "MASTER_CAPABLE": str(self.op.master_capable),
4162       "VM_CAPABLE": str(self.op.vm_capable),
4163       }
4164
4165   def BuildHooksNodes(self):
4166     """Build hooks nodes.
4167
4168     """
4169     # Exclude added node
4170     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4171     post_nodes = pre_nodes + [self.op.node_name, ]
4172
4173     return (pre_nodes, post_nodes)
4174
4175   def CheckPrereq(self):
4176     """Check prerequisites.
4177
4178     This checks:
4179      - the new node is not already in the config
4180      - it is resolvable
4181      - its parameters (single/dual homed) matches the cluster
4182
4183     Any errors are signaled by raising errors.OpPrereqError.
4184
4185     """
4186     cfg = self.cfg
4187     hostname = self.hostname
4188     node = hostname.name
4189     primary_ip = self.op.primary_ip = hostname.ip
4190     if self.op.secondary_ip is None:
4191       if self.primary_ip_family == netutils.IP6Address.family:
4192         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4193                                    " IPv4 address must be given as secondary",
4194                                    errors.ECODE_INVAL)
4195       self.op.secondary_ip = primary_ip
4196
4197     secondary_ip = self.op.secondary_ip
4198     if not netutils.IP4Address.IsValid(secondary_ip):
4199       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4200                                  " address" % secondary_ip, errors.ECODE_INVAL)
4201
4202     node_list = cfg.GetNodeList()
4203     if not self.op.readd and node in node_list:
4204       raise errors.OpPrereqError("Node %s is already in the configuration" %
4205                                  node, errors.ECODE_EXISTS)
4206     elif self.op.readd and node not in node_list:
4207       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4208                                  errors.ECODE_NOENT)
4209
4210     self.changed_primary_ip = False
4211
4212     for existing_node_name in node_list:
4213       existing_node = cfg.GetNodeInfo(existing_node_name)
4214
4215       if self.op.readd and node == existing_node_name:
4216         if existing_node.secondary_ip != secondary_ip:
4217           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4218                                      " address configuration as before",
4219                                      errors.ECODE_INVAL)
4220         if existing_node.primary_ip != primary_ip:
4221           self.changed_primary_ip = True
4222
4223         continue
4224
4225       if (existing_node.primary_ip == primary_ip or
4226           existing_node.secondary_ip == primary_ip or
4227           existing_node.primary_ip == secondary_ip or
4228           existing_node.secondary_ip == secondary_ip):
4229         raise errors.OpPrereqError("New node ip address(es) conflict with"
4230                                    " existing node %s" % existing_node.name,
4231                                    errors.ECODE_NOTUNIQUE)
4232
4233     # After this 'if' block, None is no longer a valid value for the
4234     # _capable op attributes
4235     if self.op.readd:
4236       old_node = self.cfg.GetNodeInfo(node)
4237       assert old_node is not None, "Can't retrieve locked node %s" % node
4238       for attr in self._NFLAGS:
4239         if getattr(self.op, attr) is None:
4240           setattr(self.op, attr, getattr(old_node, attr))
4241     else:
4242       for attr in self._NFLAGS:
4243         if getattr(self.op, attr) is None:
4244           setattr(self.op, attr, True)
4245
4246     if self.op.readd and not self.op.vm_capable:
4247       pri, sec = cfg.GetNodeInstances(node)
4248       if pri or sec:
4249         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4250                                    " flag set to false, but it already holds"
4251                                    " instances" % node,
4252                                    errors.ECODE_STATE)
4253
4254     # check that the type of the node (single versus dual homed) is the
4255     # same as for the master
4256     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4257     master_singlehomed = myself.secondary_ip == myself.primary_ip
4258     newbie_singlehomed = secondary_ip == primary_ip
4259     if master_singlehomed != newbie_singlehomed:
4260       if master_singlehomed:
4261         raise errors.OpPrereqError("The master has no secondary ip but the"
4262                                    " new node has one",
4263                                    errors.ECODE_INVAL)
4264       else:
4265         raise errors.OpPrereqError("The master has a secondary ip but the"
4266                                    " new node doesn't have one",
4267                                    errors.ECODE_INVAL)
4268
4269     # checks reachability
4270     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4271       raise errors.OpPrereqError("Node not reachable by ping",
4272                                  errors.ECODE_ENVIRON)
4273
4274     if not newbie_singlehomed:
4275       # check reachability from my secondary ip to newbie's secondary ip
4276       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4277                            source=myself.secondary_ip):
4278         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4279                                    " based ping to node daemon port",
4280                                    errors.ECODE_ENVIRON)
4281
4282     if self.op.readd:
4283       exceptions = [node]
4284     else:
4285       exceptions = []
4286
4287     if self.op.master_capable:
4288       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4289     else:
4290       self.master_candidate = False
4291
4292     if self.op.readd:
4293       self.new_node = old_node
4294     else:
4295       node_group = cfg.LookupNodeGroup(self.op.group)
4296       self.new_node = objects.Node(name=node,
4297                                    primary_ip=primary_ip,
4298                                    secondary_ip=secondary_ip,
4299                                    master_candidate=self.master_candidate,
4300                                    offline=False, drained=False,
4301                                    group=node_group)
4302
4303     if self.op.ndparams:
4304       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4305
4306   def Exec(self, feedback_fn):
4307     """Adds the new node to the cluster.
4308
4309     """
4310     new_node = self.new_node
4311     node = new_node.name
4312
4313     # We adding a new node so we assume it's powered
4314     new_node.powered = True
4315
4316     # for re-adds, reset the offline/drained/master-candidate flags;
4317     # we need to reset here, otherwise offline would prevent RPC calls
4318     # later in the procedure; this also means that if the re-add
4319     # fails, we are left with a non-offlined, broken node
4320     if self.op.readd:
4321       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4322       self.LogInfo("Readding a node, the offline/drained flags were reset")
4323       # if we demote the node, we do cleanup later in the procedure
4324       new_node.master_candidate = self.master_candidate
4325       if self.changed_primary_ip:
4326         new_node.primary_ip = self.op.primary_ip
4327
4328     # copy the master/vm_capable flags
4329     for attr in self._NFLAGS:
4330       setattr(new_node, attr, getattr(self.op, attr))
4331
4332     # notify the user about any possible mc promotion
4333     if new_node.master_candidate:
4334       self.LogInfo("Node will be a master candidate")
4335
4336     if self.op.ndparams:
4337       new_node.ndparams = self.op.ndparams
4338     else:
4339       new_node.ndparams = {}
4340
4341     # check connectivity
4342     result = self.rpc.call_version([node])[node]
4343     result.Raise("Can't get version information from node %s" % node)
4344     if constants.PROTOCOL_VERSION == result.payload:
4345       logging.info("Communication to node %s fine, sw version %s match",
4346                    node, result.payload)
4347     else:
4348       raise errors.OpExecError("Version mismatch master version %s,"
4349                                " node version %s" %
4350                                (constants.PROTOCOL_VERSION, result.payload))
4351
4352     # Add node to our /etc/hosts, and add key to known_hosts
4353     if self.cfg.GetClusterInfo().modify_etc_hosts:
4354       master_node = self.cfg.GetMasterNode()
4355       result = self.rpc.call_etc_hosts_modify(master_node,
4356                                               constants.ETC_HOSTS_ADD,
4357                                               self.hostname.name,
4358                                               self.hostname.ip)
4359       result.Raise("Can't update hosts file with new host data")
4360
4361     if new_node.secondary_ip != new_node.primary_ip:
4362       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4363                                False)
4364
4365     node_verify_list = [self.cfg.GetMasterNode()]
4366     node_verify_param = {
4367       constants.NV_NODELIST: [node],
4368       # TODO: do a node-net-test as well?
4369     }
4370
4371     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4372                                        self.cfg.GetClusterName())
4373     for verifier in node_verify_list:
4374       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4375       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4376       if nl_payload:
4377         for failed in nl_payload:
4378           feedback_fn("ssh/hostname verification failed"
4379                       " (checking from %s): %s" %
4380                       (verifier, nl_payload[failed]))
4381         raise errors.OpExecError("ssh/hostname verification failed.")
4382
4383     if self.op.readd:
4384       _RedistributeAncillaryFiles(self)
4385       self.context.ReaddNode(new_node)
4386       # make sure we redistribute the config
4387       self.cfg.Update(new_node, feedback_fn)
4388       # and make sure the new node will not have old files around
4389       if not new_node.master_candidate:
4390         result = self.rpc.call_node_demote_from_mc(new_node.name)
4391         msg = result.fail_msg
4392         if msg:
4393           self.LogWarning("Node failed to demote itself from master"
4394                           " candidate status: %s" % msg)
4395     else:
4396       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4397                                   additional_vm=self.op.vm_capable)
4398       self.context.AddNode(new_node, self.proc.GetECId())
4399
4400
4401 class LUNodeSetParams(LogicalUnit):
4402   """Modifies the parameters of a node.
4403
4404   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4405       to the node role (as _ROLE_*)
4406   @cvar _R2F: a dictionary from node role to tuples of flags
4407   @cvar _FLAGS: a list of attribute names corresponding to the flags
4408
4409   """
4410   HPATH = "node-modify"
4411   HTYPE = constants.HTYPE_NODE
4412   REQ_BGL = False
4413   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4414   _F2R = {
4415     (True, False, False): _ROLE_CANDIDATE,
4416     (False, True, False): _ROLE_DRAINED,
4417     (False, False, True): _ROLE_OFFLINE,
4418     (False, False, False): _ROLE_REGULAR,
4419     }
4420   _R2F = dict((v, k) for k, v in _F2R.items())
4421   _FLAGS = ["master_candidate", "drained", "offline"]
4422
4423   def CheckArguments(self):
4424     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4425     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4426                 self.op.master_capable, self.op.vm_capable,
4427                 self.op.secondary_ip, self.op.ndparams]
4428     if all_mods.count(None) == len(all_mods):
4429       raise errors.OpPrereqError("Please pass at least one modification",
4430                                  errors.ECODE_INVAL)
4431     if all_mods.count(True) > 1:
4432       raise errors.OpPrereqError("Can't set the node into more than one"
4433                                  " state at the same time",
4434                                  errors.ECODE_INVAL)
4435
4436     # Boolean value that tells us whether we might be demoting from MC
4437     self.might_demote = (self.op.master_candidate == False or
4438                          self.op.offline == True or
4439                          self.op.drained == True or
4440                          self.op.master_capable == False)
4441
4442     if self.op.secondary_ip:
4443       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4444         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4445                                    " address" % self.op.secondary_ip,
4446                                    errors.ECODE_INVAL)
4447
4448     self.lock_all = self.op.auto_promote and self.might_demote
4449     self.lock_instances = self.op.secondary_ip is not None
4450
4451   def ExpandNames(self):
4452     if self.lock_all:
4453       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4454     else:
4455       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4456
4457     if self.lock_instances:
4458       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4459
4460   def DeclareLocks(self, level):
4461     # If we have locked all instances, before waiting to lock nodes, release
4462     # all the ones living on nodes unrelated to the current operation.
4463     if level == locking.LEVEL_NODE and self.lock_instances:
4464       instances_release = []
4465       instances_keep = []
4466       self.affected_instances = []
4467       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4468         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4469           instance = self.context.cfg.GetInstanceInfo(instance_name)
4470           i_mirrored = instance.disk_template in constants.DTS_INT_MIRROR
4471           if i_mirrored and self.op.node_name in instance.all_nodes:
4472             instances_keep.append(instance_name)
4473             self.affected_instances.append(instance)
4474           else:
4475             instances_release.append(instance_name)
4476         if instances_release:
4477           self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4478           self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4479
4480   def BuildHooksEnv(self):
4481     """Build hooks env.
4482
4483     This runs on the master node.
4484
4485     """
4486     return {
4487       "OP_TARGET": self.op.node_name,
4488       "MASTER_CANDIDATE": str(self.op.master_candidate),
4489       "OFFLINE": str(self.op.offline),
4490       "DRAINED": str(self.op.drained),
4491       "MASTER_CAPABLE": str(self.op.master_capable),
4492       "VM_CAPABLE": str(self.op.vm_capable),
4493       }
4494
4495   def BuildHooksNodes(self):
4496     """Build hooks nodes.
4497
4498     """
4499     nl = [self.cfg.GetMasterNode(), self.op.node_name]
4500     return (nl, nl)
4501
4502   def CheckPrereq(self):
4503     """Check prerequisites.
4504
4505     This only checks the instance list against the existing names.
4506
4507     """
4508     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4509
4510     if (self.op.master_candidate is not None or
4511         self.op.drained is not None or
4512         self.op.offline is not None):
4513       # we can't change the master's node flags
4514       if self.op.node_name == self.cfg.GetMasterNode():
4515         raise errors.OpPrereqError("The master role can be changed"
4516                                    " only via master-failover",
4517                                    errors.ECODE_INVAL)
4518
4519     if self.op.master_candidate and not node.master_capable:
4520       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4521                                  " it a master candidate" % node.name,
4522                                  errors.ECODE_STATE)
4523
4524     if self.op.vm_capable == False:
4525       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4526       if ipri or isec:
4527         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4528                                    " the vm_capable flag" % node.name,
4529                                    errors.ECODE_STATE)
4530
4531     if node.master_candidate and self.might_demote and not self.lock_all:
4532       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4533       # check if after removing the current node, we're missing master
4534       # candidates
4535       (mc_remaining, mc_should, _) = \
4536           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4537       if mc_remaining < mc_should:
4538         raise errors.OpPrereqError("Not enough master candidates, please"
4539                                    " pass auto promote option to allow"
4540                                    " promotion", errors.ECODE_STATE)
4541
4542     self.old_flags = old_flags = (node.master_candidate,
4543                                   node.drained, node.offline)
4544     assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4545     self.old_role = old_role = self._F2R[old_flags]
4546
4547     # Check for ineffective changes
4548     for attr in self._FLAGS:
4549       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4550         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4551         setattr(self.op, attr, None)
4552
4553     # Past this point, any flag change to False means a transition
4554     # away from the respective state, as only real changes are kept
4555
4556     # TODO: We might query the real power state if it supports OOB
4557     if _SupportsOob(self.cfg, node):
4558       if self.op.offline is False and not (node.powered or
4559                                            self.op.powered == True):
4560         raise errors.OpPrereqError(("Please power on node %s first before you"
4561                                     " can reset offline state") %
4562                                    self.op.node_name)
4563     elif self.op.powered is not None:
4564       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4565                                   " which does not support out-of-band"
4566                                   " handling") % self.op.node_name)
4567
4568     # If we're being deofflined/drained, we'll MC ourself if needed
4569     if (self.op.drained == False or self.op.offline == False or
4570         (self.op.master_capable and not node.master_capable)):
4571       if _DecideSelfPromotion(self):
4572         self.op.master_candidate = True
4573         self.LogInfo("Auto-promoting node to master candidate")
4574
4575     # If we're no longer master capable, we'll demote ourselves from MC
4576     if self.op.master_capable == False and node.master_candidate:
4577       self.LogInfo("Demoting from master candidate")
4578       self.op.master_candidate = False
4579
4580     # Compute new role
4581     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4582     if self.op.master_candidate:
4583       new_role = self._ROLE_CANDIDATE
4584     elif self.op.drained:
4585       new_role = self._ROLE_DRAINED
4586     elif self.op.offline:
4587       new_role = self._ROLE_OFFLINE
4588     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4589       # False is still in new flags, which means we're un-setting (the
4590       # only) True flag
4591       new_role = self._ROLE_REGULAR
4592     else: # no new flags, nothing, keep old role
4593       new_role = old_role
4594
4595     self.new_role = new_role
4596
4597     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4598       # Trying to transition out of offline status
4599       result = self.rpc.call_version([node.name])[node.name]
4600       if result.fail_msg:
4601         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4602                                    " to report its version: %s" %
4603                                    (node.name, result.fail_msg),
4604                                    errors.ECODE_STATE)
4605       else:
4606         self.LogWarning("Transitioning node from offline to online state"
4607                         " without using re-add. Please make sure the node"
4608                         " is healthy!")
4609
4610     if self.op.secondary_ip:
4611       # Ok even without locking, because this can't be changed by any LU
4612       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4613       master_singlehomed = master.secondary_ip == master.primary_ip
4614       if master_singlehomed and self.op.secondary_ip:
4615         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4616                                    " homed cluster", errors.ECODE_INVAL)
4617
4618       if node.offline:
4619         if self.affected_instances:
4620           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4621                                      " node has instances (%s) configured"
4622                                      " to use it" % self.affected_instances)
4623       else:
4624         # On online nodes, check that no instances are running, and that
4625         # the node has the new ip and we can reach it.
4626         for instance in self.affected_instances:
4627           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4628
4629         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4630         if master.name != node.name:
4631           # check reachability from master secondary ip to new secondary ip
4632           if not netutils.TcpPing(self.op.secondary_ip,
4633                                   constants.DEFAULT_NODED_PORT,
4634                                   source=master.secondary_ip):
4635             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4636                                        " based ping to node daemon port",
4637                                        errors.ECODE_ENVIRON)
4638
4639     if self.op.ndparams:
4640       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4641       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4642       self.new_ndparams = new_ndparams
4643
4644   def Exec(self, feedback_fn):
4645     """Modifies a node.
4646
4647     """
4648     node = self.node
4649     old_role = self.old_role
4650     new_role = self.new_role
4651
4652     result = []
4653
4654     if self.op.ndparams:
4655       node.ndparams = self.new_ndparams
4656
4657     if self.op.powered is not None:
4658       node.powered = self.op.powered
4659
4660     for attr in ["master_capable", "vm_capable"]:
4661       val = getattr(self.op, attr)
4662       if val is not None:
4663         setattr(node, attr, val)
4664         result.append((attr, str(val)))
4665
4666     if new_role != old_role:
4667       # Tell the node to demote itself, if no longer MC and not offline
4668       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4669         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4670         if msg:
4671           self.LogWarning("Node failed to demote itself: %s", msg)
4672
4673       new_flags = self._R2F[new_role]
4674       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4675         if of != nf:
4676           result.append((desc, str(nf)))
4677       (node.master_candidate, node.drained, node.offline) = new_flags
4678
4679       # we locked all nodes, we adjust the CP before updating this node
4680       if self.lock_all:
4681         _AdjustCandidatePool(self, [node.name])
4682
4683     if self.op.secondary_ip:
4684       node.secondary_ip = self.op.secondary_ip
4685       result.append(("secondary_ip", self.op.secondary_ip))
4686
4687     # this will trigger configuration file update, if needed
4688     self.cfg.Update(node, feedback_fn)
4689
4690     # this will trigger job queue propagation or cleanup if the mc
4691     # flag changed
4692     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4693       self.context.ReaddNode(node)
4694
4695     return result
4696
4697
4698 class LUNodePowercycle(NoHooksLU):
4699   """Powercycles a node.
4700
4701   """
4702   REQ_BGL = False
4703
4704   def CheckArguments(self):
4705     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4706     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4707       raise errors.OpPrereqError("The node is the master and the force"
4708                                  " parameter was not set",
4709                                  errors.ECODE_INVAL)
4710
4711   def ExpandNames(self):
4712     """Locking for PowercycleNode.
4713
4714     This is a last-resort option and shouldn't block on other
4715     jobs. Therefore, we grab no locks.
4716
4717     """
4718     self.needed_locks = {}
4719
4720   def Exec(self, feedback_fn):
4721     """Reboots a node.
4722
4723     """
4724     result = self.rpc.call_node_powercycle(self.op.node_name,
4725                                            self.cfg.GetHypervisorType())
4726     result.Raise("Failed to schedule the reboot")
4727     return result.payload
4728
4729
4730 class LUClusterQuery(NoHooksLU):
4731   """Query cluster configuration.
4732
4733   """
4734   REQ_BGL = False
4735
4736   def ExpandNames(self):
4737     self.needed_locks = {}
4738
4739   def Exec(self, feedback_fn):
4740     """Return cluster config.
4741
4742     """
4743     cluster = self.cfg.GetClusterInfo()
4744     os_hvp = {}
4745
4746     # Filter just for enabled hypervisors
4747     for os_name, hv_dict in cluster.os_hvp.items():
4748       os_hvp[os_name] = {}
4749       for hv_name, hv_params in hv_dict.items():
4750         if hv_name in cluster.enabled_hypervisors:
4751           os_hvp[os_name][hv_name] = hv_params
4752
4753     # Convert ip_family to ip_version
4754     primary_ip_version = constants.IP4_VERSION
4755     if cluster.primary_ip_family == netutils.IP6Address.family:
4756       primary_ip_version = constants.IP6_VERSION
4757
4758     result = {
4759       "software_version": constants.RELEASE_VERSION,
4760       "protocol_version": constants.PROTOCOL_VERSION,
4761       "config_version": constants.CONFIG_VERSION,
4762       "os_api_version": max(constants.OS_API_VERSIONS),
4763       "export_version": constants.EXPORT_VERSION,
4764       "architecture": (platform.architecture()[0], platform.machine()),
4765       "name": cluster.cluster_name,
4766       "master": cluster.master_node,
4767       "default_hypervisor": cluster.enabled_hypervisors[0],
4768       "enabled_hypervisors": cluster.enabled_hypervisors,
4769       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4770                         for hypervisor_name in cluster.enabled_hypervisors]),
4771       "os_hvp": os_hvp,
4772       "beparams": cluster.beparams,
4773       "osparams": cluster.osparams,
4774       "nicparams": cluster.nicparams,
4775       "ndparams": cluster.ndparams,
4776       "candidate_pool_size": cluster.candidate_pool_size,
4777       "master_netdev": cluster.master_netdev,
4778       "volume_group_name": cluster.volume_group_name,
4779       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4780       "file_storage_dir": cluster.file_storage_dir,
4781       "shared_file_storage_dir": cluster.shared_file_storage_dir,
4782       "maintain_node_health": cluster.maintain_node_health,
4783       "ctime": cluster.ctime,
4784       "mtime": cluster.mtime,
4785       "uuid": cluster.uuid,
4786       "tags": list(cluster.GetTags()),
4787       "uid_pool": cluster.uid_pool,
4788       "default_iallocator": cluster.default_iallocator,
4789       "reserved_lvs": cluster.reserved_lvs,
4790       "primary_ip_version": primary_ip_version,
4791       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4792       "hidden_os": cluster.hidden_os,
4793       "blacklisted_os": cluster.blacklisted_os,
4794       }
4795
4796     return result
4797
4798
4799 class LUClusterConfigQuery(NoHooksLU):
4800   """Return configuration values.
4801
4802   """
4803   REQ_BGL = False
4804   _FIELDS_DYNAMIC = utils.FieldSet()
4805   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4806                                   "watcher_pause", "volume_group_name")
4807
4808   def CheckArguments(self):
4809     _CheckOutputFields(static=self._FIELDS_STATIC,
4810                        dynamic=self._FIELDS_DYNAMIC,
4811                        selected=self.op.output_fields)
4812
4813   def ExpandNames(self):
4814     self.needed_locks = {}
4815
4816   def Exec(self, feedback_fn):
4817     """Dump a representation of the cluster config to the standard output.
4818
4819     """
4820     values = []
4821     for field in self.op.output_fields:
4822       if field == "cluster_name":
4823         entry = self.cfg.GetClusterName()
4824       elif field == "master_node":
4825         entry = self.cfg.GetMasterNode()
4826       elif field == "drain_flag":
4827         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4828       elif field == "watcher_pause":
4829         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4830       elif field == "volume_group_name":
4831         entry = self.cfg.GetVGName()
4832       else:
4833         raise errors.ParameterError(field)
4834       values.append(entry)
4835     return values
4836
4837
4838 class LUInstanceActivateDisks(NoHooksLU):
4839   """Bring up an instance's disks.
4840
4841   """
4842   REQ_BGL = False
4843
4844   def ExpandNames(self):
4845     self._ExpandAndLockInstance()
4846     self.needed_locks[locking.LEVEL_NODE] = []
4847     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4848
4849   def DeclareLocks(self, level):
4850     if level == locking.LEVEL_NODE:
4851       self._LockInstancesNodes()
4852
4853   def CheckPrereq(self):
4854     """Check prerequisites.
4855
4856     This checks that the instance is in the cluster.
4857
4858     """
4859     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4860     assert self.instance is not None, \
4861       "Cannot retrieve locked instance %s" % self.op.instance_name
4862     _CheckNodeOnline(self, self.instance.primary_node)
4863
4864   def Exec(self, feedback_fn):
4865     """Activate the disks.
4866
4867     """
4868     disks_ok, disks_info = \
4869               _AssembleInstanceDisks(self, self.instance,
4870                                      ignore_size=self.op.ignore_size)
4871     if not disks_ok:
4872       raise errors.OpExecError("Cannot activate block devices")
4873
4874     return disks_info
4875
4876
4877 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4878                            ignore_size=False):
4879   """Prepare the block devices for an instance.
4880
4881   This sets up the block devices on all nodes.
4882
4883   @type lu: L{LogicalUnit}
4884   @param lu: the logical unit on whose behalf we execute
4885   @type instance: L{objects.Instance}
4886   @param instance: the instance for whose disks we assemble
4887   @type disks: list of L{objects.Disk} or None
4888   @param disks: which disks to assemble (or all, if None)
4889   @type ignore_secondaries: boolean
4890   @param ignore_secondaries: if true, errors on secondary nodes
4891       won't result in an error return from the function
4892   @type ignore_size: boolean
4893   @param ignore_size: if true, the current known size of the disk
4894       will not be used during the disk activation, useful for cases
4895       when the size is wrong
4896   @return: False if the operation failed, otherwise a list of
4897       (host, instance_visible_name, node_visible_name)
4898       with the mapping from node devices to instance devices
4899
4900   """
4901   device_info = []
4902   disks_ok = True
4903   iname = instance.name
4904   disks = _ExpandCheckDisks(instance, disks)
4905
4906   # With the two passes mechanism we try to reduce the window of
4907   # opportunity for the race condition of switching DRBD to primary
4908   # before handshaking occured, but we do not eliminate it
4909
4910   # The proper fix would be to wait (with some limits) until the
4911   # connection has been made and drbd transitions from WFConnection
4912   # into any other network-connected state (Connected, SyncTarget,
4913   # SyncSource, etc.)
4914
4915   # 1st pass, assemble on all nodes in secondary mode
4916   for idx, inst_disk in enumerate(disks):
4917     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4918       if ignore_size:
4919         node_disk = node_disk.Copy()
4920         node_disk.UnsetSize()
4921       lu.cfg.SetDiskID(node_disk, node)
4922       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
4923       msg = result.fail_msg
4924       if msg:
4925         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4926                            " (is_primary=False, pass=1): %s",
4927                            inst_disk.iv_name, node, msg)
4928         if not ignore_secondaries:
4929           disks_ok = False
4930
4931   # FIXME: race condition on drbd migration to primary
4932
4933   # 2nd pass, do only the primary node
4934   for idx, inst_disk in enumerate(disks):
4935     dev_path = None
4936
4937     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4938       if node != instance.primary_node:
4939         continue
4940       if ignore_size:
4941         node_disk = node_disk.Copy()
4942         node_disk.UnsetSize()
4943       lu.cfg.SetDiskID(node_disk, node)
4944       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
4945       msg = result.fail_msg
4946       if msg:
4947         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4948                            " (is_primary=True, pass=2): %s",
4949                            inst_disk.iv_name, node, msg)
4950         disks_ok = False
4951       else:
4952         dev_path = result.payload
4953
4954     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4955
4956   # leave the disks configured for the primary node
4957   # this is a workaround that would be fixed better by
4958   # improving the logical/physical id handling
4959   for disk in disks:
4960     lu.cfg.SetDiskID(disk, instance.primary_node)
4961
4962   return disks_ok, device_info
4963
4964
4965 def _StartInstanceDisks(lu, instance, force):
4966   """Start the disks of an instance.
4967
4968   """
4969   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4970                                            ignore_secondaries=force)
4971   if not disks_ok:
4972     _ShutdownInstanceDisks(lu, instance)
4973     if force is not None and not force:
4974       lu.proc.LogWarning("", hint="If the message above refers to a"
4975                          " secondary node,"
4976                          " you can retry the operation using '--force'.")
4977     raise errors.OpExecError("Disk consistency error")
4978
4979
4980 class LUInstanceDeactivateDisks(NoHooksLU):
4981   """Shutdown an instance's disks.
4982
4983   """
4984   REQ_BGL = False
4985
4986   def ExpandNames(self):
4987     self._ExpandAndLockInstance()
4988     self.needed_locks[locking.LEVEL_NODE] = []
4989     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4990
4991   def DeclareLocks(self, level):
4992     if level == locking.LEVEL_NODE:
4993       self._LockInstancesNodes()
4994
4995   def CheckPrereq(self):
4996     """Check prerequisites.
4997
4998     This checks that the instance is in the cluster.
4999
5000     """
5001     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5002     assert self.instance is not None, \
5003       "Cannot retrieve locked instance %s" % self.op.instance_name
5004
5005   def Exec(self, feedback_fn):
5006     """Deactivate the disks
5007
5008     """
5009     instance = self.instance
5010     if self.op.force:
5011       _ShutdownInstanceDisks(self, instance)
5012     else:
5013       _SafeShutdownInstanceDisks(self, instance)
5014
5015
5016 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5017   """Shutdown block devices of an instance.
5018
5019   This function checks if an instance is running, before calling
5020   _ShutdownInstanceDisks.
5021
5022   """
5023   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5024   _ShutdownInstanceDisks(lu, instance, disks=disks)
5025
5026
5027 def _ExpandCheckDisks(instance, disks):
5028   """Return the instance disks selected by the disks list
5029
5030   @type disks: list of L{objects.Disk} or None
5031   @param disks: selected disks
5032   @rtype: list of L{objects.Disk}
5033   @return: selected instance disks to act on
5034
5035   """
5036   if disks is None:
5037     return instance.disks
5038   else:
5039     if not set(disks).issubset(instance.disks):
5040       raise errors.ProgrammerError("Can only act on disks belonging to the"
5041                                    " target instance")
5042     return disks
5043
5044
5045 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5046   """Shutdown block devices of an instance.
5047
5048   This does the shutdown on all nodes of the instance.
5049
5050   If the ignore_primary is false, errors on the primary node are
5051   ignored.
5052
5053   """
5054   all_result = True
5055   disks = _ExpandCheckDisks(instance, disks)
5056
5057   for disk in disks:
5058     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5059       lu.cfg.SetDiskID(top_disk, node)
5060       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5061       msg = result.fail_msg
5062       if msg:
5063         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5064                       disk.iv_name, node, msg)
5065         if ((node == instance.primary_node and not ignore_primary) or
5066             (node != instance.primary_node and not result.offline)):
5067           all_result = False
5068   return all_result
5069
5070
5071 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5072   """Checks if a node has enough free memory.
5073
5074   This function check if a given node has the needed amount of free
5075   memory. In case the node has less memory or we cannot get the
5076   information from the node, this function raise an OpPrereqError
5077   exception.
5078
5079   @type lu: C{LogicalUnit}
5080   @param lu: a logical unit from which we get configuration data
5081   @type node: C{str}
5082   @param node: the node to check
5083   @type reason: C{str}
5084   @param reason: string to use in the error message
5085   @type requested: C{int}
5086   @param requested: the amount of memory in MiB to check for
5087   @type hypervisor_name: C{str}
5088   @param hypervisor_name: the hypervisor to ask for memory stats
5089   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5090       we cannot check the node
5091
5092   """
5093   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5094   nodeinfo[node].Raise("Can't get data from node %s" % node,
5095                        prereq=True, ecode=errors.ECODE_ENVIRON)
5096   free_mem = nodeinfo[node].payload.get('memory_free', None)
5097   if not isinstance(free_mem, int):
5098     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5099                                " was '%s'" % (node, free_mem),
5100                                errors.ECODE_ENVIRON)
5101   if requested > free_mem:
5102     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5103                                " needed %s MiB, available %s MiB" %
5104                                (node, reason, requested, free_mem),
5105                                errors.ECODE_NORES)
5106
5107
5108 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5109   """Checks if nodes have enough free disk space in the all VGs.
5110
5111   This function check if all given nodes have the needed amount of
5112   free disk. In case any node has less disk or we cannot get the
5113   information from the node, this function raise an OpPrereqError
5114   exception.
5115
5116   @type lu: C{LogicalUnit}
5117   @param lu: a logical unit from which we get configuration data
5118   @type nodenames: C{list}
5119   @param nodenames: the list of node names to check
5120   @type req_sizes: C{dict}
5121   @param req_sizes: the hash of vg and corresponding amount of disk in
5122       MiB to check for
5123   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5124       or we cannot check the node
5125
5126   """
5127   for vg, req_size in req_sizes.items():
5128     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5129
5130
5131 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5132   """Checks if nodes have enough free disk space in the specified VG.
5133
5134   This function check if all given nodes have the needed amount of
5135   free disk. In case any node has less disk or we cannot get the
5136   information from the node, this function raise an OpPrereqError
5137   exception.
5138
5139   @type lu: C{LogicalUnit}
5140   @param lu: a logical unit from which we get configuration data
5141   @type nodenames: C{list}
5142   @param nodenames: the list of node names to check
5143   @type vg: C{str}
5144   @param vg: the volume group to check
5145   @type requested: C{int}
5146   @param requested: the amount of disk in MiB to check for
5147   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5148       or we cannot check the node
5149
5150   """
5151   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5152   for node in nodenames:
5153     info = nodeinfo[node]
5154     info.Raise("Cannot get current information from node %s" % node,
5155                prereq=True, ecode=errors.ECODE_ENVIRON)
5156     vg_free = info.payload.get("vg_free", None)
5157     if not isinstance(vg_free, int):
5158       raise errors.OpPrereqError("Can't compute free disk space on node"
5159                                  " %s for vg %s, result was '%s'" %
5160                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5161     if requested > vg_free:
5162       raise errors.OpPrereqError("Not enough disk space on target node %s"
5163                                  " vg %s: required %d MiB, available %d MiB" %
5164                                  (node, vg, requested, vg_free),
5165                                  errors.ECODE_NORES)
5166
5167
5168 class LUInstanceStartup(LogicalUnit):
5169   """Starts an instance.
5170
5171   """
5172   HPATH = "instance-start"
5173   HTYPE = constants.HTYPE_INSTANCE
5174   REQ_BGL = False
5175
5176   def CheckArguments(self):
5177     # extra beparams
5178     if self.op.beparams:
5179       # fill the beparams dict
5180       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5181
5182   def ExpandNames(self):
5183     self._ExpandAndLockInstance()
5184
5185   def BuildHooksEnv(self):
5186     """Build hooks env.
5187
5188     This runs on master, primary and secondary nodes of the instance.
5189
5190     """
5191     env = {
5192       "FORCE": self.op.force,
5193       }
5194
5195     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5196
5197     return env
5198
5199   def BuildHooksNodes(self):
5200     """Build hooks nodes.
5201
5202     """
5203     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5204     return (nl, nl)
5205
5206   def CheckPrereq(self):
5207     """Check prerequisites.
5208
5209     This checks that the instance is in the cluster.
5210
5211     """
5212     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5213     assert self.instance is not None, \
5214       "Cannot retrieve locked instance %s" % self.op.instance_name
5215
5216     # extra hvparams
5217     if self.op.hvparams:
5218       # check hypervisor parameter syntax (locally)
5219       cluster = self.cfg.GetClusterInfo()
5220       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5221       filled_hvp = cluster.FillHV(instance)
5222       filled_hvp.update(self.op.hvparams)
5223       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5224       hv_type.CheckParameterSyntax(filled_hvp)
5225       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5226
5227     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5228
5229     if self.primary_offline and self.op.ignore_offline_nodes:
5230       self.proc.LogWarning("Ignoring offline primary node")
5231
5232       if self.op.hvparams or self.op.beparams:
5233         self.proc.LogWarning("Overridden parameters are ignored")
5234     else:
5235       _CheckNodeOnline(self, instance.primary_node)
5236
5237       bep = self.cfg.GetClusterInfo().FillBE(instance)
5238
5239       # check bridges existence
5240       _CheckInstanceBridgesExist(self, instance)
5241
5242       remote_info = self.rpc.call_instance_info(instance.primary_node,
5243                                                 instance.name,
5244                                                 instance.hypervisor)
5245       remote_info.Raise("Error checking node %s" % instance.primary_node,
5246                         prereq=True, ecode=errors.ECODE_ENVIRON)
5247       if not remote_info.payload: # not running already
5248         _CheckNodeFreeMemory(self, instance.primary_node,
5249                              "starting instance %s" % instance.name,
5250                              bep[constants.BE_MEMORY], instance.hypervisor)
5251
5252   def Exec(self, feedback_fn):
5253     """Start the instance.
5254
5255     """
5256     instance = self.instance
5257     force = self.op.force
5258
5259     self.cfg.MarkInstanceUp(instance.name)
5260
5261     if self.primary_offline:
5262       assert self.op.ignore_offline_nodes
5263       self.proc.LogInfo("Primary node offline, marked instance as started")
5264     else:
5265       node_current = instance.primary_node
5266
5267       _StartInstanceDisks(self, instance, force)
5268
5269       result = self.rpc.call_instance_start(node_current, instance,
5270                                             self.op.hvparams, self.op.beparams)
5271       msg = result.fail_msg
5272       if msg:
5273         _ShutdownInstanceDisks(self, instance)
5274         raise errors.OpExecError("Could not start instance: %s" % msg)
5275
5276
5277 class LUInstanceReboot(LogicalUnit):
5278   """Reboot an instance.
5279
5280   """
5281   HPATH = "instance-reboot"
5282   HTYPE = constants.HTYPE_INSTANCE
5283   REQ_BGL = False
5284
5285   def ExpandNames(self):
5286     self._ExpandAndLockInstance()
5287
5288   def BuildHooksEnv(self):
5289     """Build hooks env.
5290
5291     This runs on master, primary and secondary nodes of the instance.
5292
5293     """
5294     env = {
5295       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5296       "REBOOT_TYPE": self.op.reboot_type,
5297       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5298       }
5299
5300     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5301
5302     return env
5303
5304   def BuildHooksNodes(self):
5305     """Build hooks nodes.
5306
5307     """
5308     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5309     return (nl, nl)
5310
5311   def CheckPrereq(self):
5312     """Check prerequisites.
5313
5314     This checks that the instance is in the cluster.
5315
5316     """
5317     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5318     assert self.instance is not None, \
5319       "Cannot retrieve locked instance %s" % self.op.instance_name
5320
5321     _CheckNodeOnline(self, instance.primary_node)
5322
5323     # check bridges existence
5324     _CheckInstanceBridgesExist(self, instance)
5325
5326   def Exec(self, feedback_fn):
5327     """Reboot the instance.
5328
5329     """
5330     instance = self.instance
5331     ignore_secondaries = self.op.ignore_secondaries
5332     reboot_type = self.op.reboot_type
5333
5334     remote_info = self.rpc.call_instance_info(instance.primary_node,
5335                                               instance.name,
5336                                               instance.hypervisor)
5337     remote_info.Raise("Error checking node %s" % instance.primary_node)
5338     instance_running = bool(remote_info.payload)
5339
5340     node_current = instance.primary_node
5341
5342     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5343                                             constants.INSTANCE_REBOOT_HARD]:
5344       for disk in instance.disks:
5345         self.cfg.SetDiskID(disk, node_current)
5346       result = self.rpc.call_instance_reboot(node_current, instance,
5347                                              reboot_type,
5348                                              self.op.shutdown_timeout)
5349       result.Raise("Could not reboot instance")
5350     else:
5351       if instance_running:
5352         result = self.rpc.call_instance_shutdown(node_current, instance,
5353                                                  self.op.shutdown_timeout)
5354         result.Raise("Could not shutdown instance for full reboot")
5355         _ShutdownInstanceDisks(self, instance)
5356       else:
5357         self.LogInfo("Instance %s was already stopped, starting now",
5358                      instance.name)
5359       _StartInstanceDisks(self, instance, ignore_secondaries)
5360       result = self.rpc.call_instance_start(node_current, instance, None, None)
5361       msg = result.fail_msg
5362       if msg:
5363         _ShutdownInstanceDisks(self, instance)
5364         raise errors.OpExecError("Could not start instance for"
5365                                  " full reboot: %s" % msg)
5366
5367     self.cfg.MarkInstanceUp(instance.name)
5368
5369
5370 class LUInstanceShutdown(LogicalUnit):
5371   """Shutdown an instance.
5372
5373   """
5374   HPATH = "instance-stop"
5375   HTYPE = constants.HTYPE_INSTANCE
5376   REQ_BGL = False
5377
5378   def ExpandNames(self):
5379     self._ExpandAndLockInstance()
5380
5381   def BuildHooksEnv(self):
5382     """Build hooks env.
5383
5384     This runs on master, primary and secondary nodes of the instance.
5385
5386     """
5387     env = _BuildInstanceHookEnvByObject(self, self.instance)
5388     env["TIMEOUT"] = self.op.timeout
5389     return env
5390
5391   def BuildHooksNodes(self):
5392     """Build hooks nodes.
5393
5394     """
5395     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5396     return (nl, nl)
5397
5398   def CheckPrereq(self):
5399     """Check prerequisites.
5400
5401     This checks that the instance is in the cluster.
5402
5403     """
5404     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5405     assert self.instance is not None, \
5406       "Cannot retrieve locked instance %s" % self.op.instance_name
5407
5408     self.primary_offline = \
5409       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5410
5411     if self.primary_offline and self.op.ignore_offline_nodes:
5412       self.proc.LogWarning("Ignoring offline primary node")
5413     else:
5414       _CheckNodeOnline(self, self.instance.primary_node)
5415
5416   def Exec(self, feedback_fn):
5417     """Shutdown the instance.
5418
5419     """
5420     instance = self.instance
5421     node_current = instance.primary_node
5422     timeout = self.op.timeout
5423
5424     self.cfg.MarkInstanceDown(instance.name)
5425
5426     if self.primary_offline:
5427       assert self.op.ignore_offline_nodes
5428       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5429     else:
5430       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5431       msg = result.fail_msg
5432       if msg:
5433         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5434
5435       _ShutdownInstanceDisks(self, instance)
5436
5437
5438 class LUInstanceReinstall(LogicalUnit):
5439   """Reinstall an instance.
5440
5441   """
5442   HPATH = "instance-reinstall"
5443   HTYPE = constants.HTYPE_INSTANCE
5444   REQ_BGL = False
5445
5446   def ExpandNames(self):
5447     self._ExpandAndLockInstance()
5448
5449   def BuildHooksEnv(self):
5450     """Build hooks env.
5451
5452     This runs on master, primary and secondary nodes of the instance.
5453
5454     """
5455     return _BuildInstanceHookEnvByObject(self, self.instance)
5456
5457   def BuildHooksNodes(self):
5458     """Build hooks nodes.
5459
5460     """
5461     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5462     return (nl, nl)
5463
5464   def CheckPrereq(self):
5465     """Check prerequisites.
5466
5467     This checks that the instance is in the cluster and is not running.
5468
5469     """
5470     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5471     assert instance is not None, \
5472       "Cannot retrieve locked instance %s" % self.op.instance_name
5473     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5474                      " offline, cannot reinstall")
5475     for node in instance.secondary_nodes:
5476       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5477                        " cannot reinstall")
5478
5479     if instance.disk_template == constants.DT_DISKLESS:
5480       raise errors.OpPrereqError("Instance '%s' has no disks" %
5481                                  self.op.instance_name,
5482                                  errors.ECODE_INVAL)
5483     _CheckInstanceDown(self, instance, "cannot reinstall")
5484
5485     if self.op.os_type is not None:
5486       # OS verification
5487       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5488       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5489       instance_os = self.op.os_type
5490     else:
5491       instance_os = instance.os
5492
5493     nodelist = list(instance.all_nodes)
5494
5495     if self.op.osparams:
5496       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5497       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5498       self.os_inst = i_osdict # the new dict (without defaults)
5499     else:
5500       self.os_inst = None
5501
5502     self.instance = instance
5503
5504   def Exec(self, feedback_fn):
5505     """Reinstall the instance.
5506
5507     """
5508     inst = self.instance
5509
5510     if self.op.os_type is not None:
5511       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5512       inst.os = self.op.os_type
5513       # Write to configuration
5514       self.cfg.Update(inst, feedback_fn)
5515
5516     _StartInstanceDisks(self, inst, None)
5517     try:
5518       feedback_fn("Running the instance OS create scripts...")
5519       # FIXME: pass debug option from opcode to backend
5520       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5521                                              self.op.debug_level,
5522                                              osparams=self.os_inst)
5523       result.Raise("Could not install OS for instance %s on node %s" %
5524                    (inst.name, inst.primary_node))
5525     finally:
5526       _ShutdownInstanceDisks(self, inst)
5527
5528
5529 class LUInstanceRecreateDisks(LogicalUnit):
5530   """Recreate an instance's missing disks.
5531
5532   """
5533   HPATH = "instance-recreate-disks"
5534   HTYPE = constants.HTYPE_INSTANCE
5535   REQ_BGL = False
5536
5537   def ExpandNames(self):
5538     self._ExpandAndLockInstance()
5539
5540   def BuildHooksEnv(self):
5541     """Build hooks env.
5542
5543     This runs on master, primary and secondary nodes of the instance.
5544
5545     """
5546     return _BuildInstanceHookEnvByObject(self, self.instance)
5547
5548   def BuildHooksNodes(self):
5549     """Build hooks nodes.
5550
5551     """
5552     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5553     return (nl, nl)
5554
5555   def CheckPrereq(self):
5556     """Check prerequisites.
5557
5558     This checks that the instance is in the cluster and is not running.
5559
5560     """
5561     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5562     assert instance is not None, \
5563       "Cannot retrieve locked instance %s" % self.op.instance_name
5564     _CheckNodeOnline(self, instance.primary_node)
5565
5566     if instance.disk_template == constants.DT_DISKLESS:
5567       raise errors.OpPrereqError("Instance '%s' has no disks" %
5568                                  self.op.instance_name, errors.ECODE_INVAL)
5569     _CheckInstanceDown(self, instance, "cannot recreate disks")
5570
5571     if not self.op.disks:
5572       self.op.disks = range(len(instance.disks))
5573     else:
5574       for idx in self.op.disks:
5575         if idx >= len(instance.disks):
5576           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5577                                      errors.ECODE_INVAL)
5578
5579     self.instance = instance
5580
5581   def Exec(self, feedback_fn):
5582     """Recreate the disks.
5583
5584     """
5585     to_skip = []
5586     for idx, _ in enumerate(self.instance.disks):
5587       if idx not in self.op.disks: # disk idx has not been passed in
5588         to_skip.append(idx)
5589         continue
5590
5591     _CreateDisks(self, self.instance, to_skip=to_skip)
5592
5593
5594 class LUInstanceRename(LogicalUnit):
5595   """Rename an instance.
5596
5597   """
5598   HPATH = "instance-rename"
5599   HTYPE = constants.HTYPE_INSTANCE
5600
5601   def CheckArguments(self):
5602     """Check arguments.
5603
5604     """
5605     if self.op.ip_check and not self.op.name_check:
5606       # TODO: make the ip check more flexible and not depend on the name check
5607       raise errors.OpPrereqError("Cannot do ip check without a name check",
5608                                  errors.ECODE_INVAL)
5609
5610   def BuildHooksEnv(self):
5611     """Build hooks env.
5612
5613     This runs on master, primary and secondary nodes of the instance.
5614
5615     """
5616     env = _BuildInstanceHookEnvByObject(self, self.instance)
5617     env["INSTANCE_NEW_NAME"] = self.op.new_name
5618     return env
5619
5620   def BuildHooksNodes(self):
5621     """Build hooks nodes.
5622
5623     """
5624     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5625     return (nl, nl)
5626
5627   def CheckPrereq(self):
5628     """Check prerequisites.
5629
5630     This checks that the instance is in the cluster and is not running.
5631
5632     """
5633     self.op.instance_name = _ExpandInstanceName(self.cfg,
5634                                                 self.op.instance_name)
5635     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5636     assert instance is not None
5637     _CheckNodeOnline(self, instance.primary_node)
5638     _CheckInstanceDown(self, instance, "cannot rename")
5639     self.instance = instance
5640
5641     new_name = self.op.new_name
5642     if self.op.name_check:
5643       hostname = netutils.GetHostname(name=new_name)
5644       self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5645                    hostname.name)
5646       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5647         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5648                                     " same as given hostname '%s'") %
5649                                     (hostname.name, self.op.new_name),
5650                                     errors.ECODE_INVAL)
5651       new_name = self.op.new_name = hostname.name
5652       if (self.op.ip_check and
5653           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5654         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5655                                    (hostname.ip, new_name),
5656                                    errors.ECODE_NOTUNIQUE)
5657
5658     instance_list = self.cfg.GetInstanceList()
5659     if new_name in instance_list and new_name != instance.name:
5660       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5661                                  new_name, errors.ECODE_EXISTS)
5662
5663   def Exec(self, feedback_fn):
5664     """Rename the instance.
5665
5666     """
5667     inst = self.instance
5668     old_name = inst.name
5669
5670     rename_file_storage = False
5671     if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5672         self.op.new_name != inst.name):
5673       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5674       rename_file_storage = True
5675
5676     self.cfg.RenameInstance(inst.name, self.op.new_name)
5677     # Change the instance lock. This is definitely safe while we hold the BGL
5678     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5679     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5680
5681     # re-read the instance from the configuration after rename
5682     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5683
5684     if rename_file_storage:
5685       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5686       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5687                                                      old_file_storage_dir,
5688                                                      new_file_storage_dir)
5689       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5690                    " (but the instance has been renamed in Ganeti)" %
5691                    (inst.primary_node, old_file_storage_dir,
5692                     new_file_storage_dir))
5693
5694     _StartInstanceDisks(self, inst, None)
5695     try:
5696       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5697                                                  old_name, self.op.debug_level)
5698       msg = result.fail_msg
5699       if msg:
5700         msg = ("Could not run OS rename script for instance %s on node %s"
5701                " (but the instance has been renamed in Ganeti): %s" %
5702                (inst.name, inst.primary_node, msg))
5703         self.proc.LogWarning(msg)
5704     finally:
5705       _ShutdownInstanceDisks(self, inst)
5706
5707     return inst.name
5708
5709
5710 class LUInstanceRemove(LogicalUnit):
5711   """Remove an instance.
5712
5713   """
5714   HPATH = "instance-remove"
5715   HTYPE = constants.HTYPE_INSTANCE
5716   REQ_BGL = False
5717
5718   def ExpandNames(self):
5719     self._ExpandAndLockInstance()
5720     self.needed_locks[locking.LEVEL_NODE] = []
5721     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5722
5723   def DeclareLocks(self, level):
5724     if level == locking.LEVEL_NODE:
5725       self._LockInstancesNodes()
5726
5727   def BuildHooksEnv(self):
5728     """Build hooks env.
5729
5730     This runs on master, primary and secondary nodes of the instance.
5731
5732     """
5733     env = _BuildInstanceHookEnvByObject(self, self.instance)
5734     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5735     return env
5736
5737   def BuildHooksNodes(self):
5738     """Build hooks nodes.
5739
5740     """
5741     nl = [self.cfg.GetMasterNode()]
5742     nl_post = list(self.instance.all_nodes) + nl
5743     return (nl, nl_post)
5744
5745   def CheckPrereq(self):
5746     """Check prerequisites.
5747
5748     This checks that the instance is in the cluster.
5749
5750     """
5751     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5752     assert self.instance is not None, \
5753       "Cannot retrieve locked instance %s" % self.op.instance_name
5754
5755   def Exec(self, feedback_fn):
5756     """Remove the instance.
5757
5758     """
5759     instance = self.instance
5760     logging.info("Shutting down instance %s on node %s",
5761                  instance.name, instance.primary_node)
5762
5763     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5764                                              self.op.shutdown_timeout)
5765     msg = result.fail_msg
5766     if msg:
5767       if self.op.ignore_failures:
5768         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5769       else:
5770         raise errors.OpExecError("Could not shutdown instance %s on"
5771                                  " node %s: %s" %
5772                                  (instance.name, instance.primary_node, msg))
5773
5774     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5775
5776
5777 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5778   """Utility function to remove an instance.
5779
5780   """
5781   logging.info("Removing block devices for instance %s", instance.name)
5782
5783   if not _RemoveDisks(lu, instance):
5784     if not ignore_failures:
5785       raise errors.OpExecError("Can't remove instance's disks")
5786     feedback_fn("Warning: can't remove instance's disks")
5787
5788   logging.info("Removing instance %s out of cluster config", instance.name)
5789
5790   lu.cfg.RemoveInstance(instance.name)
5791
5792   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5793     "Instance lock removal conflict"
5794
5795   # Remove lock for the instance
5796   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5797
5798
5799 class LUInstanceQuery(NoHooksLU):
5800   """Logical unit for querying instances.
5801
5802   """
5803   # pylint: disable-msg=W0142
5804   REQ_BGL = False
5805
5806   def CheckArguments(self):
5807     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5808                              self.op.output_fields, self.op.use_locking)
5809
5810   def ExpandNames(self):
5811     self.iq.ExpandNames(self)
5812
5813   def DeclareLocks(self, level):
5814     self.iq.DeclareLocks(self, level)
5815
5816   def Exec(self, feedback_fn):
5817     return self.iq.OldStyleQuery(self)
5818
5819
5820 class LUInstanceFailover(LogicalUnit):
5821   """Failover an instance.
5822
5823   """
5824   HPATH = "instance-failover"
5825   HTYPE = constants.HTYPE_INSTANCE
5826   REQ_BGL = False
5827
5828   def CheckArguments(self):
5829     """Check the arguments.
5830
5831     """
5832     self.iallocator = getattr(self.op, "iallocator", None)
5833     self.target_node = getattr(self.op, "target_node", None)
5834
5835   def ExpandNames(self):
5836     self._ExpandAndLockInstance()
5837
5838     if self.op.target_node is not None:
5839       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5840
5841     self.needed_locks[locking.LEVEL_NODE] = []
5842     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5843
5844   def DeclareLocks(self, level):
5845     if level == locking.LEVEL_NODE:
5846       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5847       if instance.disk_template in constants.DTS_EXT_MIRROR:
5848         if self.op.target_node is None:
5849           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5850         else:
5851           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5852                                                    self.op.target_node]
5853         del self.recalculate_locks[locking.LEVEL_NODE]
5854       else:
5855         self._LockInstancesNodes()
5856
5857   def BuildHooksEnv(self):
5858     """Build hooks env.
5859
5860     This runs on master, primary and secondary nodes of the instance.
5861
5862     """
5863     instance = self.instance
5864     source_node = instance.primary_node
5865     env = {
5866       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5867       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5868       "OLD_PRIMARY": source_node,
5869       "NEW_PRIMARY": self.op.target_node,
5870       }
5871
5872     if instance.disk_template in constants.DTS_INT_MIRROR:
5873       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
5874       env["NEW_SECONDARY"] = source_node
5875     else:
5876       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
5877
5878     env.update(_BuildInstanceHookEnvByObject(self, instance))
5879
5880     return env
5881
5882   def BuildHooksNodes(self):
5883     """Build hooks nodes.
5884
5885     """
5886     nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
5887     return (nl, nl + [self.instance.primary_node])
5888
5889   def CheckPrereq(self):
5890     """Check prerequisites.
5891
5892     This checks that the instance is in the cluster.
5893
5894     """
5895     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5896     assert self.instance is not None, \
5897       "Cannot retrieve locked instance %s" % self.op.instance_name
5898
5899     bep = self.cfg.GetClusterInfo().FillBE(instance)
5900     if instance.disk_template not in constants.DTS_MIRRORED:
5901       raise errors.OpPrereqError("Instance's disk layout is not"
5902                                  " mirrored, cannot failover.",
5903                                  errors.ECODE_STATE)
5904
5905     if instance.disk_template in constants.DTS_EXT_MIRROR:
5906       _CheckIAllocatorOrNode(self, "iallocator", "target_node")
5907       if self.op.iallocator:
5908         self._RunAllocator()
5909         # Release all unnecessary node locks
5910         nodes_keep = [instance.primary_node, self.op.target_node]
5911         nodes_rel = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5912                      if node not in nodes_keep]
5913         self.context.glm.release(locking.LEVEL_NODE, nodes_rel)
5914         self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5915
5916       # self.op.target_node is already populated, either directly or by the
5917       # iallocator run
5918       target_node = self.op.target_node
5919
5920     else:
5921       secondary_nodes = instance.secondary_nodes
5922       if not secondary_nodes:
5923         raise errors.ConfigurationError("No secondary node but using"
5924                                         " %s disk template" %
5925                                         instance.disk_template)
5926       target_node = secondary_nodes[0]
5927
5928       if self.op.iallocator or (self.op.target_node and
5929                                 self.op.target_node != target_node):
5930         raise errors.OpPrereqError("Instances with disk template %s cannot"
5931                                    " be failed over to arbitrary nodes"
5932                                    " (neither an iallocator nor a target"
5933                                    " node can be passed)" %
5934                                    instance.disk_template, errors.ECODE_INVAL)
5935     _CheckNodeOnline(self, target_node)
5936     _CheckNodeNotDrained(self, target_node)
5937
5938     # Save target_node so that we can use it in BuildHooksEnv
5939     self.op.target_node = target_node
5940
5941     if instance.admin_up:
5942       # check memory requirements on the secondary node
5943       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5944                            instance.name, bep[constants.BE_MEMORY],
5945                            instance.hypervisor)
5946     else:
5947       self.LogInfo("Not checking memory on the secondary node as"
5948                    " instance will not be started")
5949
5950     # check bridge existance
5951     _CheckInstanceBridgesExist(self, instance, node=target_node)
5952
5953   def Exec(self, feedback_fn):
5954     """Failover an instance.
5955
5956     The failover is done by shutting it down on its present node and
5957     starting it on the secondary.
5958
5959     """
5960     instance = self.instance
5961     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5962
5963     source_node = instance.primary_node
5964     target_node = self.op.target_node
5965
5966     if instance.admin_up:
5967       feedback_fn("* checking disk consistency between source and target")
5968       for dev in instance.disks:
5969         # for drbd, these are drbd over lvm
5970         if not _CheckDiskConsistency(self, dev, target_node, False):
5971           if not self.op.ignore_consistency:
5972             raise errors.OpExecError("Disk %s is degraded on target node,"
5973                                      " aborting failover." % dev.iv_name)
5974     else:
5975       feedback_fn("* not checking disk consistency as instance is not running")
5976
5977     feedback_fn("* shutting down instance on source node")
5978     logging.info("Shutting down instance %s on node %s",
5979                  instance.name, source_node)
5980
5981     result = self.rpc.call_instance_shutdown(source_node, instance,
5982                                              self.op.shutdown_timeout)
5983     msg = result.fail_msg
5984     if msg:
5985       if self.op.ignore_consistency or primary_node.offline:
5986         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5987                              " Proceeding anyway. Please make sure node"
5988                              " %s is down. Error details: %s",
5989                              instance.name, source_node, source_node, msg)
5990       else:
5991         raise errors.OpExecError("Could not shutdown instance %s on"
5992                                  " node %s: %s" %
5993                                  (instance.name, source_node, msg))
5994
5995     feedback_fn("* deactivating the instance's disks on source node")
5996     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5997       raise errors.OpExecError("Can't shut down the instance's disks.")
5998
5999     instance.primary_node = target_node
6000     # distribute new instance config to the other nodes
6001     self.cfg.Update(instance, feedback_fn)
6002
6003     # Only start the instance if it's marked as up
6004     if instance.admin_up:
6005       feedback_fn("* activating the instance's disks on target node")
6006       logging.info("Starting instance %s on node %s",
6007                    instance.name, target_node)
6008
6009       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6010                                            ignore_secondaries=True)
6011       if not disks_ok:
6012         _ShutdownInstanceDisks(self, instance)
6013         raise errors.OpExecError("Can't activate the instance's disks")
6014
6015       feedback_fn("* starting the instance on the target node")
6016       result = self.rpc.call_instance_start(target_node, instance, None, None)
6017       msg = result.fail_msg
6018       if msg:
6019         _ShutdownInstanceDisks(self, instance)
6020         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6021                                  (instance.name, target_node, msg))
6022
6023   def _RunAllocator(self):
6024     """Run the allocator based on input opcode.
6025
6026     """
6027     ial = IAllocator(self.cfg, self.rpc,
6028                      mode=constants.IALLOCATOR_MODE_RELOC,
6029                      name=self.instance.name,
6030                      # TODO See why hail breaks with a single node below
6031                      relocate_from=[self.instance.primary_node,
6032                                     self.instance.primary_node],
6033                      )
6034
6035     ial.Run(self.op.iallocator)
6036
6037     if not ial.success:
6038       raise errors.OpPrereqError("Can't compute nodes using"
6039                                  " iallocator '%s': %s" %
6040                                  (self.op.iallocator, ial.info),
6041                                  errors.ECODE_NORES)
6042     if len(ial.result) != ial.required_nodes:
6043       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6044                                  " of nodes (%s), required %s" %
6045                                  (self.op.iallocator, len(ial.result),
6046                                   ial.required_nodes), errors.ECODE_FAULT)
6047     self.op.target_node = ial.result[0]
6048     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6049                  self.instance.name, self.op.iallocator,
6050                  utils.CommaJoin(ial.result))
6051
6052
6053 class LUInstanceMigrate(LogicalUnit):
6054   """Migrate an instance.
6055
6056   This is migration without shutting down, compared to the failover,
6057   which is done with shutdown.
6058
6059   """
6060   HPATH = "instance-migrate"
6061   HTYPE = constants.HTYPE_INSTANCE
6062   REQ_BGL = False
6063
6064   def ExpandNames(self):
6065     self._ExpandAndLockInstance()
6066
6067     if self.op.target_node is not None:
6068       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6069
6070     self.needed_locks[locking.LEVEL_NODE] = []
6071     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6072
6073     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6074                                        self.op.cleanup, self.op.iallocator,
6075                                        self.op.target_node)
6076     self.tasklets = [self._migrater]
6077
6078   def DeclareLocks(self, level):
6079     if level == locking.LEVEL_NODE:
6080       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6081       if instance.disk_template in constants.DTS_EXT_MIRROR:
6082         if self.op.target_node is None:
6083           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6084         else:
6085           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6086                                                    self.op.target_node]
6087         del self.recalculate_locks[locking.LEVEL_NODE]
6088       else:
6089         self._LockInstancesNodes()
6090
6091   def BuildHooksEnv(self):
6092     """Build hooks env.
6093
6094     This runs on master, primary and secondary nodes of the instance.
6095
6096     """
6097     instance = self._migrater.instance
6098     source_node = instance.primary_node
6099     target_node = self._migrater.target_node
6100     env = _BuildInstanceHookEnvByObject(self, instance)
6101     env.update({
6102       "MIGRATE_LIVE": self._migrater.live,
6103       "MIGRATE_CLEANUP": self.op.cleanup,
6104       "OLD_PRIMARY": source_node,
6105       "NEW_PRIMARY": target_node,
6106       })
6107
6108     if instance.disk_template in constants.DTS_INT_MIRROR:
6109       env["OLD_SECONDARY"] = target_node
6110       env["NEW_SECONDARY"] = source_node
6111     else:
6112       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6113
6114     return env
6115
6116   def BuildHooksNodes(self):
6117     """Build hooks nodes.
6118
6119     """
6120     instance = self._migrater.instance
6121     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6122     return (nl, nl + [instance.primary_node])
6123
6124
6125 class LUInstanceMove(LogicalUnit):
6126   """Move an instance by data-copying.
6127
6128   """
6129   HPATH = "instance-move"
6130   HTYPE = constants.HTYPE_INSTANCE
6131   REQ_BGL = False
6132
6133   def ExpandNames(self):
6134     self._ExpandAndLockInstance()
6135     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6136     self.op.target_node = target_node
6137     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6138     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6139
6140   def DeclareLocks(self, level):
6141     if level == locking.LEVEL_NODE:
6142       self._LockInstancesNodes(primary_only=True)
6143
6144   def BuildHooksEnv(self):
6145     """Build hooks env.
6146
6147     This runs on master, primary and secondary nodes of the instance.
6148
6149     """
6150     env = {
6151       "TARGET_NODE": self.op.target_node,
6152       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6153       }
6154     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6155     return env
6156
6157   def BuildHooksNodes(self):
6158     """Build hooks nodes.
6159
6160     """
6161     nl = [
6162       self.cfg.GetMasterNode(),
6163       self.instance.primary_node,
6164       self.op.target_node,
6165       ]
6166     return (nl, nl)
6167
6168   def CheckPrereq(self):
6169     """Check prerequisites.
6170
6171     This checks that the instance is in the cluster.
6172
6173     """
6174     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6175     assert self.instance is not None, \
6176       "Cannot retrieve locked instance %s" % self.op.instance_name
6177
6178     node = self.cfg.GetNodeInfo(self.op.target_node)
6179     assert node is not None, \
6180       "Cannot retrieve locked node %s" % self.op.target_node
6181
6182     self.target_node = target_node = node.name
6183
6184     if target_node == instance.primary_node:
6185       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6186                                  (instance.name, target_node),
6187                                  errors.ECODE_STATE)
6188
6189     bep = self.cfg.GetClusterInfo().FillBE(instance)
6190
6191     for idx, dsk in enumerate(instance.disks):
6192       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6193         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6194                                    " cannot copy" % idx, errors.ECODE_STATE)
6195
6196     _CheckNodeOnline(self, target_node)
6197     _CheckNodeNotDrained(self, target_node)
6198     _CheckNodeVmCapable(self, target_node)
6199
6200     if instance.admin_up:
6201       # check memory requirements on the secondary node
6202       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6203                            instance.name, bep[constants.BE_MEMORY],
6204                            instance.hypervisor)
6205     else:
6206       self.LogInfo("Not checking memory on the secondary node as"
6207                    " instance will not be started")
6208
6209     # check bridge existance
6210     _CheckInstanceBridgesExist(self, instance, node=target_node)
6211
6212   def Exec(self, feedback_fn):
6213     """Move an instance.
6214
6215     The move is done by shutting it down on its present node, copying
6216     the data over (slow) and starting it on the new node.
6217
6218     """
6219     instance = self.instance
6220
6221     source_node = instance.primary_node
6222     target_node = self.target_node
6223
6224     self.LogInfo("Shutting down instance %s on source node %s",
6225                  instance.name, source_node)
6226
6227     result = self.rpc.call_instance_shutdown(source_node, instance,
6228                                              self.op.shutdown_timeout)
6229     msg = result.fail_msg
6230     if msg:
6231       if self.op.ignore_consistency:
6232         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6233                              " Proceeding anyway. Please make sure node"
6234                              " %s is down. Error details: %s",
6235                              instance.name, source_node, source_node, msg)
6236       else:
6237         raise errors.OpExecError("Could not shutdown instance %s on"
6238                                  " node %s: %s" %
6239                                  (instance.name, source_node, msg))
6240
6241     # create the target disks
6242     try:
6243       _CreateDisks(self, instance, target_node=target_node)
6244     except errors.OpExecError:
6245       self.LogWarning("Device creation failed, reverting...")
6246       try:
6247         _RemoveDisks(self, instance, target_node=target_node)
6248       finally:
6249         self.cfg.ReleaseDRBDMinors(instance.name)
6250         raise
6251
6252     cluster_name = self.cfg.GetClusterInfo().cluster_name
6253
6254     errs = []
6255     # activate, get path, copy the data over
6256     for idx, disk in enumerate(instance.disks):
6257       self.LogInfo("Copying data for disk %d", idx)
6258       result = self.rpc.call_blockdev_assemble(target_node, disk,
6259                                                instance.name, True, idx)
6260       if result.fail_msg:
6261         self.LogWarning("Can't assemble newly created disk %d: %s",
6262                         idx, result.fail_msg)
6263         errs.append(result.fail_msg)
6264         break
6265       dev_path = result.payload
6266       result = self.rpc.call_blockdev_export(source_node, disk,
6267                                              target_node, dev_path,
6268                                              cluster_name)
6269       if result.fail_msg:
6270         self.LogWarning("Can't copy data over for disk %d: %s",
6271                         idx, result.fail_msg)
6272         errs.append(result.fail_msg)
6273         break
6274
6275     if errs:
6276       self.LogWarning("Some disks failed to copy, aborting")
6277       try:
6278         _RemoveDisks(self, instance, target_node=target_node)
6279       finally:
6280         self.cfg.ReleaseDRBDMinors(instance.name)
6281         raise errors.OpExecError("Errors during disk copy: %s" %
6282                                  (",".join(errs),))
6283
6284     instance.primary_node = target_node
6285     self.cfg.Update(instance, feedback_fn)
6286
6287     self.LogInfo("Removing the disks on the original node")
6288     _RemoveDisks(self, instance, target_node=source_node)
6289
6290     # Only start the instance if it's marked as up
6291     if instance.admin_up:
6292       self.LogInfo("Starting instance %s on node %s",
6293                    instance.name, target_node)
6294
6295       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6296                                            ignore_secondaries=True)
6297       if not disks_ok:
6298         _ShutdownInstanceDisks(self, instance)
6299         raise errors.OpExecError("Can't activate the instance's disks")
6300
6301       result = self.rpc.call_instance_start(target_node, instance, None, None)
6302       msg = result.fail_msg
6303       if msg:
6304         _ShutdownInstanceDisks(self, instance)
6305         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6306                                  (instance.name, target_node, msg))
6307
6308
6309 class LUNodeMigrate(LogicalUnit):
6310   """Migrate all instances from a node.
6311
6312   """
6313   HPATH = "node-migrate"
6314   HTYPE = constants.HTYPE_NODE
6315   REQ_BGL = False
6316
6317   def CheckArguments(self):
6318     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6319
6320   def ExpandNames(self):
6321     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6322
6323     self.needed_locks = {}
6324
6325     # Create tasklets for migrating instances for all instances on this node
6326     names = []
6327     tasklets = []
6328
6329     self.lock_all_nodes = False
6330
6331     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6332       logging.debug("Migrating instance %s", inst.name)
6333       names.append(inst.name)
6334
6335       tasklets.append(TLMigrateInstance(self, inst.name, False,
6336                                         self.op.iallocator, None))
6337
6338       if inst.disk_template in constants.DTS_EXT_MIRROR:
6339         # We need to lock all nodes, as the iallocator will choose the
6340         # destination nodes afterwards
6341         self.lock_all_nodes = True
6342
6343     self.tasklets = tasklets
6344
6345     # Declare node locks
6346     if self.lock_all_nodes:
6347       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6348     else:
6349       self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6350       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6351
6352     # Declare instance locks
6353     self.needed_locks[locking.LEVEL_INSTANCE] = names
6354
6355   def DeclareLocks(self, level):
6356     if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6357       self._LockInstancesNodes()
6358
6359   def BuildHooksEnv(self):
6360     """Build hooks env.
6361
6362     This runs on the master, the primary and all the secondaries.
6363
6364     """
6365     return {
6366       "NODE_NAME": self.op.node_name,
6367       }
6368
6369   def BuildHooksNodes(self):
6370     """Build hooks nodes.
6371
6372     """
6373     nl = [self.cfg.GetMasterNode()]
6374     return (nl, nl)
6375
6376
6377 class TLMigrateInstance(Tasklet):
6378   """Tasklet class for instance migration.
6379
6380   @type live: boolean
6381   @ivar live: whether the migration will be done live or non-live;
6382       this variable is initalized only after CheckPrereq has run
6383
6384   """
6385   def __init__(self, lu, instance_name, cleanup,
6386                iallocator=None, target_node=None):
6387     """Initializes this class.
6388
6389     """
6390     Tasklet.__init__(self, lu)
6391
6392     # Parameters
6393     self.instance_name = instance_name
6394     self.cleanup = cleanup
6395     self.live = False # will be overridden later
6396     self.iallocator = iallocator
6397     self.target_node = target_node
6398
6399   def CheckPrereq(self):
6400     """Check prerequisites.
6401
6402     This checks that the instance is in the cluster.
6403
6404     """
6405     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6406     instance = self.cfg.GetInstanceInfo(instance_name)
6407     assert instance is not None
6408     self.instance = instance
6409
6410     if instance.disk_template not in constants.DTS_MIRRORED:
6411       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6412                                  " migrations" % instance.disk_template,
6413                                  errors.ECODE_STATE)
6414
6415     if instance.disk_template in constants.DTS_EXT_MIRROR:
6416       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6417
6418       if self.iallocator:
6419         self._RunAllocator()
6420
6421       # self.target_node is already populated, either directly or by the
6422       # iallocator run
6423       target_node = self.target_node
6424
6425       if len(self.lu.tasklets) == 1:
6426         # It is safe to remove locks only when we're the only tasklet in the LU
6427         nodes_keep = [instance.primary_node, self.target_node]
6428         nodes_rel = [node for node in self.lu.acquired_locks[locking.LEVEL_NODE]
6429                      if node not in nodes_keep]
6430         self.lu.context.glm.release(locking.LEVEL_NODE, nodes_rel)
6431         self.lu.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6432
6433     else:
6434       secondary_nodes = instance.secondary_nodes
6435       if not secondary_nodes:
6436         raise errors.ConfigurationError("No secondary node but using"
6437                                         " %s disk template" %
6438                                         instance.disk_template)
6439       target_node = secondary_nodes[0]
6440       if self.lu.op.iallocator or (self.lu.op.target_node and
6441                                    self.lu.op.target_node != target_node):
6442         raise errors.OpPrereqError("Instances with disk template %s cannot"
6443                                    " be migrated over to arbitrary nodes"
6444                                    " (neither an iallocator nor a target"
6445                                    " node can be passed)" %
6446                                    instance.disk_template, errors.ECODE_INVAL)
6447
6448     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6449
6450     # check memory requirements on the secondary node
6451     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6452                          instance.name, i_be[constants.BE_MEMORY],
6453                          instance.hypervisor)
6454
6455     # check bridge existance
6456     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6457
6458     if not self.cleanup:
6459       _CheckNodeNotDrained(self.lu, target_node)
6460       result = self.rpc.call_instance_migratable(instance.primary_node,
6461                                                  instance)
6462       result.Raise("Can't migrate, please use failover",
6463                    prereq=True, ecode=errors.ECODE_STATE)
6464
6465
6466   def _RunAllocator(self):
6467     """Run the allocator based on input opcode.
6468
6469     """
6470     ial = IAllocator(self.cfg, self.rpc,
6471                      mode=constants.IALLOCATOR_MODE_RELOC,
6472                      name=self.instance_name,
6473                      # TODO See why hail breaks with a single node below
6474                      relocate_from=[self.instance.primary_node,
6475                                     self.instance.primary_node],
6476                      )
6477
6478     ial.Run(self.iallocator)
6479
6480     if not ial.success:
6481       raise errors.OpPrereqError("Can't compute nodes using"
6482                                  " iallocator '%s': %s" %
6483                                  (self.iallocator, ial.info),
6484                                  errors.ECODE_NORES)
6485     if len(ial.result) != ial.required_nodes:
6486       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6487                                  " of nodes (%s), required %s" %
6488                                  (self.iallocator, len(ial.result),
6489                                   ial.required_nodes), errors.ECODE_FAULT)
6490     self.target_node = ial.result[0]
6491     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6492                  self.instance_name, self.iallocator,
6493                  utils.CommaJoin(ial.result))
6494
6495     if self.lu.op.live is not None and self.lu.op.mode is not None:
6496       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6497                                  " parameters are accepted",
6498                                  errors.ECODE_INVAL)
6499     if self.lu.op.live is not None:
6500       if self.lu.op.live:
6501         self.lu.op.mode = constants.HT_MIGRATION_LIVE
6502       else:
6503         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6504       # reset the 'live' parameter to None so that repeated
6505       # invocations of CheckPrereq do not raise an exception
6506       self.lu.op.live = None
6507     elif self.lu.op.mode is None:
6508       # read the default value from the hypervisor
6509       i_hv = self.cfg.GetClusterInfo().FillHV(self.instance, skip_globals=False)
6510       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6511
6512     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6513
6514   def _WaitUntilSync(self):
6515     """Poll with custom rpc for disk sync.
6516
6517     This uses our own step-based rpc call.
6518
6519     """
6520     self.feedback_fn("* wait until resync is done")
6521     all_done = False
6522     while not all_done:
6523       all_done = True
6524       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6525                                             self.nodes_ip,
6526                                             self.instance.disks)
6527       min_percent = 100
6528       for node, nres in result.items():
6529         nres.Raise("Cannot resync disks on node %s" % node)
6530         node_done, node_percent = nres.payload
6531         all_done = all_done and node_done
6532         if node_percent is not None:
6533           min_percent = min(min_percent, node_percent)
6534       if not all_done:
6535         if min_percent < 100:
6536           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6537         time.sleep(2)
6538
6539   def _EnsureSecondary(self, node):
6540     """Demote a node to secondary.
6541
6542     """
6543     self.feedback_fn("* switching node %s to secondary mode" % node)
6544
6545     for dev in self.instance.disks:
6546       self.cfg.SetDiskID(dev, node)
6547
6548     result = self.rpc.call_blockdev_close(node, self.instance.name,
6549                                           self.instance.disks)
6550     result.Raise("Cannot change disk to secondary on node %s" % node)
6551
6552   def _GoStandalone(self):
6553     """Disconnect from the network.
6554
6555     """
6556     self.feedback_fn("* changing into standalone mode")
6557     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6558                                                self.instance.disks)
6559     for node, nres in result.items():
6560       nres.Raise("Cannot disconnect disks node %s" % node)
6561
6562   def _GoReconnect(self, multimaster):
6563     """Reconnect to the network.
6564
6565     """
6566     if multimaster:
6567       msg = "dual-master"
6568     else:
6569       msg = "single-master"
6570     self.feedback_fn("* changing disks into %s mode" % msg)
6571     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6572                                            self.instance.disks,
6573                                            self.instance.name, multimaster)
6574     for node, nres in result.items():
6575       nres.Raise("Cannot change disks config on node %s" % node)
6576
6577   def _ExecCleanup(self):
6578     """Try to cleanup after a failed migration.
6579
6580     The cleanup is done by:
6581       - check that the instance is running only on one node
6582         (and update the config if needed)
6583       - change disks on its secondary node to secondary
6584       - wait until disks are fully synchronized
6585       - disconnect from the network
6586       - change disks into single-master mode
6587       - wait again until disks are fully synchronized
6588
6589     """
6590     instance = self.instance
6591     target_node = self.target_node
6592     source_node = self.source_node
6593
6594     # check running on only one node
6595     self.feedback_fn("* checking where the instance actually runs"
6596                      " (if this hangs, the hypervisor might be in"
6597                      " a bad state)")
6598     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6599     for node, result in ins_l.items():
6600       result.Raise("Can't contact node %s" % node)
6601
6602     runningon_source = instance.name in ins_l[source_node].payload
6603     runningon_target = instance.name in ins_l[target_node].payload
6604
6605     if runningon_source and runningon_target:
6606       raise errors.OpExecError("Instance seems to be running on two nodes,"
6607                                " or the hypervisor is confused. You will have"
6608                                " to ensure manually that it runs only on one"
6609                                " and restart this operation.")
6610
6611     if not (runningon_source or runningon_target):
6612       raise errors.OpExecError("Instance does not seem to be running at all."
6613                                " In this case, it's safer to repair by"
6614                                " running 'gnt-instance stop' to ensure disk"
6615                                " shutdown, and then restarting it.")
6616
6617     if runningon_target:
6618       # the migration has actually succeeded, we need to update the config
6619       self.feedback_fn("* instance running on secondary node (%s),"
6620                        " updating config" % target_node)
6621       instance.primary_node = target_node
6622       self.cfg.Update(instance, self.feedback_fn)
6623       demoted_node = source_node
6624     else:
6625       self.feedback_fn("* instance confirmed to be running on its"
6626                        " primary node (%s)" % source_node)
6627       demoted_node = target_node
6628
6629     if instance.disk_template in constants.DTS_INT_MIRROR:
6630       self._EnsureSecondary(demoted_node)
6631       try:
6632         self._WaitUntilSync()
6633       except errors.OpExecError:
6634         # we ignore here errors, since if the device is standalone, it
6635         # won't be able to sync
6636         pass
6637       self._GoStandalone()
6638       self._GoReconnect(False)
6639       self._WaitUntilSync()
6640
6641     self.feedback_fn("* done")
6642
6643   def _RevertDiskStatus(self):
6644     """Try to revert the disk status after a failed migration.
6645
6646     """
6647     target_node = self.target_node
6648     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6649       return
6650
6651     try:
6652       self._EnsureSecondary(target_node)
6653       self._GoStandalone()
6654       self._GoReconnect(False)
6655       self._WaitUntilSync()
6656     except errors.OpExecError, err:
6657       self.lu.LogWarning("Migration failed and I can't reconnect the"
6658                          " drives: error '%s'\n"
6659                          "Please look and recover the instance status" %
6660                          str(err))
6661
6662   def _AbortMigration(self):
6663     """Call the hypervisor code to abort a started migration.
6664
6665     """
6666     instance = self.instance
6667     target_node = self.target_node
6668     migration_info = self.migration_info
6669
6670     abort_result = self.rpc.call_finalize_migration(target_node,
6671                                                     instance,
6672                                                     migration_info,
6673                                                     False)
6674     abort_msg = abort_result.fail_msg
6675     if abort_msg:
6676       logging.error("Aborting migration failed on target node %s: %s",
6677                     target_node, abort_msg)
6678       # Don't raise an exception here, as we stil have to try to revert the
6679       # disk status, even if this step failed.
6680
6681   def _ExecMigration(self):
6682     """Migrate an instance.
6683
6684     The migrate is done by:
6685       - change the disks into dual-master mode
6686       - wait until disks are fully synchronized again
6687       - migrate the instance
6688       - change disks on the new secondary node (the old primary) to secondary
6689       - wait until disks are fully synchronized
6690       - change disks into single-master mode
6691
6692     """
6693     instance = self.instance
6694     target_node = self.target_node
6695     source_node = self.source_node
6696
6697     self.feedback_fn("* checking disk consistency between source and target")
6698     for dev in instance.disks:
6699       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6700         raise errors.OpExecError("Disk %s is degraded or not fully"
6701                                  " synchronized on target node,"
6702                                  " aborting migrate." % dev.iv_name)
6703
6704     # First get the migration information from the remote node
6705     result = self.rpc.call_migration_info(source_node, instance)
6706     msg = result.fail_msg
6707     if msg:
6708       log_err = ("Failed fetching source migration information from %s: %s" %
6709                  (source_node, msg))
6710       logging.error(log_err)
6711       raise errors.OpExecError(log_err)
6712
6713     self.migration_info = migration_info = result.payload
6714
6715     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6716       # Then switch the disks to master/master mode
6717       self._EnsureSecondary(target_node)
6718       self._GoStandalone()
6719       self._GoReconnect(True)
6720       self._WaitUntilSync()
6721
6722     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6723     result = self.rpc.call_accept_instance(target_node,
6724                                            instance,
6725                                            migration_info,
6726                                            self.nodes_ip[target_node])
6727
6728     msg = result.fail_msg
6729     if msg:
6730       logging.error("Instance pre-migration failed, trying to revert"
6731                     " disk status: %s", msg)
6732       self.feedback_fn("Pre-migration failed, aborting")
6733       self._AbortMigration()
6734       self._RevertDiskStatus()
6735       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6736                                (instance.name, msg))
6737
6738     self.feedback_fn("* migrating instance to %s" % target_node)
6739     time.sleep(10)
6740     result = self.rpc.call_instance_migrate(source_node, instance,
6741                                             self.nodes_ip[target_node],
6742                                             self.live)
6743     msg = result.fail_msg
6744     if msg:
6745       logging.error("Instance migration failed, trying to revert"
6746                     " disk status: %s", msg)
6747       self.feedback_fn("Migration failed, aborting")
6748       self._AbortMigration()
6749       self._RevertDiskStatus()
6750       raise errors.OpExecError("Could not migrate instance %s: %s" %
6751                                (instance.name, msg))
6752     time.sleep(10)
6753
6754     instance.primary_node = target_node
6755     # distribute new instance config to the other nodes
6756     self.cfg.Update(instance, self.feedback_fn)
6757
6758     result = self.rpc.call_finalize_migration(target_node,
6759                                               instance,
6760                                               migration_info,
6761                                               True)
6762     msg = result.fail_msg
6763     if msg:
6764       logging.error("Instance migration succeeded, but finalization failed:"
6765                     " %s", msg)
6766       raise errors.OpExecError("Could not finalize instance migration: %s" %
6767                                msg)
6768
6769     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6770       self._EnsureSecondary(source_node)
6771       self._WaitUntilSync()
6772       self._GoStandalone()
6773       self._GoReconnect(False)
6774       self._WaitUntilSync()
6775
6776     self.feedback_fn("* done")
6777
6778   def Exec(self, feedback_fn):
6779     """Perform the migration.
6780
6781     """
6782     feedback_fn("Migrating instance %s" % self.instance.name)
6783
6784     self.feedback_fn = feedback_fn
6785
6786     self.source_node = self.instance.primary_node
6787
6788     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6789     if self.instance.disk_template in constants.DTS_INT_MIRROR:
6790       self.target_node = self.instance.secondary_nodes[0]
6791       # Otherwise self.target_node has been populated either
6792       # directly, or through an iallocator.
6793
6794     self.all_nodes = [self.source_node, self.target_node]
6795     self.nodes_ip = {
6796       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6797       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6798       }
6799
6800     if self.cleanup:
6801       return self._ExecCleanup()
6802     else:
6803       return self._ExecMigration()
6804
6805
6806 def _CreateBlockDev(lu, node, instance, device, force_create,
6807                     info, force_open):
6808   """Create a tree of block devices on a given node.
6809
6810   If this device type has to be created on secondaries, create it and
6811   all its children.
6812
6813   If not, just recurse to children keeping the same 'force' value.
6814
6815   @param lu: the lu on whose behalf we execute
6816   @param node: the node on which to create the device
6817   @type instance: L{objects.Instance}
6818   @param instance: the instance which owns the device
6819   @type device: L{objects.Disk}
6820   @param device: the device to create
6821   @type force_create: boolean
6822   @param force_create: whether to force creation of this device; this
6823       will be change to True whenever we find a device which has
6824       CreateOnSecondary() attribute
6825   @param info: the extra 'metadata' we should attach to the device
6826       (this will be represented as a LVM tag)
6827   @type force_open: boolean
6828   @param force_open: this parameter will be passes to the
6829       L{backend.BlockdevCreate} function where it specifies
6830       whether we run on primary or not, and it affects both
6831       the child assembly and the device own Open() execution
6832
6833   """
6834   if device.CreateOnSecondary():
6835     force_create = True
6836
6837   if device.children:
6838     for child in device.children:
6839       _CreateBlockDev(lu, node, instance, child, force_create,
6840                       info, force_open)
6841
6842   if not force_create:
6843     return
6844
6845   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6846
6847
6848 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6849   """Create a single block device on a given node.
6850
6851   This will not recurse over children of the device, so they must be
6852   created in advance.
6853
6854   @param lu: the lu on whose behalf we execute
6855   @param node: the node on which to create the device
6856   @type instance: L{objects.Instance}
6857   @param instance: the instance which owns the device
6858   @type device: L{objects.Disk}
6859   @param device: the device to create
6860   @param info: the extra 'metadata' we should attach to the device
6861       (this will be represented as a LVM tag)
6862   @type force_open: boolean
6863   @param force_open: this parameter will be passes to the
6864       L{backend.BlockdevCreate} function where it specifies
6865       whether we run on primary or not, and it affects both
6866       the child assembly and the device own Open() execution
6867
6868   """
6869   lu.cfg.SetDiskID(device, node)
6870   result = lu.rpc.call_blockdev_create(node, device, device.size,
6871                                        instance.name, force_open, info)
6872   result.Raise("Can't create block device %s on"
6873                " node %s for instance %s" % (device, node, instance.name))
6874   if device.physical_id is None:
6875     device.physical_id = result.payload
6876
6877
6878 def _GenerateUniqueNames(lu, exts):
6879   """Generate a suitable LV name.
6880
6881   This will generate a logical volume name for the given instance.
6882
6883   """
6884   results = []
6885   for val in exts:
6886     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6887     results.append("%s%s" % (new_id, val))
6888   return results
6889
6890
6891 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6892                          p_minor, s_minor):
6893   """Generate a drbd8 device complete with its children.
6894
6895   """
6896   port = lu.cfg.AllocatePort()
6897   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6898   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6899                           logical_id=(vgname, names[0]))
6900   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6901                           logical_id=(vgname, names[1]))
6902   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6903                           logical_id=(primary, secondary, port,
6904                                       p_minor, s_minor,
6905                                       shared_secret),
6906                           children=[dev_data, dev_meta],
6907                           iv_name=iv_name)
6908   return drbd_dev
6909
6910
6911 def _GenerateDiskTemplate(lu, template_name,
6912                           instance_name, primary_node,
6913                           secondary_nodes, disk_info,
6914                           file_storage_dir, file_driver,
6915                           base_index, feedback_fn):
6916   """Generate the entire disk layout for a given template type.
6917
6918   """
6919   #TODO: compute space requirements
6920
6921   vgname = lu.cfg.GetVGName()
6922   disk_count = len(disk_info)
6923   disks = []
6924   if template_name == constants.DT_DISKLESS:
6925     pass
6926   elif template_name == constants.DT_PLAIN:
6927     if len(secondary_nodes) != 0:
6928       raise errors.ProgrammerError("Wrong template configuration")
6929
6930     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6931                                       for i in range(disk_count)])
6932     for idx, disk in enumerate(disk_info):
6933       disk_index = idx + base_index
6934       vg = disk.get("vg", vgname)
6935       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6936       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6937                               logical_id=(vg, names[idx]),
6938                               iv_name="disk/%d" % disk_index,
6939                               mode=disk["mode"])
6940       disks.append(disk_dev)
6941   elif template_name == constants.DT_DRBD8:
6942     if len(secondary_nodes) != 1:
6943       raise errors.ProgrammerError("Wrong template configuration")
6944     remote_node = secondary_nodes[0]
6945     minors = lu.cfg.AllocateDRBDMinor(
6946       [primary_node, remote_node] * len(disk_info), instance_name)
6947
6948     names = []
6949     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6950                                                for i in range(disk_count)]):
6951       names.append(lv_prefix + "_data")
6952       names.append(lv_prefix + "_meta")
6953     for idx, disk in enumerate(disk_info):
6954       disk_index = idx + base_index
6955       vg = disk.get("vg", vgname)
6956       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6957                                       disk["size"], vg, names[idx*2:idx*2+2],
6958                                       "disk/%d" % disk_index,
6959                                       minors[idx*2], minors[idx*2+1])
6960       disk_dev.mode = disk["mode"]
6961       disks.append(disk_dev)
6962   elif template_name == constants.DT_FILE:
6963     if len(secondary_nodes) != 0:
6964       raise errors.ProgrammerError("Wrong template configuration")
6965
6966     opcodes.RequireFileStorage()
6967
6968     for idx, disk in enumerate(disk_info):
6969       disk_index = idx + base_index
6970       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6971                               iv_name="disk/%d" % disk_index,
6972                               logical_id=(file_driver,
6973                                           "%s/disk%d" % (file_storage_dir,
6974                                                          disk_index)),
6975                               mode=disk["mode"])
6976       disks.append(disk_dev)
6977   elif template_name == constants.DT_SHARED_FILE:
6978     if len(secondary_nodes) != 0:
6979       raise errors.ProgrammerError("Wrong template configuration")
6980
6981     opcodes.RequireSharedFileStorage()
6982
6983     for idx, disk in enumerate(disk_info):
6984       disk_index = idx + base_index
6985       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6986                               iv_name="disk/%d" % disk_index,
6987                               logical_id=(file_driver,
6988                                           "%s/disk%d" % (file_storage_dir,
6989                                                          disk_index)),
6990                               mode=disk["mode"])
6991       disks.append(disk_dev)
6992   elif template_name == constants.DT_BLOCK:
6993     if len(secondary_nodes) != 0:
6994       raise errors.ProgrammerError("Wrong template configuration")
6995
6996     for idx, disk in enumerate(disk_info):
6997       disk_index = idx + base_index
6998       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV, size=disk["size"],
6999                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7000                                           disk["adopt"]),
7001                               iv_name="disk/%d" % disk_index,
7002                               mode=disk["mode"])
7003       disks.append(disk_dev)
7004
7005   else:
7006     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7007   return disks
7008
7009
7010 def _GetInstanceInfoText(instance):
7011   """Compute that text that should be added to the disk's metadata.
7012
7013   """
7014   return "originstname+%s" % instance.name
7015
7016
7017 def _CalcEta(time_taken, written, total_size):
7018   """Calculates the ETA based on size written and total size.
7019
7020   @param time_taken: The time taken so far
7021   @param written: amount written so far
7022   @param total_size: The total size of data to be written
7023   @return: The remaining time in seconds
7024
7025   """
7026   avg_time = time_taken / float(written)
7027   return (total_size - written) * avg_time
7028
7029
7030 def _WipeDisks(lu, instance):
7031   """Wipes instance disks.
7032
7033   @type lu: L{LogicalUnit}
7034   @param lu: the logical unit on whose behalf we execute
7035   @type instance: L{objects.Instance}
7036   @param instance: the instance whose disks we should create
7037   @return: the success of the wipe
7038
7039   """
7040   node = instance.primary_node
7041
7042   for device in instance.disks:
7043     lu.cfg.SetDiskID(device, node)
7044
7045   logging.info("Pause sync of instance %s disks", instance.name)
7046   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7047
7048   for idx, success in enumerate(result.payload):
7049     if not success:
7050       logging.warn("pause-sync of instance %s for disks %d failed",
7051                    instance.name, idx)
7052
7053   try:
7054     for idx, device in enumerate(instance.disks):
7055       lu.LogInfo("* Wiping disk %d", idx)
7056       logging.info("Wiping disk %d for instance %s, node %s",
7057                    idx, instance.name, node)
7058
7059       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7060       # MAX_WIPE_CHUNK at max
7061       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7062                             constants.MIN_WIPE_CHUNK_PERCENT)
7063
7064       offset = 0
7065       size = device.size
7066       last_output = 0
7067       start_time = time.time()
7068
7069       while offset < size:
7070         wipe_size = min(wipe_chunk_size, size - offset)
7071         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7072         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7073                      (idx, offset, wipe_size))
7074         now = time.time()
7075         offset += wipe_size
7076         if now - last_output >= 60:
7077           eta = _CalcEta(now - start_time, offset, size)
7078           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7079                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7080           last_output = now
7081   finally:
7082     logging.info("Resume sync of instance %s disks", instance.name)
7083
7084     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7085
7086     for idx, success in enumerate(result.payload):
7087       if not success:
7088         lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
7089                       " look at the status and troubleshoot the issue.", idx)
7090         logging.warn("resume-sync of instance %s for disks %d failed",
7091                      instance.name, idx)
7092
7093
7094 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7095   """Create all disks for an instance.
7096
7097   This abstracts away some work from AddInstance.
7098
7099   @type lu: L{LogicalUnit}
7100   @param lu: the logical unit on whose behalf we execute
7101   @type instance: L{objects.Instance}
7102   @param instance: the instance whose disks we should create
7103   @type to_skip: list
7104   @param to_skip: list of indices to skip
7105   @type target_node: string
7106   @param target_node: if passed, overrides the target node for creation
7107   @rtype: boolean
7108   @return: the success of the creation
7109
7110   """
7111   info = _GetInstanceInfoText(instance)
7112   if target_node is None:
7113     pnode = instance.primary_node
7114     all_nodes = instance.all_nodes
7115   else:
7116     pnode = target_node
7117     all_nodes = [pnode]
7118
7119   if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7120     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7121     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7122
7123     result.Raise("Failed to create directory '%s' on"
7124                  " node %s" % (file_storage_dir, pnode))
7125
7126   # Note: this needs to be kept in sync with adding of disks in
7127   # LUInstanceSetParams
7128   for idx, device in enumerate(instance.disks):
7129     if to_skip and idx in to_skip:
7130       continue
7131     logging.info("Creating volume %s for instance %s",
7132                  device.iv_name, instance.name)
7133     #HARDCODE
7134     for node in all_nodes:
7135       f_create = node == pnode
7136       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7137
7138
7139 def _RemoveDisks(lu, instance, target_node=None):
7140   """Remove all disks for an instance.
7141
7142   This abstracts away some work from `AddInstance()` and
7143   `RemoveInstance()`. Note that in case some of the devices couldn't
7144   be removed, the removal will continue with the other ones (compare
7145   with `_CreateDisks()`).
7146
7147   @type lu: L{LogicalUnit}
7148   @param lu: the logical unit on whose behalf we execute
7149   @type instance: L{objects.Instance}
7150   @param instance: the instance whose disks we should remove
7151   @type target_node: string
7152   @param target_node: used to override the node on which to remove the disks
7153   @rtype: boolean
7154   @return: the success of the removal
7155
7156   """
7157   logging.info("Removing block devices for instance %s", instance.name)
7158
7159   all_result = True
7160   for device in instance.disks:
7161     if target_node:
7162       edata = [(target_node, device)]
7163     else:
7164       edata = device.ComputeNodeTree(instance.primary_node)
7165     for node, disk in edata:
7166       lu.cfg.SetDiskID(disk, node)
7167       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7168       if msg:
7169         lu.LogWarning("Could not remove block device %s on node %s,"
7170                       " continuing anyway: %s", device.iv_name, node, msg)
7171         all_result = False
7172
7173   if instance.disk_template == constants.DT_FILE:
7174     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7175     if target_node:
7176       tgt = target_node
7177     else:
7178       tgt = instance.primary_node
7179     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7180     if result.fail_msg:
7181       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7182                     file_storage_dir, instance.primary_node, result.fail_msg)
7183       all_result = False
7184
7185   return all_result
7186
7187
7188 def _ComputeDiskSizePerVG(disk_template, disks):
7189   """Compute disk size requirements in the volume group
7190
7191   """
7192   def _compute(disks, payload):
7193     """Universal algorithm
7194
7195     """
7196     vgs = {}
7197     for disk in disks:
7198       vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
7199
7200     return vgs
7201
7202   # Required free disk space as a function of disk and swap space
7203   req_size_dict = {
7204     constants.DT_DISKLESS: {},
7205     constants.DT_PLAIN: _compute(disks, 0),
7206     # 128 MB are added for drbd metadata for each disk
7207     constants.DT_DRBD8: _compute(disks, 128),
7208     constants.DT_FILE: {},
7209     constants.DT_SHARED_FILE: {},
7210   }
7211
7212   if disk_template not in req_size_dict:
7213     raise errors.ProgrammerError("Disk template '%s' size requirement"
7214                                  " is unknown" %  disk_template)
7215
7216   return req_size_dict[disk_template]
7217
7218
7219 def _ComputeDiskSize(disk_template, disks):
7220   """Compute disk size requirements in the volume group
7221
7222   """
7223   # Required free disk space as a function of disk and swap space
7224   req_size_dict = {
7225     constants.DT_DISKLESS: None,
7226     constants.DT_PLAIN: sum(d["size"] for d in disks),
7227     # 128 MB are added for drbd metadata for each disk
7228     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
7229     constants.DT_FILE: None,
7230     constants.DT_SHARED_FILE: 0,
7231     constants.DT_BLOCK: 0,
7232   }
7233
7234   if disk_template not in req_size_dict:
7235     raise errors.ProgrammerError("Disk template '%s' size requirement"
7236                                  " is unknown" %  disk_template)
7237
7238   return req_size_dict[disk_template]
7239
7240
7241 def _FilterVmNodes(lu, nodenames):
7242   """Filters out non-vm_capable nodes from a list.
7243
7244   @type lu: L{LogicalUnit}
7245   @param lu: the logical unit for which we check
7246   @type nodenames: list
7247   @param nodenames: the list of nodes on which we should check
7248   @rtype: list
7249   @return: the list of vm-capable nodes
7250
7251   """
7252   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7253   return [name for name in nodenames if name not in vm_nodes]
7254
7255
7256 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7257   """Hypervisor parameter validation.
7258
7259   This function abstract the hypervisor parameter validation to be
7260   used in both instance create and instance modify.
7261
7262   @type lu: L{LogicalUnit}
7263   @param lu: the logical unit for which we check
7264   @type nodenames: list
7265   @param nodenames: the list of nodes on which we should check
7266   @type hvname: string
7267   @param hvname: the name of the hypervisor we should use
7268   @type hvparams: dict
7269   @param hvparams: the parameters which we need to check
7270   @raise errors.OpPrereqError: if the parameters are not valid
7271
7272   """
7273   nodenames = _FilterVmNodes(lu, nodenames)
7274   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7275                                                   hvname,
7276                                                   hvparams)
7277   for node in nodenames:
7278     info = hvinfo[node]
7279     if info.offline:
7280       continue
7281     info.Raise("Hypervisor parameter validation failed on node %s" % node)
7282
7283
7284 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7285   """OS parameters validation.
7286
7287   @type lu: L{LogicalUnit}
7288   @param lu: the logical unit for which we check
7289   @type required: boolean
7290   @param required: whether the validation should fail if the OS is not
7291       found
7292   @type nodenames: list
7293   @param nodenames: the list of nodes on which we should check
7294   @type osname: string
7295   @param osname: the name of the hypervisor we should use
7296   @type osparams: dict
7297   @param osparams: the parameters which we need to check
7298   @raise errors.OpPrereqError: if the parameters are not valid
7299
7300   """
7301   nodenames = _FilterVmNodes(lu, nodenames)
7302   result = lu.rpc.call_os_validate(required, nodenames, osname,
7303                                    [constants.OS_VALIDATE_PARAMETERS],
7304                                    osparams)
7305   for node, nres in result.items():
7306     # we don't check for offline cases since this should be run only
7307     # against the master node and/or an instance's nodes
7308     nres.Raise("OS Parameters validation failed on node %s" % node)
7309     if not nres.payload:
7310       lu.LogInfo("OS %s not found on node %s, validation skipped",
7311                  osname, node)
7312
7313
7314 class LUInstanceCreate(LogicalUnit):
7315   """Create an instance.
7316
7317   """
7318   HPATH = "instance-add"
7319   HTYPE = constants.HTYPE_INSTANCE
7320   REQ_BGL = False
7321
7322   def CheckArguments(self):
7323     """Check arguments.
7324
7325     """
7326     # do not require name_check to ease forward/backward compatibility
7327     # for tools
7328     if self.op.no_install and self.op.start:
7329       self.LogInfo("No-installation mode selected, disabling startup")
7330       self.op.start = False
7331     # validate/normalize the instance name
7332     self.op.instance_name = \
7333       netutils.Hostname.GetNormalizedName(self.op.instance_name)
7334
7335     if self.op.ip_check and not self.op.name_check:
7336       # TODO: make the ip check more flexible and not depend on the name check
7337       raise errors.OpPrereqError("Cannot do ip check without a name check",
7338                                  errors.ECODE_INVAL)
7339
7340     # check nics' parameter names
7341     for nic in self.op.nics:
7342       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7343
7344     # check disks. parameter names and consistent adopt/no-adopt strategy
7345     has_adopt = has_no_adopt = False
7346     for disk in self.op.disks:
7347       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7348       if "adopt" in disk:
7349         has_adopt = True
7350       else:
7351         has_no_adopt = True
7352     if has_adopt and has_no_adopt:
7353       raise errors.OpPrereqError("Either all disks are adopted or none is",
7354                                  errors.ECODE_INVAL)
7355     if has_adopt:
7356       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7357         raise errors.OpPrereqError("Disk adoption is not supported for the"
7358                                    " '%s' disk template" %
7359                                    self.op.disk_template,
7360                                    errors.ECODE_INVAL)
7361       if self.op.iallocator is not None:
7362         raise errors.OpPrereqError("Disk adoption not allowed with an"
7363                                    " iallocator script", errors.ECODE_INVAL)
7364       if self.op.mode == constants.INSTANCE_IMPORT:
7365         raise errors.OpPrereqError("Disk adoption not allowed for"
7366                                    " instance import", errors.ECODE_INVAL)
7367     else:
7368       if self.op.disk_template in constants.DTS_MUST_ADOPT:
7369         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7370                                    " but no 'adopt' parameter given" %
7371                                    self.op.disk_template,
7372                                    errors.ECODE_INVAL)
7373
7374     self.adopt_disks = has_adopt
7375
7376     # instance name verification
7377     if self.op.name_check:
7378       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7379       self.op.instance_name = self.hostname1.name
7380       # used in CheckPrereq for ip ping check
7381       self.check_ip = self.hostname1.ip
7382     else:
7383       self.check_ip = None
7384
7385     # file storage checks
7386     if (self.op.file_driver and
7387         not self.op.file_driver in constants.FILE_DRIVER):
7388       raise errors.OpPrereqError("Invalid file driver name '%s'" %
7389                                  self.op.file_driver, errors.ECODE_INVAL)
7390
7391     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7392       raise errors.OpPrereqError("File storage directory path not absolute",
7393                                  errors.ECODE_INVAL)
7394
7395     ### Node/iallocator related checks
7396     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7397
7398     if self.op.pnode is not None:
7399       if self.op.disk_template in constants.DTS_INT_MIRROR:
7400         if self.op.snode is None:
7401           raise errors.OpPrereqError("The networked disk templates need"
7402                                      " a mirror node", errors.ECODE_INVAL)
7403       elif self.op.snode:
7404         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7405                         " template")
7406         self.op.snode = None
7407
7408     self._cds = _GetClusterDomainSecret()
7409
7410     if self.op.mode == constants.INSTANCE_IMPORT:
7411       # On import force_variant must be True, because if we forced it at
7412       # initial install, our only chance when importing it back is that it
7413       # works again!
7414       self.op.force_variant = True
7415
7416       if self.op.no_install:
7417         self.LogInfo("No-installation mode has no effect during import")
7418
7419     elif self.op.mode == constants.INSTANCE_CREATE:
7420       if self.op.os_type is None:
7421         raise errors.OpPrereqError("No guest OS specified",
7422                                    errors.ECODE_INVAL)
7423       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7424         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7425                                    " installation" % self.op.os_type,
7426                                    errors.ECODE_STATE)
7427       if self.op.disk_template is None:
7428         raise errors.OpPrereqError("No disk template specified",
7429                                    errors.ECODE_INVAL)
7430
7431     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7432       # Check handshake to ensure both clusters have the same domain secret
7433       src_handshake = self.op.source_handshake
7434       if not src_handshake:
7435         raise errors.OpPrereqError("Missing source handshake",
7436                                    errors.ECODE_INVAL)
7437
7438       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7439                                                            src_handshake)
7440       if errmsg:
7441         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7442                                    errors.ECODE_INVAL)
7443
7444       # Load and check source CA
7445       self.source_x509_ca_pem = self.op.source_x509_ca
7446       if not self.source_x509_ca_pem:
7447         raise errors.OpPrereqError("Missing source X509 CA",
7448                                    errors.ECODE_INVAL)
7449
7450       try:
7451         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7452                                                     self._cds)
7453       except OpenSSL.crypto.Error, err:
7454         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7455                                    (err, ), errors.ECODE_INVAL)
7456
7457       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7458       if errcode is not None:
7459         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7460                                    errors.ECODE_INVAL)
7461
7462       self.source_x509_ca = cert
7463
7464       src_instance_name = self.op.source_instance_name
7465       if not src_instance_name:
7466         raise errors.OpPrereqError("Missing source instance name",
7467                                    errors.ECODE_INVAL)
7468
7469       self.source_instance_name = \
7470           netutils.GetHostname(name=src_instance_name).name
7471
7472     else:
7473       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7474                                  self.op.mode, errors.ECODE_INVAL)
7475
7476   def ExpandNames(self):
7477     """ExpandNames for CreateInstance.
7478
7479     Figure out the right locks for instance creation.
7480
7481     """
7482     self.needed_locks = {}
7483
7484     instance_name = self.op.instance_name
7485     # this is just a preventive check, but someone might still add this
7486     # instance in the meantime, and creation will fail at lock-add time
7487     if instance_name in self.cfg.GetInstanceList():
7488       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7489                                  instance_name, errors.ECODE_EXISTS)
7490
7491     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7492
7493     if self.op.iallocator:
7494       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7495     else:
7496       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7497       nodelist = [self.op.pnode]
7498       if self.op.snode is not None:
7499         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7500         nodelist.append(self.op.snode)
7501       self.needed_locks[locking.LEVEL_NODE] = nodelist
7502
7503     # in case of import lock the source node too
7504     if self.op.mode == constants.INSTANCE_IMPORT:
7505       src_node = self.op.src_node
7506       src_path = self.op.src_path
7507
7508       if src_path is None:
7509         self.op.src_path = src_path = self.op.instance_name
7510
7511       if src_node is None:
7512         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7513         self.op.src_node = None
7514         if os.path.isabs(src_path):
7515           raise errors.OpPrereqError("Importing an instance from an absolute"
7516                                      " path requires a source node option.",
7517                                      errors.ECODE_INVAL)
7518       else:
7519         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7520         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7521           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7522         if not os.path.isabs(src_path):
7523           self.op.src_path = src_path = \
7524             utils.PathJoin(constants.EXPORT_DIR, src_path)
7525
7526   def _RunAllocator(self):
7527     """Run the allocator based on input opcode.
7528
7529     """
7530     nics = [n.ToDict() for n in self.nics]
7531     ial = IAllocator(self.cfg, self.rpc,
7532                      mode=constants.IALLOCATOR_MODE_ALLOC,
7533                      name=self.op.instance_name,
7534                      disk_template=self.op.disk_template,
7535                      tags=[],
7536                      os=self.op.os_type,
7537                      vcpus=self.be_full[constants.BE_VCPUS],
7538                      mem_size=self.be_full[constants.BE_MEMORY],
7539                      disks=self.disks,
7540                      nics=nics,
7541                      hypervisor=self.op.hypervisor,
7542                      )
7543
7544     ial.Run(self.op.iallocator)
7545
7546     if not ial.success:
7547       raise errors.OpPrereqError("Can't compute nodes using"
7548                                  " iallocator '%s': %s" %
7549                                  (self.op.iallocator, ial.info),
7550                                  errors.ECODE_NORES)
7551     if len(ial.result) != ial.required_nodes:
7552       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7553                                  " of nodes (%s), required %s" %
7554                                  (self.op.iallocator, len(ial.result),
7555                                   ial.required_nodes), errors.ECODE_FAULT)
7556     self.op.pnode = ial.result[0]
7557     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7558                  self.op.instance_name, self.op.iallocator,
7559                  utils.CommaJoin(ial.result))
7560     if ial.required_nodes == 2:
7561       self.op.snode = ial.result[1]
7562
7563   def BuildHooksEnv(self):
7564     """Build hooks env.
7565
7566     This runs on master, primary and secondary nodes of the instance.
7567
7568     """
7569     env = {
7570       "ADD_MODE": self.op.mode,
7571       }
7572     if self.op.mode == constants.INSTANCE_IMPORT:
7573       env["SRC_NODE"] = self.op.src_node
7574       env["SRC_PATH"] = self.op.src_path
7575       env["SRC_IMAGES"] = self.src_images
7576
7577     env.update(_BuildInstanceHookEnv(
7578       name=self.op.instance_name,
7579       primary_node=self.op.pnode,
7580       secondary_nodes=self.secondaries,
7581       status=self.op.start,
7582       os_type=self.op.os_type,
7583       memory=self.be_full[constants.BE_MEMORY],
7584       vcpus=self.be_full[constants.BE_VCPUS],
7585       nics=_NICListToTuple(self, self.nics),
7586       disk_template=self.op.disk_template,
7587       disks=[(d["size"], d["mode"]) for d in self.disks],
7588       bep=self.be_full,
7589       hvp=self.hv_full,
7590       hypervisor_name=self.op.hypervisor,
7591     ))
7592
7593     return env
7594
7595   def BuildHooksNodes(self):
7596     """Build hooks nodes.
7597
7598     """
7599     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7600     return nl, nl
7601
7602   def _ReadExportInfo(self):
7603     """Reads the export information from disk.
7604
7605     It will override the opcode source node and path with the actual
7606     information, if these two were not specified before.
7607
7608     @return: the export information
7609
7610     """
7611     assert self.op.mode == constants.INSTANCE_IMPORT
7612
7613     src_node = self.op.src_node
7614     src_path = self.op.src_path
7615
7616     if src_node is None:
7617       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7618       exp_list = self.rpc.call_export_list(locked_nodes)
7619       found = False
7620       for node in exp_list:
7621         if exp_list[node].fail_msg:
7622           continue
7623         if src_path in exp_list[node].payload:
7624           found = True
7625           self.op.src_node = src_node = node
7626           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7627                                                        src_path)
7628           break
7629       if not found:
7630         raise errors.OpPrereqError("No export found for relative path %s" %
7631                                     src_path, errors.ECODE_INVAL)
7632
7633     _CheckNodeOnline(self, src_node)
7634     result = self.rpc.call_export_info(src_node, src_path)
7635     result.Raise("No export or invalid export found in dir %s" % src_path)
7636
7637     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7638     if not export_info.has_section(constants.INISECT_EXP):
7639       raise errors.ProgrammerError("Corrupted export config",
7640                                    errors.ECODE_ENVIRON)
7641
7642     ei_version = export_info.get(constants.INISECT_EXP, "version")
7643     if (int(ei_version) != constants.EXPORT_VERSION):
7644       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7645                                  (ei_version, constants.EXPORT_VERSION),
7646                                  errors.ECODE_ENVIRON)
7647     return export_info
7648
7649   def _ReadExportParams(self, einfo):
7650     """Use export parameters as defaults.
7651
7652     In case the opcode doesn't specify (as in override) some instance
7653     parameters, then try to use them from the export information, if
7654     that declares them.
7655
7656     """
7657     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7658
7659     if self.op.disk_template is None:
7660       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7661         self.op.disk_template = einfo.get(constants.INISECT_INS,
7662                                           "disk_template")
7663       else:
7664         raise errors.OpPrereqError("No disk template specified and the export"
7665                                    " is missing the disk_template information",
7666                                    errors.ECODE_INVAL)
7667
7668     if not self.op.disks:
7669       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7670         disks = []
7671         # TODO: import the disk iv_name too
7672         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7673           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7674           disks.append({"size": disk_sz})
7675         self.op.disks = disks
7676       else:
7677         raise errors.OpPrereqError("No disk info specified and the export"
7678                                    " is missing the disk information",
7679                                    errors.ECODE_INVAL)
7680
7681     if (not self.op.nics and
7682         einfo.has_option(constants.INISECT_INS, "nic_count")):
7683       nics = []
7684       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7685         ndict = {}
7686         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7687           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7688           ndict[name] = v
7689         nics.append(ndict)
7690       self.op.nics = nics
7691
7692     if (self.op.hypervisor is None and
7693         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7694       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7695     if einfo.has_section(constants.INISECT_HYP):
7696       # use the export parameters but do not override the ones
7697       # specified by the user
7698       for name, value in einfo.items(constants.INISECT_HYP):
7699         if name not in self.op.hvparams:
7700           self.op.hvparams[name] = value
7701
7702     if einfo.has_section(constants.INISECT_BEP):
7703       # use the parameters, without overriding
7704       for name, value in einfo.items(constants.INISECT_BEP):
7705         if name not in self.op.beparams:
7706           self.op.beparams[name] = value
7707     else:
7708       # try to read the parameters old style, from the main section
7709       for name in constants.BES_PARAMETERS:
7710         if (name not in self.op.beparams and
7711             einfo.has_option(constants.INISECT_INS, name)):
7712           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7713
7714     if einfo.has_section(constants.INISECT_OSP):
7715       # use the parameters, without overriding
7716       for name, value in einfo.items(constants.INISECT_OSP):
7717         if name not in self.op.osparams:
7718           self.op.osparams[name] = value
7719
7720   def _RevertToDefaults(self, cluster):
7721     """Revert the instance parameters to the default values.
7722
7723     """
7724     # hvparams
7725     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7726     for name in self.op.hvparams.keys():
7727       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7728         del self.op.hvparams[name]
7729     # beparams
7730     be_defs = cluster.SimpleFillBE({})
7731     for name in self.op.beparams.keys():
7732       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7733         del self.op.beparams[name]
7734     # nic params
7735     nic_defs = cluster.SimpleFillNIC({})
7736     for nic in self.op.nics:
7737       for name in constants.NICS_PARAMETERS:
7738         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7739           del nic[name]
7740     # osparams
7741     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7742     for name in self.op.osparams.keys():
7743       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7744         del self.op.osparams[name]
7745
7746   def CheckPrereq(self):
7747     """Check prerequisites.
7748
7749     """
7750     if self.op.mode == constants.INSTANCE_IMPORT:
7751       export_info = self._ReadExportInfo()
7752       self._ReadExportParams(export_info)
7753
7754     if (not self.cfg.GetVGName() and
7755         self.op.disk_template not in constants.DTS_NOT_LVM):
7756       raise errors.OpPrereqError("Cluster does not support lvm-based"
7757                                  " instances", errors.ECODE_STATE)
7758
7759     if self.op.hypervisor is None:
7760       self.op.hypervisor = self.cfg.GetHypervisorType()
7761
7762     cluster = self.cfg.GetClusterInfo()
7763     enabled_hvs = cluster.enabled_hypervisors
7764     if self.op.hypervisor not in enabled_hvs:
7765       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7766                                  " cluster (%s)" % (self.op.hypervisor,
7767                                   ",".join(enabled_hvs)),
7768                                  errors.ECODE_STATE)
7769
7770     # check hypervisor parameter syntax (locally)
7771     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7772     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7773                                       self.op.hvparams)
7774     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7775     hv_type.CheckParameterSyntax(filled_hvp)
7776     self.hv_full = filled_hvp
7777     # check that we don't specify global parameters on an instance
7778     _CheckGlobalHvParams(self.op.hvparams)
7779
7780     # fill and remember the beparams dict
7781     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7782     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7783
7784     # build os parameters
7785     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7786
7787     # now that hvp/bep are in final format, let's reset to defaults,
7788     # if told to do so
7789     if self.op.identify_defaults:
7790       self._RevertToDefaults(cluster)
7791
7792     # NIC buildup
7793     self.nics = []
7794     for idx, nic in enumerate(self.op.nics):
7795       nic_mode_req = nic.get("mode", None)
7796       nic_mode = nic_mode_req
7797       if nic_mode is None:
7798         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7799
7800       # in routed mode, for the first nic, the default ip is 'auto'
7801       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7802         default_ip_mode = constants.VALUE_AUTO
7803       else:
7804         default_ip_mode = constants.VALUE_NONE
7805
7806       # ip validity checks
7807       ip = nic.get("ip", default_ip_mode)
7808       if ip is None or ip.lower() == constants.VALUE_NONE:
7809         nic_ip = None
7810       elif ip.lower() == constants.VALUE_AUTO:
7811         if not self.op.name_check:
7812           raise errors.OpPrereqError("IP address set to auto but name checks"
7813                                      " have been skipped",
7814                                      errors.ECODE_INVAL)
7815         nic_ip = self.hostname1.ip
7816       else:
7817         if not netutils.IPAddress.IsValid(ip):
7818           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7819                                      errors.ECODE_INVAL)
7820         nic_ip = ip
7821
7822       # TODO: check the ip address for uniqueness
7823       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7824         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7825                                    errors.ECODE_INVAL)
7826
7827       # MAC address verification
7828       mac = nic.get("mac", constants.VALUE_AUTO)
7829       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7830         mac = utils.NormalizeAndValidateMac(mac)
7831
7832         try:
7833           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7834         except errors.ReservationError:
7835           raise errors.OpPrereqError("MAC address %s already in use"
7836                                      " in cluster" % mac,
7837                                      errors.ECODE_NOTUNIQUE)
7838
7839       #  Build nic parameters
7840       link = nic.get(constants.INIC_LINK, None)
7841       nicparams = {}
7842       if nic_mode_req:
7843         nicparams[constants.NIC_MODE] = nic_mode_req
7844       if link:
7845         nicparams[constants.NIC_LINK] = link
7846
7847       check_params = cluster.SimpleFillNIC(nicparams)
7848       objects.NIC.CheckParameterSyntax(check_params)
7849       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7850
7851     # disk checks/pre-build
7852     self.disks = []
7853     for disk in self.op.disks:
7854       mode = disk.get("mode", constants.DISK_RDWR)
7855       if mode not in constants.DISK_ACCESS_SET:
7856         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7857                                    mode, errors.ECODE_INVAL)
7858       size = disk.get("size", None)
7859       if size is None:
7860         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7861       try:
7862         size = int(size)
7863       except (TypeError, ValueError):
7864         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7865                                    errors.ECODE_INVAL)
7866       vg = disk.get("vg", self.cfg.GetVGName())
7867       new_disk = {"size": size, "mode": mode, "vg": vg}
7868       if "adopt" in disk:
7869         new_disk["adopt"] = disk["adopt"]
7870       self.disks.append(new_disk)
7871
7872     if self.op.mode == constants.INSTANCE_IMPORT:
7873
7874       # Check that the new instance doesn't have less disks than the export
7875       instance_disks = len(self.disks)
7876       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7877       if instance_disks < export_disks:
7878         raise errors.OpPrereqError("Not enough disks to import."
7879                                    " (instance: %d, export: %d)" %
7880                                    (instance_disks, export_disks),
7881                                    errors.ECODE_INVAL)
7882
7883       disk_images = []
7884       for idx in range(export_disks):
7885         option = 'disk%d_dump' % idx
7886         if export_info.has_option(constants.INISECT_INS, option):
7887           # FIXME: are the old os-es, disk sizes, etc. useful?
7888           export_name = export_info.get(constants.INISECT_INS, option)
7889           image = utils.PathJoin(self.op.src_path, export_name)
7890           disk_images.append(image)
7891         else:
7892           disk_images.append(False)
7893
7894       self.src_images = disk_images
7895
7896       old_name = export_info.get(constants.INISECT_INS, 'name')
7897       try:
7898         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7899       except (TypeError, ValueError), err:
7900         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7901                                    " an integer: %s" % str(err),
7902                                    errors.ECODE_STATE)
7903       if self.op.instance_name == old_name:
7904         for idx, nic in enumerate(self.nics):
7905           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7906             nic_mac_ini = 'nic%d_mac' % idx
7907             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7908
7909     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7910
7911     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7912     if self.op.ip_check:
7913       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7914         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7915                                    (self.check_ip, self.op.instance_name),
7916                                    errors.ECODE_NOTUNIQUE)
7917
7918     #### mac address generation
7919     # By generating here the mac address both the allocator and the hooks get
7920     # the real final mac address rather than the 'auto' or 'generate' value.
7921     # There is a race condition between the generation and the instance object
7922     # creation, which means that we know the mac is valid now, but we're not
7923     # sure it will be when we actually add the instance. If things go bad
7924     # adding the instance will abort because of a duplicate mac, and the
7925     # creation job will fail.
7926     for nic in self.nics:
7927       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7928         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7929
7930     #### allocator run
7931
7932     if self.op.iallocator is not None:
7933       self._RunAllocator()
7934
7935     #### node related checks
7936
7937     # check primary node
7938     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7939     assert self.pnode is not None, \
7940       "Cannot retrieve locked node %s" % self.op.pnode
7941     if pnode.offline:
7942       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7943                                  pnode.name, errors.ECODE_STATE)
7944     if pnode.drained:
7945       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7946                                  pnode.name, errors.ECODE_STATE)
7947     if not pnode.vm_capable:
7948       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7949                                  " '%s'" % pnode.name, errors.ECODE_STATE)
7950
7951     self.secondaries = []
7952
7953     # mirror node verification
7954     if self.op.disk_template in constants.DTS_INT_MIRROR:
7955       if self.op.snode == pnode.name:
7956         raise errors.OpPrereqError("The secondary node cannot be the"
7957                                    " primary node.", errors.ECODE_INVAL)
7958       _CheckNodeOnline(self, self.op.snode)
7959       _CheckNodeNotDrained(self, self.op.snode)
7960       _CheckNodeVmCapable(self, self.op.snode)
7961       self.secondaries.append(self.op.snode)
7962
7963     nodenames = [pnode.name] + self.secondaries
7964
7965     if not self.adopt_disks:
7966       # Check lv size requirements, if not adopting
7967       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7968       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7969
7970     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
7971       all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7972       if len(all_lvs) != len(self.disks):
7973         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7974                                    errors.ECODE_INVAL)
7975       for lv_name in all_lvs:
7976         try:
7977           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7978           # to ReserveLV uses the same syntax
7979           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7980         except errors.ReservationError:
7981           raise errors.OpPrereqError("LV named %s used by another instance" %
7982                                      lv_name, errors.ECODE_NOTUNIQUE)
7983
7984       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7985       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7986
7987       node_lvs = self.rpc.call_lv_list([pnode.name],
7988                                        vg_names.payload.keys())[pnode.name]
7989       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7990       node_lvs = node_lvs.payload
7991
7992       delta = all_lvs.difference(node_lvs.keys())
7993       if delta:
7994         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7995                                    utils.CommaJoin(delta),
7996                                    errors.ECODE_INVAL)
7997       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7998       if online_lvs:
7999         raise errors.OpPrereqError("Online logical volumes found, cannot"
8000                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8001                                    errors.ECODE_STATE)
8002       # update the size of disk based on what is found
8003       for dsk in self.disks:
8004         dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
8005
8006     elif self.op.disk_template == constants.DT_BLOCK:
8007       # Normalize and de-duplicate device paths
8008       all_disks = set([os.path.abspath(i["adopt"]) for i in self.disks])
8009       if len(all_disks) != len(self.disks):
8010         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8011                                    errors.ECODE_INVAL)
8012       baddisks = [d for d in all_disks
8013                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8014       if baddisks:
8015         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8016                                    " cannot be adopted" %
8017                                    (", ".join(baddisks),
8018                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8019                                    errors.ECODE_INVAL)
8020
8021       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8022                                             list(all_disks))[pnode.name]
8023       node_disks.Raise("Cannot get block device information from node %s" %
8024                        pnode.name)
8025       node_disks = node_disks.payload
8026       delta = all_disks.difference(node_disks.keys())
8027       if delta:
8028         raise errors.OpPrereqError("Missing block device(s): %s" %
8029                                    utils.CommaJoin(delta),
8030                                    errors.ECODE_INVAL)
8031       for dsk in self.disks:
8032         dsk["size"] = int(float(node_disks[dsk["adopt"]]))
8033
8034     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8035
8036     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8037     # check OS parameters (remotely)
8038     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8039
8040     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8041
8042     # memory check on primary node
8043     if self.op.start:
8044       _CheckNodeFreeMemory(self, self.pnode.name,
8045                            "creating instance %s" % self.op.instance_name,
8046                            self.be_full[constants.BE_MEMORY],
8047                            self.op.hypervisor)
8048
8049     self.dry_run_result = list(nodenames)
8050
8051   def Exec(self, feedback_fn):
8052     """Create and add the instance to the cluster.
8053
8054     """
8055     instance = self.op.instance_name
8056     pnode_name = self.pnode.name
8057
8058     ht_kind = self.op.hypervisor
8059     if ht_kind in constants.HTS_REQ_PORT:
8060       network_port = self.cfg.AllocatePort()
8061     else:
8062       network_port = None
8063
8064     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8065       # this is needed because os.path.join does not accept None arguments
8066       if self.op.file_storage_dir is None:
8067         string_file_storage_dir = ""
8068       else:
8069         string_file_storage_dir = self.op.file_storage_dir
8070
8071       # build the full file storage dir path
8072       if self.op.disk_template == constants.DT_SHARED_FILE:
8073         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8074       else:
8075         get_fsd_fn = self.cfg.GetFileStorageDir
8076
8077       file_storage_dir = utils.PathJoin(get_fsd_fn(),
8078                                         string_file_storage_dir, instance)
8079     else:
8080       file_storage_dir = ""
8081
8082     disks = _GenerateDiskTemplate(self,
8083                                   self.op.disk_template,
8084                                   instance, pnode_name,
8085                                   self.secondaries,
8086                                   self.disks,
8087                                   file_storage_dir,
8088                                   self.op.file_driver,
8089                                   0,
8090                                   feedback_fn)
8091
8092     iobj = objects.Instance(name=instance, os=self.op.os_type,
8093                             primary_node=pnode_name,
8094                             nics=self.nics, disks=disks,
8095                             disk_template=self.op.disk_template,
8096                             admin_up=False,
8097                             network_port=network_port,
8098                             beparams=self.op.beparams,
8099                             hvparams=self.op.hvparams,
8100                             hypervisor=self.op.hypervisor,
8101                             osparams=self.op.osparams,
8102                             )
8103
8104     if self.adopt_disks:
8105       if self.op.disk_template == constants.DT_PLAIN:
8106         # rename LVs to the newly-generated names; we need to construct
8107         # 'fake' LV disks with the old data, plus the new unique_id
8108         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8109         rename_to = []
8110         for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8111           rename_to.append(t_dsk.logical_id)
8112           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
8113           self.cfg.SetDiskID(t_dsk, pnode_name)
8114         result = self.rpc.call_blockdev_rename(pnode_name,
8115                                                zip(tmp_disks, rename_to))
8116         result.Raise("Failed to rename adoped LVs")
8117     else:
8118       feedback_fn("* creating instance disks...")
8119       try:
8120         _CreateDisks(self, iobj)
8121       except errors.OpExecError:
8122         self.LogWarning("Device creation failed, reverting...")
8123         try:
8124           _RemoveDisks(self, iobj)
8125         finally:
8126           self.cfg.ReleaseDRBDMinors(instance)
8127           raise
8128
8129       if self.cfg.GetClusterInfo().prealloc_wipe_disks:
8130         feedback_fn("* wiping instance disks...")
8131         try:
8132           _WipeDisks(self, iobj)
8133         except errors.OpExecError:
8134           self.LogWarning("Device wiping failed, reverting...")
8135           try:
8136             _RemoveDisks(self, iobj)
8137           finally:
8138             self.cfg.ReleaseDRBDMinors(instance)
8139             raise
8140
8141     feedback_fn("adding instance %s to cluster config" % instance)
8142
8143     self.cfg.AddInstance(iobj, self.proc.GetECId())
8144
8145     # Declare that we don't want to remove the instance lock anymore, as we've
8146     # added the instance to the config
8147     del self.remove_locks[locking.LEVEL_INSTANCE]
8148     # Unlock all the nodes
8149     if self.op.mode == constants.INSTANCE_IMPORT:
8150       nodes_keep = [self.op.src_node]
8151       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
8152                        if node != self.op.src_node]
8153       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
8154       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
8155     else:
8156       self.context.glm.release(locking.LEVEL_NODE)
8157       del self.acquired_locks[locking.LEVEL_NODE]
8158
8159     if self.op.wait_for_sync:
8160       disk_abort = not _WaitForSync(self, iobj)
8161     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8162       # make sure the disks are not degraded (still sync-ing is ok)
8163       time.sleep(15)
8164       feedback_fn("* checking mirrors status")
8165       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8166     else:
8167       disk_abort = False
8168
8169     if disk_abort:
8170       _RemoveDisks(self, iobj)
8171       self.cfg.RemoveInstance(iobj.name)
8172       # Make sure the instance lock gets removed
8173       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8174       raise errors.OpExecError("There are some degraded disks for"
8175                                " this instance")
8176
8177     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8178       if self.op.mode == constants.INSTANCE_CREATE:
8179         if not self.op.no_install:
8180           feedback_fn("* running the instance OS create scripts...")
8181           # FIXME: pass debug option from opcode to backend
8182           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8183                                                  self.op.debug_level)
8184           result.Raise("Could not add os for instance %s"
8185                        " on node %s" % (instance, pnode_name))
8186
8187       elif self.op.mode == constants.INSTANCE_IMPORT:
8188         feedback_fn("* running the instance OS import scripts...")
8189
8190         transfers = []
8191
8192         for idx, image in enumerate(self.src_images):
8193           if not image:
8194             continue
8195
8196           # FIXME: pass debug option from opcode to backend
8197           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8198                                              constants.IEIO_FILE, (image, ),
8199                                              constants.IEIO_SCRIPT,
8200                                              (iobj.disks[idx], idx),
8201                                              None)
8202           transfers.append(dt)
8203
8204         import_result = \
8205           masterd.instance.TransferInstanceData(self, feedback_fn,
8206                                                 self.op.src_node, pnode_name,
8207                                                 self.pnode.secondary_ip,
8208                                                 iobj, transfers)
8209         if not compat.all(import_result):
8210           self.LogWarning("Some disks for instance %s on node %s were not"
8211                           " imported successfully" % (instance, pnode_name))
8212
8213       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8214         feedback_fn("* preparing remote import...")
8215         # The source cluster will stop the instance before attempting to make a
8216         # connection. In some cases stopping an instance can take a long time,
8217         # hence the shutdown timeout is added to the connection timeout.
8218         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8219                            self.op.source_shutdown_timeout)
8220         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8221
8222         assert iobj.primary_node == self.pnode.name
8223         disk_results = \
8224           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8225                                         self.source_x509_ca,
8226                                         self._cds, timeouts)
8227         if not compat.all(disk_results):
8228           # TODO: Should the instance still be started, even if some disks
8229           # failed to import (valid for local imports, too)?
8230           self.LogWarning("Some disks for instance %s on node %s were not"
8231                           " imported successfully" % (instance, pnode_name))
8232
8233         # Run rename script on newly imported instance
8234         assert iobj.name == instance
8235         feedback_fn("Running rename script for %s" % instance)
8236         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8237                                                    self.source_instance_name,
8238                                                    self.op.debug_level)
8239         if result.fail_msg:
8240           self.LogWarning("Failed to run rename script for %s on node"
8241                           " %s: %s" % (instance, pnode_name, result.fail_msg))
8242
8243       else:
8244         # also checked in the prereq part
8245         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8246                                      % self.op.mode)
8247
8248     if self.op.start:
8249       iobj.admin_up = True
8250       self.cfg.Update(iobj, feedback_fn)
8251       logging.info("Starting instance %s on node %s", instance, pnode_name)
8252       feedback_fn("* starting instance...")
8253       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8254       result.Raise("Could not start instance")
8255
8256     return list(iobj.all_nodes)
8257
8258
8259 class LUInstanceConsole(NoHooksLU):
8260   """Connect to an instance's console.
8261
8262   This is somewhat special in that it returns the command line that
8263   you need to run on the master node in order to connect to the
8264   console.
8265
8266   """
8267   REQ_BGL = False
8268
8269   def ExpandNames(self):
8270     self._ExpandAndLockInstance()
8271
8272   def CheckPrereq(self):
8273     """Check prerequisites.
8274
8275     This checks that the instance is in the cluster.
8276
8277     """
8278     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8279     assert self.instance is not None, \
8280       "Cannot retrieve locked instance %s" % self.op.instance_name
8281     _CheckNodeOnline(self, self.instance.primary_node)
8282
8283   def Exec(self, feedback_fn):
8284     """Connect to the console of an instance
8285
8286     """
8287     instance = self.instance
8288     node = instance.primary_node
8289
8290     node_insts = self.rpc.call_instance_list([node],
8291                                              [instance.hypervisor])[node]
8292     node_insts.Raise("Can't get node information from %s" % node)
8293
8294     if instance.name not in node_insts.payload:
8295       if instance.admin_up:
8296         state = constants.INSTST_ERRORDOWN
8297       else:
8298         state = constants.INSTST_ADMINDOWN
8299       raise errors.OpExecError("Instance %s is not running (state %s)" %
8300                                (instance.name, state))
8301
8302     logging.debug("Connecting to console of %s on %s", instance.name, node)
8303
8304     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8305
8306
8307 def _GetInstanceConsole(cluster, instance):
8308   """Returns console information for an instance.
8309
8310   @type cluster: L{objects.Cluster}
8311   @type instance: L{objects.Instance}
8312   @rtype: dict
8313
8314   """
8315   hyper = hypervisor.GetHypervisor(instance.hypervisor)
8316   # beparams and hvparams are passed separately, to avoid editing the
8317   # instance and then saving the defaults in the instance itself.
8318   hvparams = cluster.FillHV(instance)
8319   beparams = cluster.FillBE(instance)
8320   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8321
8322   assert console.instance == instance.name
8323   assert console.Validate()
8324
8325   return console.ToDict()
8326
8327
8328 class LUInstanceReplaceDisks(LogicalUnit):
8329   """Replace the disks of an instance.
8330
8331   """
8332   HPATH = "mirrors-replace"
8333   HTYPE = constants.HTYPE_INSTANCE
8334   REQ_BGL = False
8335
8336   def CheckArguments(self):
8337     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8338                                   self.op.iallocator)
8339
8340   def ExpandNames(self):
8341     self._ExpandAndLockInstance()
8342
8343     if self.op.iallocator is not None:
8344       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8345
8346     elif self.op.remote_node is not None:
8347       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8348       self.op.remote_node = remote_node
8349
8350       # Warning: do not remove the locking of the new secondary here
8351       # unless DRBD8.AddChildren is changed to work in parallel;
8352       # currently it doesn't since parallel invocations of
8353       # FindUnusedMinor will conflict
8354       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8355       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8356
8357     else:
8358       self.needed_locks[locking.LEVEL_NODE] = []
8359       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8360
8361     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8362                                    self.op.iallocator, self.op.remote_node,
8363                                    self.op.disks, False, self.op.early_release)
8364
8365     self.tasklets = [self.replacer]
8366
8367   def DeclareLocks(self, level):
8368     # If we're not already locking all nodes in the set we have to declare the
8369     # instance's primary/secondary nodes.
8370     if (level == locking.LEVEL_NODE and
8371         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8372       self._LockInstancesNodes()
8373
8374   def BuildHooksEnv(self):
8375     """Build hooks env.
8376
8377     This runs on the master, the primary and all the secondaries.
8378
8379     """
8380     instance = self.replacer.instance
8381     env = {
8382       "MODE": self.op.mode,
8383       "NEW_SECONDARY": self.op.remote_node,
8384       "OLD_SECONDARY": instance.secondary_nodes[0],
8385       }
8386     env.update(_BuildInstanceHookEnvByObject(self, instance))
8387     return env
8388
8389   def BuildHooksNodes(self):
8390     """Build hooks nodes.
8391
8392     """
8393     instance = self.replacer.instance
8394     nl = [
8395       self.cfg.GetMasterNode(),
8396       instance.primary_node,
8397       ]
8398     if self.op.remote_node is not None:
8399       nl.append(self.op.remote_node)
8400     return nl, nl
8401
8402
8403 class TLReplaceDisks(Tasklet):
8404   """Replaces disks for an instance.
8405
8406   Note: Locking is not within the scope of this class.
8407
8408   """
8409   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8410                disks, delay_iallocator, early_release):
8411     """Initializes this class.
8412
8413     """
8414     Tasklet.__init__(self, lu)
8415
8416     # Parameters
8417     self.instance_name = instance_name
8418     self.mode = mode
8419     self.iallocator_name = iallocator_name
8420     self.remote_node = remote_node
8421     self.disks = disks
8422     self.delay_iallocator = delay_iallocator
8423     self.early_release = early_release
8424
8425     # Runtime data
8426     self.instance = None
8427     self.new_node = None
8428     self.target_node = None
8429     self.other_node = None
8430     self.remote_node_info = None
8431     self.node_secondary_ip = None
8432
8433   @staticmethod
8434   def CheckArguments(mode, remote_node, iallocator):
8435     """Helper function for users of this class.
8436
8437     """
8438     # check for valid parameter combination
8439     if mode == constants.REPLACE_DISK_CHG:
8440       if remote_node is None and iallocator is None:
8441         raise errors.OpPrereqError("When changing the secondary either an"
8442                                    " iallocator script must be used or the"
8443                                    " new node given", errors.ECODE_INVAL)
8444
8445       if remote_node is not None and iallocator is not None:
8446         raise errors.OpPrereqError("Give either the iallocator or the new"
8447                                    " secondary, not both", errors.ECODE_INVAL)
8448
8449     elif remote_node is not None or iallocator is not None:
8450       # Not replacing the secondary
8451       raise errors.OpPrereqError("The iallocator and new node options can"
8452                                  " only be used when changing the"
8453                                  " secondary node", errors.ECODE_INVAL)
8454
8455   @staticmethod
8456   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8457     """Compute a new secondary node using an IAllocator.
8458
8459     """
8460     ial = IAllocator(lu.cfg, lu.rpc,
8461                      mode=constants.IALLOCATOR_MODE_RELOC,
8462                      name=instance_name,
8463                      relocate_from=relocate_from)
8464
8465     ial.Run(iallocator_name)
8466
8467     if not ial.success:
8468       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8469                                  " %s" % (iallocator_name, ial.info),
8470                                  errors.ECODE_NORES)
8471
8472     if len(ial.result) != ial.required_nodes:
8473       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8474                                  " of nodes (%s), required %s" %
8475                                  (iallocator_name,
8476                                   len(ial.result), ial.required_nodes),
8477                                  errors.ECODE_FAULT)
8478
8479     remote_node_name = ial.result[0]
8480
8481     lu.LogInfo("Selected new secondary for instance '%s': %s",
8482                instance_name, remote_node_name)
8483
8484     return remote_node_name
8485
8486   def _FindFaultyDisks(self, node_name):
8487     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8488                                     node_name, True)
8489
8490   def _CheckDisksActivated(self, instance):
8491     """Checks if the instance disks are activated.
8492
8493     @param instance: The instance to check disks
8494     @return: True if they are activated, False otherwise
8495
8496     """
8497     nodes = instance.all_nodes
8498
8499     for idx, dev in enumerate(instance.disks):
8500       for node in nodes:
8501         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8502         self.cfg.SetDiskID(dev, node)
8503
8504         result = self.rpc.call_blockdev_find(node, dev)
8505
8506         if result.offline:
8507           continue
8508         elif result.fail_msg or not result.payload:
8509           return False
8510
8511     return True
8512
8513
8514   def CheckPrereq(self):
8515     """Check prerequisites.
8516
8517     This checks that the instance is in the cluster.
8518
8519     """
8520     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8521     assert instance is not None, \
8522       "Cannot retrieve locked instance %s" % self.instance_name
8523
8524     if instance.disk_template != constants.DT_DRBD8:
8525       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8526                                  " instances", errors.ECODE_INVAL)
8527
8528     if len(instance.secondary_nodes) != 1:
8529       raise errors.OpPrereqError("The instance has a strange layout,"
8530                                  " expected one secondary but found %d" %
8531                                  len(instance.secondary_nodes),
8532                                  errors.ECODE_FAULT)
8533
8534     if not self.delay_iallocator:
8535       self._CheckPrereq2()
8536
8537   def _CheckPrereq2(self):
8538     """Check prerequisites, second part.
8539
8540     This function should always be part of CheckPrereq. It was separated and is
8541     now called from Exec because during node evacuation iallocator was only
8542     called with an unmodified cluster model, not taking planned changes into
8543     account.
8544
8545     """
8546     instance = self.instance
8547     secondary_node = instance.secondary_nodes[0]
8548
8549     if self.iallocator_name is None:
8550       remote_node = self.remote_node
8551     else:
8552       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8553                                        instance.name, instance.secondary_nodes)
8554
8555     if remote_node is not None:
8556       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8557       assert self.remote_node_info is not None, \
8558         "Cannot retrieve locked node %s" % remote_node
8559     else:
8560       self.remote_node_info = None
8561
8562     if remote_node == self.instance.primary_node:
8563       raise errors.OpPrereqError("The specified node is the primary node of"
8564                                  " the instance.", errors.ECODE_INVAL)
8565
8566     if remote_node == secondary_node:
8567       raise errors.OpPrereqError("The specified node is already the"
8568                                  " secondary node of the instance.",
8569                                  errors.ECODE_INVAL)
8570
8571     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8572                                     constants.REPLACE_DISK_CHG):
8573       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8574                                  errors.ECODE_INVAL)
8575
8576     if self.mode == constants.REPLACE_DISK_AUTO:
8577       if not self._CheckDisksActivated(instance):
8578         raise errors.OpPrereqError("Please run activate-disks on instance %s"
8579                                    " first" % self.instance_name,
8580                                    errors.ECODE_STATE)
8581       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8582       faulty_secondary = self._FindFaultyDisks(secondary_node)
8583
8584       if faulty_primary and faulty_secondary:
8585         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8586                                    " one node and can not be repaired"
8587                                    " automatically" % self.instance_name,
8588                                    errors.ECODE_STATE)
8589
8590       if faulty_primary:
8591         self.disks = faulty_primary
8592         self.target_node = instance.primary_node
8593         self.other_node = secondary_node
8594         check_nodes = [self.target_node, self.other_node]
8595       elif faulty_secondary:
8596         self.disks = faulty_secondary
8597         self.target_node = secondary_node
8598         self.other_node = instance.primary_node
8599         check_nodes = [self.target_node, self.other_node]
8600       else:
8601         self.disks = []
8602         check_nodes = []
8603
8604     else:
8605       # Non-automatic modes
8606       if self.mode == constants.REPLACE_DISK_PRI:
8607         self.target_node = instance.primary_node
8608         self.other_node = secondary_node
8609         check_nodes = [self.target_node, self.other_node]
8610
8611       elif self.mode == constants.REPLACE_DISK_SEC:
8612         self.target_node = secondary_node
8613         self.other_node = instance.primary_node
8614         check_nodes = [self.target_node, self.other_node]
8615
8616       elif self.mode == constants.REPLACE_DISK_CHG:
8617         self.new_node = remote_node
8618         self.other_node = instance.primary_node
8619         self.target_node = secondary_node
8620         check_nodes = [self.new_node, self.other_node]
8621
8622         _CheckNodeNotDrained(self.lu, remote_node)
8623         _CheckNodeVmCapable(self.lu, remote_node)
8624
8625         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8626         assert old_node_info is not None
8627         if old_node_info.offline and not self.early_release:
8628           # doesn't make sense to delay the release
8629           self.early_release = True
8630           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8631                           " early-release mode", secondary_node)
8632
8633       else:
8634         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8635                                      self.mode)
8636
8637       # If not specified all disks should be replaced
8638       if not self.disks:
8639         self.disks = range(len(self.instance.disks))
8640
8641     for node in check_nodes:
8642       _CheckNodeOnline(self.lu, node)
8643
8644     # Check whether disks are valid
8645     for disk_idx in self.disks:
8646       instance.FindDisk(disk_idx)
8647
8648     # Get secondary node IP addresses
8649     node_2nd_ip = {}
8650
8651     for node_name in [self.target_node, self.other_node, self.new_node]:
8652       if node_name is not None:
8653         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8654
8655     self.node_secondary_ip = node_2nd_ip
8656
8657   def Exec(self, feedback_fn):
8658     """Execute disk replacement.
8659
8660     This dispatches the disk replacement to the appropriate handler.
8661
8662     """
8663     if self.delay_iallocator:
8664       self._CheckPrereq2()
8665
8666     if not self.disks:
8667       feedback_fn("No disks need replacement")
8668       return
8669
8670     feedback_fn("Replacing disk(s) %s for %s" %
8671                 (utils.CommaJoin(self.disks), self.instance.name))
8672
8673     activate_disks = (not self.instance.admin_up)
8674
8675     # Activate the instance disks if we're replacing them on a down instance
8676     if activate_disks:
8677       _StartInstanceDisks(self.lu, self.instance, True)
8678
8679     try:
8680       # Should we replace the secondary node?
8681       if self.new_node is not None:
8682         fn = self._ExecDrbd8Secondary
8683       else:
8684         fn = self._ExecDrbd8DiskOnly
8685
8686       return fn(feedback_fn)
8687
8688     finally:
8689       # Deactivate the instance disks if we're replacing them on a
8690       # down instance
8691       if activate_disks:
8692         _SafeShutdownInstanceDisks(self.lu, self.instance)
8693
8694   def _CheckVolumeGroup(self, nodes):
8695     self.lu.LogInfo("Checking volume groups")
8696
8697     vgname = self.cfg.GetVGName()
8698
8699     # Make sure volume group exists on all involved nodes
8700     results = self.rpc.call_vg_list(nodes)
8701     if not results:
8702       raise errors.OpExecError("Can't list volume groups on the nodes")
8703
8704     for node in nodes:
8705       res = results[node]
8706       res.Raise("Error checking node %s" % node)
8707       if vgname not in res.payload:
8708         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8709                                  (vgname, node))
8710
8711   def _CheckDisksExistence(self, nodes):
8712     # Check disk existence
8713     for idx, dev in enumerate(self.instance.disks):
8714       if idx not in self.disks:
8715         continue
8716
8717       for node in nodes:
8718         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8719         self.cfg.SetDiskID(dev, node)
8720
8721         result = self.rpc.call_blockdev_find(node, dev)
8722
8723         msg = result.fail_msg
8724         if msg or not result.payload:
8725           if not msg:
8726             msg = "disk not found"
8727           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8728                                    (idx, node, msg))
8729
8730   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8731     for idx, dev in enumerate(self.instance.disks):
8732       if idx not in self.disks:
8733         continue
8734
8735       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8736                       (idx, node_name))
8737
8738       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8739                                    ldisk=ldisk):
8740         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8741                                  " replace disks for instance %s" %
8742                                  (node_name, self.instance.name))
8743
8744   def _CreateNewStorage(self, node_name):
8745     vgname = self.cfg.GetVGName()
8746     iv_names = {}
8747
8748     for idx, dev in enumerate(self.instance.disks):
8749       if idx not in self.disks:
8750         continue
8751
8752       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8753
8754       self.cfg.SetDiskID(dev, node_name)
8755
8756       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8757       names = _GenerateUniqueNames(self.lu, lv_names)
8758
8759       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8760                              logical_id=(vgname, names[0]))
8761       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8762                              logical_id=(vgname, names[1]))
8763
8764       new_lvs = [lv_data, lv_meta]
8765       old_lvs = dev.children
8766       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8767
8768       # we pass force_create=True to force the LVM creation
8769       for new_lv in new_lvs:
8770         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8771                         _GetInstanceInfoText(self.instance), False)
8772
8773     return iv_names
8774
8775   def _CheckDevices(self, node_name, iv_names):
8776     for name, (dev, _, _) in iv_names.iteritems():
8777       self.cfg.SetDiskID(dev, node_name)
8778
8779       result = self.rpc.call_blockdev_find(node_name, dev)
8780
8781       msg = result.fail_msg
8782       if msg or not result.payload:
8783         if not msg:
8784           msg = "disk not found"
8785         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8786                                  (name, msg))
8787
8788       if result.payload.is_degraded:
8789         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8790
8791   def _RemoveOldStorage(self, node_name, iv_names):
8792     for name, (_, old_lvs, _) in iv_names.iteritems():
8793       self.lu.LogInfo("Remove logical volumes for %s" % name)
8794
8795       for lv in old_lvs:
8796         self.cfg.SetDiskID(lv, node_name)
8797
8798         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8799         if msg:
8800           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8801                              hint="remove unused LVs manually")
8802
8803   def _ReleaseNodeLock(self, node_name):
8804     """Releases the lock for a given node."""
8805     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8806
8807   def _ExecDrbd8DiskOnly(self, feedback_fn):
8808     """Replace a disk on the primary or secondary for DRBD 8.
8809
8810     The algorithm for replace is quite complicated:
8811
8812       1. for each disk to be replaced:
8813
8814         1. create new LVs on the target node with unique names
8815         1. detach old LVs from the drbd device
8816         1. rename old LVs to name_replaced.<time_t>
8817         1. rename new LVs to old LVs
8818         1. attach the new LVs (with the old names now) to the drbd device
8819
8820       1. wait for sync across all devices
8821
8822       1. for each modified disk:
8823
8824         1. remove old LVs (which have the name name_replaces.<time_t>)
8825
8826     Failures are not very well handled.
8827
8828     """
8829     steps_total = 6
8830
8831     # Step: check device activation
8832     self.lu.LogStep(1, steps_total, "Check device existence")
8833     self._CheckDisksExistence([self.other_node, self.target_node])
8834     self._CheckVolumeGroup([self.target_node, self.other_node])
8835
8836     # Step: check other node consistency
8837     self.lu.LogStep(2, steps_total, "Check peer consistency")
8838     self._CheckDisksConsistency(self.other_node,
8839                                 self.other_node == self.instance.primary_node,
8840                                 False)
8841
8842     # Step: create new storage
8843     self.lu.LogStep(3, steps_total, "Allocate new storage")
8844     iv_names = self._CreateNewStorage(self.target_node)
8845
8846     # Step: for each lv, detach+rename*2+attach
8847     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8848     for dev, old_lvs, new_lvs in iv_names.itervalues():
8849       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8850
8851       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8852                                                      old_lvs)
8853       result.Raise("Can't detach drbd from local storage on node"
8854                    " %s for device %s" % (self.target_node, dev.iv_name))
8855       #dev.children = []
8856       #cfg.Update(instance)
8857
8858       # ok, we created the new LVs, so now we know we have the needed
8859       # storage; as such, we proceed on the target node to rename
8860       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8861       # using the assumption that logical_id == physical_id (which in
8862       # turn is the unique_id on that node)
8863
8864       # FIXME(iustin): use a better name for the replaced LVs
8865       temp_suffix = int(time.time())
8866       ren_fn = lambda d, suff: (d.physical_id[0],
8867                                 d.physical_id[1] + "_replaced-%s" % suff)
8868
8869       # Build the rename list based on what LVs exist on the node
8870       rename_old_to_new = []
8871       for to_ren in old_lvs:
8872         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8873         if not result.fail_msg and result.payload:
8874           # device exists
8875           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8876
8877       self.lu.LogInfo("Renaming the old LVs on the target node")
8878       result = self.rpc.call_blockdev_rename(self.target_node,
8879                                              rename_old_to_new)
8880       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8881
8882       # Now we rename the new LVs to the old LVs
8883       self.lu.LogInfo("Renaming the new LVs on the target node")
8884       rename_new_to_old = [(new, old.physical_id)
8885                            for old, new in zip(old_lvs, new_lvs)]
8886       result = self.rpc.call_blockdev_rename(self.target_node,
8887                                              rename_new_to_old)
8888       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8889
8890       for old, new in zip(old_lvs, new_lvs):
8891         new.logical_id = old.logical_id
8892         self.cfg.SetDiskID(new, self.target_node)
8893
8894       for disk in old_lvs:
8895         disk.logical_id = ren_fn(disk, temp_suffix)
8896         self.cfg.SetDiskID(disk, self.target_node)
8897
8898       # Now that the new lvs have the old name, we can add them to the device
8899       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8900       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8901                                                   new_lvs)
8902       msg = result.fail_msg
8903       if msg:
8904         for new_lv in new_lvs:
8905           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8906                                                new_lv).fail_msg
8907           if msg2:
8908             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8909                                hint=("cleanup manually the unused logical"
8910                                      "volumes"))
8911         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8912
8913       dev.children = new_lvs
8914
8915       self.cfg.Update(self.instance, feedback_fn)
8916
8917     cstep = 5
8918     if self.early_release:
8919       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8920       cstep += 1
8921       self._RemoveOldStorage(self.target_node, iv_names)
8922       # WARNING: we release both node locks here, do not do other RPCs
8923       # than WaitForSync to the primary node
8924       self._ReleaseNodeLock([self.target_node, self.other_node])
8925
8926     # Wait for sync
8927     # This can fail as the old devices are degraded and _WaitForSync
8928     # does a combined result over all disks, so we don't check its return value
8929     self.lu.LogStep(cstep, steps_total, "Sync devices")
8930     cstep += 1
8931     _WaitForSync(self.lu, self.instance)
8932
8933     # Check all devices manually
8934     self._CheckDevices(self.instance.primary_node, iv_names)
8935
8936     # Step: remove old storage
8937     if not self.early_release:
8938       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8939       cstep += 1
8940       self._RemoveOldStorage(self.target_node, iv_names)
8941
8942   def _ExecDrbd8Secondary(self, feedback_fn):
8943     """Replace the secondary node for DRBD 8.
8944
8945     The algorithm for replace is quite complicated:
8946       - for all disks of the instance:
8947         - create new LVs on the new node with same names
8948         - shutdown the drbd device on the old secondary
8949         - disconnect the drbd network on the primary
8950         - create the drbd device on the new secondary
8951         - network attach the drbd on the primary, using an artifice:
8952           the drbd code for Attach() will connect to the network if it
8953           finds a device which is connected to the good local disks but
8954           not network enabled
8955       - wait for sync across all devices
8956       - remove all disks from the old secondary
8957
8958     Failures are not very well handled.
8959
8960     """
8961     steps_total = 6
8962
8963     # Step: check device activation
8964     self.lu.LogStep(1, steps_total, "Check device existence")
8965     self._CheckDisksExistence([self.instance.primary_node])
8966     self._CheckVolumeGroup([self.instance.primary_node])
8967
8968     # Step: check other node consistency
8969     self.lu.LogStep(2, steps_total, "Check peer consistency")
8970     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8971
8972     # Step: create new storage
8973     self.lu.LogStep(3, steps_total, "Allocate new storage")
8974     for idx, dev in enumerate(self.instance.disks):
8975       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8976                       (self.new_node, idx))
8977       # we pass force_create=True to force LVM creation
8978       for new_lv in dev.children:
8979         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8980                         _GetInstanceInfoText(self.instance), False)
8981
8982     # Step 4: dbrd minors and drbd setups changes
8983     # after this, we must manually remove the drbd minors on both the
8984     # error and the success paths
8985     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8986     minors = self.cfg.AllocateDRBDMinor([self.new_node
8987                                          for dev in self.instance.disks],
8988                                         self.instance.name)
8989     logging.debug("Allocated minors %r", minors)
8990
8991     iv_names = {}
8992     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8993       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8994                       (self.new_node, idx))
8995       # create new devices on new_node; note that we create two IDs:
8996       # one without port, so the drbd will be activated without
8997       # networking information on the new node at this stage, and one
8998       # with network, for the latter activation in step 4
8999       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9000       if self.instance.primary_node == o_node1:
9001         p_minor = o_minor1
9002       else:
9003         assert self.instance.primary_node == o_node2, "Three-node instance?"
9004         p_minor = o_minor2
9005
9006       new_alone_id = (self.instance.primary_node, self.new_node, None,
9007                       p_minor, new_minor, o_secret)
9008       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9009                     p_minor, new_minor, o_secret)
9010
9011       iv_names[idx] = (dev, dev.children, new_net_id)
9012       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9013                     new_net_id)
9014       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9015                               logical_id=new_alone_id,
9016                               children=dev.children,
9017                               size=dev.size)
9018       try:
9019         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9020                               _GetInstanceInfoText(self.instance), False)
9021       except errors.GenericError:
9022         self.cfg.ReleaseDRBDMinors(self.instance.name)
9023         raise
9024
9025     # We have new devices, shutdown the drbd on the old secondary
9026     for idx, dev in enumerate(self.instance.disks):
9027       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9028       self.cfg.SetDiskID(dev, self.target_node)
9029       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9030       if msg:
9031         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9032                            "node: %s" % (idx, msg),
9033                            hint=("Please cleanup this device manually as"
9034                                  " soon as possible"))
9035
9036     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9037     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9038                                                self.node_secondary_ip,
9039                                                self.instance.disks)\
9040                                               [self.instance.primary_node]
9041
9042     msg = result.fail_msg
9043     if msg:
9044       # detaches didn't succeed (unlikely)
9045       self.cfg.ReleaseDRBDMinors(self.instance.name)
9046       raise errors.OpExecError("Can't detach the disks from the network on"
9047                                " old node: %s" % (msg,))
9048
9049     # if we managed to detach at least one, we update all the disks of
9050     # the instance to point to the new secondary
9051     self.lu.LogInfo("Updating instance configuration")
9052     for dev, _, new_logical_id in iv_names.itervalues():
9053       dev.logical_id = new_logical_id
9054       self.cfg.SetDiskID(dev, self.instance.primary_node)
9055
9056     self.cfg.Update(self.instance, feedback_fn)
9057
9058     # and now perform the drbd attach
9059     self.lu.LogInfo("Attaching primary drbds to new secondary"
9060                     " (standalone => connected)")
9061     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9062                                             self.new_node],
9063                                            self.node_secondary_ip,
9064                                            self.instance.disks,
9065                                            self.instance.name,
9066                                            False)
9067     for to_node, to_result in result.items():
9068       msg = to_result.fail_msg
9069       if msg:
9070         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9071                            to_node, msg,
9072                            hint=("please do a gnt-instance info to see the"
9073                                  " status of disks"))
9074     cstep = 5
9075     if self.early_release:
9076       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9077       cstep += 1
9078       self._RemoveOldStorage(self.target_node, iv_names)
9079       # WARNING: we release all node locks here, do not do other RPCs
9080       # than WaitForSync to the primary node
9081       self._ReleaseNodeLock([self.instance.primary_node,
9082                              self.target_node,
9083                              self.new_node])
9084
9085     # Wait for sync
9086     # This can fail as the old devices are degraded and _WaitForSync
9087     # does a combined result over all disks, so we don't check its return value
9088     self.lu.LogStep(cstep, steps_total, "Sync devices")
9089     cstep += 1
9090     _WaitForSync(self.lu, self.instance)
9091
9092     # Check all devices manually
9093     self._CheckDevices(self.instance.primary_node, iv_names)
9094
9095     # Step: remove old storage
9096     if not self.early_release:
9097       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9098       self._RemoveOldStorage(self.target_node, iv_names)
9099
9100
9101 class LURepairNodeStorage(NoHooksLU):
9102   """Repairs the volume group on a node.
9103
9104   """
9105   REQ_BGL = False
9106
9107   def CheckArguments(self):
9108     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9109
9110     storage_type = self.op.storage_type
9111
9112     if (constants.SO_FIX_CONSISTENCY not in
9113         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9114       raise errors.OpPrereqError("Storage units of type '%s' can not be"
9115                                  " repaired" % storage_type,
9116                                  errors.ECODE_INVAL)
9117
9118   def ExpandNames(self):
9119     self.needed_locks = {
9120       locking.LEVEL_NODE: [self.op.node_name],
9121       }
9122
9123   def _CheckFaultyDisks(self, instance, node_name):
9124     """Ensure faulty disks abort the opcode or at least warn."""
9125     try:
9126       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9127                                   node_name, True):
9128         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9129                                    " node '%s'" % (instance.name, node_name),
9130                                    errors.ECODE_STATE)
9131     except errors.OpPrereqError, err:
9132       if self.op.ignore_consistency:
9133         self.proc.LogWarning(str(err.args[0]))
9134       else:
9135         raise
9136
9137   def CheckPrereq(self):
9138     """Check prerequisites.
9139
9140     """
9141     # Check whether any instance on this node has faulty disks
9142     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9143       if not inst.admin_up:
9144         continue
9145       check_nodes = set(inst.all_nodes)
9146       check_nodes.discard(self.op.node_name)
9147       for inst_node_name in check_nodes:
9148         self._CheckFaultyDisks(inst, inst_node_name)
9149
9150   def Exec(self, feedback_fn):
9151     feedback_fn("Repairing storage unit '%s' on %s ..." %
9152                 (self.op.name, self.op.node_name))
9153
9154     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9155     result = self.rpc.call_storage_execute(self.op.node_name,
9156                                            self.op.storage_type, st_args,
9157                                            self.op.name,
9158                                            constants.SO_FIX_CONSISTENCY)
9159     result.Raise("Failed to repair storage unit '%s' on %s" %
9160                  (self.op.name, self.op.node_name))
9161
9162
9163 class LUNodeEvacStrategy(NoHooksLU):
9164   """Computes the node evacuation strategy.
9165
9166   """
9167   REQ_BGL = False
9168
9169   def CheckArguments(self):
9170     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9171
9172   def ExpandNames(self):
9173     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9174     self.needed_locks = locks = {}
9175     if self.op.remote_node is None:
9176       locks[locking.LEVEL_NODE] = locking.ALL_SET
9177     else:
9178       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9179       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9180
9181   def Exec(self, feedback_fn):
9182     if self.op.remote_node is not None:
9183       instances = []
9184       for node in self.op.nodes:
9185         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9186       result = []
9187       for i in instances:
9188         if i.primary_node == self.op.remote_node:
9189           raise errors.OpPrereqError("Node %s is the primary node of"
9190                                      " instance %s, cannot use it as"
9191                                      " secondary" %
9192                                      (self.op.remote_node, i.name),
9193                                      errors.ECODE_INVAL)
9194         result.append([i.name, self.op.remote_node])
9195     else:
9196       ial = IAllocator(self.cfg, self.rpc,
9197                        mode=constants.IALLOCATOR_MODE_MEVAC,
9198                        evac_nodes=self.op.nodes)
9199       ial.Run(self.op.iallocator, validate=True)
9200       if not ial.success:
9201         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9202                                  errors.ECODE_NORES)
9203       result = ial.result
9204     return result
9205
9206
9207 class LUInstanceGrowDisk(LogicalUnit):
9208   """Grow a disk of an instance.
9209
9210   """
9211   HPATH = "disk-grow"
9212   HTYPE = constants.HTYPE_INSTANCE
9213   REQ_BGL = False
9214
9215   def ExpandNames(self):
9216     self._ExpandAndLockInstance()
9217     self.needed_locks[locking.LEVEL_NODE] = []
9218     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9219
9220   def DeclareLocks(self, level):
9221     if level == locking.LEVEL_NODE:
9222       self._LockInstancesNodes()
9223
9224   def BuildHooksEnv(self):
9225     """Build hooks env.
9226
9227     This runs on the master, the primary and all the secondaries.
9228
9229     """
9230     env = {
9231       "DISK": self.op.disk,
9232       "AMOUNT": self.op.amount,
9233       }
9234     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9235     return env
9236
9237   def BuildHooksNodes(self):
9238     """Build hooks nodes.
9239
9240     """
9241     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9242     return (nl, nl)
9243
9244   def CheckPrereq(self):
9245     """Check prerequisites.
9246
9247     This checks that the instance is in the cluster.
9248
9249     """
9250     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9251     assert instance is not None, \
9252       "Cannot retrieve locked instance %s" % self.op.instance_name
9253     nodenames = list(instance.all_nodes)
9254     for node in nodenames:
9255       _CheckNodeOnline(self, node)
9256
9257     self.instance = instance
9258
9259     if instance.disk_template not in constants.DTS_GROWABLE:
9260       raise errors.OpPrereqError("Instance's disk layout does not support"
9261                                  " growing.", errors.ECODE_INVAL)
9262
9263     self.disk = instance.FindDisk(self.op.disk)
9264
9265     if instance.disk_template not in (constants.DT_FILE,
9266                                       constants.DT_SHARED_FILE):
9267       # TODO: check the free disk space for file, when that feature will be
9268       # supported
9269       _CheckNodesFreeDiskPerVG(self, nodenames,
9270                                self.disk.ComputeGrowth(self.op.amount))
9271
9272   def Exec(self, feedback_fn):
9273     """Execute disk grow.
9274
9275     """
9276     instance = self.instance
9277     disk = self.disk
9278
9279     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9280     if not disks_ok:
9281       raise errors.OpExecError("Cannot activate block device to grow")
9282
9283     for node in instance.all_nodes:
9284       self.cfg.SetDiskID(disk, node)
9285       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9286       result.Raise("Grow request failed to node %s" % node)
9287
9288       # TODO: Rewrite code to work properly
9289       # DRBD goes into sync mode for a short amount of time after executing the
9290       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9291       # calling "resize" in sync mode fails. Sleeping for a short amount of
9292       # time is a work-around.
9293       time.sleep(5)
9294
9295     disk.RecordGrow(self.op.amount)
9296     self.cfg.Update(instance, feedback_fn)
9297     if self.op.wait_for_sync:
9298       disk_abort = not _WaitForSync(self, instance, disks=[disk])
9299       if disk_abort:
9300         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9301                              " status.\nPlease check the instance.")
9302       if not instance.admin_up:
9303         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9304     elif not instance.admin_up:
9305       self.proc.LogWarning("Not shutting down the disk even if the instance is"
9306                            " not supposed to be running because no wait for"
9307                            " sync mode was requested.")
9308
9309
9310 class LUInstanceQueryData(NoHooksLU):
9311   """Query runtime instance data.
9312
9313   """
9314   REQ_BGL = False
9315
9316   def ExpandNames(self):
9317     self.needed_locks = {}
9318     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9319
9320     if self.op.instances:
9321       self.wanted_names = []
9322       for name in self.op.instances:
9323         full_name = _ExpandInstanceName(self.cfg, name)
9324         self.wanted_names.append(full_name)
9325       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9326     else:
9327       self.wanted_names = None
9328       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9329
9330     self.needed_locks[locking.LEVEL_NODE] = []
9331     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9332
9333   def DeclareLocks(self, level):
9334     if level == locking.LEVEL_NODE:
9335       self._LockInstancesNodes()
9336
9337   def CheckPrereq(self):
9338     """Check prerequisites.
9339
9340     This only checks the optional instance list against the existing names.
9341
9342     """
9343     if self.wanted_names is None:
9344       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9345
9346     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
9347                              in self.wanted_names]
9348
9349   def _ComputeBlockdevStatus(self, node, instance_name, dev):
9350     """Returns the status of a block device
9351
9352     """
9353     if self.op.static or not node:
9354       return None
9355
9356     self.cfg.SetDiskID(dev, node)
9357
9358     result = self.rpc.call_blockdev_find(node, dev)
9359     if result.offline:
9360       return None
9361
9362     result.Raise("Can't compute disk status for %s" % instance_name)
9363
9364     status = result.payload
9365     if status is None:
9366       return None
9367
9368     return (status.dev_path, status.major, status.minor,
9369             status.sync_percent, status.estimated_time,
9370             status.is_degraded, status.ldisk_status)
9371
9372   def _ComputeDiskStatus(self, instance, snode, dev):
9373     """Compute block device status.
9374
9375     """
9376     if dev.dev_type in constants.LDS_DRBD:
9377       # we change the snode then (otherwise we use the one passed in)
9378       if dev.logical_id[0] == instance.primary_node:
9379         snode = dev.logical_id[1]
9380       else:
9381         snode = dev.logical_id[0]
9382
9383     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9384                                               instance.name, dev)
9385     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9386
9387     if dev.children:
9388       dev_children = [self._ComputeDiskStatus(instance, snode, child)
9389                       for child in dev.children]
9390     else:
9391       dev_children = []
9392
9393     data = {
9394       "iv_name": dev.iv_name,
9395       "dev_type": dev.dev_type,
9396       "logical_id": dev.logical_id,
9397       "physical_id": dev.physical_id,
9398       "pstatus": dev_pstatus,
9399       "sstatus": dev_sstatus,
9400       "children": dev_children,
9401       "mode": dev.mode,
9402       "size": dev.size,
9403       }
9404
9405     return data
9406
9407   def Exec(self, feedback_fn):
9408     """Gather and return data"""
9409     result = {}
9410
9411     cluster = self.cfg.GetClusterInfo()
9412
9413     for instance in self.wanted_instances:
9414       if not self.op.static:
9415         remote_info = self.rpc.call_instance_info(instance.primary_node,
9416                                                   instance.name,
9417                                                   instance.hypervisor)
9418         remote_info.Raise("Error checking node %s" % instance.primary_node)
9419         remote_info = remote_info.payload
9420         if remote_info and "state" in remote_info:
9421           remote_state = "up"
9422         else:
9423           remote_state = "down"
9424       else:
9425         remote_state = None
9426       if instance.admin_up:
9427         config_state = "up"
9428       else:
9429         config_state = "down"
9430
9431       disks = [self._ComputeDiskStatus(instance, None, device)
9432                for device in instance.disks]
9433
9434       idict = {
9435         "name": instance.name,
9436         "config_state": config_state,
9437         "run_state": remote_state,
9438         "pnode": instance.primary_node,
9439         "snodes": instance.secondary_nodes,
9440         "os": instance.os,
9441         # this happens to be the same format used for hooks
9442         "nics": _NICListToTuple(self, instance.nics),
9443         "disk_template": instance.disk_template,
9444         "disks": disks,
9445         "hypervisor": instance.hypervisor,
9446         "network_port": instance.network_port,
9447         "hv_instance": instance.hvparams,
9448         "hv_actual": cluster.FillHV(instance, skip_globals=True),
9449         "be_instance": instance.beparams,
9450         "be_actual": cluster.FillBE(instance),
9451         "os_instance": instance.osparams,
9452         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9453         "serial_no": instance.serial_no,
9454         "mtime": instance.mtime,
9455         "ctime": instance.ctime,
9456         "uuid": instance.uuid,
9457         }
9458
9459       result[instance.name] = idict
9460
9461     return result
9462
9463
9464 class LUInstanceSetParams(LogicalUnit):
9465   """Modifies an instances's parameters.
9466
9467   """
9468   HPATH = "instance-modify"
9469   HTYPE = constants.HTYPE_INSTANCE
9470   REQ_BGL = False
9471
9472   def CheckArguments(self):
9473     if not (self.op.nics or self.op.disks or self.op.disk_template or
9474             self.op.hvparams or self.op.beparams or self.op.os_name):
9475       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9476
9477     if self.op.hvparams:
9478       _CheckGlobalHvParams(self.op.hvparams)
9479
9480     # Disk validation
9481     disk_addremove = 0
9482     for disk_op, disk_dict in self.op.disks:
9483       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9484       if disk_op == constants.DDM_REMOVE:
9485         disk_addremove += 1
9486         continue
9487       elif disk_op == constants.DDM_ADD:
9488         disk_addremove += 1
9489       else:
9490         if not isinstance(disk_op, int):
9491           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9492         if not isinstance(disk_dict, dict):
9493           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9494           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9495
9496       if disk_op == constants.DDM_ADD:
9497         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9498         if mode not in constants.DISK_ACCESS_SET:
9499           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9500                                      errors.ECODE_INVAL)
9501         size = disk_dict.get('size', None)
9502         if size is None:
9503           raise errors.OpPrereqError("Required disk parameter size missing",
9504                                      errors.ECODE_INVAL)
9505         try:
9506           size = int(size)
9507         except (TypeError, ValueError), err:
9508           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9509                                      str(err), errors.ECODE_INVAL)
9510         disk_dict['size'] = size
9511       else:
9512         # modification of disk
9513         if 'size' in disk_dict:
9514           raise errors.OpPrereqError("Disk size change not possible, use"
9515                                      " grow-disk", errors.ECODE_INVAL)
9516
9517     if disk_addremove > 1:
9518       raise errors.OpPrereqError("Only one disk add or remove operation"
9519                                  " supported at a time", errors.ECODE_INVAL)
9520
9521     if self.op.disks and self.op.disk_template is not None:
9522       raise errors.OpPrereqError("Disk template conversion and other disk"
9523                                  " changes not supported at the same time",
9524                                  errors.ECODE_INVAL)
9525
9526     if (self.op.disk_template and
9527         self.op.disk_template in constants.DTS_INT_MIRROR and
9528         self.op.remote_node is None):
9529       raise errors.OpPrereqError("Changing the disk template to a mirrored"
9530                                  " one requires specifying a secondary node",
9531                                  errors.ECODE_INVAL)
9532
9533     # NIC validation
9534     nic_addremove = 0
9535     for nic_op, nic_dict in self.op.nics:
9536       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9537       if nic_op == constants.DDM_REMOVE:
9538         nic_addremove += 1
9539         continue
9540       elif nic_op == constants.DDM_ADD:
9541         nic_addremove += 1
9542       else:
9543         if not isinstance(nic_op, int):
9544           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9545         if not isinstance(nic_dict, dict):
9546           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9547           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9548
9549       # nic_dict should be a dict
9550       nic_ip = nic_dict.get('ip', None)
9551       if nic_ip is not None:
9552         if nic_ip.lower() == constants.VALUE_NONE:
9553           nic_dict['ip'] = None
9554         else:
9555           if not netutils.IPAddress.IsValid(nic_ip):
9556             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9557                                        errors.ECODE_INVAL)
9558
9559       nic_bridge = nic_dict.get('bridge', None)
9560       nic_link = nic_dict.get('link', None)
9561       if nic_bridge and nic_link:
9562         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9563                                    " at the same time", errors.ECODE_INVAL)
9564       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9565         nic_dict['bridge'] = None
9566       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9567         nic_dict['link'] = None
9568
9569       if nic_op == constants.DDM_ADD:
9570         nic_mac = nic_dict.get('mac', None)
9571         if nic_mac is None:
9572           nic_dict['mac'] = constants.VALUE_AUTO
9573
9574       if 'mac' in nic_dict:
9575         nic_mac = nic_dict['mac']
9576         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9577           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9578
9579         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9580           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9581                                      " modifying an existing nic",
9582                                      errors.ECODE_INVAL)
9583
9584     if nic_addremove > 1:
9585       raise errors.OpPrereqError("Only one NIC add or remove operation"
9586                                  " supported at a time", errors.ECODE_INVAL)
9587
9588   def ExpandNames(self):
9589     self._ExpandAndLockInstance()
9590     self.needed_locks[locking.LEVEL_NODE] = []
9591     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9592
9593   def DeclareLocks(self, level):
9594     if level == locking.LEVEL_NODE:
9595       self._LockInstancesNodes()
9596       if self.op.disk_template and self.op.remote_node:
9597         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9598         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9599
9600   def BuildHooksEnv(self):
9601     """Build hooks env.
9602
9603     This runs on the master, primary and secondaries.
9604
9605     """
9606     args = dict()
9607     if constants.BE_MEMORY in self.be_new:
9608       args['memory'] = self.be_new[constants.BE_MEMORY]
9609     if constants.BE_VCPUS in self.be_new:
9610       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9611     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9612     # information at all.
9613     if self.op.nics:
9614       args['nics'] = []
9615       nic_override = dict(self.op.nics)
9616       for idx, nic in enumerate(self.instance.nics):
9617         if idx in nic_override:
9618           this_nic_override = nic_override[idx]
9619         else:
9620           this_nic_override = {}
9621         if 'ip' in this_nic_override:
9622           ip = this_nic_override['ip']
9623         else:
9624           ip = nic.ip
9625         if 'mac' in this_nic_override:
9626           mac = this_nic_override['mac']
9627         else:
9628           mac = nic.mac
9629         if idx in self.nic_pnew:
9630           nicparams = self.nic_pnew[idx]
9631         else:
9632           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9633         mode = nicparams[constants.NIC_MODE]
9634         link = nicparams[constants.NIC_LINK]
9635         args['nics'].append((ip, mac, mode, link))
9636       if constants.DDM_ADD in nic_override:
9637         ip = nic_override[constants.DDM_ADD].get('ip', None)
9638         mac = nic_override[constants.DDM_ADD]['mac']
9639         nicparams = self.nic_pnew[constants.DDM_ADD]
9640         mode = nicparams[constants.NIC_MODE]
9641         link = nicparams[constants.NIC_LINK]
9642         args['nics'].append((ip, mac, mode, link))
9643       elif constants.DDM_REMOVE in nic_override:
9644         del args['nics'][-1]
9645
9646     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9647     if self.op.disk_template:
9648       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9649
9650     return env
9651
9652   def BuildHooksNodes(self):
9653     """Build hooks nodes.
9654
9655     """
9656     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9657     return (nl, nl)
9658
9659   def CheckPrereq(self):
9660     """Check prerequisites.
9661
9662     This only checks the instance list against the existing names.
9663
9664     """
9665     # checking the new params on the primary/secondary nodes
9666
9667     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9668     cluster = self.cluster = self.cfg.GetClusterInfo()
9669     assert self.instance is not None, \
9670       "Cannot retrieve locked instance %s" % self.op.instance_name
9671     pnode = instance.primary_node
9672     nodelist = list(instance.all_nodes)
9673
9674     # OS change
9675     if self.op.os_name and not self.op.force:
9676       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9677                       self.op.force_variant)
9678       instance_os = self.op.os_name
9679     else:
9680       instance_os = instance.os
9681
9682     if self.op.disk_template:
9683       if instance.disk_template == self.op.disk_template:
9684         raise errors.OpPrereqError("Instance already has disk template %s" %
9685                                    instance.disk_template, errors.ECODE_INVAL)
9686
9687       if (instance.disk_template,
9688           self.op.disk_template) not in self._DISK_CONVERSIONS:
9689         raise errors.OpPrereqError("Unsupported disk template conversion from"
9690                                    " %s to %s" % (instance.disk_template,
9691                                                   self.op.disk_template),
9692                                    errors.ECODE_INVAL)
9693       _CheckInstanceDown(self, instance, "cannot change disk template")
9694       if self.op.disk_template in constants.DTS_INT_MIRROR:
9695         if self.op.remote_node == pnode:
9696           raise errors.OpPrereqError("Given new secondary node %s is the same"
9697                                      " as the primary node of the instance" %
9698                                      self.op.remote_node, errors.ECODE_STATE)
9699         _CheckNodeOnline(self, self.op.remote_node)
9700         _CheckNodeNotDrained(self, self.op.remote_node)
9701         # FIXME: here we assume that the old instance type is DT_PLAIN
9702         assert instance.disk_template == constants.DT_PLAIN
9703         disks = [{"size": d.size, "vg": d.logical_id[0]}
9704                  for d in instance.disks]
9705         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9706         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9707
9708     # hvparams processing
9709     if self.op.hvparams:
9710       hv_type = instance.hypervisor
9711       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9712       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9713       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9714
9715       # local check
9716       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9717       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9718       self.hv_new = hv_new # the new actual values
9719       self.hv_inst = i_hvdict # the new dict (without defaults)
9720     else:
9721       self.hv_new = self.hv_inst = {}
9722
9723     # beparams processing
9724     if self.op.beparams:
9725       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9726                                    use_none=True)
9727       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9728       be_new = cluster.SimpleFillBE(i_bedict)
9729       self.be_new = be_new # the new actual values
9730       self.be_inst = i_bedict # the new dict (without defaults)
9731     else:
9732       self.be_new = self.be_inst = {}
9733
9734     # osparams processing
9735     if self.op.osparams:
9736       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9737       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9738       self.os_inst = i_osdict # the new dict (without defaults)
9739     else:
9740       self.os_inst = {}
9741
9742     self.warn = []
9743
9744     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9745       mem_check_list = [pnode]
9746       if be_new[constants.BE_AUTO_BALANCE]:
9747         # either we changed auto_balance to yes or it was from before
9748         mem_check_list.extend(instance.secondary_nodes)
9749       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9750                                                   instance.hypervisor)
9751       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9752                                          instance.hypervisor)
9753       pninfo = nodeinfo[pnode]
9754       msg = pninfo.fail_msg
9755       if msg:
9756         # Assume the primary node is unreachable and go ahead
9757         self.warn.append("Can't get info from primary node %s: %s" %
9758                          (pnode,  msg))
9759       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9760         self.warn.append("Node data from primary node %s doesn't contain"
9761                          " free memory information" % pnode)
9762       elif instance_info.fail_msg:
9763         self.warn.append("Can't get instance runtime information: %s" %
9764                         instance_info.fail_msg)
9765       else:
9766         if instance_info.payload:
9767           current_mem = int(instance_info.payload['memory'])
9768         else:
9769           # Assume instance not running
9770           # (there is a slight race condition here, but it's not very probable,
9771           # and we have no other way to check)
9772           current_mem = 0
9773         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9774                     pninfo.payload['memory_free'])
9775         if miss_mem > 0:
9776           raise errors.OpPrereqError("This change will prevent the instance"
9777                                      " from starting, due to %d MB of memory"
9778                                      " missing on its primary node" % miss_mem,
9779                                      errors.ECODE_NORES)
9780
9781       if be_new[constants.BE_AUTO_BALANCE]:
9782         for node, nres in nodeinfo.items():
9783           if node not in instance.secondary_nodes:
9784             continue
9785           msg = nres.fail_msg
9786           if msg:
9787             self.warn.append("Can't get info from secondary node %s: %s" %
9788                              (node, msg))
9789           elif not isinstance(nres.payload.get('memory_free', None), int):
9790             self.warn.append("Secondary node %s didn't return free"
9791                              " memory information" % node)
9792           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9793             self.warn.append("Not enough memory to failover instance to"
9794                              " secondary node %s" % node)
9795
9796     # NIC processing
9797     self.nic_pnew = {}
9798     self.nic_pinst = {}
9799     for nic_op, nic_dict in self.op.nics:
9800       if nic_op == constants.DDM_REMOVE:
9801         if not instance.nics:
9802           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9803                                      errors.ECODE_INVAL)
9804         continue
9805       if nic_op != constants.DDM_ADD:
9806         # an existing nic
9807         if not instance.nics:
9808           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9809                                      " no NICs" % nic_op,
9810                                      errors.ECODE_INVAL)
9811         if nic_op < 0 or nic_op >= len(instance.nics):
9812           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9813                                      " are 0 to %d" %
9814                                      (nic_op, len(instance.nics) - 1),
9815                                      errors.ECODE_INVAL)
9816         old_nic_params = instance.nics[nic_op].nicparams
9817         old_nic_ip = instance.nics[nic_op].ip
9818       else:
9819         old_nic_params = {}
9820         old_nic_ip = None
9821
9822       update_params_dict = dict([(key, nic_dict[key])
9823                                  for key in constants.NICS_PARAMETERS
9824                                  if key in nic_dict])
9825
9826       if 'bridge' in nic_dict:
9827         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9828
9829       new_nic_params = _GetUpdatedParams(old_nic_params,
9830                                          update_params_dict)
9831       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9832       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9833       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9834       self.nic_pinst[nic_op] = new_nic_params
9835       self.nic_pnew[nic_op] = new_filled_nic_params
9836       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9837
9838       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9839         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9840         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9841         if msg:
9842           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9843           if self.op.force:
9844             self.warn.append(msg)
9845           else:
9846             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9847       if new_nic_mode == constants.NIC_MODE_ROUTED:
9848         if 'ip' in nic_dict:
9849           nic_ip = nic_dict['ip']
9850         else:
9851           nic_ip = old_nic_ip
9852         if nic_ip is None:
9853           raise errors.OpPrereqError('Cannot set the nic ip to None'
9854                                      ' on a routed nic', errors.ECODE_INVAL)
9855       if 'mac' in nic_dict:
9856         nic_mac = nic_dict['mac']
9857         if nic_mac is None:
9858           raise errors.OpPrereqError('Cannot set the nic mac to None',
9859                                      errors.ECODE_INVAL)
9860         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9861           # otherwise generate the mac
9862           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9863         else:
9864           # or validate/reserve the current one
9865           try:
9866             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9867           except errors.ReservationError:
9868             raise errors.OpPrereqError("MAC address %s already in use"
9869                                        " in cluster" % nic_mac,
9870                                        errors.ECODE_NOTUNIQUE)
9871
9872     # DISK processing
9873     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9874       raise errors.OpPrereqError("Disk operations not supported for"
9875                                  " diskless instances",
9876                                  errors.ECODE_INVAL)
9877     for disk_op, _ in self.op.disks:
9878       if disk_op == constants.DDM_REMOVE:
9879         if len(instance.disks) == 1:
9880           raise errors.OpPrereqError("Cannot remove the last disk of"
9881                                      " an instance", errors.ECODE_INVAL)
9882         _CheckInstanceDown(self, instance, "cannot remove disks")
9883
9884       if (disk_op == constants.DDM_ADD and
9885           len(instance.disks) >= constants.MAX_DISKS):
9886         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9887                                    " add more" % constants.MAX_DISKS,
9888                                    errors.ECODE_STATE)
9889       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9890         # an existing disk
9891         if disk_op < 0 or disk_op >= len(instance.disks):
9892           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9893                                      " are 0 to %d" %
9894                                      (disk_op, len(instance.disks)),
9895                                      errors.ECODE_INVAL)
9896
9897     return
9898
9899   def _ConvertPlainToDrbd(self, feedback_fn):
9900     """Converts an instance from plain to drbd.
9901
9902     """
9903     feedback_fn("Converting template to drbd")
9904     instance = self.instance
9905     pnode = instance.primary_node
9906     snode = self.op.remote_node
9907
9908     # create a fake disk info for _GenerateDiskTemplate
9909     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9910     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9911                                       instance.name, pnode, [snode],
9912                                       disk_info, None, None, 0, feedback_fn)
9913     info = _GetInstanceInfoText(instance)
9914     feedback_fn("Creating aditional volumes...")
9915     # first, create the missing data and meta devices
9916     for disk in new_disks:
9917       # unfortunately this is... not too nice
9918       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9919                             info, True)
9920       for child in disk.children:
9921         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9922     # at this stage, all new LVs have been created, we can rename the
9923     # old ones
9924     feedback_fn("Renaming original volumes...")
9925     rename_list = [(o, n.children[0].logical_id)
9926                    for (o, n) in zip(instance.disks, new_disks)]
9927     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9928     result.Raise("Failed to rename original LVs")
9929
9930     feedback_fn("Initializing DRBD devices...")
9931     # all child devices are in place, we can now create the DRBD devices
9932     for disk in new_disks:
9933       for node in [pnode, snode]:
9934         f_create = node == pnode
9935         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9936
9937     # at this point, the instance has been modified
9938     instance.disk_template = constants.DT_DRBD8
9939     instance.disks = new_disks
9940     self.cfg.Update(instance, feedback_fn)
9941
9942     # disks are created, waiting for sync
9943     disk_abort = not _WaitForSync(self, instance)
9944     if disk_abort:
9945       raise errors.OpExecError("There are some degraded disks for"
9946                                " this instance, please cleanup manually")
9947
9948   def _ConvertDrbdToPlain(self, feedback_fn):
9949     """Converts an instance from drbd to plain.
9950
9951     """
9952     instance = self.instance
9953     assert len(instance.secondary_nodes) == 1
9954     pnode = instance.primary_node
9955     snode = instance.secondary_nodes[0]
9956     feedback_fn("Converting template to plain")
9957
9958     old_disks = instance.disks
9959     new_disks = [d.children[0] for d in old_disks]
9960
9961     # copy over size and mode
9962     for parent, child in zip(old_disks, new_disks):
9963       child.size = parent.size
9964       child.mode = parent.mode
9965
9966     # update instance structure
9967     instance.disks = new_disks
9968     instance.disk_template = constants.DT_PLAIN
9969     self.cfg.Update(instance, feedback_fn)
9970
9971     feedback_fn("Removing volumes on the secondary node...")
9972     for disk in old_disks:
9973       self.cfg.SetDiskID(disk, snode)
9974       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9975       if msg:
9976         self.LogWarning("Could not remove block device %s on node %s,"
9977                         " continuing anyway: %s", disk.iv_name, snode, msg)
9978
9979     feedback_fn("Removing unneeded volumes on the primary node...")
9980     for idx, disk in enumerate(old_disks):
9981       meta = disk.children[1]
9982       self.cfg.SetDiskID(meta, pnode)
9983       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9984       if msg:
9985         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9986                         " continuing anyway: %s", idx, pnode, msg)
9987
9988   def Exec(self, feedback_fn):
9989     """Modifies an instance.
9990
9991     All parameters take effect only at the next restart of the instance.
9992
9993     """
9994     # Process here the warnings from CheckPrereq, as we don't have a
9995     # feedback_fn there.
9996     for warn in self.warn:
9997       feedback_fn("WARNING: %s" % warn)
9998
9999     result = []
10000     instance = self.instance
10001     # disk changes
10002     for disk_op, disk_dict in self.op.disks:
10003       if disk_op == constants.DDM_REMOVE:
10004         # remove the last disk
10005         device = instance.disks.pop()
10006         device_idx = len(instance.disks)
10007         for node, disk in device.ComputeNodeTree(instance.primary_node):
10008           self.cfg.SetDiskID(disk, node)
10009           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10010           if msg:
10011             self.LogWarning("Could not remove disk/%d on node %s: %s,"
10012                             " continuing anyway", device_idx, node, msg)
10013         result.append(("disk/%d" % device_idx, "remove"))
10014       elif disk_op == constants.DDM_ADD:
10015         # add a new disk
10016         if instance.disk_template in (constants.DT_FILE,
10017                                         constants.DT_SHARED_FILE):
10018           file_driver, file_path = instance.disks[0].logical_id
10019           file_path = os.path.dirname(file_path)
10020         else:
10021           file_driver = file_path = None
10022         disk_idx_base = len(instance.disks)
10023         new_disk = _GenerateDiskTemplate(self,
10024                                          instance.disk_template,
10025                                          instance.name, instance.primary_node,
10026                                          instance.secondary_nodes,
10027                                          [disk_dict],
10028                                          file_path,
10029                                          file_driver,
10030                                          disk_idx_base, feedback_fn)[0]
10031         instance.disks.append(new_disk)
10032         info = _GetInstanceInfoText(instance)
10033
10034         logging.info("Creating volume %s for instance %s",
10035                      new_disk.iv_name, instance.name)
10036         # Note: this needs to be kept in sync with _CreateDisks
10037         #HARDCODE
10038         for node in instance.all_nodes:
10039           f_create = node == instance.primary_node
10040           try:
10041             _CreateBlockDev(self, node, instance, new_disk,
10042                             f_create, info, f_create)
10043           except errors.OpExecError, err:
10044             self.LogWarning("Failed to create volume %s (%s) on"
10045                             " node %s: %s",
10046                             new_disk.iv_name, new_disk, node, err)
10047         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10048                        (new_disk.size, new_disk.mode)))
10049       else:
10050         # change a given disk
10051         instance.disks[disk_op].mode = disk_dict['mode']
10052         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
10053
10054     if self.op.disk_template:
10055       r_shut = _ShutdownInstanceDisks(self, instance)
10056       if not r_shut:
10057         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10058                                  " proceed with disk template conversion")
10059       mode = (instance.disk_template, self.op.disk_template)
10060       try:
10061         self._DISK_CONVERSIONS[mode](self, feedback_fn)
10062       except:
10063         self.cfg.ReleaseDRBDMinors(instance.name)
10064         raise
10065       result.append(("disk_template", self.op.disk_template))
10066
10067     # NIC changes
10068     for nic_op, nic_dict in self.op.nics:
10069       if nic_op == constants.DDM_REMOVE:
10070         # remove the last nic
10071         del instance.nics[-1]
10072         result.append(("nic.%d" % len(instance.nics), "remove"))
10073       elif nic_op == constants.DDM_ADD:
10074         # mac and bridge should be set, by now
10075         mac = nic_dict['mac']
10076         ip = nic_dict.get('ip', None)
10077         nicparams = self.nic_pinst[constants.DDM_ADD]
10078         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10079         instance.nics.append(new_nic)
10080         result.append(("nic.%d" % (len(instance.nics) - 1),
10081                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
10082                        (new_nic.mac, new_nic.ip,
10083                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10084                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10085                        )))
10086       else:
10087         for key in 'mac', 'ip':
10088           if key in nic_dict:
10089             setattr(instance.nics[nic_op], key, nic_dict[key])
10090         if nic_op in self.nic_pinst:
10091           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10092         for key, val in nic_dict.iteritems():
10093           result.append(("nic.%s/%d" % (key, nic_op), val))
10094
10095     # hvparams changes
10096     if self.op.hvparams:
10097       instance.hvparams = self.hv_inst
10098       for key, val in self.op.hvparams.iteritems():
10099         result.append(("hv/%s" % key, val))
10100
10101     # beparams changes
10102     if self.op.beparams:
10103       instance.beparams = self.be_inst
10104       for key, val in self.op.beparams.iteritems():
10105         result.append(("be/%s" % key, val))
10106
10107     # OS change
10108     if self.op.os_name:
10109       instance.os = self.op.os_name
10110
10111     # osparams changes
10112     if self.op.osparams:
10113       instance.osparams = self.os_inst
10114       for key, val in self.op.osparams.iteritems():
10115         result.append(("os/%s" % key, val))
10116
10117     self.cfg.Update(instance, feedback_fn)
10118
10119     return result
10120
10121   _DISK_CONVERSIONS = {
10122     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10123     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10124     }
10125
10126
10127 class LUBackupQuery(NoHooksLU):
10128   """Query the exports list
10129
10130   """
10131   REQ_BGL = False
10132
10133   def ExpandNames(self):
10134     self.needed_locks = {}
10135     self.share_locks[locking.LEVEL_NODE] = 1
10136     if not self.op.nodes:
10137       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10138     else:
10139       self.needed_locks[locking.LEVEL_NODE] = \
10140         _GetWantedNodes(self, self.op.nodes)
10141
10142   def Exec(self, feedback_fn):
10143     """Compute the list of all the exported system images.
10144
10145     @rtype: dict
10146     @return: a dictionary with the structure node->(export-list)
10147         where export-list is a list of the instances exported on
10148         that node.
10149
10150     """
10151     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
10152     rpcresult = self.rpc.call_export_list(self.nodes)
10153     result = {}
10154     for node in rpcresult:
10155       if rpcresult[node].fail_msg:
10156         result[node] = False
10157       else:
10158         result[node] = rpcresult[node].payload
10159
10160     return result
10161
10162
10163 class LUBackupPrepare(NoHooksLU):
10164   """Prepares an instance for an export and returns useful information.
10165
10166   """
10167   REQ_BGL = False
10168
10169   def ExpandNames(self):
10170     self._ExpandAndLockInstance()
10171
10172   def CheckPrereq(self):
10173     """Check prerequisites.
10174
10175     """
10176     instance_name = self.op.instance_name
10177
10178     self.instance = self.cfg.GetInstanceInfo(instance_name)
10179     assert self.instance is not None, \
10180           "Cannot retrieve locked instance %s" % self.op.instance_name
10181     _CheckNodeOnline(self, self.instance.primary_node)
10182
10183     self._cds = _GetClusterDomainSecret()
10184
10185   def Exec(self, feedback_fn):
10186     """Prepares an instance for an export.
10187
10188     """
10189     instance = self.instance
10190
10191     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10192       salt = utils.GenerateSecret(8)
10193
10194       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10195       result = self.rpc.call_x509_cert_create(instance.primary_node,
10196                                               constants.RIE_CERT_VALIDITY)
10197       result.Raise("Can't create X509 key and certificate on %s" % result.node)
10198
10199       (name, cert_pem) = result.payload
10200
10201       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10202                                              cert_pem)
10203
10204       return {
10205         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10206         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10207                           salt),
10208         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10209         }
10210
10211     return None
10212
10213
10214 class LUBackupExport(LogicalUnit):
10215   """Export an instance to an image in the cluster.
10216
10217   """
10218   HPATH = "instance-export"
10219   HTYPE = constants.HTYPE_INSTANCE
10220   REQ_BGL = False
10221
10222   def CheckArguments(self):
10223     """Check the arguments.
10224
10225     """
10226     self.x509_key_name = self.op.x509_key_name
10227     self.dest_x509_ca_pem = self.op.destination_x509_ca
10228
10229     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10230       if not self.x509_key_name:
10231         raise errors.OpPrereqError("Missing X509 key name for encryption",
10232                                    errors.ECODE_INVAL)
10233
10234       if not self.dest_x509_ca_pem:
10235         raise errors.OpPrereqError("Missing destination X509 CA",
10236                                    errors.ECODE_INVAL)
10237
10238   def ExpandNames(self):
10239     self._ExpandAndLockInstance()
10240
10241     # Lock all nodes for local exports
10242     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10243       # FIXME: lock only instance primary and destination node
10244       #
10245       # Sad but true, for now we have do lock all nodes, as we don't know where
10246       # the previous export might be, and in this LU we search for it and
10247       # remove it from its current node. In the future we could fix this by:
10248       #  - making a tasklet to search (share-lock all), then create the
10249       #    new one, then one to remove, after
10250       #  - removing the removal operation altogether
10251       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10252
10253   def DeclareLocks(self, level):
10254     """Last minute lock declaration."""
10255     # All nodes are locked anyway, so nothing to do here.
10256
10257   def BuildHooksEnv(self):
10258     """Build hooks env.
10259
10260     This will run on the master, primary node and target node.
10261
10262     """
10263     env = {
10264       "EXPORT_MODE": self.op.mode,
10265       "EXPORT_NODE": self.op.target_node,
10266       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10267       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10268       # TODO: Generic function for boolean env variables
10269       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10270       }
10271
10272     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10273
10274     return env
10275
10276   def BuildHooksNodes(self):
10277     """Build hooks nodes.
10278
10279     """
10280     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10281
10282     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10283       nl.append(self.op.target_node)
10284
10285     return (nl, nl)
10286
10287   def CheckPrereq(self):
10288     """Check prerequisites.
10289
10290     This checks that the instance and node names are valid.
10291
10292     """
10293     instance_name = self.op.instance_name
10294
10295     self.instance = self.cfg.GetInstanceInfo(instance_name)
10296     assert self.instance is not None, \
10297           "Cannot retrieve locked instance %s" % self.op.instance_name
10298     _CheckNodeOnline(self, self.instance.primary_node)
10299
10300     if (self.op.remove_instance and self.instance.admin_up and
10301         not self.op.shutdown):
10302       raise errors.OpPrereqError("Can not remove instance without shutting it"
10303                                  " down before")
10304
10305     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10306       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10307       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10308       assert self.dst_node is not None
10309
10310       _CheckNodeOnline(self, self.dst_node.name)
10311       _CheckNodeNotDrained(self, self.dst_node.name)
10312
10313       self._cds = None
10314       self.dest_disk_info = None
10315       self.dest_x509_ca = None
10316
10317     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10318       self.dst_node = None
10319
10320       if len(self.op.target_node) != len(self.instance.disks):
10321         raise errors.OpPrereqError(("Received destination information for %s"
10322                                     " disks, but instance %s has %s disks") %
10323                                    (len(self.op.target_node), instance_name,
10324                                     len(self.instance.disks)),
10325                                    errors.ECODE_INVAL)
10326
10327       cds = _GetClusterDomainSecret()
10328
10329       # Check X509 key name
10330       try:
10331         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10332       except (TypeError, ValueError), err:
10333         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10334
10335       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10336         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10337                                    errors.ECODE_INVAL)
10338
10339       # Load and verify CA
10340       try:
10341         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10342       except OpenSSL.crypto.Error, err:
10343         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10344                                    (err, ), errors.ECODE_INVAL)
10345
10346       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10347       if errcode is not None:
10348         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10349                                    (msg, ), errors.ECODE_INVAL)
10350
10351       self.dest_x509_ca = cert
10352
10353       # Verify target information
10354       disk_info = []
10355       for idx, disk_data in enumerate(self.op.target_node):
10356         try:
10357           (host, port, magic) = \
10358             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10359         except errors.GenericError, err:
10360           raise errors.OpPrereqError("Target info for disk %s: %s" %
10361                                      (idx, err), errors.ECODE_INVAL)
10362
10363         disk_info.append((host, port, magic))
10364
10365       assert len(disk_info) == len(self.op.target_node)
10366       self.dest_disk_info = disk_info
10367
10368     else:
10369       raise errors.ProgrammerError("Unhandled export mode %r" %
10370                                    self.op.mode)
10371
10372     # instance disk type verification
10373     # TODO: Implement export support for file-based disks
10374     for disk in self.instance.disks:
10375       if disk.dev_type == constants.LD_FILE:
10376         raise errors.OpPrereqError("Export not supported for instances with"
10377                                    " file-based disks", errors.ECODE_INVAL)
10378
10379   def _CleanupExports(self, feedback_fn):
10380     """Removes exports of current instance from all other nodes.
10381
10382     If an instance in a cluster with nodes A..D was exported to node C, its
10383     exports will be removed from the nodes A, B and D.
10384
10385     """
10386     assert self.op.mode != constants.EXPORT_MODE_REMOTE
10387
10388     nodelist = self.cfg.GetNodeList()
10389     nodelist.remove(self.dst_node.name)
10390
10391     # on one-node clusters nodelist will be empty after the removal
10392     # if we proceed the backup would be removed because OpBackupQuery
10393     # substitutes an empty list with the full cluster node list.
10394     iname = self.instance.name
10395     if nodelist:
10396       feedback_fn("Removing old exports for instance %s" % iname)
10397       exportlist = self.rpc.call_export_list(nodelist)
10398       for node in exportlist:
10399         if exportlist[node].fail_msg:
10400           continue
10401         if iname in exportlist[node].payload:
10402           msg = self.rpc.call_export_remove(node, iname).fail_msg
10403           if msg:
10404             self.LogWarning("Could not remove older export for instance %s"
10405                             " on node %s: %s", iname, node, msg)
10406
10407   def Exec(self, feedback_fn):
10408     """Export an instance to an image in the cluster.
10409
10410     """
10411     assert self.op.mode in constants.EXPORT_MODES
10412
10413     instance = self.instance
10414     src_node = instance.primary_node
10415
10416     if self.op.shutdown:
10417       # shutdown the instance, but not the disks
10418       feedback_fn("Shutting down instance %s" % instance.name)
10419       result = self.rpc.call_instance_shutdown(src_node, instance,
10420                                                self.op.shutdown_timeout)
10421       # TODO: Maybe ignore failures if ignore_remove_failures is set
10422       result.Raise("Could not shutdown instance %s on"
10423                    " node %s" % (instance.name, src_node))
10424
10425     # set the disks ID correctly since call_instance_start needs the
10426     # correct drbd minor to create the symlinks
10427     for disk in instance.disks:
10428       self.cfg.SetDiskID(disk, src_node)
10429
10430     activate_disks = (not instance.admin_up)
10431
10432     if activate_disks:
10433       # Activate the instance disks if we'exporting a stopped instance
10434       feedback_fn("Activating disks for %s" % instance.name)
10435       _StartInstanceDisks(self, instance, None)
10436
10437     try:
10438       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10439                                                      instance)
10440
10441       helper.CreateSnapshots()
10442       try:
10443         if (self.op.shutdown and instance.admin_up and
10444             not self.op.remove_instance):
10445           assert not activate_disks
10446           feedback_fn("Starting instance %s" % instance.name)
10447           result = self.rpc.call_instance_start(src_node, instance, None, None)
10448           msg = result.fail_msg
10449           if msg:
10450             feedback_fn("Failed to start instance: %s" % msg)
10451             _ShutdownInstanceDisks(self, instance)
10452             raise errors.OpExecError("Could not start instance: %s" % msg)
10453
10454         if self.op.mode == constants.EXPORT_MODE_LOCAL:
10455           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10456         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10457           connect_timeout = constants.RIE_CONNECT_TIMEOUT
10458           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10459
10460           (key_name, _, _) = self.x509_key_name
10461
10462           dest_ca_pem = \
10463             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10464                                             self.dest_x509_ca)
10465
10466           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10467                                                      key_name, dest_ca_pem,
10468                                                      timeouts)
10469       finally:
10470         helper.Cleanup()
10471
10472       # Check for backwards compatibility
10473       assert len(dresults) == len(instance.disks)
10474       assert compat.all(isinstance(i, bool) for i in dresults), \
10475              "Not all results are boolean: %r" % dresults
10476
10477     finally:
10478       if activate_disks:
10479         feedback_fn("Deactivating disks for %s" % instance.name)
10480         _ShutdownInstanceDisks(self, instance)
10481
10482     if not (compat.all(dresults) and fin_resu):
10483       failures = []
10484       if not fin_resu:
10485         failures.append("export finalization")
10486       if not compat.all(dresults):
10487         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10488                                if not dsk)
10489         failures.append("disk export: disk(s) %s" % fdsk)
10490
10491       raise errors.OpExecError("Export failed, errors in %s" %
10492                                utils.CommaJoin(failures))
10493
10494     # At this point, the export was successful, we can cleanup/finish
10495
10496     # Remove instance if requested
10497     if self.op.remove_instance:
10498       feedback_fn("Removing instance %s" % instance.name)
10499       _RemoveInstance(self, feedback_fn, instance,
10500                       self.op.ignore_remove_failures)
10501
10502     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10503       self._CleanupExports(feedback_fn)
10504
10505     return fin_resu, dresults
10506
10507
10508 class LUBackupRemove(NoHooksLU):
10509   """Remove exports related to the named instance.
10510
10511   """
10512   REQ_BGL = False
10513
10514   def ExpandNames(self):
10515     self.needed_locks = {}
10516     # We need all nodes to be locked in order for RemoveExport to work, but we
10517     # don't need to lock the instance itself, as nothing will happen to it (and
10518     # we can remove exports also for a removed instance)
10519     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10520
10521   def Exec(self, feedback_fn):
10522     """Remove any export.
10523
10524     """
10525     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10526     # If the instance was not found we'll try with the name that was passed in.
10527     # This will only work if it was an FQDN, though.
10528     fqdn_warn = False
10529     if not instance_name:
10530       fqdn_warn = True
10531       instance_name = self.op.instance_name
10532
10533     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10534     exportlist = self.rpc.call_export_list(locked_nodes)
10535     found = False
10536     for node in exportlist:
10537       msg = exportlist[node].fail_msg
10538       if msg:
10539         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10540         continue
10541       if instance_name in exportlist[node].payload:
10542         found = True
10543         result = self.rpc.call_export_remove(node, instance_name)
10544         msg = result.fail_msg
10545         if msg:
10546           logging.error("Could not remove export for instance %s"
10547                         " on node %s: %s", instance_name, node, msg)
10548
10549     if fqdn_warn and not found:
10550       feedback_fn("Export not found. If trying to remove an export belonging"
10551                   " to a deleted instance please use its Fully Qualified"
10552                   " Domain Name.")
10553
10554
10555 class LUGroupAdd(LogicalUnit):
10556   """Logical unit for creating node groups.
10557
10558   """
10559   HPATH = "group-add"
10560   HTYPE = constants.HTYPE_GROUP
10561   REQ_BGL = False
10562
10563   def ExpandNames(self):
10564     # We need the new group's UUID here so that we can create and acquire the
10565     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10566     # that it should not check whether the UUID exists in the configuration.
10567     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10568     self.needed_locks = {}
10569     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10570
10571   def CheckPrereq(self):
10572     """Check prerequisites.
10573
10574     This checks that the given group name is not an existing node group
10575     already.
10576
10577     """
10578     try:
10579       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10580     except errors.OpPrereqError:
10581       pass
10582     else:
10583       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10584                                  " node group (UUID: %s)" %
10585                                  (self.op.group_name, existing_uuid),
10586                                  errors.ECODE_EXISTS)
10587
10588     if self.op.ndparams:
10589       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10590
10591   def BuildHooksEnv(self):
10592     """Build hooks env.
10593
10594     """
10595     return {
10596       "GROUP_NAME": self.op.group_name,
10597       }
10598
10599   def BuildHooksNodes(self):
10600     """Build hooks nodes.
10601
10602     """
10603     mn = self.cfg.GetMasterNode()
10604     return ([mn], [mn])
10605
10606   def Exec(self, feedback_fn):
10607     """Add the node group to the cluster.
10608
10609     """
10610     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10611                                   uuid=self.group_uuid,
10612                                   alloc_policy=self.op.alloc_policy,
10613                                   ndparams=self.op.ndparams)
10614
10615     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10616     del self.remove_locks[locking.LEVEL_NODEGROUP]
10617
10618
10619 class LUGroupAssignNodes(NoHooksLU):
10620   """Logical unit for assigning nodes to groups.
10621
10622   """
10623   REQ_BGL = False
10624
10625   def ExpandNames(self):
10626     # These raise errors.OpPrereqError on their own:
10627     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10628     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10629
10630     # We want to lock all the affected nodes and groups. We have readily
10631     # available the list of nodes, and the *destination* group. To gather the
10632     # list of "source" groups, we need to fetch node information.
10633     self.node_data = self.cfg.GetAllNodesInfo()
10634     affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10635     affected_groups.add(self.group_uuid)
10636
10637     self.needed_locks = {
10638       locking.LEVEL_NODEGROUP: list(affected_groups),
10639       locking.LEVEL_NODE: self.op.nodes,
10640       }
10641
10642   def CheckPrereq(self):
10643     """Check prerequisites.
10644
10645     """
10646     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10647     instance_data = self.cfg.GetAllInstancesInfo()
10648
10649     if self.group is None:
10650       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10651                                (self.op.group_name, self.group_uuid))
10652
10653     (new_splits, previous_splits) = \
10654       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10655                                              for node in self.op.nodes],
10656                                             self.node_data, instance_data)
10657
10658     if new_splits:
10659       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10660
10661       if not self.op.force:
10662         raise errors.OpExecError("The following instances get split by this"
10663                                  " change and --force was not given: %s" %
10664                                  fmt_new_splits)
10665       else:
10666         self.LogWarning("This operation will split the following instances: %s",
10667                         fmt_new_splits)
10668
10669         if previous_splits:
10670           self.LogWarning("In addition, these already-split instances continue"
10671                           " to be spit across groups: %s",
10672                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10673
10674   def Exec(self, feedback_fn):
10675     """Assign nodes to a new group.
10676
10677     """
10678     for node in self.op.nodes:
10679       self.node_data[node].group = self.group_uuid
10680
10681     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10682
10683   @staticmethod
10684   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10685     """Check for split instances after a node assignment.
10686
10687     This method considers a series of node assignments as an atomic operation,
10688     and returns information about split instances after applying the set of
10689     changes.
10690
10691     In particular, it returns information about newly split instances, and
10692     instances that were already split, and remain so after the change.
10693
10694     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10695     considered.
10696
10697     @type changes: list of (node_name, new_group_uuid) pairs.
10698     @param changes: list of node assignments to consider.
10699     @param node_data: a dict with data for all nodes
10700     @param instance_data: a dict with all instances to consider
10701     @rtype: a two-tuple
10702     @return: a list of instances that were previously okay and result split as a
10703       consequence of this change, and a list of instances that were previously
10704       split and this change does not fix.
10705
10706     """
10707     changed_nodes = dict((node, group) for node, group in changes
10708                          if node_data[node].group != group)
10709
10710     all_split_instances = set()
10711     previously_split_instances = set()
10712
10713     def InstanceNodes(instance):
10714       return [instance.primary_node] + list(instance.secondary_nodes)
10715
10716     for inst in instance_data.values():
10717       if inst.disk_template not in constants.DTS_INT_MIRROR:
10718         continue
10719
10720       instance_nodes = InstanceNodes(inst)
10721
10722       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10723         previously_split_instances.add(inst.name)
10724
10725       if len(set(changed_nodes.get(node, node_data[node].group)
10726                  for node in instance_nodes)) > 1:
10727         all_split_instances.add(inst.name)
10728
10729     return (list(all_split_instances - previously_split_instances),
10730             list(previously_split_instances & all_split_instances))
10731
10732
10733 class _GroupQuery(_QueryBase):
10734   FIELDS = query.GROUP_FIELDS
10735
10736   def ExpandNames(self, lu):
10737     lu.needed_locks = {}
10738
10739     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10740     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10741
10742     if not self.names:
10743       self.wanted = [name_to_uuid[name]
10744                      for name in utils.NiceSort(name_to_uuid.keys())]
10745     else:
10746       # Accept names to be either names or UUIDs.
10747       missing = []
10748       self.wanted = []
10749       all_uuid = frozenset(self._all_groups.keys())
10750
10751       for name in self.names:
10752         if name in all_uuid:
10753           self.wanted.append(name)
10754         elif name in name_to_uuid:
10755           self.wanted.append(name_to_uuid[name])
10756         else:
10757           missing.append(name)
10758
10759       if missing:
10760         raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10761                                    errors.ECODE_NOENT)
10762
10763   def DeclareLocks(self, lu, level):
10764     pass
10765
10766   def _GetQueryData(self, lu):
10767     """Computes the list of node groups and their attributes.
10768
10769     """
10770     do_nodes = query.GQ_NODE in self.requested_data
10771     do_instances = query.GQ_INST in self.requested_data
10772
10773     group_to_nodes = None
10774     group_to_instances = None
10775
10776     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10777     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10778     # latter GetAllInstancesInfo() is not enough, for we have to go through
10779     # instance->node. Hence, we will need to process nodes even if we only need
10780     # instance information.
10781     if do_nodes or do_instances:
10782       all_nodes = lu.cfg.GetAllNodesInfo()
10783       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10784       node_to_group = {}
10785
10786       for node in all_nodes.values():
10787         if node.group in group_to_nodes:
10788           group_to_nodes[node.group].append(node.name)
10789           node_to_group[node.name] = node.group
10790
10791       if do_instances:
10792         all_instances = lu.cfg.GetAllInstancesInfo()
10793         group_to_instances = dict((uuid, []) for uuid in self.wanted)
10794
10795         for instance in all_instances.values():
10796           node = instance.primary_node
10797           if node in node_to_group:
10798             group_to_instances[node_to_group[node]].append(instance.name)
10799
10800         if not do_nodes:
10801           # Do not pass on node information if it was not requested.
10802           group_to_nodes = None
10803
10804     return query.GroupQueryData([self._all_groups[uuid]
10805                                  for uuid in self.wanted],
10806                                 group_to_nodes, group_to_instances)
10807
10808
10809 class LUGroupQuery(NoHooksLU):
10810   """Logical unit for querying node groups.
10811
10812   """
10813   REQ_BGL = False
10814
10815   def CheckArguments(self):
10816     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10817                           self.op.output_fields, False)
10818
10819   def ExpandNames(self):
10820     self.gq.ExpandNames(self)
10821
10822   def Exec(self, feedback_fn):
10823     return self.gq.OldStyleQuery(self)
10824
10825
10826 class LUGroupSetParams(LogicalUnit):
10827   """Modifies the parameters of a node group.
10828
10829   """
10830   HPATH = "group-modify"
10831   HTYPE = constants.HTYPE_GROUP
10832   REQ_BGL = False
10833
10834   def CheckArguments(self):
10835     all_changes = [
10836       self.op.ndparams,
10837       self.op.alloc_policy,
10838       ]
10839
10840     if all_changes.count(None) == len(all_changes):
10841       raise errors.OpPrereqError("Please pass at least one modification",
10842                                  errors.ECODE_INVAL)
10843
10844   def ExpandNames(self):
10845     # This raises errors.OpPrereqError on its own:
10846     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10847
10848     self.needed_locks = {
10849       locking.LEVEL_NODEGROUP: [self.group_uuid],
10850       }
10851
10852   def CheckPrereq(self):
10853     """Check prerequisites.
10854
10855     """
10856     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10857
10858     if self.group is None:
10859       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10860                                (self.op.group_name, self.group_uuid))
10861
10862     if self.op.ndparams:
10863       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10864       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10865       self.new_ndparams = new_ndparams
10866
10867   def BuildHooksEnv(self):
10868     """Build hooks env.
10869
10870     """
10871     return {
10872       "GROUP_NAME": self.op.group_name,
10873       "NEW_ALLOC_POLICY": self.op.alloc_policy,
10874       }
10875
10876   def BuildHooksNodes(self):
10877     """Build hooks nodes.
10878
10879     """
10880     mn = self.cfg.GetMasterNode()
10881     return ([mn], [mn])
10882
10883   def Exec(self, feedback_fn):
10884     """Modifies the node group.
10885
10886     """
10887     result = []
10888
10889     if self.op.ndparams:
10890       self.group.ndparams = self.new_ndparams
10891       result.append(("ndparams", str(self.group.ndparams)))
10892
10893     if self.op.alloc_policy:
10894       self.group.alloc_policy = self.op.alloc_policy
10895
10896     self.cfg.Update(self.group, feedback_fn)
10897     return result
10898
10899
10900
10901 class LUGroupRemove(LogicalUnit):
10902   HPATH = "group-remove"
10903   HTYPE = constants.HTYPE_GROUP
10904   REQ_BGL = False
10905
10906   def ExpandNames(self):
10907     # This will raises errors.OpPrereqError on its own:
10908     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10909     self.needed_locks = {
10910       locking.LEVEL_NODEGROUP: [self.group_uuid],
10911       }
10912
10913   def CheckPrereq(self):
10914     """Check prerequisites.
10915
10916     This checks that the given group name exists as a node group, that is
10917     empty (i.e., contains no nodes), and that is not the last group of the
10918     cluster.
10919
10920     """
10921     # Verify that the group is empty.
10922     group_nodes = [node.name
10923                    for node in self.cfg.GetAllNodesInfo().values()
10924                    if node.group == self.group_uuid]
10925
10926     if group_nodes:
10927       raise errors.OpPrereqError("Group '%s' not empty, has the following"
10928                                  " nodes: %s" %
10929                                  (self.op.group_name,
10930                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
10931                                  errors.ECODE_STATE)
10932
10933     # Verify the cluster would not be left group-less.
10934     if len(self.cfg.GetNodeGroupList()) == 1:
10935       raise errors.OpPrereqError("Group '%s' is the only group,"
10936                                  " cannot be removed" %
10937                                  self.op.group_name,
10938                                  errors.ECODE_STATE)
10939
10940   def BuildHooksEnv(self):
10941     """Build hooks env.
10942
10943     """
10944     return {
10945       "GROUP_NAME": self.op.group_name,
10946       }
10947
10948   def BuildHooksNodes(self):
10949     """Build hooks nodes.
10950
10951     """
10952     mn = self.cfg.GetMasterNode()
10953     return ([mn], [mn])
10954
10955   def Exec(self, feedback_fn):
10956     """Remove the node group.
10957
10958     """
10959     try:
10960       self.cfg.RemoveNodeGroup(self.group_uuid)
10961     except errors.ConfigurationError:
10962       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10963                                (self.op.group_name, self.group_uuid))
10964
10965     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10966
10967
10968 class LUGroupRename(LogicalUnit):
10969   HPATH = "group-rename"
10970   HTYPE = constants.HTYPE_GROUP
10971   REQ_BGL = False
10972
10973   def ExpandNames(self):
10974     # This raises errors.OpPrereqError on its own:
10975     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10976
10977     self.needed_locks = {
10978       locking.LEVEL_NODEGROUP: [self.group_uuid],
10979       }
10980
10981   def CheckPrereq(self):
10982     """Check prerequisites.
10983
10984     Ensures requested new name is not yet used.
10985
10986     """
10987     try:
10988       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10989     except errors.OpPrereqError:
10990       pass
10991     else:
10992       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10993                                  " node group (UUID: %s)" %
10994                                  (self.op.new_name, new_name_uuid),
10995                                  errors.ECODE_EXISTS)
10996
10997   def BuildHooksEnv(self):
10998     """Build hooks env.
10999
11000     """
11001     return {
11002       "OLD_NAME": self.op.group_name,
11003       "NEW_NAME": self.op.new_name,
11004       }
11005
11006   def BuildHooksNodes(self):
11007     """Build hooks nodes.
11008
11009     """
11010     mn = self.cfg.GetMasterNode()
11011
11012     all_nodes = self.cfg.GetAllNodesInfo()
11013     all_nodes.pop(mn, None)
11014
11015     run_nodes = [mn]
11016     run_nodes.extend(node.name for node in all_nodes.values()
11017                      if node.group == self.group_uuid)
11018
11019     return (run_nodes, run_nodes)
11020
11021   def Exec(self, feedback_fn):
11022     """Rename the node group.
11023
11024     """
11025     group = self.cfg.GetNodeGroup(self.group_uuid)
11026
11027     if group is None:
11028       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11029                                (self.op.group_name, self.group_uuid))
11030
11031     group.name = self.op.new_name
11032     self.cfg.Update(group, feedback_fn)
11033
11034     return self.op.new_name
11035
11036
11037 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11038   """Generic tags LU.
11039
11040   This is an abstract class which is the parent of all the other tags LUs.
11041
11042   """
11043
11044   def ExpandNames(self):
11045     self.needed_locks = {}
11046     if self.op.kind == constants.TAG_NODE:
11047       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11048       self.needed_locks[locking.LEVEL_NODE] = self.op.name
11049     elif self.op.kind == constants.TAG_INSTANCE:
11050       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11051       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11052
11053     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11054     # not possible to acquire the BGL based on opcode parameters)
11055
11056   def CheckPrereq(self):
11057     """Check prerequisites.
11058
11059     """
11060     if self.op.kind == constants.TAG_CLUSTER:
11061       self.target = self.cfg.GetClusterInfo()
11062     elif self.op.kind == constants.TAG_NODE:
11063       self.target = self.cfg.GetNodeInfo(self.op.name)
11064     elif self.op.kind == constants.TAG_INSTANCE:
11065       self.target = self.cfg.GetInstanceInfo(self.op.name)
11066     else:
11067       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11068                                  str(self.op.kind), errors.ECODE_INVAL)
11069
11070
11071 class LUTagsGet(TagsLU):
11072   """Returns the tags of a given object.
11073
11074   """
11075   REQ_BGL = False
11076
11077   def ExpandNames(self):
11078     TagsLU.ExpandNames(self)
11079
11080     # Share locks as this is only a read operation
11081     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11082
11083   def Exec(self, feedback_fn):
11084     """Returns the tag list.
11085
11086     """
11087     return list(self.target.GetTags())
11088
11089
11090 class LUTagsSearch(NoHooksLU):
11091   """Searches the tags for a given pattern.
11092
11093   """
11094   REQ_BGL = False
11095
11096   def ExpandNames(self):
11097     self.needed_locks = {}
11098
11099   def CheckPrereq(self):
11100     """Check prerequisites.
11101
11102     This checks the pattern passed for validity by compiling it.
11103
11104     """
11105     try:
11106       self.re = re.compile(self.op.pattern)
11107     except re.error, err:
11108       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11109                                  (self.op.pattern, err), errors.ECODE_INVAL)
11110
11111   def Exec(self, feedback_fn):
11112     """Returns the tag list.
11113
11114     """
11115     cfg = self.cfg
11116     tgts = [("/cluster", cfg.GetClusterInfo())]
11117     ilist = cfg.GetAllInstancesInfo().values()
11118     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11119     nlist = cfg.GetAllNodesInfo().values()
11120     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11121     results = []
11122     for path, target in tgts:
11123       for tag in target.GetTags():
11124         if self.re.search(tag):
11125           results.append((path, tag))
11126     return results
11127
11128
11129 class LUTagsSet(TagsLU):
11130   """Sets a tag on a given object.
11131
11132   """
11133   REQ_BGL = False
11134
11135   def CheckPrereq(self):
11136     """Check prerequisites.
11137
11138     This checks the type and length of the tag name and value.
11139
11140     """
11141     TagsLU.CheckPrereq(self)
11142     for tag in self.op.tags:
11143       objects.TaggableObject.ValidateTag(tag)
11144
11145   def Exec(self, feedback_fn):
11146     """Sets the tag.
11147
11148     """
11149     try:
11150       for tag in self.op.tags:
11151         self.target.AddTag(tag)
11152     except errors.TagError, err:
11153       raise errors.OpExecError("Error while setting tag: %s" % str(err))
11154     self.cfg.Update(self.target, feedback_fn)
11155
11156
11157 class LUTagsDel(TagsLU):
11158   """Delete a list of tags from a given object.
11159
11160   """
11161   REQ_BGL = False
11162
11163   def CheckPrereq(self):
11164     """Check prerequisites.
11165
11166     This checks that we have the given tag.
11167
11168     """
11169     TagsLU.CheckPrereq(self)
11170     for tag in self.op.tags:
11171       objects.TaggableObject.ValidateTag(tag)
11172     del_tags = frozenset(self.op.tags)
11173     cur_tags = self.target.GetTags()
11174
11175     diff_tags = del_tags - cur_tags
11176     if diff_tags:
11177       diff_names = ("'%s'" % i for i in sorted(diff_tags))
11178       raise errors.OpPrereqError("Tag(s) %s not found" %
11179                                  (utils.CommaJoin(diff_names), ),
11180                                  errors.ECODE_NOENT)
11181
11182   def Exec(self, feedback_fn):
11183     """Remove the tag from the object.
11184
11185     """
11186     for tag in self.op.tags:
11187       self.target.RemoveTag(tag)
11188     self.cfg.Update(self.target, feedback_fn)
11189
11190
11191 class LUTestDelay(NoHooksLU):
11192   """Sleep for a specified amount of time.
11193
11194   This LU sleeps on the master and/or nodes for a specified amount of
11195   time.
11196
11197   """
11198   REQ_BGL = False
11199
11200   def ExpandNames(self):
11201     """Expand names and set required locks.
11202
11203     This expands the node list, if any.
11204
11205     """
11206     self.needed_locks = {}
11207     if self.op.on_nodes:
11208       # _GetWantedNodes can be used here, but is not always appropriate to use
11209       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11210       # more information.
11211       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11212       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11213
11214   def _TestDelay(self):
11215     """Do the actual sleep.
11216
11217     """
11218     if self.op.on_master:
11219       if not utils.TestDelay(self.op.duration):
11220         raise errors.OpExecError("Error during master delay test")
11221     if self.op.on_nodes:
11222       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11223       for node, node_result in result.items():
11224         node_result.Raise("Failure during rpc call to node %s" % node)
11225
11226   def Exec(self, feedback_fn):
11227     """Execute the test delay opcode, with the wanted repetitions.
11228
11229     """
11230     if self.op.repeat == 0:
11231       self._TestDelay()
11232     else:
11233       top_value = self.op.repeat - 1
11234       for i in range(self.op.repeat):
11235         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11236         self._TestDelay()
11237
11238
11239 class LUTestJqueue(NoHooksLU):
11240   """Utility LU to test some aspects of the job queue.
11241
11242   """
11243   REQ_BGL = False
11244
11245   # Must be lower than default timeout for WaitForJobChange to see whether it
11246   # notices changed jobs
11247   _CLIENT_CONNECT_TIMEOUT = 20.0
11248   _CLIENT_CONFIRM_TIMEOUT = 60.0
11249
11250   @classmethod
11251   def _NotifyUsingSocket(cls, cb, errcls):
11252     """Opens a Unix socket and waits for another program to connect.
11253
11254     @type cb: callable
11255     @param cb: Callback to send socket name to client
11256     @type errcls: class
11257     @param errcls: Exception class to use for errors
11258
11259     """
11260     # Using a temporary directory as there's no easy way to create temporary
11261     # sockets without writing a custom loop around tempfile.mktemp and
11262     # socket.bind
11263     tmpdir = tempfile.mkdtemp()
11264     try:
11265       tmpsock = utils.PathJoin(tmpdir, "sock")
11266
11267       logging.debug("Creating temporary socket at %s", tmpsock)
11268       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11269       try:
11270         sock.bind(tmpsock)
11271         sock.listen(1)
11272
11273         # Send details to client
11274         cb(tmpsock)
11275
11276         # Wait for client to connect before continuing
11277         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11278         try:
11279           (conn, _) = sock.accept()
11280         except socket.error, err:
11281           raise errcls("Client didn't connect in time (%s)" % err)
11282       finally:
11283         sock.close()
11284     finally:
11285       # Remove as soon as client is connected
11286       shutil.rmtree(tmpdir)
11287
11288     # Wait for client to close
11289     try:
11290       try:
11291         # pylint: disable-msg=E1101
11292         # Instance of '_socketobject' has no ... member
11293         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11294         conn.recv(1)
11295       except socket.error, err:
11296         raise errcls("Client failed to confirm notification (%s)" % err)
11297     finally:
11298       conn.close()
11299
11300   def _SendNotification(self, test, arg, sockname):
11301     """Sends a notification to the client.
11302
11303     @type test: string
11304     @param test: Test name
11305     @param arg: Test argument (depends on test)
11306     @type sockname: string
11307     @param sockname: Socket path
11308
11309     """
11310     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11311
11312   def _Notify(self, prereq, test, arg):
11313     """Notifies the client of a test.
11314
11315     @type prereq: bool
11316     @param prereq: Whether this is a prereq-phase test
11317     @type test: string
11318     @param test: Test name
11319     @param arg: Test argument (depends on test)
11320
11321     """
11322     if prereq:
11323       errcls = errors.OpPrereqError
11324     else:
11325       errcls = errors.OpExecError
11326
11327     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11328                                                   test, arg),
11329                                    errcls)
11330
11331   def CheckArguments(self):
11332     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11333     self.expandnames_calls = 0
11334
11335   def ExpandNames(self):
11336     checkargs_calls = getattr(self, "checkargs_calls", 0)
11337     if checkargs_calls < 1:
11338       raise errors.ProgrammerError("CheckArguments was not called")
11339
11340     self.expandnames_calls += 1
11341
11342     if self.op.notify_waitlock:
11343       self._Notify(True, constants.JQT_EXPANDNAMES, None)
11344
11345     self.LogInfo("Expanding names")
11346
11347     # Get lock on master node (just to get a lock, not for a particular reason)
11348     self.needed_locks = {
11349       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11350       }
11351
11352   def Exec(self, feedback_fn):
11353     if self.expandnames_calls < 1:
11354       raise errors.ProgrammerError("ExpandNames was not called")
11355
11356     if self.op.notify_exec:
11357       self._Notify(False, constants.JQT_EXEC, None)
11358
11359     self.LogInfo("Executing")
11360
11361     if self.op.log_messages:
11362       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11363       for idx, msg in enumerate(self.op.log_messages):
11364         self.LogInfo("Sending log message %s", idx + 1)
11365         feedback_fn(constants.JQT_MSGPREFIX + msg)
11366         # Report how many test messages have been sent
11367         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11368
11369     if self.op.fail:
11370       raise errors.OpExecError("Opcode failure was requested")
11371
11372     return True
11373
11374
11375 class IAllocator(object):
11376   """IAllocator framework.
11377
11378   An IAllocator instance has three sets of attributes:
11379     - cfg that is needed to query the cluster
11380     - input data (all members of the _KEYS class attribute are required)
11381     - four buffer attributes (in|out_data|text), that represent the
11382       input (to the external script) in text and data structure format,
11383       and the output from it, again in two formats
11384     - the result variables from the script (success, info, nodes) for
11385       easy usage
11386
11387   """
11388   # pylint: disable-msg=R0902
11389   # lots of instance attributes
11390   _ALLO_KEYS = [
11391     "name", "mem_size", "disks", "disk_template",
11392     "os", "tags", "nics", "vcpus", "hypervisor",
11393     ]
11394   _RELO_KEYS = [
11395     "name", "relocate_from",
11396     ]
11397   _EVAC_KEYS = [
11398     "evac_nodes",
11399     ]
11400
11401   def __init__(self, cfg, rpc, mode, **kwargs):
11402     self.cfg = cfg
11403     self.rpc = rpc
11404     # init buffer variables
11405     self.in_text = self.out_text = self.in_data = self.out_data = None
11406     # init all input fields so that pylint is happy
11407     self.mode = mode
11408     self.mem_size = self.disks = self.disk_template = None
11409     self.os = self.tags = self.nics = self.vcpus = None
11410     self.hypervisor = None
11411     self.relocate_from = None
11412     self.name = None
11413     self.evac_nodes = None
11414     # computed fields
11415     self.required_nodes = None
11416     # init result fields
11417     self.success = self.info = self.result = None
11418     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11419       keyset = self._ALLO_KEYS
11420       fn = self._AddNewInstance
11421     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11422       keyset = self._RELO_KEYS
11423       fn = self._AddRelocateInstance
11424     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11425       keyset = self._EVAC_KEYS
11426       fn = self._AddEvacuateNodes
11427     else:
11428       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11429                                    " IAllocator" % self.mode)
11430     for key in kwargs:
11431       if key not in keyset:
11432         raise errors.ProgrammerError("Invalid input parameter '%s' to"
11433                                      " IAllocator" % key)
11434       setattr(self, key, kwargs[key])
11435
11436     for key in keyset:
11437       if key not in kwargs:
11438         raise errors.ProgrammerError("Missing input parameter '%s' to"
11439                                      " IAllocator" % key)
11440     self._BuildInputData(fn)
11441
11442   def _ComputeClusterData(self):
11443     """Compute the generic allocator input data.
11444
11445     This is the data that is independent of the actual operation.
11446
11447     """
11448     cfg = self.cfg
11449     cluster_info = cfg.GetClusterInfo()
11450     # cluster data
11451     data = {
11452       "version": constants.IALLOCATOR_VERSION,
11453       "cluster_name": cfg.GetClusterName(),
11454       "cluster_tags": list(cluster_info.GetTags()),
11455       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11456       # we don't have job IDs
11457       }
11458     ninfo = cfg.GetAllNodesInfo()
11459     iinfo = cfg.GetAllInstancesInfo().values()
11460     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11461
11462     # node data
11463     node_list = [n.name for n in ninfo.values() if n.vm_capable]
11464
11465     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11466       hypervisor_name = self.hypervisor
11467     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11468       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11469     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11470       hypervisor_name = cluster_info.enabled_hypervisors[0]
11471
11472     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11473                                         hypervisor_name)
11474     node_iinfo = \
11475       self.rpc.call_all_instances_info(node_list,
11476                                        cluster_info.enabled_hypervisors)
11477
11478     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11479
11480     config_ndata = self._ComputeBasicNodeData(ninfo)
11481     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11482                                                  i_list, config_ndata)
11483     assert len(data["nodes"]) == len(ninfo), \
11484         "Incomplete node data computed"
11485
11486     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11487
11488     self.in_data = data
11489
11490   @staticmethod
11491   def _ComputeNodeGroupData(cfg):
11492     """Compute node groups data.
11493
11494     """
11495     ng = {}
11496     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11497       ng[guuid] = {
11498         "name": gdata.name,
11499         "alloc_policy": gdata.alloc_policy,
11500         }
11501     return ng
11502
11503   @staticmethod
11504   def _ComputeBasicNodeData(node_cfg):
11505     """Compute global node data.
11506
11507     @rtype: dict
11508     @returns: a dict of name: (node dict, node config)
11509
11510     """
11511     node_results = {}
11512     for ninfo in node_cfg.values():
11513       # fill in static (config-based) values
11514       pnr = {
11515         "tags": list(ninfo.GetTags()),
11516         "primary_ip": ninfo.primary_ip,
11517         "secondary_ip": ninfo.secondary_ip,
11518         "offline": ninfo.offline,
11519         "drained": ninfo.drained,
11520         "master_candidate": ninfo.master_candidate,
11521         "group": ninfo.group,
11522         "master_capable": ninfo.master_capable,
11523         "vm_capable": ninfo.vm_capable,
11524         }
11525
11526       node_results[ninfo.name] = pnr
11527
11528     return node_results
11529
11530   @staticmethod
11531   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11532                               node_results):
11533     """Compute global node data.
11534
11535     @param node_results: the basic node structures as filled from the config
11536
11537     """
11538     # make a copy of the current dict
11539     node_results = dict(node_results)
11540     for nname, nresult in node_data.items():
11541       assert nname in node_results, "Missing basic data for node %s" % nname
11542       ninfo = node_cfg[nname]
11543
11544       if not (ninfo.offline or ninfo.drained):
11545         nresult.Raise("Can't get data for node %s" % nname)
11546         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11547                                 nname)
11548         remote_info = nresult.payload
11549
11550         for attr in ['memory_total', 'memory_free', 'memory_dom0',
11551                      'vg_size', 'vg_free', 'cpu_total']:
11552           if attr not in remote_info:
11553             raise errors.OpExecError("Node '%s' didn't return attribute"
11554                                      " '%s'" % (nname, attr))
11555           if not isinstance(remote_info[attr], int):
11556             raise errors.OpExecError("Node '%s' returned invalid value"
11557                                      " for '%s': %s" %
11558                                      (nname, attr, remote_info[attr]))
11559         # compute memory used by primary instances
11560         i_p_mem = i_p_up_mem = 0
11561         for iinfo, beinfo in i_list:
11562           if iinfo.primary_node == nname:
11563             i_p_mem += beinfo[constants.BE_MEMORY]
11564             if iinfo.name not in node_iinfo[nname].payload:
11565               i_used_mem = 0
11566             else:
11567               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11568             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11569             remote_info['memory_free'] -= max(0, i_mem_diff)
11570
11571             if iinfo.admin_up:
11572               i_p_up_mem += beinfo[constants.BE_MEMORY]
11573
11574         # compute memory used by instances
11575         pnr_dyn = {
11576           "total_memory": remote_info['memory_total'],
11577           "reserved_memory": remote_info['memory_dom0'],
11578           "free_memory": remote_info['memory_free'],
11579           "total_disk": remote_info['vg_size'],
11580           "free_disk": remote_info['vg_free'],
11581           "total_cpus": remote_info['cpu_total'],
11582           "i_pri_memory": i_p_mem,
11583           "i_pri_up_memory": i_p_up_mem,
11584           }
11585         pnr_dyn.update(node_results[nname])
11586         node_results[nname] = pnr_dyn
11587
11588     return node_results
11589
11590   @staticmethod
11591   def _ComputeInstanceData(cluster_info, i_list):
11592     """Compute global instance data.
11593
11594     """
11595     instance_data = {}
11596     for iinfo, beinfo in i_list:
11597       nic_data = []
11598       for nic in iinfo.nics:
11599         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11600         nic_dict = {"mac": nic.mac,
11601                     "ip": nic.ip,
11602                     "mode": filled_params[constants.NIC_MODE],
11603                     "link": filled_params[constants.NIC_LINK],
11604                    }
11605         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11606           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11607         nic_data.append(nic_dict)
11608       pir = {
11609         "tags": list(iinfo.GetTags()),
11610         "admin_up": iinfo.admin_up,
11611         "vcpus": beinfo[constants.BE_VCPUS],
11612         "memory": beinfo[constants.BE_MEMORY],
11613         "os": iinfo.os,
11614         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11615         "nics": nic_data,
11616         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11617         "disk_template": iinfo.disk_template,
11618         "hypervisor": iinfo.hypervisor,
11619         }
11620       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11621                                                  pir["disks"])
11622       instance_data[iinfo.name] = pir
11623
11624     return instance_data
11625
11626   def _AddNewInstance(self):
11627     """Add new instance data to allocator structure.
11628
11629     This in combination with _AllocatorGetClusterData will create the
11630     correct structure needed as input for the allocator.
11631
11632     The checks for the completeness of the opcode must have already been
11633     done.
11634
11635     """
11636     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11637
11638     if self.disk_template in constants.DTS_INT_MIRROR:
11639       self.required_nodes = 2
11640     else:
11641       self.required_nodes = 1
11642     request = {
11643       "name": self.name,
11644       "disk_template": self.disk_template,
11645       "tags": self.tags,
11646       "os": self.os,
11647       "vcpus": self.vcpus,
11648       "memory": self.mem_size,
11649       "disks": self.disks,
11650       "disk_space_total": disk_space,
11651       "nics": self.nics,
11652       "required_nodes": self.required_nodes,
11653       }
11654     return request
11655
11656   def _AddRelocateInstance(self):
11657     """Add relocate instance data to allocator structure.
11658
11659     This in combination with _IAllocatorGetClusterData will create the
11660     correct structure needed as input for the allocator.
11661
11662     The checks for the completeness of the opcode must have already been
11663     done.
11664
11665     """
11666     instance = self.cfg.GetInstanceInfo(self.name)
11667     if instance is None:
11668       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11669                                    " IAllocator" % self.name)
11670
11671     if instance.disk_template not in constants.DTS_MIRRORED:
11672       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11673                                  errors.ECODE_INVAL)
11674
11675     if instance.disk_template in constants.DTS_INT_MIRROR and \
11676         len(instance.secondary_nodes) != 1:
11677       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11678                                  errors.ECODE_STATE)
11679
11680     self.required_nodes = 1
11681     disk_sizes = [{'size': disk.size} for disk in instance.disks]
11682     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11683
11684     request = {
11685       "name": self.name,
11686       "disk_space_total": disk_space,
11687       "required_nodes": self.required_nodes,
11688       "relocate_from": self.relocate_from,
11689       }
11690     return request
11691
11692   def _AddEvacuateNodes(self):
11693     """Add evacuate nodes data to allocator structure.
11694
11695     """
11696     request = {
11697       "evac_nodes": self.evac_nodes
11698       }
11699     return request
11700
11701   def _BuildInputData(self, fn):
11702     """Build input data structures.
11703
11704     """
11705     self._ComputeClusterData()
11706
11707     request = fn()
11708     request["type"] = self.mode
11709     self.in_data["request"] = request
11710
11711     self.in_text = serializer.Dump(self.in_data)
11712
11713   def Run(self, name, validate=True, call_fn=None):
11714     """Run an instance allocator and return the results.
11715
11716     """
11717     if call_fn is None:
11718       call_fn = self.rpc.call_iallocator_runner
11719
11720     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11721     result.Raise("Failure while running the iallocator script")
11722
11723     self.out_text = result.payload
11724     if validate:
11725       self._ValidateResult()
11726
11727   def _ValidateResult(self):
11728     """Process the allocator results.
11729
11730     This will process and if successful save the result in
11731     self.out_data and the other parameters.
11732
11733     """
11734     try:
11735       rdict = serializer.Load(self.out_text)
11736     except Exception, err:
11737       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11738
11739     if not isinstance(rdict, dict):
11740       raise errors.OpExecError("Can't parse iallocator results: not a dict")
11741
11742     # TODO: remove backwards compatiblity in later versions
11743     if "nodes" in rdict and "result" not in rdict:
11744       rdict["result"] = rdict["nodes"]
11745       del rdict["nodes"]
11746
11747     for key in "success", "info", "result":
11748       if key not in rdict:
11749         raise errors.OpExecError("Can't parse iallocator results:"
11750                                  " missing key '%s'" % key)
11751       setattr(self, key, rdict[key])
11752
11753     if not isinstance(rdict["result"], list):
11754       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11755                                " is not a list")
11756     self.out_data = rdict
11757
11758
11759 class LUTestAllocator(NoHooksLU):
11760   """Run allocator tests.
11761
11762   This LU runs the allocator tests
11763
11764   """
11765   def CheckPrereq(self):
11766     """Check prerequisites.
11767
11768     This checks the opcode parameters depending on the director and mode test.
11769
11770     """
11771     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11772       for attr in ["mem_size", "disks", "disk_template",
11773                    "os", "tags", "nics", "vcpus"]:
11774         if not hasattr(self.op, attr):
11775           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11776                                      attr, errors.ECODE_INVAL)
11777       iname = self.cfg.ExpandInstanceName(self.op.name)
11778       if iname is not None:
11779         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11780                                    iname, errors.ECODE_EXISTS)
11781       if not isinstance(self.op.nics, list):
11782         raise errors.OpPrereqError("Invalid parameter 'nics'",
11783                                    errors.ECODE_INVAL)
11784       if not isinstance(self.op.disks, list):
11785         raise errors.OpPrereqError("Invalid parameter 'disks'",
11786                                    errors.ECODE_INVAL)
11787       for row in self.op.disks:
11788         if (not isinstance(row, dict) or
11789             "size" not in row or
11790             not isinstance(row["size"], int) or
11791             "mode" not in row or
11792             row["mode"] not in ['r', 'w']):
11793           raise errors.OpPrereqError("Invalid contents of the 'disks'"
11794                                      " parameter", errors.ECODE_INVAL)
11795       if self.op.hypervisor is None:
11796         self.op.hypervisor = self.cfg.GetHypervisorType()
11797     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11798       fname = _ExpandInstanceName(self.cfg, self.op.name)
11799       self.op.name = fname
11800       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11801     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11802       if not hasattr(self.op, "evac_nodes"):
11803         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11804                                    " opcode input", errors.ECODE_INVAL)
11805     else:
11806       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11807                                  self.op.mode, errors.ECODE_INVAL)
11808
11809     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11810       if self.op.allocator is None:
11811         raise errors.OpPrereqError("Missing allocator name",
11812                                    errors.ECODE_INVAL)
11813     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11814       raise errors.OpPrereqError("Wrong allocator test '%s'" %
11815                                  self.op.direction, errors.ECODE_INVAL)
11816
11817   def Exec(self, feedback_fn):
11818     """Run the allocator test.
11819
11820     """
11821     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11822       ial = IAllocator(self.cfg, self.rpc,
11823                        mode=self.op.mode,
11824                        name=self.op.name,
11825                        mem_size=self.op.mem_size,
11826                        disks=self.op.disks,
11827                        disk_template=self.op.disk_template,
11828                        os=self.op.os,
11829                        tags=self.op.tags,
11830                        nics=self.op.nics,
11831                        vcpus=self.op.vcpus,
11832                        hypervisor=self.op.hypervisor,
11833                        )
11834     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11835       ial = IAllocator(self.cfg, self.rpc,
11836                        mode=self.op.mode,
11837                        name=self.op.name,
11838                        relocate_from=list(self.relocate_from),
11839                        )
11840     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11841       ial = IAllocator(self.cfg, self.rpc,
11842                        mode=self.op.mode,
11843                        evac_nodes=self.op.evac_nodes)
11844     else:
11845       raise errors.ProgrammerError("Uncatched mode %s in"
11846                                    " LUTestAllocator.Exec", self.op.mode)
11847
11848     if self.op.direction == constants.IALLOCATOR_DIR_IN:
11849       result = ial.in_text
11850     else:
11851       ial.Run(self.op.allocator, validate=False)
11852       result = ial.out_text
11853     return result
11854
11855
11856 #: Query type implementations
11857 _QUERY_IMPL = {
11858   constants.QR_INSTANCE: _InstanceQuery,
11859   constants.QR_NODE: _NodeQuery,
11860   constants.QR_GROUP: _GroupQuery,
11861   constants.QR_OS: _OsQuery,
11862   }
11863
11864 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
11865
11866
11867 def _GetQueryImplementation(name):
11868   """Returns the implemtnation for a query type.
11869
11870   @param name: Query type, must be one of L{constants.QR_VIA_OP}
11871
11872   """
11873   try:
11874     return _QUERY_IMPL[name]
11875   except KeyError:
11876     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11877                                errors.ECODE_INVAL)