code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 # End types
  78 class LogicalUnit(object):
  79   """Logical Unit base class.
  80
  81   Subclasses must follow these rules:
  82     - implement ExpandNames
  83     - implement CheckPrereq (except when tasklets are used)
  84     - implement Exec (except when tasklets are used)
  85     - implement BuildHooksEnv
  86     - redefine HPATH and HTYPE
  87     - optionally redefine their run requirements:
  88         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  89
  90   Note that all commands require root permissions.
  91
  92   @ivar dry_run_result: the value (if any) that will be returned to the caller
  93       in dry-run mode (signalled by opcode dry_run parameter)
  94
  95   """
  96   HPATH = None
  97   HTYPE = None
  98   REQ_BGL = True
  99
 100   def __init__(self, processor, op, context, rpc):
 101     """Constructor for LogicalUnit.
 102
 103     This needs to be overridden in derived classes in order to check op
 104     validity.
 105
 106     """
 107     self.proc = processor
 108     self.op = op
 109     self.cfg = context.cfg
 110     self.context = context
 111     self.rpc = rpc
 112     # Dicts used to declare locking needs to mcpu
 113     self.needed_locks = None
 114     self.acquired_locks = {}
 115     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 116     self.add_locks = {}
 117     self.remove_locks = {}
 118     # Used to force good behavior when calling helper functions
 119     self.recalculate_locks = {}
 120     self.__ssh = None
 121     # logging
 122     self.Log = processor.Log # pylint: disable-msg=C0103
 123     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 124     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 125     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 126     # support for dry-run
 127     self.dry_run_result = None
 128     # support for generic debug attribute
 129     if (not hasattr(self.op, "debug_level") or
 130         not isinstance(self.op.debug_level, int)):
 131       self.op.debug_level = 0
 132
 133     # Tasklets
 134     self.tasklets = None
 135
 136     # Validate opcode parameters and set defaults
 137     self.op.Validate(True)
 138
 139     self.CheckArguments()
 140
 141   def __GetSSH(self):
 142     """Returns the SshRunner object
 143
 144     """
 145     if not self.__ssh:
 146       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 147     return self.__ssh
 148
 149   ssh = property(fget=__GetSSH)
 150
 151   def CheckArguments(self):
 152     """Check syntactic validity for the opcode arguments.
 153
 154     This method is for doing a simple syntactic check and ensure
 155     validity of opcode parameters, without any cluster-related
 156     checks. While the same can be accomplished in ExpandNames and/or
 157     CheckPrereq, doing these separate is better because:
 158
 159       - ExpandNames is left as as purely a lock-related function
 160       - CheckPrereq is run after we have acquired locks (and possible
 161         waited for them)
 162
 163     The function is allowed to change the self.op attribute so that
 164     later methods can no longer worry about missing parameters.
 165
 166     """
 167     pass
 168
 169   def ExpandNames(self):
 170     """Expand names for this LU.
 171
 172     This method is called before starting to execute the opcode, and it should
 173     update all the parameters of the opcode to their canonical form (e.g. a
 174     short node name must be fully expanded after this method has successfully
 175     completed). This way locking, hooks, logging, etc. can work correctly.
 176
 177     LUs which implement this method must also populate the self.needed_locks
 178     member, as a dict with lock levels as keys, and a list of needed lock names
 179     as values. Rules:
 180
 181       - use an empty dict if you don't need any lock
 182       - if you don't need any lock at a particular level omit that level
 183       - don't put anything for the BGL level
 184       - if you want all locks at a level use locking.ALL_SET as a value
 185
 186     If you need to share locks (rather than acquire them exclusively) at one
 187     level you can modify self.share_locks, setting a true value (usually 1) for
 188     that level. By default locks are not shared.
 189
 190     This function can also define a list of tasklets, which then will be
 191     executed in order instead of the usual LU-level CheckPrereq and Exec
 192     functions, if those are not defined by the LU.
 193
 194     Examples::
 195
 196       # Acquire all nodes and one instance
 197       self.needed_locks = {
 198         locking.LEVEL_NODE: locking.ALL_SET,
 199         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 200       }
 201       # Acquire just two nodes
 202       self.needed_locks = {
 203         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 204       }
 205       # Acquire no locks
 206       self.needed_locks = {} # No, you can't leave it to the default value None
 207
 208     """
 209     # The implementation of this method is mandatory only if the new LU is
 210     # concurrent, so that old LUs don't need to be changed all at the same
 211     # time.
 212     if self.REQ_BGL:
 213       self.needed_locks = {} # Exclusive LUs don't need locks.
 214     else:
 215       raise NotImplementedError
 216
 217   def DeclareLocks(self, level):
 218     """Declare LU locking needs for a level
 219
 220     While most LUs can just declare their locking needs at ExpandNames time,
 221     sometimes there's the need to calculate some locks after having acquired
 222     the ones before. This function is called just before acquiring locks at a
 223     particular level, but after acquiring the ones at lower levels, and permits
 224     such calculations. It can be used to modify self.needed_locks, and by
 225     default it does nothing.
 226
 227     This function is only called if you have something already set in
 228     self.needed_locks for the level.
 229
 230     @param level: Locking level which is going to be locked
 231     @type level: member of ganeti.locking.LEVELS
 232
 233     """
 234
 235   def CheckPrereq(self):
 236     """Check prerequisites for this LU.
 237
 238     This method should check that the prerequisites for the execution
 239     of this LU are fulfilled. It can do internode communication, but
 240     it should be idempotent - no cluster or system changes are
 241     allowed.
 242
 243     The method should raise errors.OpPrereqError in case something is
 244     not fulfilled. Its return value is ignored.
 245
 246     This method should also update all the parameters of the opcode to
 247     their canonical form if it hasn't been done by ExpandNames before.
 248
 249     """
 250     if self.tasklets is not None:
 251       for (idx, tl) in enumerate(self.tasklets):
 252         logging.debug("Checking prerequisites for tasklet %s/%s",
 253                       idx + 1, len(self.tasklets))
 254         tl.CheckPrereq()
 255     else:
 256       pass
 257
 258   def Exec(self, feedback_fn):
 259     """Execute the LU.
 260
 261     This method should implement the actual work. It should raise
 262     errors.OpExecError for failures that are somewhat dealt with in
 263     code, or expected.
 264
 265     """
 266     if self.tasklets is not None:
 267       for (idx, tl) in enumerate(self.tasklets):
 268         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 269         tl.Exec(feedback_fn)
 270     else:
 271       raise NotImplementedError
 272
 273   def BuildHooksEnv(self):
 274     """Build hooks environment for this LU.
 275
 276     This method should return a three-node tuple consisting of: a dict
 277     containing the environment that will be used for running the
 278     specific hook for this LU, a list of node names on which the hook
 279     should run before the execution, and a list of node names on which
 280     the hook should run after the execution.
 281
 282     The keys of the dict must not have 'GANETI_' prefixed as this will
 283     be handled in the hooks runner. Also note additional keys will be
 284     added by the hooks runner. If the LU doesn't define any
 285     environment, an empty dict (and not None) should be returned.
 286
 287     No nodes should be returned as an empty list (and not None).
 288
 289     Note that if the HPATH for a LU class is None, this function will
 290     not be called.
 291
 292     """
 293     raise NotImplementedError
 294
 295   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 296     """Notify the LU about the results of its hooks.
 297
 298     This method is called every time a hooks phase is executed, and notifies
 299     the Logical Unit about the hooks' result. The LU can then use it to alter
 300     its result based on the hooks.  By default the method does nothing and the
 301     previous result is passed back unchanged but any LU can define it if it
 302     wants to use the local cluster hook-scripts somehow.
 303
 304     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 305         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 306     @param hook_results: the results of the multi-node hooks rpc call
 307     @param feedback_fn: function used send feedback back to the caller
 308     @param lu_result: the previous Exec result this LU had, or None
 309         in the PRE phase
 310     @return: the new Exec result, based on the previous result
 311         and hook results
 312
 313     """
 314     # API must be kept, thus we ignore the unused argument and could
 315     # be a function warnings
 316     # pylint: disable-msg=W0613,R0201
 317     return lu_result
 318
 319   def _ExpandAndLockInstance(self):
 320     """Helper function to expand and lock an instance.
 321
 322     Many LUs that work on an instance take its name in self.op.instance_name
 323     and need to expand it and then declare the expanded name for locking. This
 324     function does it, and then updates self.op.instance_name to the expanded
 325     name. It also initializes needed_locks as a dict, if this hasn't been done
 326     before.
 327
 328     """
 329     if self.needed_locks is None:
 330       self.needed_locks = {}
 331     else:
 332       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 333         "_ExpandAndLockInstance called with instance-level locks set"
 334     self.op.instance_name = _ExpandInstanceName(self.cfg,
 335                                                 self.op.instance_name)
 336     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 337
 338   def _LockInstancesNodes(self, primary_only=False):
 339     """Helper function to declare instances' nodes for locking.
 340
 341     This function should be called after locking one or more instances to lock
 342     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 343     with all primary or secondary nodes for instances already locked and
 344     present in self.needed_locks[locking.LEVEL_INSTANCE].
 345
 346     It should be called from DeclareLocks, and for safety only works if
 347     self.recalculate_locks[locking.LEVEL_NODE] is set.
 348
 349     In the future it may grow parameters to just lock some instance's nodes, or
 350     to just lock primaries or secondary nodes, if needed.
 351
 352     If should be called in DeclareLocks in a way similar to::
 353
 354       if level == locking.LEVEL_NODE:
 355         self._LockInstancesNodes()
 356
 357     @type primary_only: boolean
 358     @param primary_only: only lock primary nodes of locked instances
 359
 360     """
 361     assert locking.LEVEL_NODE in self.recalculate_locks, \
 362       "_LockInstancesNodes helper function called with no nodes to recalculate"
 363
 364     # TODO: check if we're really been called with the instance locks held
 365
 366     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 367     # future we might want to have different behaviors depending on the value
 368     # of self.recalculate_locks[locking.LEVEL_NODE]
 369     wanted_nodes = []
 370     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 371       instance = self.context.cfg.GetInstanceInfo(instance_name)
 372       wanted_nodes.append(instance.primary_node)
 373       if not primary_only:
 374         wanted_nodes.extend(instance.secondary_nodes)
 375
 376     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 377       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 378     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 379       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 380
 381     del self.recalculate_locks[locking.LEVEL_NODE]
 382
 383
 384 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 385   """Simple LU which runs no hooks.
 386
 387   This LU is intended as a parent for other LogicalUnits which will
 388   run no hooks, in order to reduce duplicate code.
 389
 390   """
 391   HPATH = None
 392   HTYPE = None
 393
 394   def BuildHooksEnv(self):
 395     """Empty BuildHooksEnv for NoHooksLu.
 396
 397     This just raises an error.
 398
 399     """
 400     assert False, "BuildHooksEnv called for NoHooksLUs"
 401
 402
 403 class Tasklet:
 404   """Tasklet base class.
 405
 406   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 407   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 408   tasklets know nothing about locks.
 409
 410   Subclasses must follow these rules:
 411     - Implement CheckPrereq
 412     - Implement Exec
 413
 414   """
 415   def __init__(self, lu):
 416     self.lu = lu
 417
 418     # Shortcuts
 419     self.cfg = lu.cfg
 420     self.rpc = lu.rpc
 421
 422   def CheckPrereq(self):
 423     """Check prerequisites for this tasklets.
 424
 425     This method should check whether the prerequisites for the execution of
 426     this tasklet are fulfilled. It can do internode communication, but it
 427     should be idempotent - no cluster or system changes are allowed.
 428
 429     The method should raise errors.OpPrereqError in case something is not
 430     fulfilled. Its return value is ignored.
 431
 432     This method should also update all parameters to their canonical form if it
 433     hasn't been done before.
 434
 435     """
 436     pass
 437
 438   def Exec(self, feedback_fn):
 439     """Execute the tasklet.
 440
 441     This method should implement the actual work. It should raise
 442     errors.OpExecError for failures that are somewhat dealt with in code, or
 443     expected.
 444
 445     """
 446     raise NotImplementedError
 447
 448
 449 class _QueryBase:
 450   """Base for query utility classes.
 451
 452   """
 453   #: Attribute holding field definitions
 454   FIELDS = None
 455
 456   def __init__(self, names, fields, use_locking):
 457     """Initializes this class.
 458
 459     """
 460     self.names = names
 461     self.use_locking = use_locking
 462
 463     self.query = query.Query(self.FIELDS, fields)
 464     self.requested_data = self.query.RequestedData()
 465
 466     self.do_locking = None
 467     self.wanted = None
 468
 469   def _GetNames(self, lu, all_names, lock_level):
 470     """Helper function to determine names asked for in the query.
 471
 472     """
 473     if self.do_locking:
 474       names = lu.acquired_locks[lock_level]
 475     else:
 476       names = all_names
 477
 478     if self.wanted == locking.ALL_SET:
 479       assert not self.names
 480       # caller didn't specify names, so ordering is not important
 481       return utils.NiceSort(names)
 482
 483     # caller specified names and we must keep the same order
 484     assert self.names
 485     assert not self.do_locking or lu.acquired_locks[lock_level]
 486
 487     missing = set(self.wanted).difference(names)
 488     if missing:
 489       raise errors.OpExecError("Some items were removed before retrieving"
 490                                " their data: %s" % missing)
 491
 492     # Return expanded names
 493     return self.wanted
 494
 495   @classmethod
 496   def FieldsQuery(cls, fields):
 497     """Returns list of available fields.
 498
 499     @return: List of L{objects.QueryFieldDefinition}
 500
 501     """
 502     return query.QueryFields(cls.FIELDS, fields)
 503
 504   def ExpandNames(self, lu):
 505     """Expand names for this query.
 506
 507     See L{LogicalUnit.ExpandNames}.
 508
 509     """
 510     raise NotImplementedError()
 511
 512   def DeclareLocks(self, lu, level):
 513     """Declare locks for this query.
 514
 515     See L{LogicalUnit.DeclareLocks}.
 516
 517     """
 518     raise NotImplementedError()
 519
 520   def _GetQueryData(self, lu):
 521     """Collects all data for this query.
 522
 523     @return: Query data object
 524
 525     """
 526     raise NotImplementedError()
 527
 528   def NewStyleQuery(self, lu):
 529     """Collect data and execute query.
 530
 531     """
 532     return query.GetQueryResponse(self.query, self._GetQueryData(lu))
 533
 534   def OldStyleQuery(self, lu):
 535     """Collect data and execute query.
 536
 537     """
 538     return self.query.OldStyleQuery(self._GetQueryData(lu))
 539
 540
 541 def _GetWantedNodes(lu, nodes):
 542   """Returns list of checked and expanded node names.
 543
 544   @type lu: L{LogicalUnit}
 545   @param lu: the logical unit on whose behalf we execute
 546   @type nodes: list
 547   @param nodes: list of node names or None for all nodes
 548   @rtype: list
 549   @return: the list of nodes, sorted
 550   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 551
 552   """
 553   if nodes:
 554     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 555
 556   return utils.NiceSort(lu.cfg.GetNodeList())
 557
 558
 559 def _GetWantedInstances(lu, instances):
 560   """Returns list of checked and expanded instance names.
 561
 562   @type lu: L{LogicalUnit}
 563   @param lu: the logical unit on whose behalf we execute
 564   @type instances: list
 565   @param instances: list of instance names or None for all instances
 566   @rtype: list
 567   @return: the list of instances, sorted
 568   @raise errors.OpPrereqError: if the instances parameter is wrong type
 569   @raise errors.OpPrereqError: if any of the passed instances is not found
 570
 571   """
 572   if instances:
 573     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 574   else:
 575     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 576   return wanted
 577
 578
 579 def _GetUpdatedParams(old_params, update_dict,
 580                       use_default=True, use_none=False):
 581   """Return the new version of a parameter dictionary.
 582
 583   @type old_params: dict
 584   @param old_params: old parameters
 585   @type update_dict: dict
 586   @param update_dict: dict containing new parameter values, or
 587       constants.VALUE_DEFAULT to reset the parameter to its default
 588       value
 589   @param use_default: boolean
 590   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 591       values as 'to be deleted' values
 592   @param use_none: boolean
 593   @type use_none: whether to recognise C{None} values as 'to be
 594       deleted' values
 595   @rtype: dict
 596   @return: the new parameter dictionary
 597
 598   """
 599   params_copy = copy.deepcopy(old_params)
 600   for key, val in update_dict.iteritems():
 601     if ((use_default and val == constants.VALUE_DEFAULT) or
 602         (use_none and val is None)):
 603       try:
 604         del params_copy[key]
 605       except KeyError:
 606         pass
 607     else:
 608       params_copy[key] = val
 609   return params_copy
 610
 611
 612 def _CheckOutputFields(static, dynamic, selected):
 613   """Checks whether all selected fields are valid.
 614
 615   @type static: L{utils.FieldSet}
 616   @param static: static fields set
 617   @type dynamic: L{utils.FieldSet}
 618   @param dynamic: dynamic fields set
 619
 620   """
 621   f = utils.FieldSet()
 622   f.Extend(static)
 623   f.Extend(dynamic)
 624
 625   delta = f.NonMatching(selected)
 626   if delta:
 627     raise errors.OpPrereqError("Unknown output fields selected: %s"
 628                                % ",".join(delta), errors.ECODE_INVAL)
 629
 630
 631 def _CheckGlobalHvParams(params):
 632   """Validates that given hypervisor params are not global ones.
 633
 634   This will ensure that instances don't get customised versions of
 635   global params.
 636
 637   """
 638   used_globals = constants.HVC_GLOBALS.intersection(params)
 639   if used_globals:
 640     msg = ("The following hypervisor parameters are global and cannot"
 641            " be customized at instance level, please modify them at"
 642            " cluster level: %s" % utils.CommaJoin(used_globals))
 643     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 644
 645
 646 def _CheckNodeOnline(lu, node, msg=None):
 647   """Ensure that a given node is online.
 648
 649   @param lu: the LU on behalf of which we make the check
 650   @param node: the node to check
 651   @param msg: if passed, should be a message to replace the default one
 652   @raise errors.OpPrereqError: if the node is offline
 653
 654   """
 655   if msg is None:
 656     msg = "Can't use offline node"
 657   if lu.cfg.GetNodeInfo(node).offline:
 658     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 659
 660
 661 def _CheckNodeNotDrained(lu, node):
 662   """Ensure that a given node is not drained.
 663
 664   @param lu: the LU on behalf of which we make the check
 665   @param node: the node to check
 666   @raise errors.OpPrereqError: if the node is drained
 667
 668   """
 669   if lu.cfg.GetNodeInfo(node).drained:
 670     raise errors.OpPrereqError("Can't use drained node %s" % node,
 671                                errors.ECODE_STATE)
 672
 673
 674 def _CheckNodeVmCapable(lu, node):
 675   """Ensure that a given node is vm capable.
 676
 677   @param lu: the LU on behalf of which we make the check
 678   @param node: the node to check
 679   @raise errors.OpPrereqError: if the node is not vm capable
 680
 681   """
 682   if not lu.cfg.GetNodeInfo(node).vm_capable:
 683     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 684                                errors.ECODE_STATE)
 685
 686
 687 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 688   """Ensure that a node supports a given OS.
 689
 690   @param lu: the LU on behalf of which we make the check
 691   @param node: the node to check
 692   @param os_name: the OS to query about
 693   @param force_variant: whether to ignore variant errors
 694   @raise errors.OpPrereqError: if the node is not supporting the OS
 695
 696   """
 697   result = lu.rpc.call_os_get(node, os_name)
 698   result.Raise("OS '%s' not in supported OS list for node %s" %
 699                (os_name, node),
 700                prereq=True, ecode=errors.ECODE_INVAL)
 701   if not force_variant:
 702     _CheckOSVariant(result.payload, os_name)
 703
 704
 705 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 706   """Ensure that a node has the given secondary ip.
 707
 708   @type lu: L{LogicalUnit}
 709   @param lu: the LU on behalf of which we make the check
 710   @type node: string
 711   @param node: the node to check
 712   @type secondary_ip: string
 713   @param secondary_ip: the ip to check
 714   @type prereq: boolean
 715   @param prereq: whether to throw a prerequisite or an execute error
 716   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 717   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 718
 719   """
 720   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 721   result.Raise("Failure checking secondary ip on node %s" % node,
 722                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 723   if not result.payload:
 724     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 725            " please fix and re-run this command" % secondary_ip)
 726     if prereq:
 727       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 728     else:
 729       raise errors.OpExecError(msg)
 730
 731
 732 def _GetClusterDomainSecret():
 733   """Reads the cluster domain secret.
 734
 735   """
 736   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 737                                strict=True)
 738
 739
 740 def _CheckInstanceDown(lu, instance, reason):
 741   """Ensure that an instance is not running."""
 742   if instance.admin_up:
 743     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 744                                (instance.name, reason), errors.ECODE_STATE)
 745
 746   pnode = instance.primary_node
 747   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 748   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 749               prereq=True, ecode=errors.ECODE_ENVIRON)
 750
 751   if instance.name in ins_l.payload:
 752     raise errors.OpPrereqError("Instance %s is running, %s" %
 753                                (instance.name, reason), errors.ECODE_STATE)
 754
 755
 756 def _ExpandItemName(fn, name, kind):
 757   """Expand an item name.
 758
 759   @param fn: the function to use for expansion
 760   @param name: requested item name
 761   @param kind: text description ('Node' or 'Instance')
 762   @return: the resolved (full) name
 763   @raise errors.OpPrereqError: if the item is not found
 764
 765   """
 766   full_name = fn(name)
 767   if full_name is None:
 768     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 769                                errors.ECODE_NOENT)
 770   return full_name
 771
 772
 773 def _ExpandNodeName(cfg, name):
 774   """Wrapper over L{_ExpandItemName} for nodes."""
 775   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 776
 777
 778 def _ExpandInstanceName(cfg, name):
 779   """Wrapper over L{_ExpandItemName} for instance."""
 780   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 781
 782
 783 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 784                           memory, vcpus, nics, disk_template, disks,
 785                           bep, hvp, hypervisor_name):
 786   """Builds instance related env variables for hooks
 787
 788   This builds the hook environment from individual variables.
 789
 790   @type name: string
 791   @param name: the name of the instance
 792   @type primary_node: string
 793   @param primary_node: the name of the instance's primary node
 794   @type secondary_nodes: list
 795   @param secondary_nodes: list of secondary nodes as strings
 796   @type os_type: string
 797   @param os_type: the name of the instance's OS
 798   @type status: boolean
 799   @param status: the should_run status of the instance
 800   @type memory: string
 801   @param memory: the memory size of the instance
 802   @type vcpus: string
 803   @param vcpus: the count of VCPUs the instance has
 804   @type nics: list
 805   @param nics: list of tuples (ip, mac, mode, link) representing
 806       the NICs the instance has
 807   @type disk_template: string
 808   @param disk_template: the disk template of the instance
 809   @type disks: list
 810   @param disks: the list of (size, mode) pairs
 811   @type bep: dict
 812   @param bep: the backend parameters for the instance
 813   @type hvp: dict
 814   @param hvp: the hypervisor parameters for the instance
 815   @type hypervisor_name: string
 816   @param hypervisor_name: the hypervisor for the instance
 817   @rtype: dict
 818   @return: the hook environment for this instance
 819
 820   """
 821   if status:
 822     str_status = "up"
 823   else:
 824     str_status = "down"
 825   env = {
 826     "OP_TARGET": name,
 827     "INSTANCE_NAME": name,
 828     "INSTANCE_PRIMARY": primary_node,
 829     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 830     "INSTANCE_OS_TYPE": os_type,
 831     "INSTANCE_STATUS": str_status,
 832     "INSTANCE_MEMORY": memory,
 833     "INSTANCE_VCPUS": vcpus,
 834     "INSTANCE_DISK_TEMPLATE": disk_template,
 835     "INSTANCE_HYPERVISOR": hypervisor_name,
 836   }
 837
 838   if nics:
 839     nic_count = len(nics)
 840     for idx, (ip, mac, mode, link) in enumerate(nics):
 841       if ip is None:
 842         ip = ""
 843       env["INSTANCE_NIC%d_IP" % idx] = ip
 844       env["INSTANCE_NIC%d_MAC" % idx] = mac
 845       env["INSTANCE_NIC%d_MODE" % idx] = mode
 846       env["INSTANCE_NIC%d_LINK" % idx] = link
 847       if mode == constants.NIC_MODE_BRIDGED:
 848         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 849   else:
 850     nic_count = 0
 851
 852   env["INSTANCE_NIC_COUNT"] = nic_count
 853
 854   if disks:
 855     disk_count = len(disks)
 856     for idx, (size, mode) in enumerate(disks):
 857       env["INSTANCE_DISK%d_SIZE" % idx] = size
 858       env["INSTANCE_DISK%d_MODE" % idx] = mode
 859   else:
 860     disk_count = 0
 861
 862   env["INSTANCE_DISK_COUNT"] = disk_count
 863
 864   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 865     for key, value in source.items():
 866       env["INSTANCE_%s_%s" % (kind, key)] = value
 867
 868   return env
 869
 870
 871 def _NICListToTuple(lu, nics):
 872   """Build a list of nic information tuples.
 873
 874   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 875   value in LUInstanceQueryData.
 876
 877   @type lu:  L{LogicalUnit}
 878   @param lu: the logical unit on whose behalf we execute
 879   @type nics: list of L{objects.NIC}
 880   @param nics: list of nics to convert to hooks tuples
 881
 882   """
 883   hooks_nics = []
 884   cluster = lu.cfg.GetClusterInfo()
 885   for nic in nics:
 886     ip = nic.ip
 887     mac = nic.mac
 888     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 889     mode = filled_params[constants.NIC_MODE]
 890     link = filled_params[constants.NIC_LINK]
 891     hooks_nics.append((ip, mac, mode, link))
 892   return hooks_nics
 893
 894
 895 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 896   """Builds instance related env variables for hooks from an object.
 897
 898   @type lu: L{LogicalUnit}
 899   @param lu: the logical unit on whose behalf we execute
 900   @type instance: L{objects.Instance}
 901   @param instance: the instance for which we should build the
 902       environment
 903   @type override: dict
 904   @param override: dictionary with key/values that will override
 905       our values
 906   @rtype: dict
 907   @return: the hook environment dictionary
 908
 909   """
 910   cluster = lu.cfg.GetClusterInfo()
 911   bep = cluster.FillBE(instance)
 912   hvp = cluster.FillHV(instance)
 913   args = {
 914     'name': instance.name,
 915     'primary_node': instance.primary_node,
 916     'secondary_nodes': instance.secondary_nodes,
 917     'os_type': instance.os,
 918     'status': instance.admin_up,
 919     'memory': bep[constants.BE_MEMORY],
 920     'vcpus': bep[constants.BE_VCPUS],
 921     'nics': _NICListToTuple(lu, instance.nics),
 922     'disk_template': instance.disk_template,
 923     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 924     'bep': bep,
 925     'hvp': hvp,
 926     'hypervisor_name': instance.hypervisor,
 927   }
 928   if override:
 929     args.update(override)
 930   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 931
 932
 933 def _AdjustCandidatePool(lu, exceptions):
 934   """Adjust the candidate pool after node operations.
 935
 936   """
 937   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 938   if mod_list:
 939     lu.LogInfo("Promoted nodes to master candidate role: %s",
 940                utils.CommaJoin(node.name for node in mod_list))
 941     for name in mod_list:
 942       lu.context.ReaddNode(name)
 943   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 944   if mc_now > mc_max:
 945     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 946                (mc_now, mc_max))
 947
 948
 949 def _DecideSelfPromotion(lu, exceptions=None):
 950   """Decide whether I should promote myself as a master candidate.
 951
 952   """
 953   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 954   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 955   # the new node will increase mc_max with one, so:
 956   mc_should = min(mc_should + 1, cp_size)
 957   return mc_now < mc_should
 958
 959
 960 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 961   """Check that the brigdes needed by a list of nics exist.
 962
 963   """
 964   cluster = lu.cfg.GetClusterInfo()
 965   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 966   brlist = [params[constants.NIC_LINK] for params in paramslist
 967             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 968   if brlist:
 969     result = lu.rpc.call_bridges_exist(target_node, brlist)
 970     result.Raise("Error checking bridges on destination node '%s'" %
 971                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 972
 973
 974 def _CheckInstanceBridgesExist(lu, instance, node=None):
 975   """Check that the brigdes needed by an instance exist.
 976
 977   """
 978   if node is None:
 979     node = instance.primary_node
 980   _CheckNicsBridgesExist(lu, instance.nics, node)
 981
 982
 983 def _CheckOSVariant(os_obj, name):
 984   """Check whether an OS name conforms to the os variants specification.
 985
 986   @type os_obj: L{objects.OS}
 987   @param os_obj: OS object to check
 988   @type name: string
 989   @param name: OS name passed by the user, to check for validity
 990
 991   """
 992   if not os_obj.supported_variants:
 993     return
 994   variant = objects.OS.GetVariant(name)
 995   if not variant:
 996     raise errors.OpPrereqError("OS name must include a variant",
 997                                errors.ECODE_INVAL)
 998
 999   if variant not in os_obj.supported_variants:
1000     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1001
1002
1003 def _GetNodeInstancesInner(cfg, fn):
1004   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1005
1006
1007 def _GetNodeInstances(cfg, node_name):
1008   """Returns a list of all primary and secondary instances on a node.
1009
1010   """
1011
1012   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1013
1014
1015 def _GetNodePrimaryInstances(cfg, node_name):
1016   """Returns primary instances on a node.
1017
1018   """
1019   return _GetNodeInstancesInner(cfg,
1020                                 lambda inst: node_name == inst.primary_node)
1021
1022
1023 def _GetNodeSecondaryInstances(cfg, node_name):
1024   """Returns secondary instances on a node.
1025
1026   """
1027   return _GetNodeInstancesInner(cfg,
1028                                 lambda inst: node_name in inst.secondary_nodes)
1029
1030
1031 def _GetStorageTypeArgs(cfg, storage_type):
1032   """Returns the arguments for a storage type.
1033
1034   """
1035   # Special case for file storage
1036   if storage_type == constants.ST_FILE:
1037     # storage.FileStorage wants a list of storage directories
1038     return [[cfg.GetFileStorageDir()]]
1039
1040   return []
1041
1042
1043 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1044   faulty = []
1045
1046   for dev in instance.disks:
1047     cfg.SetDiskID(dev, node_name)
1048
1049   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1050   result.Raise("Failed to get disk status from node %s" % node_name,
1051                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1052
1053   for idx, bdev_status in enumerate(result.payload):
1054     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1055       faulty.append(idx)
1056
1057   return faulty
1058
1059
1060 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1061   """Check the sanity of iallocator and node arguments and use the
1062   cluster-wide iallocator if appropriate.
1063
1064   Check that at most one of (iallocator, node) is specified. If none is
1065   specified, then the LU's opcode's iallocator slot is filled with the
1066   cluster-wide default iallocator.
1067
1068   @type iallocator_slot: string
1069   @param iallocator_slot: the name of the opcode iallocator slot
1070   @type node_slot: string
1071   @param node_slot: the name of the opcode target node slot
1072
1073   """
1074   node = getattr(lu.op, node_slot, None)
1075   iallocator = getattr(lu.op, iallocator_slot, None)
1076
1077   if node is not None and iallocator is not None:
1078     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1079                                errors.ECODE_INVAL)
1080   elif node is None and iallocator is None:
1081     default_iallocator = lu.cfg.GetDefaultIAllocator()
1082     if default_iallocator:
1083       setattr(lu.op, iallocator_slot, default_iallocator)
1084     else:
1085       raise errors.OpPrereqError("No iallocator or node given and no"
1086                                  " cluster-wide default iallocator found."
1087                                  " Please specify either an iallocator or a"
1088                                  " node, or set a cluster-wide default"
1089                                  " iallocator.")
1090
1091
1092 class LUClusterPostInit(LogicalUnit):
1093   """Logical unit for running hooks after cluster initialization.
1094
1095   """
1096   HPATH = "cluster-init"
1097   HTYPE = constants.HTYPE_CLUSTER
1098
1099   def BuildHooksEnv(self):
1100     """Build hooks env.
1101
1102     """
1103     env = {"OP_TARGET": self.cfg.GetClusterName()}
1104     mn = self.cfg.GetMasterNode()
1105     return env, [], [mn]
1106
1107   def Exec(self, feedback_fn):
1108     """Nothing to do.
1109
1110     """
1111     return True
1112
1113
1114 class LUClusterDestroy(LogicalUnit):
1115   """Logical unit for destroying the cluster.
1116
1117   """
1118   HPATH = "cluster-destroy"
1119   HTYPE = constants.HTYPE_CLUSTER
1120
1121   def BuildHooksEnv(self):
1122     """Build hooks env.
1123
1124     """
1125     env = {"OP_TARGET": self.cfg.GetClusterName()}
1126     return env, [], []
1127
1128   def CheckPrereq(self):
1129     """Check prerequisites.
1130
1131     This checks whether the cluster is empty.
1132
1133     Any errors are signaled by raising errors.OpPrereqError.
1134
1135     """
1136     master = self.cfg.GetMasterNode()
1137
1138     nodelist = self.cfg.GetNodeList()
1139     if len(nodelist) != 1 or nodelist[0] != master:
1140       raise errors.OpPrereqError("There are still %d node(s) in"
1141                                  " this cluster." % (len(nodelist) - 1),
1142                                  errors.ECODE_INVAL)
1143     instancelist = self.cfg.GetInstanceList()
1144     if instancelist:
1145       raise errors.OpPrereqError("There are still %d instance(s) in"
1146                                  " this cluster." % len(instancelist),
1147                                  errors.ECODE_INVAL)
1148
1149   def Exec(self, feedback_fn):
1150     """Destroys the cluster.
1151
1152     """
1153     master = self.cfg.GetMasterNode()
1154
1155     # Run post hooks on master node before it's removed
1156     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1157     try:
1158       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1159     except:
1160       # pylint: disable-msg=W0702
1161       self.LogWarning("Errors occurred running hooks on %s" % master)
1162
1163     result = self.rpc.call_node_stop_master(master, False)
1164     result.Raise("Could not disable the master role")
1165
1166     return master
1167
1168
1169 def _VerifyCertificate(filename):
1170   """Verifies a certificate for LUClusterVerify.
1171
1172   @type filename: string
1173   @param filename: Path to PEM file
1174
1175   """
1176   try:
1177     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1178                                            utils.ReadFile(filename))
1179   except Exception, err: # pylint: disable-msg=W0703
1180     return (LUClusterVerify.ETYPE_ERROR,
1181             "Failed to load X509 certificate %s: %s" % (filename, err))
1182
1183   (errcode, msg) = \
1184     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1185                                 constants.SSL_CERT_EXPIRATION_ERROR)
1186
1187   if msg:
1188     fnamemsg = "While verifying %s: %s" % (filename, msg)
1189   else:
1190     fnamemsg = None
1191
1192   if errcode is None:
1193     return (None, fnamemsg)
1194   elif errcode == utils.CERT_WARNING:
1195     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1196   elif errcode == utils.CERT_ERROR:
1197     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1198
1199   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1200
1201
1202 class LUClusterVerify(LogicalUnit):
1203   """Verifies the cluster status.
1204
1205   """
1206   HPATH = "cluster-verify"
1207   HTYPE = constants.HTYPE_CLUSTER
1208   REQ_BGL = False
1209
1210   TCLUSTER = "cluster"
1211   TNODE = "node"
1212   TINSTANCE = "instance"
1213
1214   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1215   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1216   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1217   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1218   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1219   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1220   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1221   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1222   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1223   ENODEDRBD = (TNODE, "ENODEDRBD")
1224   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1225   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1226   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1227   ENODEHV = (TNODE, "ENODEHV")
1228   ENODELVM = (TNODE, "ENODELVM")
1229   ENODEN1 = (TNODE, "ENODEN1")
1230   ENODENET = (TNODE, "ENODENET")
1231   ENODEOS = (TNODE, "ENODEOS")
1232   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1233   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1234   ENODERPC = (TNODE, "ENODERPC")
1235   ENODESSH = (TNODE, "ENODESSH")
1236   ENODEVERSION = (TNODE, "ENODEVERSION")
1237   ENODESETUP = (TNODE, "ENODESETUP")
1238   ENODETIME = (TNODE, "ENODETIME")
1239   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1240
1241   ETYPE_FIELD = "code"
1242   ETYPE_ERROR = "ERROR"
1243   ETYPE_WARNING = "WARNING"
1244
1245   _HOOKS_INDENT_RE = re.compile("^", re.M)
1246
1247   class NodeImage(object):
1248     """A class representing the logical and physical status of a node.
1249
1250     @type name: string
1251     @ivar name: the node name to which this object refers
1252     @ivar volumes: a structure as returned from
1253         L{ganeti.backend.GetVolumeList} (runtime)
1254     @ivar instances: a list of running instances (runtime)
1255     @ivar pinst: list of configured primary instances (config)
1256     @ivar sinst: list of configured secondary instances (config)
1257     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1258         of this node (config)
1259     @ivar mfree: free memory, as reported by hypervisor (runtime)
1260     @ivar dfree: free disk, as reported by the node (runtime)
1261     @ivar offline: the offline status (config)
1262     @type rpc_fail: boolean
1263     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1264         not whether the individual keys were correct) (runtime)
1265     @type lvm_fail: boolean
1266     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1267     @type hyp_fail: boolean
1268     @ivar hyp_fail: whether the RPC call didn't return the instance list
1269     @type ghost: boolean
1270     @ivar ghost: whether this is a known node or not (config)
1271     @type os_fail: boolean
1272     @ivar os_fail: whether the RPC call didn't return valid OS data
1273     @type oslist: list
1274     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1275     @type vm_capable: boolean
1276     @ivar vm_capable: whether the node can host instances
1277
1278     """
1279     def __init__(self, offline=False, name=None, vm_capable=True):
1280       self.name = name
1281       self.volumes = {}
1282       self.instances = []
1283       self.pinst = []
1284       self.sinst = []
1285       self.sbp = {}
1286       self.mfree = 0
1287       self.dfree = 0
1288       self.offline = offline
1289       self.vm_capable = vm_capable
1290       self.rpc_fail = False
1291       self.lvm_fail = False
1292       self.hyp_fail = False
1293       self.ghost = False
1294       self.os_fail = False
1295       self.oslist = {}
1296
1297   def ExpandNames(self):
1298     self.needed_locks = {
1299       locking.LEVEL_NODE: locking.ALL_SET,
1300       locking.LEVEL_INSTANCE: locking.ALL_SET,
1301     }
1302     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1303
1304   def _Error(self, ecode, item, msg, *args, **kwargs):
1305     """Format an error message.
1306
1307     Based on the opcode's error_codes parameter, either format a
1308     parseable error code, or a simpler error string.
1309
1310     This must be called only from Exec and functions called from Exec.
1311
1312     """
1313     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1314     itype, etxt = ecode
1315     # first complete the msg
1316     if args:
1317       msg = msg % args
1318     # then format the whole message
1319     if self.op.error_codes:
1320       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1321     else:
1322       if item:
1323         item = " " + item
1324       else:
1325         item = ""
1326       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1327     # and finally report it via the feedback_fn
1328     self._feedback_fn("  - %s" % msg)
1329
1330   def _ErrorIf(self, cond, *args, **kwargs):
1331     """Log an error message if the passed condition is True.
1332
1333     """
1334     cond = bool(cond) or self.op.debug_simulate_errors
1335     if cond:
1336       self._Error(*args, **kwargs)
1337     # do not mark the operation as failed for WARN cases only
1338     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1339       self.bad = self.bad or cond
1340
1341   def _VerifyNode(self, ninfo, nresult):
1342     """Perform some basic validation on data returned from a node.
1343
1344       - check the result data structure is well formed and has all the
1345         mandatory fields
1346       - check ganeti version
1347
1348     @type ninfo: L{objects.Node}
1349     @param ninfo: the node to check
1350     @param nresult: the results from the node
1351     @rtype: boolean
1352     @return: whether overall this call was successful (and we can expect
1353          reasonable values in the respose)
1354
1355     """
1356     node = ninfo.name
1357     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1358
1359     # main result, nresult should be a non-empty dict
1360     test = not nresult or not isinstance(nresult, dict)
1361     _ErrorIf(test, self.ENODERPC, node,
1362                   "unable to verify node: no data returned")
1363     if test:
1364       return False
1365
1366     # compares ganeti version
1367     local_version = constants.PROTOCOL_VERSION
1368     remote_version = nresult.get("version", None)
1369     test = not (remote_version and
1370                 isinstance(remote_version, (list, tuple)) and
1371                 len(remote_version) == 2)
1372     _ErrorIf(test, self.ENODERPC, node,
1373              "connection to node returned invalid data")
1374     if test:
1375       return False
1376
1377     test = local_version != remote_version[0]
1378     _ErrorIf(test, self.ENODEVERSION, node,
1379              "incompatible protocol versions: master %s,"
1380              " node %s", local_version, remote_version[0])
1381     if test:
1382       return False
1383
1384     # node seems compatible, we can actually try to look into its results
1385
1386     # full package version
1387     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1388                   self.ENODEVERSION, node,
1389                   "software version mismatch: master %s, node %s",
1390                   constants.RELEASE_VERSION, remote_version[1],
1391                   code=self.ETYPE_WARNING)
1392
1393     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1394     if ninfo.vm_capable and isinstance(hyp_result, dict):
1395       for hv_name, hv_result in hyp_result.iteritems():
1396         test = hv_result is not None
1397         _ErrorIf(test, self.ENODEHV, node,
1398                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1399
1400     test = nresult.get(constants.NV_NODESETUP,
1401                            ["Missing NODESETUP results"])
1402     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1403              "; ".join(test))
1404
1405     return True
1406
1407   def _VerifyNodeTime(self, ninfo, nresult,
1408                       nvinfo_starttime, nvinfo_endtime):
1409     """Check the node time.
1410
1411     @type ninfo: L{objects.Node}
1412     @param ninfo: the node to check
1413     @param nresult: the remote results for the node
1414     @param nvinfo_starttime: the start time of the RPC call
1415     @param nvinfo_endtime: the end time of the RPC call
1416
1417     """
1418     node = ninfo.name
1419     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1420
1421     ntime = nresult.get(constants.NV_TIME, None)
1422     try:
1423       ntime_merged = utils.MergeTime(ntime)
1424     except (ValueError, TypeError):
1425       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1426       return
1427
1428     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1429       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1430     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1431       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1432     else:
1433       ntime_diff = None
1434
1435     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1436              "Node time diverges by at least %s from master node time",
1437              ntime_diff)
1438
1439   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1440     """Check the node time.
1441
1442     @type ninfo: L{objects.Node}
1443     @param ninfo: the node to check
1444     @param nresult: the remote results for the node
1445     @param vg_name: the configured VG name
1446
1447     """
1448     if vg_name is None:
1449       return
1450
1451     node = ninfo.name
1452     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1453
1454     # checks vg existence and size > 20G
1455     vglist = nresult.get(constants.NV_VGLIST, None)
1456     test = not vglist
1457     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1458     if not test:
1459       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1460                                             constants.MIN_VG_SIZE)
1461       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1462
1463     # check pv names
1464     pvlist = nresult.get(constants.NV_PVLIST, None)
1465     test = pvlist is None
1466     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1467     if not test:
1468       # check that ':' is not present in PV names, since it's a
1469       # special character for lvcreate (denotes the range of PEs to
1470       # use on the PV)
1471       for _, pvname, owner_vg in pvlist:
1472         test = ":" in pvname
1473         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1474                  " '%s' of VG '%s'", pvname, owner_vg)
1475
1476   def _VerifyNodeNetwork(self, ninfo, nresult):
1477     """Check the node time.
1478
1479     @type ninfo: L{objects.Node}
1480     @param ninfo: the node to check
1481     @param nresult: the remote results for the node
1482
1483     """
1484     node = ninfo.name
1485     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1486
1487     test = constants.NV_NODELIST not in nresult
1488     _ErrorIf(test, self.ENODESSH, node,
1489              "node hasn't returned node ssh connectivity data")
1490     if not test:
1491       if nresult[constants.NV_NODELIST]:
1492         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1493           _ErrorIf(True, self.ENODESSH, node,
1494                    "ssh communication with node '%s': %s", a_node, a_msg)
1495
1496     test = constants.NV_NODENETTEST not in nresult
1497     _ErrorIf(test, self.ENODENET, node,
1498              "node hasn't returned node tcp connectivity data")
1499     if not test:
1500       if nresult[constants.NV_NODENETTEST]:
1501         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1502         for anode in nlist:
1503           _ErrorIf(True, self.ENODENET, node,
1504                    "tcp communication with node '%s': %s",
1505                    anode, nresult[constants.NV_NODENETTEST][anode])
1506
1507     test = constants.NV_MASTERIP not in nresult
1508     _ErrorIf(test, self.ENODENET, node,
1509              "node hasn't returned node master IP reachability data")
1510     if not test:
1511       if not nresult[constants.NV_MASTERIP]:
1512         if node == self.master_node:
1513           msg = "the master node cannot reach the master IP (not configured?)"
1514         else:
1515           msg = "cannot reach the master IP"
1516         _ErrorIf(True, self.ENODENET, node, msg)
1517
1518   def _VerifyInstance(self, instance, instanceconfig, node_image,
1519                       diskstatus):
1520     """Verify an instance.
1521
1522     This function checks to see if the required block devices are
1523     available on the instance's node.
1524
1525     """
1526     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1527     node_current = instanceconfig.primary_node
1528
1529     node_vol_should = {}
1530     instanceconfig.MapLVsByNode(node_vol_should)
1531
1532     for node in node_vol_should:
1533       n_img = node_image[node]
1534       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1535         # ignore missing volumes on offline or broken nodes
1536         continue
1537       for volume in node_vol_should[node]:
1538         test = volume not in n_img.volumes
1539         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1540                  "volume %s missing on node %s", volume, node)
1541
1542     if instanceconfig.admin_up:
1543       pri_img = node_image[node_current]
1544       test = instance not in pri_img.instances and not pri_img.offline
1545       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1546                "instance not running on its primary node %s",
1547                node_current)
1548
1549     for node, n_img in node_image.items():
1550       if (not node == node_current):
1551         test = instance in n_img.instances
1552         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1553                  "instance should not run on node %s", node)
1554
1555     diskdata = [(nname, success, status, idx)
1556                 for (nname, disks) in diskstatus.items()
1557                 for idx, (success, status) in enumerate(disks)]
1558
1559     for nname, success, bdev_status, idx in diskdata:
1560       _ErrorIf(instanceconfig.admin_up and not success,
1561                self.EINSTANCEFAULTYDISK, instance,
1562                "couldn't retrieve status for disk/%s on %s: %s",
1563                idx, nname, bdev_status)
1564       _ErrorIf((instanceconfig.admin_up and success and
1565                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1566                self.EINSTANCEFAULTYDISK, instance,
1567                "disk/%s on %s is faulty", idx, nname)
1568
1569   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1570     """Verify if there are any unknown volumes in the cluster.
1571
1572     The .os, .swap and backup volumes are ignored. All other volumes are
1573     reported as unknown.
1574
1575     @type reserved: L{ganeti.utils.FieldSet}
1576     @param reserved: a FieldSet of reserved volume names
1577
1578     """
1579     for node, n_img in node_image.items():
1580       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1581         # skip non-healthy nodes
1582         continue
1583       for volume in n_img.volumes:
1584         test = ((node not in node_vol_should or
1585                 volume not in node_vol_should[node]) and
1586                 not reserved.Matches(volume))
1587         self._ErrorIf(test, self.ENODEORPHANLV, node,
1588                       "volume %s is unknown", volume)
1589
1590   def _VerifyOrphanInstances(self, instancelist, node_image):
1591     """Verify the list of running instances.
1592
1593     This checks what instances are running but unknown to the cluster.
1594
1595     """
1596     for node, n_img in node_image.items():
1597       for o_inst in n_img.instances:
1598         test = o_inst not in instancelist
1599         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1600                       "instance %s on node %s should not exist", o_inst, node)
1601
1602   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1603     """Verify N+1 Memory Resilience.
1604
1605     Check that if one single node dies we can still start all the
1606     instances it was primary for.
1607
1608     """
1609     for node, n_img in node_image.items():
1610       # This code checks that every node which is now listed as
1611       # secondary has enough memory to host all instances it is
1612       # supposed to should a single other node in the cluster fail.
1613       # FIXME: not ready for failover to an arbitrary node
1614       # FIXME: does not support file-backed instances
1615       # WARNING: we currently take into account down instances as well
1616       # as up ones, considering that even if they're down someone
1617       # might want to start them even in the event of a node failure.
1618       for prinode, instances in n_img.sbp.items():
1619         needed_mem = 0
1620         for instance in instances:
1621           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1622           if bep[constants.BE_AUTO_BALANCE]:
1623             needed_mem += bep[constants.BE_MEMORY]
1624         test = n_img.mfree < needed_mem
1625         self._ErrorIf(test, self.ENODEN1, node,
1626                       "not enough memory to accomodate instance failovers"
1627                       " should node %s fail", prinode)
1628
1629   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1630                        master_files):
1631     """Verifies and computes the node required file checksums.
1632
1633     @type ninfo: L{objects.Node}
1634     @param ninfo: the node to check
1635     @param nresult: the remote results for the node
1636     @param file_list: required list of files
1637     @param local_cksum: dictionary of local files and their checksums
1638     @param master_files: list of files that only masters should have
1639
1640     """
1641     node = ninfo.name
1642     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1643
1644     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1645     test = not isinstance(remote_cksum, dict)
1646     _ErrorIf(test, self.ENODEFILECHECK, node,
1647              "node hasn't returned file checksum data")
1648     if test:
1649       return
1650
1651     for file_name in file_list:
1652       node_is_mc = ninfo.master_candidate
1653       must_have = (file_name not in master_files) or node_is_mc
1654       # missing
1655       test1 = file_name not in remote_cksum
1656       # invalid checksum
1657       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1658       # existing and good
1659       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1660       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1661                "file '%s' missing", file_name)
1662       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1663                "file '%s' has wrong checksum", file_name)
1664       # not candidate and this is not a must-have file
1665       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1666                "file '%s' should not exist on non master"
1667                " candidates (and the file is outdated)", file_name)
1668       # all good, except non-master/non-must have combination
1669       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1670                "file '%s' should not exist"
1671                " on non master candidates", file_name)
1672
1673   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1674                       drbd_map):
1675     """Verifies and the node DRBD status.
1676
1677     @type ninfo: L{objects.Node}
1678     @param ninfo: the node to check
1679     @param nresult: the remote results for the node
1680     @param instanceinfo: the dict of instances
1681     @param drbd_helper: the configured DRBD usermode helper
1682     @param drbd_map: the DRBD map as returned by
1683         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1684
1685     """
1686     node = ninfo.name
1687     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1688
1689     if drbd_helper:
1690       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1691       test = (helper_result == None)
1692       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1693                "no drbd usermode helper returned")
1694       if helper_result:
1695         status, payload = helper_result
1696         test = not status
1697         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1698                  "drbd usermode helper check unsuccessful: %s", payload)
1699         test = status and (payload != drbd_helper)
1700         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1701                  "wrong drbd usermode helper: %s", payload)
1702
1703     # compute the DRBD minors
1704     node_drbd = {}
1705     for minor, instance in drbd_map[node].items():
1706       test = instance not in instanceinfo
1707       _ErrorIf(test, self.ECLUSTERCFG, None,
1708                "ghost instance '%s' in temporary DRBD map", instance)
1709         # ghost instance should not be running, but otherwise we
1710         # don't give double warnings (both ghost instance and
1711         # unallocated minor in use)
1712       if test:
1713         node_drbd[minor] = (instance, False)
1714       else:
1715         instance = instanceinfo[instance]
1716         node_drbd[minor] = (instance.name, instance.admin_up)
1717
1718     # and now check them
1719     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1720     test = not isinstance(used_minors, (tuple, list))
1721     _ErrorIf(test, self.ENODEDRBD, node,
1722              "cannot parse drbd status file: %s", str(used_minors))
1723     if test:
1724       # we cannot check drbd status
1725       return
1726
1727     for minor, (iname, must_exist) in node_drbd.items():
1728       test = minor not in used_minors and must_exist
1729       _ErrorIf(test, self.ENODEDRBD, node,
1730                "drbd minor %d of instance %s is not active", minor, iname)
1731     for minor in used_minors:
1732       test = minor not in node_drbd
1733       _ErrorIf(test, self.ENODEDRBD, node,
1734                "unallocated drbd minor %d is in use", minor)
1735
1736   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1737     """Builds the node OS structures.
1738
1739     @type ninfo: L{objects.Node}
1740     @param ninfo: the node to check
1741     @param nresult: the remote results for the node
1742     @param nimg: the node image object
1743
1744     """
1745     node = ninfo.name
1746     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1747
1748     remote_os = nresult.get(constants.NV_OSLIST, None)
1749     test = (not isinstance(remote_os, list) or
1750             not compat.all(isinstance(v, list) and len(v) == 7
1751                            for v in remote_os))
1752
1753     _ErrorIf(test, self.ENODEOS, node,
1754              "node hasn't returned valid OS data")
1755
1756     nimg.os_fail = test
1757
1758     if test:
1759       return
1760
1761     os_dict = {}
1762
1763     for (name, os_path, status, diagnose,
1764          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1765
1766       if name not in os_dict:
1767         os_dict[name] = []
1768
1769       # parameters is a list of lists instead of list of tuples due to
1770       # JSON lacking a real tuple type, fix it:
1771       parameters = [tuple(v) for v in parameters]
1772       os_dict[name].append((os_path, status, diagnose,
1773                             set(variants), set(parameters), set(api_ver)))
1774
1775     nimg.oslist = os_dict
1776
1777   def _VerifyNodeOS(self, ninfo, nimg, base):
1778     """Verifies the node OS list.
1779
1780     @type ninfo: L{objects.Node}
1781     @param ninfo: the node to check
1782     @param nimg: the node image object
1783     @param base: the 'template' node we match against (e.g. from the master)
1784
1785     """
1786     node = ninfo.name
1787     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1788
1789     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1790
1791     for os_name, os_data in nimg.oslist.items():
1792       assert os_data, "Empty OS status for OS %s?!" % os_name
1793       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1794       _ErrorIf(not f_status, self.ENODEOS, node,
1795                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1796       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1797                "OS '%s' has multiple entries (first one shadows the rest): %s",
1798                os_name, utils.CommaJoin([v[0] for v in os_data]))
1799       # this will catched in backend too
1800       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1801                and not f_var, self.ENODEOS, node,
1802                "OS %s with API at least %d does not declare any variant",
1803                os_name, constants.OS_API_V15)
1804       # comparisons with the 'base' image
1805       test = os_name not in base.oslist
1806       _ErrorIf(test, self.ENODEOS, node,
1807                "Extra OS %s not present on reference node (%s)",
1808                os_name, base.name)
1809       if test:
1810         continue
1811       assert base.oslist[os_name], "Base node has empty OS status?"
1812       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1813       if not b_status:
1814         # base OS is invalid, skipping
1815         continue
1816       for kind, a, b in [("API version", f_api, b_api),
1817                          ("variants list", f_var, b_var),
1818                          ("parameters", f_param, b_param)]:
1819         _ErrorIf(a != b, self.ENODEOS, node,
1820                  "OS %s %s differs from reference node %s: %s vs. %s",
1821                  kind, os_name, base.name,
1822                  utils.CommaJoin(a), utils.CommaJoin(b))
1823
1824     # check any missing OSes
1825     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1826     _ErrorIf(missing, self.ENODEOS, node,
1827              "OSes present on reference node %s but missing on this node: %s",
1828              base.name, utils.CommaJoin(missing))
1829
1830   def _VerifyOob(self, ninfo, nresult):
1831     """Verifies out of band functionality of a node.
1832
1833     @type ninfo: L{objects.Node}
1834     @param ninfo: the node to check
1835     @param nresult: the remote results for the node
1836
1837     """
1838     node = ninfo.name
1839     # We just have to verify the paths on master and/or master candidates
1840     # as the oob helper is invoked on the master
1841     if ((ninfo.master_candidate or ninfo.master_capable) and
1842         constants.NV_OOB_PATHS in nresult):
1843       for path_result in nresult[constants.NV_OOB_PATHS]:
1844         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1845
1846   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1847     """Verifies and updates the node volume data.
1848
1849     This function will update a L{NodeImage}'s internal structures
1850     with data from the remote call.
1851
1852     @type ninfo: L{objects.Node}
1853     @param ninfo: the node to check
1854     @param nresult: the remote results for the node
1855     @param nimg: the node image object
1856     @param vg_name: the configured VG name
1857
1858     """
1859     node = ninfo.name
1860     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1861
1862     nimg.lvm_fail = True
1863     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1864     if vg_name is None:
1865       pass
1866     elif isinstance(lvdata, basestring):
1867       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1868                utils.SafeEncode(lvdata))
1869     elif not isinstance(lvdata, dict):
1870       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1871     else:
1872       nimg.volumes = lvdata
1873       nimg.lvm_fail = False
1874
1875   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1876     """Verifies and updates the node instance list.
1877
1878     If the listing was successful, then updates this node's instance
1879     list. Otherwise, it marks the RPC call as failed for the instance
1880     list key.
1881
1882     @type ninfo: L{objects.Node}
1883     @param ninfo: the node to check
1884     @param nresult: the remote results for the node
1885     @param nimg: the node image object
1886
1887     """
1888     idata = nresult.get(constants.NV_INSTANCELIST, None)
1889     test = not isinstance(idata, list)
1890     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1891                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1892     if test:
1893       nimg.hyp_fail = True
1894     else:
1895       nimg.instances = idata
1896
1897   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1898     """Verifies and computes a node information map
1899
1900     @type ninfo: L{objects.Node}
1901     @param ninfo: the node to check
1902     @param nresult: the remote results for the node
1903     @param nimg: the node image object
1904     @param vg_name: the configured VG name
1905
1906     """
1907     node = ninfo.name
1908     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1909
1910     # try to read free memory (from the hypervisor)
1911     hv_info = nresult.get(constants.NV_HVINFO, None)
1912     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1913     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1914     if not test:
1915       try:
1916         nimg.mfree = int(hv_info["memory_free"])
1917       except (ValueError, TypeError):
1918         _ErrorIf(True, self.ENODERPC, node,
1919                  "node returned invalid nodeinfo, check hypervisor")
1920
1921     # FIXME: devise a free space model for file based instances as well
1922     if vg_name is not None:
1923       test = (constants.NV_VGLIST not in nresult or
1924               vg_name not in nresult[constants.NV_VGLIST])
1925       _ErrorIf(test, self.ENODELVM, node,
1926                "node didn't return data for the volume group '%s'"
1927                " - it is either missing or broken", vg_name)
1928       if not test:
1929         try:
1930           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1931         except (ValueError, TypeError):
1932           _ErrorIf(True, self.ENODERPC, node,
1933                    "node returned invalid LVM info, check LVM status")
1934
1935   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1936     """Gets per-disk status information for all instances.
1937
1938     @type nodelist: list of strings
1939     @param nodelist: Node names
1940     @type node_image: dict of (name, L{objects.Node})
1941     @param node_image: Node objects
1942     @type instanceinfo: dict of (name, L{objects.Instance})
1943     @param instanceinfo: Instance objects
1944     @rtype: {instance: {node: [(succes, payload)]}}
1945     @return: a dictionary of per-instance dictionaries with nodes as
1946         keys and disk information as values; the disk information is a
1947         list of tuples (success, payload)
1948
1949     """
1950     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1951
1952     node_disks = {}
1953     node_disks_devonly = {}
1954     diskless_instances = set()
1955     diskless = constants.DT_DISKLESS
1956
1957     for nname in nodelist:
1958       node_instances = list(itertools.chain(node_image[nname].pinst,
1959                                             node_image[nname].sinst))
1960       diskless_instances.update(inst for inst in node_instances
1961                                 if instanceinfo[inst].disk_template == diskless)
1962       disks = [(inst, disk)
1963                for inst in node_instances
1964                for disk in instanceinfo[inst].disks]
1965
1966       if not disks:
1967         # No need to collect data
1968         continue
1969
1970       node_disks[nname] = disks
1971
1972       # Creating copies as SetDiskID below will modify the objects and that can
1973       # lead to incorrect data returned from nodes
1974       devonly = [dev.Copy() for (_, dev) in disks]
1975
1976       for dev in devonly:
1977         self.cfg.SetDiskID(dev, nname)
1978
1979       node_disks_devonly[nname] = devonly
1980
1981     assert len(node_disks) == len(node_disks_devonly)
1982
1983     # Collect data from all nodes with disks
1984     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
1985                                                           node_disks_devonly)
1986
1987     assert len(result) == len(node_disks)
1988
1989     instdisk = {}
1990
1991     for (nname, nres) in result.items():
1992       disks = node_disks[nname]
1993
1994       if nres.offline:
1995         # No data from this node
1996         data = len(disks) * [(False, "node offline")]
1997       else:
1998         msg = nres.fail_msg
1999         _ErrorIf(msg, self.ENODERPC, nname,
2000                  "while getting disk information: %s", msg)
2001         if msg:
2002           # No data from this node
2003           data = len(disks) * [(False, msg)]
2004         else:
2005           data = []
2006           for idx, i in enumerate(nres.payload):
2007             if isinstance(i, (tuple, list)) and len(i) == 2:
2008               data.append(i)
2009             else:
2010               logging.warning("Invalid result from node %s, entry %d: %s",
2011                               nname, idx, i)
2012               data.append((False, "Invalid result from the remote node"))
2013
2014       for ((inst, _), status) in zip(disks, data):
2015         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2016
2017     # Add empty entries for diskless instances.
2018     for inst in diskless_instances:
2019       assert inst not in instdisk
2020       instdisk[inst] = {}
2021
2022     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2023                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2024                       compat.all(isinstance(s, (tuple, list)) and
2025                                  len(s) == 2 for s in statuses)
2026                       for inst, nnames in instdisk.items()
2027                       for nname, statuses in nnames.items())
2028     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2029
2030     return instdisk
2031
2032   def BuildHooksEnv(self):
2033     """Build hooks env.
2034
2035     Cluster-Verify hooks just ran in the post phase and their failure makes
2036     the output be logged in the verify output and the verification to fail.
2037
2038     """
2039     all_nodes = self.cfg.GetNodeList()
2040     env = {
2041       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2042       }
2043     for node in self.cfg.GetAllNodesInfo().values():
2044       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2045
2046     return env, [], all_nodes
2047
2048   def Exec(self, feedback_fn):
2049     """Verify integrity of cluster, performing various test on nodes.
2050
2051     """
2052     # This method has too many local variables. pylint: disable-msg=R0914
2053     self.bad = False
2054     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2055     verbose = self.op.verbose
2056     self._feedback_fn = feedback_fn
2057     feedback_fn("* Verifying global settings")
2058     for msg in self.cfg.VerifyConfig():
2059       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2060
2061     # Check the cluster certificates
2062     for cert_filename in constants.ALL_CERT_FILES:
2063       (errcode, msg) = _VerifyCertificate(cert_filename)
2064       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2065
2066     vg_name = self.cfg.GetVGName()
2067     drbd_helper = self.cfg.GetDRBDHelper()
2068     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2069     cluster = self.cfg.GetClusterInfo()
2070     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2071     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2072     nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2073     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2074     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2075                         for iname in instancelist)
2076     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2077     i_non_redundant = [] # Non redundant instances
2078     i_non_a_balanced = [] # Non auto-balanced instances
2079     n_offline = 0 # Count of offline nodes
2080     n_drained = 0 # Count of nodes being drained
2081     node_vol_should = {}
2082
2083     # FIXME: verify OS list
2084     # do local checksums
2085     master_files = [constants.CLUSTER_CONF_FILE]
2086     master_node = self.master_node = self.cfg.GetMasterNode()
2087     master_ip = self.cfg.GetMasterIP()
2088
2089     file_names = ssconf.SimpleStore().GetFileList()
2090     file_names.extend(constants.ALL_CERT_FILES)
2091     file_names.extend(master_files)
2092     if cluster.modify_etc_hosts:
2093       file_names.append(constants.ETC_HOSTS)
2094
2095     local_checksums = utils.FingerprintFiles(file_names)
2096
2097     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2098     node_verify_param = {
2099       constants.NV_FILELIST: file_names,
2100       constants.NV_NODELIST: [node.name for node in nodeinfo
2101                               if not node.offline],
2102       constants.NV_HYPERVISOR: hypervisors,
2103       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2104                                   node.secondary_ip) for node in nodeinfo
2105                                  if not node.offline],
2106       constants.NV_INSTANCELIST: hypervisors,
2107       constants.NV_VERSION: None,
2108       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2109       constants.NV_NODESETUP: None,
2110       constants.NV_TIME: None,
2111       constants.NV_MASTERIP: (master_node, master_ip),
2112       constants.NV_OSLIST: None,
2113       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2114       }
2115
2116     if vg_name is not None:
2117       node_verify_param[constants.NV_VGLIST] = None
2118       node_verify_param[constants.NV_LVLIST] = vg_name
2119       node_verify_param[constants.NV_PVLIST] = [vg_name]
2120       node_verify_param[constants.NV_DRBDLIST] = None
2121
2122     if drbd_helper:
2123       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2124
2125     # Build our expected cluster state
2126     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2127                                                  name=node.name,
2128                                                  vm_capable=node.vm_capable))
2129                       for node in nodeinfo)
2130
2131     # Gather OOB paths
2132     oob_paths = []
2133     for node in nodeinfo:
2134       path = _SupportsOob(self.cfg, node)
2135       if path and path not in oob_paths:
2136         oob_paths.append(path)
2137
2138     if oob_paths:
2139       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2140
2141     for instance in instancelist:
2142       inst_config = instanceinfo[instance]
2143
2144       for nname in inst_config.all_nodes:
2145         if nname not in node_image:
2146           # ghost node
2147           gnode = self.NodeImage(name=nname)
2148           gnode.ghost = True
2149           node_image[nname] = gnode
2150
2151       inst_config.MapLVsByNode(node_vol_should)
2152
2153       pnode = inst_config.primary_node
2154       node_image[pnode].pinst.append(instance)
2155
2156       for snode in inst_config.secondary_nodes:
2157         nimg = node_image[snode]
2158         nimg.sinst.append(instance)
2159         if pnode not in nimg.sbp:
2160           nimg.sbp[pnode] = []
2161         nimg.sbp[pnode].append(instance)
2162
2163     # At this point, we have the in-memory data structures complete,
2164     # except for the runtime information, which we'll gather next
2165
2166     # Due to the way our RPC system works, exact response times cannot be
2167     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2168     # time before and after executing the request, we can at least have a time
2169     # window.
2170     nvinfo_starttime = time.time()
2171     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2172                                            self.cfg.GetClusterName())
2173     nvinfo_endtime = time.time()
2174
2175     all_drbd_map = self.cfg.ComputeDRBDMap()
2176
2177     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2178     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2179
2180     feedback_fn("* Verifying node status")
2181
2182     refos_img = None
2183
2184     for node_i in nodeinfo:
2185       node = node_i.name
2186       nimg = node_image[node]
2187
2188       if node_i.offline:
2189         if verbose:
2190           feedback_fn("* Skipping offline node %s" % (node,))
2191         n_offline += 1
2192         continue
2193
2194       if node == master_node:
2195         ntype = "master"
2196       elif node_i.master_candidate:
2197         ntype = "master candidate"
2198       elif node_i.drained:
2199         ntype = "drained"
2200         n_drained += 1
2201       else:
2202         ntype = "regular"
2203       if verbose:
2204         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2205
2206       msg = all_nvinfo[node].fail_msg
2207       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2208       if msg:
2209         nimg.rpc_fail = True
2210         continue
2211
2212       nresult = all_nvinfo[node].payload
2213
2214       nimg.call_ok = self._VerifyNode(node_i, nresult)
2215       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2216       self._VerifyNodeNetwork(node_i, nresult)
2217       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2218                             master_files)
2219
2220       self._VerifyOob(node_i, nresult)
2221
2222       if nimg.vm_capable:
2223         self._VerifyNodeLVM(node_i, nresult, vg_name)
2224         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2225                              all_drbd_map)
2226
2227         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2228         self._UpdateNodeInstances(node_i, nresult, nimg)
2229         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2230         self._UpdateNodeOS(node_i, nresult, nimg)
2231         if not nimg.os_fail:
2232           if refos_img is None:
2233             refos_img = nimg
2234           self._VerifyNodeOS(node_i, nimg, refos_img)
2235
2236     feedback_fn("* Verifying instance status")
2237     for instance in instancelist:
2238       if verbose:
2239         feedback_fn("* Verifying instance %s" % instance)
2240       inst_config = instanceinfo[instance]
2241       self._VerifyInstance(instance, inst_config, node_image,
2242                            instdisk[instance])
2243       inst_nodes_offline = []
2244
2245       pnode = inst_config.primary_node
2246       pnode_img = node_image[pnode]
2247       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2248                self.ENODERPC, pnode, "instance %s, connection to"
2249                " primary node failed", instance)
2250
2251       if pnode_img.offline:
2252         inst_nodes_offline.append(pnode)
2253
2254       # If the instance is non-redundant we cannot survive losing its primary
2255       # node, so we are not N+1 compliant. On the other hand we have no disk
2256       # templates with more than one secondary so that situation is not well
2257       # supported either.
2258       # FIXME: does not support file-backed instances
2259       if not inst_config.secondary_nodes:
2260         i_non_redundant.append(instance)
2261
2262       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2263                instance, "instance has multiple secondary nodes: %s",
2264                utils.CommaJoin(inst_config.secondary_nodes),
2265                code=self.ETYPE_WARNING)
2266
2267       if inst_config.disk_template in constants.DTS_NET_MIRROR:
2268         pnode = inst_config.primary_node
2269         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2270         instance_groups = {}
2271
2272         for node in instance_nodes:
2273           instance_groups.setdefault(nodeinfo_byname[node].group,
2274                                      []).append(node)
2275
2276         pretty_list = [
2277           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2278           # Sort so that we always list the primary node first.
2279           for group, nodes in sorted(instance_groups.items(),
2280                                      key=lambda (_, nodes): pnode in nodes,
2281                                      reverse=True)]
2282
2283         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2284                       instance, "instance has primary and secondary nodes in"
2285                       " different groups: %s", utils.CommaJoin(pretty_list),
2286                       code=self.ETYPE_WARNING)
2287
2288       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2289         i_non_a_balanced.append(instance)
2290
2291       for snode in inst_config.secondary_nodes:
2292         s_img = node_image[snode]
2293         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2294                  "instance %s, connection to secondary node failed", instance)
2295
2296         if s_img.offline:
2297           inst_nodes_offline.append(snode)
2298
2299       # warn that the instance lives on offline nodes
2300       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2301                "instance lives on offline node(s) %s",
2302                utils.CommaJoin(inst_nodes_offline))
2303       # ... or ghost/non-vm_capable nodes
2304       for node in inst_config.all_nodes:
2305         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2306                  "instance lives on ghost node %s", node)
2307         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2308                  instance, "instance lives on non-vm_capable node %s", node)
2309
2310     feedback_fn("* Verifying orphan volumes")
2311     reserved = utils.FieldSet(*cluster.reserved_lvs)
2312     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2313
2314     feedback_fn("* Verifying orphan instances")
2315     self._VerifyOrphanInstances(instancelist, node_image)
2316
2317     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2318       feedback_fn("* Verifying N+1 Memory redundancy")
2319       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2320
2321     feedback_fn("* Other Notes")
2322     if i_non_redundant:
2323       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2324                   % len(i_non_redundant))
2325
2326     if i_non_a_balanced:
2327       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2328                   % len(i_non_a_balanced))
2329
2330     if n_offline:
2331       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2332
2333     if n_drained:
2334       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2335
2336     return not self.bad
2337
2338   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2339     """Analyze the post-hooks' result
2340
2341     This method analyses the hook result, handles it, and sends some
2342     nicely-formatted feedback back to the user.
2343
2344     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2345         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2346     @param hooks_results: the results of the multi-node hooks rpc call
2347     @param feedback_fn: function used send feedback back to the caller
2348     @param lu_result: previous Exec result
2349     @return: the new Exec result, based on the previous result
2350         and hook results
2351
2352     """
2353     # We only really run POST phase hooks, and are only interested in
2354     # their results
2355     if phase == constants.HOOKS_PHASE_POST:
2356       # Used to change hooks' output to proper indentation
2357       feedback_fn("* Hooks Results")
2358       assert hooks_results, "invalid result from hooks"
2359
2360       for node_name in hooks_results:
2361         res = hooks_results[node_name]
2362         msg = res.fail_msg
2363         test = msg and not res.offline
2364         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2365                       "Communication failure in hooks execution: %s", msg)
2366         if res.offline or msg:
2367           # No need to investigate payload if node is offline or gave an error.
2368           # override manually lu_result here as _ErrorIf only
2369           # overrides self.bad
2370           lu_result = 1
2371           continue
2372         for script, hkr, output in res.payload:
2373           test = hkr == constants.HKR_FAIL
2374           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2375                         "Script %s failed, output:", script)
2376           if test:
2377             output = self._HOOKS_INDENT_RE.sub('      ', output)
2378             feedback_fn("%s" % output)
2379             lu_result = 0
2380
2381       return lu_result
2382
2383
2384 class LUClusterVerifyDisks(NoHooksLU):
2385   """Verifies the cluster disks status.
2386
2387   """
2388   REQ_BGL = False
2389
2390   def ExpandNames(self):
2391     self.needed_locks = {
2392       locking.LEVEL_NODE: locking.ALL_SET,
2393       locking.LEVEL_INSTANCE: locking.ALL_SET,
2394     }
2395     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2396
2397   def Exec(self, feedback_fn):
2398     """Verify integrity of cluster disks.
2399
2400     @rtype: tuple of three items
2401     @return: a tuple of (dict of node-to-node_error, list of instances
2402         which need activate-disks, dict of instance: (node, volume) for
2403         missing volumes
2404
2405     """
2406     result = res_nodes, res_instances, res_missing = {}, [], {}
2407
2408     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2409     instances = [self.cfg.GetInstanceInfo(name)
2410                  for name in self.cfg.GetInstanceList()]
2411
2412     nv_dict = {}
2413     for inst in instances:
2414       inst_lvs = {}
2415       if (not inst.admin_up or
2416           inst.disk_template not in constants.DTS_NET_MIRROR):
2417         continue
2418       inst.MapLVsByNode(inst_lvs)
2419       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2420       for node, vol_list in inst_lvs.iteritems():
2421         for vol in vol_list:
2422           nv_dict[(node, vol)] = inst
2423
2424     if not nv_dict:
2425       return result
2426
2427     vg_names = self.rpc.call_vg_list(nodes)
2428     for node in nodes:
2429       vg_names[node].Raise("Cannot get list of VGs")
2430
2431     for node in nodes:
2432       # node_volume
2433       node_res = self.rpc.call_lv_list([node],
2434                                        vg_names[node].payload.keys())[node]
2435       if node_res.offline:
2436         continue
2437       msg = node_res.fail_msg
2438       if msg:
2439         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2440         res_nodes[node] = msg
2441         continue
2442
2443       lvs = node_res.payload
2444       for lv_name, (_, _, lv_online) in lvs.items():
2445         inst = nv_dict.pop((node, lv_name), None)
2446         if (not lv_online and inst is not None
2447             and inst.name not in res_instances):
2448           res_instances.append(inst.name)
2449
2450     # any leftover items in nv_dict are missing LVs, let's arrange the
2451     # data better
2452     for key, inst in nv_dict.iteritems():
2453       if inst.name not in res_missing:
2454         res_missing[inst.name] = []
2455       res_missing[inst.name].append(key)
2456
2457     return result
2458
2459
2460 class LUClusterRepairDiskSizes(NoHooksLU):
2461   """Verifies the cluster disks sizes.
2462
2463   """
2464   REQ_BGL = False
2465
2466   def ExpandNames(self):
2467     if self.op.instances:
2468       self.wanted_names = []
2469       for name in self.op.instances:
2470         full_name = _ExpandInstanceName(self.cfg, name)
2471         self.wanted_names.append(full_name)
2472       self.needed_locks = {
2473         locking.LEVEL_NODE: [],
2474         locking.LEVEL_INSTANCE: self.wanted_names,
2475         }
2476       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2477     else:
2478       self.wanted_names = None
2479       self.needed_locks = {
2480         locking.LEVEL_NODE: locking.ALL_SET,
2481         locking.LEVEL_INSTANCE: locking.ALL_SET,
2482         }
2483     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2484
2485   def DeclareLocks(self, level):
2486     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2487       self._LockInstancesNodes(primary_only=True)
2488
2489   def CheckPrereq(self):
2490     """Check prerequisites.
2491
2492     This only checks the optional instance list against the existing names.
2493
2494     """
2495     if self.wanted_names is None:
2496       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2497
2498     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2499                              in self.wanted_names]
2500
2501   def _EnsureChildSizes(self, disk):
2502     """Ensure children of the disk have the needed disk size.
2503
2504     This is valid mainly for DRBD8 and fixes an issue where the
2505     children have smaller disk size.
2506
2507     @param disk: an L{ganeti.objects.Disk} object
2508
2509     """
2510     if disk.dev_type == constants.LD_DRBD8:
2511       assert disk.children, "Empty children for DRBD8?"
2512       fchild = disk.children[0]
2513       mismatch = fchild.size < disk.size
2514       if mismatch:
2515         self.LogInfo("Child disk has size %d, parent %d, fixing",
2516                      fchild.size, disk.size)
2517         fchild.size = disk.size
2518
2519       # and we recurse on this child only, not on the metadev
2520       return self._EnsureChildSizes(fchild) or mismatch
2521     else:
2522       return False
2523
2524   def Exec(self, feedback_fn):
2525     """Verify the size of cluster disks.
2526
2527     """
2528     # TODO: check child disks too
2529     # TODO: check differences in size between primary/secondary nodes
2530     per_node_disks = {}
2531     for instance in self.wanted_instances:
2532       pnode = instance.primary_node
2533       if pnode not in per_node_disks:
2534         per_node_disks[pnode] = []
2535       for idx, disk in enumerate(instance.disks):
2536         per_node_disks[pnode].append((instance, idx, disk))
2537
2538     changed = []
2539     for node, dskl in per_node_disks.items():
2540       newl = [v[2].Copy() for v in dskl]
2541       for dsk in newl:
2542         self.cfg.SetDiskID(dsk, node)
2543       result = self.rpc.call_blockdev_getsizes(node, newl)
2544       if result.fail_msg:
2545         self.LogWarning("Failure in blockdev_getsizes call to node"
2546                         " %s, ignoring", node)
2547         continue
2548       if len(result.data) != len(dskl):
2549         self.LogWarning("Invalid result from node %s, ignoring node results",
2550                         node)
2551         continue
2552       for ((instance, idx, disk), size) in zip(dskl, result.data):
2553         if size is None:
2554           self.LogWarning("Disk %d of instance %s did not return size"
2555                           " information, ignoring", idx, instance.name)
2556           continue
2557         if not isinstance(size, (int, long)):
2558           self.LogWarning("Disk %d of instance %s did not return valid"
2559                           " size information, ignoring", idx, instance.name)
2560           continue
2561         size = size >> 20
2562         if size != disk.size:
2563           self.LogInfo("Disk %d of instance %s has mismatched size,"
2564                        " correcting: recorded %d, actual %d", idx,
2565                        instance.name, disk.size, size)
2566           disk.size = size
2567           self.cfg.Update(instance, feedback_fn)
2568           changed.append((instance.name, idx, size))
2569         if self._EnsureChildSizes(disk):
2570           self.cfg.Update(instance, feedback_fn)
2571           changed.append((instance.name, idx, disk.size))
2572     return changed
2573
2574
2575 class LUClusterRename(LogicalUnit):
2576   """Rename the cluster.
2577
2578   """
2579   HPATH = "cluster-rename"
2580   HTYPE = constants.HTYPE_CLUSTER
2581
2582   def BuildHooksEnv(self):
2583     """Build hooks env.
2584
2585     """
2586     env = {
2587       "OP_TARGET": self.cfg.GetClusterName(),
2588       "NEW_NAME": self.op.name,
2589       }
2590     mn = self.cfg.GetMasterNode()
2591     all_nodes = self.cfg.GetNodeList()
2592     return env, [mn], all_nodes
2593
2594   def CheckPrereq(self):
2595     """Verify that the passed name is a valid one.
2596
2597     """
2598     hostname = netutils.GetHostname(name=self.op.name,
2599                                     family=self.cfg.GetPrimaryIPFamily())
2600
2601     new_name = hostname.name
2602     self.ip = new_ip = hostname.ip
2603     old_name = self.cfg.GetClusterName()
2604     old_ip = self.cfg.GetMasterIP()
2605     if new_name == old_name and new_ip == old_ip:
2606       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2607                                  " cluster has changed",
2608                                  errors.ECODE_INVAL)
2609     if new_ip != old_ip:
2610       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2611         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2612                                    " reachable on the network" %
2613                                    new_ip, errors.ECODE_NOTUNIQUE)
2614
2615     self.op.name = new_name
2616
2617   def Exec(self, feedback_fn):
2618     """Rename the cluster.
2619
2620     """
2621     clustername = self.op.name
2622     ip = self.ip
2623
2624     # shutdown the master IP
2625     master = self.cfg.GetMasterNode()
2626     result = self.rpc.call_node_stop_master(master, False)
2627     result.Raise("Could not disable the master role")
2628
2629     try:
2630       cluster = self.cfg.GetClusterInfo()
2631       cluster.cluster_name = clustername
2632       cluster.master_ip = ip
2633       self.cfg.Update(cluster, feedback_fn)
2634
2635       # update the known hosts file
2636       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2637       node_list = self.cfg.GetOnlineNodeList()
2638       try:
2639         node_list.remove(master)
2640       except ValueError:
2641         pass
2642       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2643     finally:
2644       result = self.rpc.call_node_start_master(master, False, False)
2645       msg = result.fail_msg
2646       if msg:
2647         self.LogWarning("Could not re-enable the master role on"
2648                         " the master, please restart manually: %s", msg)
2649
2650     return clustername
2651
2652
2653 class LUClusterSetParams(LogicalUnit):
2654   """Change the parameters of the cluster.
2655
2656   """
2657   HPATH = "cluster-modify"
2658   HTYPE = constants.HTYPE_CLUSTER
2659   REQ_BGL = False
2660
2661   def CheckArguments(self):
2662     """Check parameters
2663
2664     """
2665     if self.op.uid_pool:
2666       uidpool.CheckUidPool(self.op.uid_pool)
2667
2668     if self.op.add_uids:
2669       uidpool.CheckUidPool(self.op.add_uids)
2670
2671     if self.op.remove_uids:
2672       uidpool.CheckUidPool(self.op.remove_uids)
2673
2674   def ExpandNames(self):
2675     # FIXME: in the future maybe other cluster params won't require checking on
2676     # all nodes to be modified.
2677     self.needed_locks = {
2678       locking.LEVEL_NODE: locking.ALL_SET,
2679     }
2680     self.share_locks[locking.LEVEL_NODE] = 1
2681
2682   def BuildHooksEnv(self):
2683     """Build hooks env.
2684
2685     """
2686     env = {
2687       "OP_TARGET": self.cfg.GetClusterName(),
2688       "NEW_VG_NAME": self.op.vg_name,
2689       }
2690     mn = self.cfg.GetMasterNode()
2691     return env, [mn], [mn]
2692
2693   def CheckPrereq(self):
2694     """Check prerequisites.
2695
2696     This checks whether the given params don't conflict and
2697     if the given volume group is valid.
2698
2699     """
2700     if self.op.vg_name is not None and not self.op.vg_name:
2701       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2702         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2703                                    " instances exist", errors.ECODE_INVAL)
2704
2705     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2706       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2707         raise errors.OpPrereqError("Cannot disable drbd helper while"
2708                                    " drbd-based instances exist",
2709                                    errors.ECODE_INVAL)
2710
2711     node_list = self.acquired_locks[locking.LEVEL_NODE]
2712
2713     # if vg_name not None, checks given volume group on all nodes
2714     if self.op.vg_name:
2715       vglist = self.rpc.call_vg_list(node_list)
2716       for node in node_list:
2717         msg = vglist[node].fail_msg
2718         if msg:
2719           # ignoring down node
2720           self.LogWarning("Error while gathering data on node %s"
2721                           " (ignoring node): %s", node, msg)
2722           continue
2723         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2724                                               self.op.vg_name,
2725                                               constants.MIN_VG_SIZE)
2726         if vgstatus:
2727           raise errors.OpPrereqError("Error on node '%s': %s" %
2728                                      (node, vgstatus), errors.ECODE_ENVIRON)
2729
2730     if self.op.drbd_helper:
2731       # checks given drbd helper on all nodes
2732       helpers = self.rpc.call_drbd_helper(node_list)
2733       for node in node_list:
2734         ninfo = self.cfg.GetNodeInfo(node)
2735         if ninfo.offline:
2736           self.LogInfo("Not checking drbd helper on offline node %s", node)
2737           continue
2738         msg = helpers[node].fail_msg
2739         if msg:
2740           raise errors.OpPrereqError("Error checking drbd helper on node"
2741                                      " '%s': %s" % (node, msg),
2742                                      errors.ECODE_ENVIRON)
2743         node_helper = helpers[node].payload
2744         if node_helper != self.op.drbd_helper:
2745           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2746                                      (node, node_helper), errors.ECODE_ENVIRON)
2747
2748     self.cluster = cluster = self.cfg.GetClusterInfo()
2749     # validate params changes
2750     if self.op.beparams:
2751       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2752       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2753
2754     if self.op.ndparams:
2755       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2756       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2757
2758     if self.op.nicparams:
2759       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2760       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2761       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2762       nic_errors = []
2763
2764       # check all instances for consistency
2765       for instance in self.cfg.GetAllInstancesInfo().values():
2766         for nic_idx, nic in enumerate(instance.nics):
2767           params_copy = copy.deepcopy(nic.nicparams)
2768           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2769
2770           # check parameter syntax
2771           try:
2772             objects.NIC.CheckParameterSyntax(params_filled)
2773           except errors.ConfigurationError, err:
2774             nic_errors.append("Instance %s, nic/%d: %s" %
2775                               (instance.name, nic_idx, err))
2776
2777           # if we're moving instances to routed, check that they have an ip
2778           target_mode = params_filled[constants.NIC_MODE]
2779           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2780             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2781                               (instance.name, nic_idx))
2782       if nic_errors:
2783         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2784                                    "\n".join(nic_errors))
2785
2786     # hypervisor list/parameters
2787     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2788     if self.op.hvparams:
2789       for hv_name, hv_dict in self.op.hvparams.items():
2790         if hv_name not in self.new_hvparams:
2791           self.new_hvparams[hv_name] = hv_dict
2792         else:
2793           self.new_hvparams[hv_name].update(hv_dict)
2794
2795     # os hypervisor parameters
2796     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2797     if self.op.os_hvp:
2798       for os_name, hvs in self.op.os_hvp.items():
2799         if os_name not in self.new_os_hvp:
2800           self.new_os_hvp[os_name] = hvs
2801         else:
2802           for hv_name, hv_dict in hvs.items():
2803             if hv_name not in self.new_os_hvp[os_name]:
2804               self.new_os_hvp[os_name][hv_name] = hv_dict
2805             else:
2806               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2807
2808     # os parameters
2809     self.new_osp = objects.FillDict(cluster.osparams, {})
2810     if self.op.osparams:
2811       for os_name, osp in self.op.osparams.items():
2812         if os_name not in self.new_osp:
2813           self.new_osp[os_name] = {}
2814
2815         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2816                                                   use_none=True)
2817
2818         if not self.new_osp[os_name]:
2819           # we removed all parameters
2820           del self.new_osp[os_name]
2821         else:
2822           # check the parameter validity (remote check)
2823           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2824                          os_name, self.new_osp[os_name])
2825
2826     # changes to the hypervisor list
2827     if self.op.enabled_hypervisors is not None:
2828       self.hv_list = self.op.enabled_hypervisors
2829       for hv in self.hv_list:
2830         # if the hypervisor doesn't already exist in the cluster
2831         # hvparams, we initialize it to empty, and then (in both
2832         # cases) we make sure to fill the defaults, as we might not
2833         # have a complete defaults list if the hypervisor wasn't
2834         # enabled before
2835         if hv not in new_hvp:
2836           new_hvp[hv] = {}
2837         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2838         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2839     else:
2840       self.hv_list = cluster.enabled_hypervisors
2841
2842     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2843       # either the enabled list has changed, or the parameters have, validate
2844       for hv_name, hv_params in self.new_hvparams.items():
2845         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2846             (self.op.enabled_hypervisors and
2847              hv_name in self.op.enabled_hypervisors)):
2848           # either this is a new hypervisor, or its parameters have changed
2849           hv_class = hypervisor.GetHypervisor(hv_name)
2850           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2851           hv_class.CheckParameterSyntax(hv_params)
2852           _CheckHVParams(self, node_list, hv_name, hv_params)
2853
2854     if self.op.os_hvp:
2855       # no need to check any newly-enabled hypervisors, since the
2856       # defaults have already been checked in the above code-block
2857       for os_name, os_hvp in self.new_os_hvp.items():
2858         for hv_name, hv_params in os_hvp.items():
2859           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2860           # we need to fill in the new os_hvp on top of the actual hv_p
2861           cluster_defaults = self.new_hvparams.get(hv_name, {})
2862           new_osp = objects.FillDict(cluster_defaults, hv_params)
2863           hv_class = hypervisor.GetHypervisor(hv_name)
2864           hv_class.CheckParameterSyntax(new_osp)
2865           _CheckHVParams(self, node_list, hv_name, new_osp)
2866
2867     if self.op.default_iallocator:
2868       alloc_script = utils.FindFile(self.op.default_iallocator,
2869                                     constants.IALLOCATOR_SEARCH_PATH,
2870                                     os.path.isfile)
2871       if alloc_script is None:
2872         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2873                                    " specified" % self.op.default_iallocator,
2874                                    errors.ECODE_INVAL)
2875
2876   def Exec(self, feedback_fn):
2877     """Change the parameters of the cluster.
2878
2879     """
2880     if self.op.vg_name is not None:
2881       new_volume = self.op.vg_name
2882       if not new_volume:
2883         new_volume = None
2884       if new_volume != self.cfg.GetVGName():
2885         self.cfg.SetVGName(new_volume)
2886       else:
2887         feedback_fn("Cluster LVM configuration already in desired"
2888                     " state, not changing")
2889     if self.op.drbd_helper is not None:
2890       new_helper = self.op.drbd_helper
2891       if not new_helper:
2892         new_helper = None
2893       if new_helper != self.cfg.GetDRBDHelper():
2894         self.cfg.SetDRBDHelper(new_helper)
2895       else:
2896         feedback_fn("Cluster DRBD helper already in desired state,"
2897                     " not changing")
2898     if self.op.hvparams:
2899       self.cluster.hvparams = self.new_hvparams
2900     if self.op.os_hvp:
2901       self.cluster.os_hvp = self.new_os_hvp
2902     if self.op.enabled_hypervisors is not None:
2903       self.cluster.hvparams = self.new_hvparams
2904       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2905     if self.op.beparams:
2906       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2907     if self.op.nicparams:
2908       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2909     if self.op.osparams:
2910       self.cluster.osparams = self.new_osp
2911     if self.op.ndparams:
2912       self.cluster.ndparams = self.new_ndparams
2913
2914     if self.op.candidate_pool_size is not None:
2915       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2916       # we need to update the pool size here, otherwise the save will fail
2917       _AdjustCandidatePool(self, [])
2918
2919     if self.op.maintain_node_health is not None:
2920       self.cluster.maintain_node_health = self.op.maintain_node_health
2921
2922     if self.op.prealloc_wipe_disks is not None:
2923       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2924
2925     if self.op.add_uids is not None:
2926       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2927
2928     if self.op.remove_uids is not None:
2929       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2930
2931     if self.op.uid_pool is not None:
2932       self.cluster.uid_pool = self.op.uid_pool
2933
2934     if self.op.default_iallocator is not None:
2935       self.cluster.default_iallocator = self.op.default_iallocator
2936
2937     if self.op.reserved_lvs is not None:
2938       self.cluster.reserved_lvs = self.op.reserved_lvs
2939
2940     def helper_os(aname, mods, desc):
2941       desc += " OS list"
2942       lst = getattr(self.cluster, aname)
2943       for key, val in mods:
2944         if key == constants.DDM_ADD:
2945           if val in lst:
2946             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2947           else:
2948             lst.append(val)
2949         elif key == constants.DDM_REMOVE:
2950           if val in lst:
2951             lst.remove(val)
2952           else:
2953             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2954         else:
2955           raise errors.ProgrammerError("Invalid modification '%s'" % key)
2956
2957     if self.op.hidden_os:
2958       helper_os("hidden_os", self.op.hidden_os, "hidden")
2959
2960     if self.op.blacklisted_os:
2961       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2962
2963     if self.op.master_netdev:
2964       master = self.cfg.GetMasterNode()
2965       feedback_fn("Shutting down master ip on the current netdev (%s)" %
2966                   self.cluster.master_netdev)
2967       result = self.rpc.call_node_stop_master(master, False)
2968       result.Raise("Could not disable the master ip")
2969       feedback_fn("Changing master_netdev from %s to %s" %
2970                   (self.cluster.master_netdev, self.op.master_netdev))
2971       self.cluster.master_netdev = self.op.master_netdev
2972
2973     self.cfg.Update(self.cluster, feedback_fn)
2974
2975     if self.op.master_netdev:
2976       feedback_fn("Starting the master ip on the new master netdev (%s)" %
2977                   self.op.master_netdev)
2978       result = self.rpc.call_node_start_master(master, False, False)
2979       if result.fail_msg:
2980         self.LogWarning("Could not re-enable the master ip on"
2981                         " the master, please restart manually: %s",
2982                         result.fail_msg)
2983
2984
2985 def _UploadHelper(lu, nodes, fname):
2986   """Helper for uploading a file and showing warnings.
2987
2988   """
2989   if os.path.exists(fname):
2990     result = lu.rpc.call_upload_file(nodes, fname)
2991     for to_node, to_result in result.items():
2992       msg = to_result.fail_msg
2993       if msg:
2994         msg = ("Copy of file %s to node %s failed: %s" %
2995                (fname, to_node, msg))
2996         lu.proc.LogWarning(msg)
2997
2998
2999 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3000   """Distribute additional files which are part of the cluster configuration.
3001
3002   ConfigWriter takes care of distributing the config and ssconf files, but
3003   there are more files which should be distributed to all nodes. This function
3004   makes sure those are copied.
3005
3006   @param lu: calling logical unit
3007   @param additional_nodes: list of nodes not in the config to distribute to
3008   @type additional_vm: boolean
3009   @param additional_vm: whether the additional nodes are vm-capable or not
3010
3011   """
3012   # 1. Gather target nodes
3013   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3014   dist_nodes = lu.cfg.GetOnlineNodeList()
3015   nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3016   vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3017   if additional_nodes is not None:
3018     dist_nodes.extend(additional_nodes)
3019     if additional_vm:
3020       vm_nodes.extend(additional_nodes)
3021   if myself.name in dist_nodes:
3022     dist_nodes.remove(myself.name)
3023   if myself.name in vm_nodes:
3024     vm_nodes.remove(myself.name)
3025
3026   # 2. Gather files to distribute
3027   dist_files = set([constants.ETC_HOSTS,
3028                     constants.SSH_KNOWN_HOSTS_FILE,
3029                     constants.RAPI_CERT_FILE,
3030                     constants.RAPI_USERS_FILE,
3031                     constants.CONFD_HMAC_KEY,
3032                     constants.CLUSTER_DOMAIN_SECRET_FILE,
3033                    ])
3034
3035   vm_files = set()
3036   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3037   for hv_name in enabled_hypervisors:
3038     hv_class = hypervisor.GetHypervisor(hv_name)
3039     vm_files.update(hv_class.GetAncillaryFiles())
3040
3041   # 3. Perform the files upload
3042   for fname in dist_files:
3043     _UploadHelper(lu, dist_nodes, fname)
3044   for fname in vm_files:
3045     _UploadHelper(lu, vm_nodes, fname)
3046
3047
3048 class LUClusterRedistConf(NoHooksLU):
3049   """Force the redistribution of cluster configuration.
3050
3051   This is a very simple LU.
3052
3053   """
3054   REQ_BGL = False
3055
3056   def ExpandNames(self):
3057     self.needed_locks = {
3058       locking.LEVEL_NODE: locking.ALL_SET,
3059     }
3060     self.share_locks[locking.LEVEL_NODE] = 1
3061
3062   def Exec(self, feedback_fn):
3063     """Redistribute the configuration.
3064
3065     """
3066     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3067     _RedistributeAncillaryFiles(self)
3068
3069
3070 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3071   """Sleep and poll for an instance's disk to sync.
3072
3073   """
3074   if not instance.disks or disks is not None and not disks:
3075     return True
3076
3077   disks = _ExpandCheckDisks(instance, disks)
3078
3079   if not oneshot:
3080     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3081
3082   node = instance.primary_node
3083
3084   for dev in disks:
3085     lu.cfg.SetDiskID(dev, node)
3086
3087   # TODO: Convert to utils.Retry
3088
3089   retries = 0
3090   degr_retries = 10 # in seconds, as we sleep 1 second each time
3091   while True:
3092     max_time = 0
3093     done = True
3094     cumul_degraded = False
3095     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3096     msg = rstats.fail_msg
3097     if msg:
3098       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3099       retries += 1
3100       if retries >= 10:
3101         raise errors.RemoteError("Can't contact node %s for mirror data,"
3102                                  " aborting." % node)
3103       time.sleep(6)
3104       continue
3105     rstats = rstats.payload
3106     retries = 0
3107     for i, mstat in enumerate(rstats):
3108       if mstat is None:
3109         lu.LogWarning("Can't compute data for node %s/%s",
3110                            node, disks[i].iv_name)
3111         continue
3112
3113       cumul_degraded = (cumul_degraded or
3114                         (mstat.is_degraded and mstat.sync_percent is None))
3115       if mstat.sync_percent is not None:
3116         done = False
3117         if mstat.estimated_time is not None:
3118           rem_time = ("%s remaining (estimated)" %
3119                       utils.FormatSeconds(mstat.estimated_time))
3120           max_time = mstat.estimated_time
3121         else:
3122           rem_time = "no time estimate"
3123         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3124                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3125
3126     # if we're done but degraded, let's do a few small retries, to
3127     # make sure we see a stable and not transient situation; therefore
3128     # we force restart of the loop
3129     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3130       logging.info("Degraded disks found, %d retries left", degr_retries)
3131       degr_retries -= 1
3132       time.sleep(1)
3133       continue
3134
3135     if done or oneshot:
3136       break
3137
3138     time.sleep(min(60, max_time))
3139
3140   if done:
3141     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3142   return not cumul_degraded
3143
3144
3145 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3146   """Check that mirrors are not degraded.
3147
3148   The ldisk parameter, if True, will change the test from the
3149   is_degraded attribute (which represents overall non-ok status for
3150   the device(s)) to the ldisk (representing the local storage status).
3151
3152   """
3153   lu.cfg.SetDiskID(dev, node)
3154
3155   result = True
3156
3157   if on_primary or dev.AssembleOnSecondary():
3158     rstats = lu.rpc.call_blockdev_find(node, dev)
3159     msg = rstats.fail_msg
3160     if msg:
3161       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3162       result = False
3163     elif not rstats.payload:
3164       lu.LogWarning("Can't find disk on node %s", node)
3165       result = False
3166     else:
3167       if ldisk:
3168         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3169       else:
3170         result = result and not rstats.payload.is_degraded
3171
3172   if dev.children:
3173     for child in dev.children:
3174       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3175
3176   return result
3177
3178
3179 class LUOobCommand(NoHooksLU):
3180   """Logical unit for OOB handling.
3181
3182   """
3183   REG_BGL = False
3184
3185   def CheckPrereq(self):
3186     """Check prerequisites.
3187
3188     This checks:
3189      - the node exists in the configuration
3190      - OOB is supported
3191
3192     Any errors are signaled by raising errors.OpPrereqError.
3193
3194     """
3195     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3196     node = self.cfg.GetNodeInfo(self.op.node_name)
3197
3198     if node is None:
3199       raise errors.OpPrereqError("Node %s not found" % self.op.node_name)
3200
3201     self.oob_program = _SupportsOob(self.cfg, node)
3202
3203     if not self.oob_program:
3204       raise errors.OpPrereqError("OOB is not supported for node %s" %
3205                                  self.op.node_name)
3206
3207     if self.op.command == constants.OOB_POWER_OFF and not node.offline:
3208       raise errors.OpPrereqError(("Cannot power off node %s because it is"
3209                                   " not marked offline") % self.op.node_name)
3210
3211     self.node = node
3212
3213   def ExpandNames(self):
3214     """Gather locks we need.
3215
3216     """
3217     node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3218     self.needed_locks = {
3219       locking.LEVEL_NODE: [node_name],
3220       }
3221
3222   def Exec(self, feedback_fn):
3223     """Execute OOB and return result if we expect any.
3224
3225     """
3226     master_node = self.cfg.GetMasterNode()
3227     node = self.node
3228
3229     logging.info("Executing out-of-band command '%s' using '%s' on %s",
3230                  self.op.command, self.oob_program, self.op.node_name)
3231     result = self.rpc.call_run_oob(master_node, self.oob_program,
3232                                    self.op.command, self.op.node_name,
3233                                    self.op.timeout)
3234
3235     result.Raise("An error occurred on execution of OOB helper")
3236
3237     self._CheckPayload(result)
3238
3239     if self.op.command == constants.OOB_HEALTH:
3240       # For health we should log important events
3241       for item, status in result.payload:
3242         if status in [constants.OOB_STATUS_WARNING,
3243                       constants.OOB_STATUS_CRITICAL]:
3244           logging.warning("On node '%s' item '%s' has status '%s'",
3245                           self.op.node_name, item, status)
3246
3247     if self.op.command == constants.OOB_POWER_ON:
3248       node.powered = True
3249     elif self.op.command == constants.OOB_POWER_OFF:
3250       node.powered = False
3251     elif self.op.command == constants.OOB_POWER_STATUS:
3252       powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3253       if powered != self.node.powered:
3254         logging.warning(("Recorded power state (%s) of node '%s' does not match"
3255                          " actual power state (%s)"), node.powered,
3256                         self.op.node_name, powered)
3257
3258     self.cfg.Update(node, feedback_fn)
3259
3260     return result.payload
3261
3262   def _CheckPayload(self, result):
3263     """Checks if the payload is valid.
3264
3265     @param result: RPC result
3266     @raises errors.OpExecError: If payload is not valid
3267
3268     """
3269     errs = []
3270     if self.op.command == constants.OOB_HEALTH:
3271       if not isinstance(result.payload, list):
3272         errs.append("command 'health' is expected to return a list but got %s" %
3273                     type(result.payload))
3274       for item, status in result.payload:
3275         if status not in constants.OOB_STATUSES:
3276           errs.append("health item '%s' has invalid status '%s'" %
3277                       (item, status))
3278
3279     if self.op.command == constants.OOB_POWER_STATUS:
3280       if not isinstance(result.payload, dict):
3281         errs.append("power-status is expected to return a dict but got %s" %
3282                     type(result.payload))
3283
3284     if self.op.command in [
3285         constants.OOB_POWER_ON,
3286         constants.OOB_POWER_OFF,
3287         constants.OOB_POWER_CYCLE,
3288         ]:
3289       if result.payload is not None:
3290         errs.append("%s is expected to not return payload but got '%s'" %
3291                     (self.op.command, result.payload))
3292
3293     if errs:
3294       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3295                                utils.CommaJoin(errs))
3296
3297
3298
3299 class LUOsDiagnose(NoHooksLU):
3300   """Logical unit for OS diagnose/query.
3301
3302   """
3303   REQ_BGL = False
3304   _HID = "hidden"
3305   _BLK = "blacklisted"
3306   _VLD = "valid"
3307   _FIELDS_STATIC = utils.FieldSet()
3308   _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3309                                    "parameters", "api_versions", _HID, _BLK)
3310
3311   def CheckArguments(self):
3312     if self.op.names:
3313       raise errors.OpPrereqError("Selective OS query not supported",
3314                                  errors.ECODE_INVAL)
3315
3316     _CheckOutputFields(static=self._FIELDS_STATIC,
3317                        dynamic=self._FIELDS_DYNAMIC,
3318                        selected=self.op.output_fields)
3319
3320   def ExpandNames(self):
3321     # Lock all nodes, in shared mode
3322     # Temporary removal of locks, should be reverted later
3323     # TODO: reintroduce locks when they are lighter-weight
3324     self.needed_locks = {}
3325     #self.share_locks[locking.LEVEL_NODE] = 1
3326     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3327
3328   @staticmethod
3329   def _DiagnoseByOS(rlist):
3330     """Remaps a per-node return list into an a per-os per-node dictionary
3331
3332     @param rlist: a map with node names as keys and OS objects as values
3333
3334     @rtype: dict
3335     @return: a dictionary with osnames as keys and as value another
3336         map, with nodes as keys and tuples of (path, status, diagnose,
3337         variants, parameters, api_versions) as values, eg::
3338
3339           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3340                                      (/srv/..., False, "invalid api")],
3341                            "node2": [(/srv/..., True, "", [], [])]}
3342           }
3343
3344     """
3345     all_os = {}
3346     # we build here the list of nodes that didn't fail the RPC (at RPC
3347     # level), so that nodes with a non-responding node daemon don't
3348     # make all OSes invalid
3349     good_nodes = [node_name for node_name in rlist
3350                   if not rlist[node_name].fail_msg]
3351     for node_name, nr in rlist.items():
3352       if nr.fail_msg or not nr.payload:
3353         continue
3354       for (name, path, status, diagnose, variants,
3355            params, api_versions) in nr.payload:
3356         if name not in all_os:
3357           # build a list of nodes for this os containing empty lists
3358           # for each node in node_list
3359           all_os[name] = {}
3360           for nname in good_nodes:
3361             all_os[name][nname] = []
3362         # convert params from [name, help] to (name, help)
3363         params = [tuple(v) for v in params]
3364         all_os[name][node_name].append((path, status, diagnose,
3365                                         variants, params, api_versions))
3366     return all_os
3367
3368   def Exec(self, feedback_fn):
3369     """Compute the list of OSes.
3370
3371     """
3372     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3373     node_data = self.rpc.call_os_diagnose(valid_nodes)
3374     pol = self._DiagnoseByOS(node_data)
3375     output = []
3376     cluster = self.cfg.GetClusterInfo()
3377
3378     for os_name in utils.NiceSort(pol.keys()):
3379       os_data = pol[os_name]
3380       row = []
3381       valid = True
3382       (variants, params, api_versions) = null_state = (set(), set(), set())
3383       for idx, osl in enumerate(os_data.values()):
3384         valid = bool(valid and osl and osl[0][1])
3385         if not valid:
3386           (variants, params, api_versions) = null_state
3387           break
3388         node_variants, node_params, node_api = osl[0][3:6]
3389         if idx == 0: # first entry
3390           variants = set(node_variants)
3391           params = set(node_params)
3392           api_versions = set(node_api)
3393         else: # keep consistency
3394           variants.intersection_update(node_variants)
3395           params.intersection_update(node_params)
3396           api_versions.intersection_update(node_api)
3397
3398       is_hid = os_name in cluster.hidden_os
3399       is_blk = os_name in cluster.blacklisted_os
3400       if ((self._HID not in self.op.output_fields and is_hid) or
3401           (self._BLK not in self.op.output_fields and is_blk) or
3402           (self._VLD not in self.op.output_fields and not valid)):
3403         continue
3404
3405       for field in self.op.output_fields:
3406         if field == "name":
3407           val = os_name
3408         elif field == self._VLD:
3409           val = valid
3410         elif field == "node_status":
3411           # this is just a copy of the dict
3412           val = {}
3413           for node_name, nos_list in os_data.items():
3414             val[node_name] = nos_list
3415         elif field == "variants":
3416           val = utils.NiceSort(list(variants))
3417         elif field == "parameters":
3418           val = list(params)
3419         elif field == "api_versions":
3420           val = list(api_versions)
3421         elif field == self._HID:
3422           val = is_hid
3423         elif field == self._BLK:
3424           val = is_blk
3425         else:
3426           raise errors.ParameterError(field)
3427         row.append(val)
3428       output.append(row)
3429
3430     return output
3431
3432
3433 class LUNodeRemove(LogicalUnit):
3434   """Logical unit for removing a node.
3435
3436   """
3437   HPATH = "node-remove"
3438   HTYPE = constants.HTYPE_NODE
3439
3440   def BuildHooksEnv(self):
3441     """Build hooks env.
3442
3443     This doesn't run on the target node in the pre phase as a failed
3444     node would then be impossible to remove.
3445
3446     """
3447     env = {
3448       "OP_TARGET": self.op.node_name,
3449       "NODE_NAME": self.op.node_name,
3450       }
3451     all_nodes = self.cfg.GetNodeList()
3452     try:
3453       all_nodes.remove(self.op.node_name)
3454     except ValueError:
3455       logging.warning("Node %s which is about to be removed not found"
3456                       " in the all nodes list", self.op.node_name)
3457     return env, all_nodes, all_nodes
3458
3459   def CheckPrereq(self):
3460     """Check prerequisites.
3461
3462     This checks:
3463      - the node exists in the configuration
3464      - it does not have primary or secondary instances
3465      - it's not the master
3466
3467     Any errors are signaled by raising errors.OpPrereqError.
3468
3469     """
3470     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3471     node = self.cfg.GetNodeInfo(self.op.node_name)
3472     assert node is not None
3473
3474     instance_list = self.cfg.GetInstanceList()
3475
3476     masternode = self.cfg.GetMasterNode()
3477     if node.name == masternode:
3478       raise errors.OpPrereqError("Node is the master node,"
3479                                  " you need to failover first.",
3480                                  errors.ECODE_INVAL)
3481
3482     for instance_name in instance_list:
3483       instance = self.cfg.GetInstanceInfo(instance_name)
3484       if node.name in instance.all_nodes:
3485         raise errors.OpPrereqError("Instance %s is still running on the node,"
3486                                    " please remove first." % instance_name,
3487                                    errors.ECODE_INVAL)
3488     self.op.node_name = node.name
3489     self.node = node
3490
3491   def Exec(self, feedback_fn):
3492     """Removes the node from the cluster.
3493
3494     """
3495     node = self.node
3496     logging.info("Stopping the node daemon and removing configs from node %s",
3497                  node.name)
3498
3499     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3500
3501     # Promote nodes to master candidate as needed
3502     _AdjustCandidatePool(self, exceptions=[node.name])
3503     self.context.RemoveNode(node.name)
3504
3505     # Run post hooks on the node before it's removed
3506     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3507     try:
3508       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3509     except:
3510       # pylint: disable-msg=W0702
3511       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3512
3513     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3514     msg = result.fail_msg
3515     if msg:
3516       self.LogWarning("Errors encountered on the remote node while leaving"
3517                       " the cluster: %s", msg)
3518
3519     # Remove node from our /etc/hosts
3520     if self.cfg.GetClusterInfo().modify_etc_hosts:
3521       master_node = self.cfg.GetMasterNode()
3522       result = self.rpc.call_etc_hosts_modify(master_node,
3523                                               constants.ETC_HOSTS_REMOVE,
3524                                               node.name, None)
3525       result.Raise("Can't update hosts file with new host data")
3526       _RedistributeAncillaryFiles(self)
3527
3528
3529 class _NodeQuery(_QueryBase):
3530   FIELDS = query.NODE_FIELDS
3531
3532   def ExpandNames(self, lu):
3533     lu.needed_locks = {}
3534     lu.share_locks[locking.LEVEL_NODE] = 1
3535
3536     if self.names:
3537       self.wanted = _GetWantedNodes(lu, self.names)
3538     else:
3539       self.wanted = locking.ALL_SET
3540
3541     self.do_locking = (self.use_locking and
3542                        query.NQ_LIVE in self.requested_data)
3543
3544     if self.do_locking:
3545       # if we don't request only static fields, we need to lock the nodes
3546       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3547
3548   def DeclareLocks(self, lu, level):
3549     pass
3550
3551   def _GetQueryData(self, lu):
3552     """Computes the list of nodes and their attributes.
3553
3554     """
3555     all_info = lu.cfg.GetAllNodesInfo()
3556
3557     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3558
3559     # Gather data as requested
3560     if query.NQ_LIVE in self.requested_data:
3561       node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3562                                         lu.cfg.GetHypervisorType())
3563       live_data = dict((name, nresult.payload)
3564                        for (name, nresult) in node_data.items()
3565                        if not nresult.fail_msg and nresult.payload)
3566     else:
3567       live_data = None
3568
3569     if query.NQ_INST in self.requested_data:
3570       node_to_primary = dict([(name, set()) for name in nodenames])
3571       node_to_secondary = dict([(name, set()) for name in nodenames])
3572
3573       inst_data = lu.cfg.GetAllInstancesInfo()
3574
3575       for inst in inst_data.values():
3576         if inst.primary_node in node_to_primary:
3577           node_to_primary[inst.primary_node].add(inst.name)
3578         for secnode in inst.secondary_nodes:
3579           if secnode in node_to_secondary:
3580             node_to_secondary[secnode].add(inst.name)
3581     else:
3582       node_to_primary = None
3583       node_to_secondary = None
3584
3585     if query.NQ_OOB in self.requested_data:
3586       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3587                          for name, node in all_info.iteritems())
3588     else:
3589       oob_support = None
3590
3591     if query.NQ_GROUP in self.requested_data:
3592       groups = lu.cfg.GetAllNodeGroupsInfo()
3593     else:
3594       groups = {}
3595
3596     return query.NodeQueryData([all_info[name] for name in nodenames],
3597                                live_data, lu.cfg.GetMasterNode(),
3598                                node_to_primary, node_to_secondary, groups,
3599                                oob_support, lu.cfg.GetClusterInfo())
3600
3601
3602 class LUNodeQuery(NoHooksLU):
3603   """Logical unit for querying nodes.
3604
3605   """
3606   # pylint: disable-msg=W0142
3607   REQ_BGL = False
3608
3609   def CheckArguments(self):
3610     self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3611                          self.op.use_locking)
3612
3613   def ExpandNames(self):
3614     self.nq.ExpandNames(self)
3615
3616   def Exec(self, feedback_fn):
3617     return self.nq.OldStyleQuery(self)
3618
3619
3620 class LUNodeQueryvols(NoHooksLU):
3621   """Logical unit for getting volumes on node(s).
3622
3623   """
3624   REQ_BGL = False
3625   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3626   _FIELDS_STATIC = utils.FieldSet("node")
3627
3628   def CheckArguments(self):
3629     _CheckOutputFields(static=self._FIELDS_STATIC,
3630                        dynamic=self._FIELDS_DYNAMIC,
3631                        selected=self.op.output_fields)
3632
3633   def ExpandNames(self):
3634     self.needed_locks = {}
3635     self.share_locks[locking.LEVEL_NODE] = 1
3636     if not self.op.nodes:
3637       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3638     else:
3639       self.needed_locks[locking.LEVEL_NODE] = \
3640         _GetWantedNodes(self, self.op.nodes)
3641
3642   def Exec(self, feedback_fn):
3643     """Computes the list of nodes and their attributes.
3644
3645     """
3646     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3647     volumes = self.rpc.call_node_volumes(nodenames)
3648
3649     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3650              in self.cfg.GetInstanceList()]
3651
3652     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3653
3654     output = []
3655     for node in nodenames:
3656       nresult = volumes[node]
3657       if nresult.offline:
3658         continue
3659       msg = nresult.fail_msg
3660       if msg:
3661         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3662         continue
3663
3664       node_vols = nresult.payload[:]
3665       node_vols.sort(key=lambda vol: vol['dev'])
3666
3667       for vol in node_vols:
3668         node_output = []
3669         for field in self.op.output_fields:
3670           if field == "node":
3671             val = node
3672           elif field == "phys":
3673             val = vol['dev']
3674           elif field == "vg":
3675             val = vol['vg']
3676           elif field == "name":
3677             val = vol['name']
3678           elif field == "size":
3679             val = int(float(vol['size']))
3680           elif field == "instance":
3681             for inst in ilist:
3682               if node not in lv_by_node[inst]:
3683                 continue
3684               if vol['name'] in lv_by_node[inst][node]:
3685                 val = inst.name
3686                 break
3687             else:
3688               val = '-'
3689           else:
3690             raise errors.ParameterError(field)
3691           node_output.append(str(val))
3692
3693         output.append(node_output)
3694
3695     return output
3696
3697
3698 class LUNodeQueryStorage(NoHooksLU):
3699   """Logical unit for getting information on storage units on node(s).
3700
3701   """
3702   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3703   REQ_BGL = False
3704
3705   def CheckArguments(self):
3706     _CheckOutputFields(static=self._FIELDS_STATIC,
3707                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3708                        selected=self.op.output_fields)
3709
3710   def ExpandNames(self):
3711     self.needed_locks = {}
3712     self.share_locks[locking.LEVEL_NODE] = 1
3713
3714     if self.op.nodes:
3715       self.needed_locks[locking.LEVEL_NODE] = \
3716         _GetWantedNodes(self, self.op.nodes)
3717     else:
3718       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3719
3720   def Exec(self, feedback_fn):
3721     """Computes the list of nodes and their attributes.
3722
3723     """
3724     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3725
3726     # Always get name to sort by
3727     if constants.SF_NAME in self.op.output_fields:
3728       fields = self.op.output_fields[:]
3729     else:
3730       fields = [constants.SF_NAME] + self.op.output_fields
3731
3732     # Never ask for node or type as it's only known to the LU
3733     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3734       while extra in fields:
3735         fields.remove(extra)
3736
3737     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3738     name_idx = field_idx[constants.SF_NAME]
3739
3740     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3741     data = self.rpc.call_storage_list(self.nodes,
3742                                       self.op.storage_type, st_args,
3743                                       self.op.name, fields)
3744
3745     result = []
3746
3747     for node in utils.NiceSort(self.nodes):
3748       nresult = data[node]
3749       if nresult.offline:
3750         continue
3751
3752       msg = nresult.fail_msg
3753       if msg:
3754         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3755         continue
3756
3757       rows = dict([(row[name_idx], row) for row in nresult.payload])
3758
3759       for name in utils.NiceSort(rows.keys()):
3760         row = rows[name]
3761
3762         out = []
3763
3764         for field in self.op.output_fields:
3765           if field == constants.SF_NODE:
3766             val = node
3767           elif field == constants.SF_TYPE:
3768             val = self.op.storage_type
3769           elif field in field_idx:
3770             val = row[field_idx[field]]
3771           else:
3772             raise errors.ParameterError(field)
3773
3774           out.append(val)
3775
3776         result.append(out)
3777
3778     return result
3779
3780
3781 class _InstanceQuery(_QueryBase):
3782   FIELDS = query.INSTANCE_FIELDS
3783
3784   def ExpandNames(self, lu):
3785     lu.needed_locks = {}
3786     lu.share_locks[locking.LEVEL_INSTANCE] = 1
3787     lu.share_locks[locking.LEVEL_NODE] = 1
3788
3789     if self.names:
3790       self.wanted = _GetWantedInstances(lu, self.names)
3791     else:
3792       self.wanted = locking.ALL_SET
3793
3794     self.do_locking = (self.use_locking and
3795                        query.IQ_LIVE in self.requested_data)
3796     if self.do_locking:
3797       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3798       lu.needed_locks[locking.LEVEL_NODE] = []
3799       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3800
3801   def DeclareLocks(self, lu, level):
3802     if level == locking.LEVEL_NODE and self.do_locking:
3803       lu._LockInstancesNodes() # pylint: disable-msg=W0212
3804
3805   def _GetQueryData(self, lu):
3806     """Computes the list of instances and their attributes.
3807
3808     """
3809     all_info = lu.cfg.GetAllInstancesInfo()
3810
3811     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3812
3813     instance_list = [all_info[name] for name in instance_names]
3814     nodes = frozenset([inst.primary_node for inst in instance_list])
3815     hv_list = list(set([inst.hypervisor for inst in instance_list]))
3816     bad_nodes = []
3817     offline_nodes = []
3818
3819     # Gather data as requested
3820     if query.IQ_LIVE in self.requested_data:
3821       live_data = {}
3822       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3823       for name in nodes:
3824         result = node_data[name]
3825         if result.offline:
3826           # offline nodes will be in both lists
3827           assert result.fail_msg
3828           offline_nodes.append(name)
3829         if result.fail_msg:
3830           bad_nodes.append(name)
3831         elif result.payload:
3832           live_data.update(result.payload)
3833         # else no instance is alive
3834     else:
3835       live_data = {}
3836
3837     if query.IQ_DISKUSAGE in self.requested_data:
3838       disk_usage = dict((inst.name,
3839                          _ComputeDiskSize(inst.disk_template,
3840                                           [{"size": disk.size}
3841                                            for disk in inst.disks]))
3842                         for inst in instance_list)
3843     else:
3844       disk_usage = None
3845
3846     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3847                                    disk_usage, offline_nodes, bad_nodes,
3848                                    live_data)
3849
3850
3851 class LUQuery(NoHooksLU):
3852   """Query for resources/items of a certain kind.
3853
3854   """
3855   # pylint: disable-msg=W0142
3856   REQ_BGL = False
3857
3858   def CheckArguments(self):
3859     qcls = _GetQueryImplementation(self.op.what)
3860     names = qlang.ReadSimpleFilter("name", self.op.filter)
3861
3862     self.impl = qcls(names, self.op.fields, False)
3863
3864   def ExpandNames(self):
3865     self.impl.ExpandNames(self)
3866
3867   def DeclareLocks(self, level):
3868     self.impl.DeclareLocks(self, level)
3869
3870   def Exec(self, feedback_fn):
3871     return self.impl.NewStyleQuery(self)
3872
3873
3874 class LUQueryFields(NoHooksLU):
3875   """Query for resources/items of a certain kind.
3876
3877   """
3878   # pylint: disable-msg=W0142
3879   REQ_BGL = False
3880
3881   def CheckArguments(self):
3882     self.qcls = _GetQueryImplementation(self.op.what)
3883
3884   def ExpandNames(self):
3885     self.needed_locks = {}
3886
3887   def Exec(self, feedback_fn):
3888     return self.qcls.FieldsQuery(self.op.fields)
3889
3890
3891 class LUNodeModifyStorage(NoHooksLU):
3892   """Logical unit for modifying a storage volume on a node.
3893
3894   """
3895   REQ_BGL = False
3896
3897   def CheckArguments(self):
3898     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3899
3900     storage_type = self.op.storage_type
3901
3902     try:
3903       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3904     except KeyError:
3905       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3906                                  " modified" % storage_type,
3907                                  errors.ECODE_INVAL)
3908
3909     diff = set(self.op.changes.keys()) - modifiable
3910     if diff:
3911       raise errors.OpPrereqError("The following fields can not be modified for"
3912                                  " storage units of type '%s': %r" %
3913                                  (storage_type, list(diff)),
3914                                  errors.ECODE_INVAL)
3915
3916   def ExpandNames(self):
3917     self.needed_locks = {
3918       locking.LEVEL_NODE: self.op.node_name,
3919       }
3920
3921   def Exec(self, feedback_fn):
3922     """Computes the list of nodes and their attributes.
3923
3924     """
3925     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3926     result = self.rpc.call_storage_modify(self.op.node_name,
3927                                           self.op.storage_type, st_args,
3928                                           self.op.name, self.op.changes)
3929     result.Raise("Failed to modify storage unit '%s' on %s" %
3930                  (self.op.name, self.op.node_name))
3931
3932
3933 class LUNodeAdd(LogicalUnit):
3934   """Logical unit for adding node to the cluster.
3935
3936   """
3937   HPATH = "node-add"
3938   HTYPE = constants.HTYPE_NODE
3939   _NFLAGS = ["master_capable", "vm_capable"]
3940
3941   def CheckArguments(self):
3942     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3943     # validate/normalize the node name
3944     self.hostname = netutils.GetHostname(name=self.op.node_name,
3945                                          family=self.primary_ip_family)
3946     self.op.node_name = self.hostname.name
3947     if self.op.readd and self.op.group:
3948       raise errors.OpPrereqError("Cannot pass a node group when a node is"
3949                                  " being readded", errors.ECODE_INVAL)
3950
3951   def BuildHooksEnv(self):
3952     """Build hooks env.
3953
3954     This will run on all nodes before, and on all nodes + the new node after.
3955
3956     """
3957     env = {
3958       "OP_TARGET": self.op.node_name,
3959       "NODE_NAME": self.op.node_name,
3960       "NODE_PIP": self.op.primary_ip,
3961       "NODE_SIP": self.op.secondary_ip,
3962       "MASTER_CAPABLE": str(self.op.master_capable),
3963       "VM_CAPABLE": str(self.op.vm_capable),
3964       }
3965     nodes_0 = self.cfg.GetNodeList()
3966     nodes_1 = nodes_0 + [self.op.node_name, ]
3967     return env, nodes_0, nodes_1
3968
3969   def CheckPrereq(self):
3970     """Check prerequisites.
3971
3972     This checks:
3973      - the new node is not already in the config
3974      - it is resolvable
3975      - its parameters (single/dual homed) matches the cluster
3976
3977     Any errors are signaled by raising errors.OpPrereqError.
3978
3979     """
3980     cfg = self.cfg
3981     hostname = self.hostname
3982     node = hostname.name
3983     primary_ip = self.op.primary_ip = hostname.ip
3984     if self.op.secondary_ip is None:
3985       if self.primary_ip_family == netutils.IP6Address.family:
3986         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3987                                    " IPv4 address must be given as secondary",
3988                                    errors.ECODE_INVAL)
3989       self.op.secondary_ip = primary_ip
3990
3991     secondary_ip = self.op.secondary_ip
3992     if not netutils.IP4Address.IsValid(secondary_ip):
3993       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3994                                  " address" % secondary_ip, errors.ECODE_INVAL)
3995
3996     node_list = cfg.GetNodeList()
3997     if not self.op.readd and node in node_list:
3998       raise errors.OpPrereqError("Node %s is already in the configuration" %
3999                                  node, errors.ECODE_EXISTS)
4000     elif self.op.readd and node not in node_list:
4001       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4002                                  errors.ECODE_NOENT)
4003
4004     self.changed_primary_ip = False
4005
4006     for existing_node_name in node_list:
4007       existing_node = cfg.GetNodeInfo(existing_node_name)
4008
4009       if self.op.readd and node == existing_node_name:
4010         if existing_node.secondary_ip != secondary_ip:
4011           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4012                                      " address configuration as before",
4013                                      errors.ECODE_INVAL)
4014         if existing_node.primary_ip != primary_ip:
4015           self.changed_primary_ip = True
4016
4017         continue
4018
4019       if (existing_node.primary_ip == primary_ip or
4020           existing_node.secondary_ip == primary_ip or
4021           existing_node.primary_ip == secondary_ip or
4022           existing_node.secondary_ip == secondary_ip):
4023         raise errors.OpPrereqError("New node ip address(es) conflict with"
4024                                    " existing node %s" % existing_node.name,
4025                                    errors.ECODE_NOTUNIQUE)
4026
4027     # After this 'if' block, None is no longer a valid value for the
4028     # _capable op attributes
4029     if self.op.readd:
4030       old_node = self.cfg.GetNodeInfo(node)
4031       assert old_node is not None, "Can't retrieve locked node %s" % node
4032       for attr in self._NFLAGS:
4033         if getattr(self.op, attr) is None:
4034           setattr(self.op, attr, getattr(old_node, attr))
4035     else:
4036       for attr in self._NFLAGS:
4037         if getattr(self.op, attr) is None:
4038           setattr(self.op, attr, True)
4039
4040     if self.op.readd and not self.op.vm_capable:
4041       pri, sec = cfg.GetNodeInstances(node)
4042       if pri or sec:
4043         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4044                                    " flag set to false, but it already holds"
4045                                    " instances" % node,
4046                                    errors.ECODE_STATE)
4047
4048     # check that the type of the node (single versus dual homed) is the
4049     # same as for the master
4050     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4051     master_singlehomed = myself.secondary_ip == myself.primary_ip
4052     newbie_singlehomed = secondary_ip == primary_ip
4053     if master_singlehomed != newbie_singlehomed:
4054       if master_singlehomed:
4055         raise errors.OpPrereqError("The master has no secondary ip but the"
4056                                    " new node has one",
4057                                    errors.ECODE_INVAL)
4058       else:
4059         raise errors.OpPrereqError("The master has a secondary ip but the"
4060                                    " new node doesn't have one",
4061                                    errors.ECODE_INVAL)
4062
4063     # checks reachability
4064     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4065       raise errors.OpPrereqError("Node not reachable by ping",
4066                                  errors.ECODE_ENVIRON)
4067
4068     if not newbie_singlehomed:
4069       # check reachability from my secondary ip to newbie's secondary ip
4070       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4071                            source=myself.secondary_ip):
4072         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4073                                    " based ping to node daemon port",
4074                                    errors.ECODE_ENVIRON)
4075
4076     if self.op.readd:
4077       exceptions = [node]
4078     else:
4079       exceptions = []
4080
4081     if self.op.master_capable:
4082       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4083     else:
4084       self.master_candidate = False
4085
4086     if self.op.readd:
4087       self.new_node = old_node
4088     else:
4089       node_group = cfg.LookupNodeGroup(self.op.group)
4090       self.new_node = objects.Node(name=node,
4091                                    primary_ip=primary_ip,
4092                                    secondary_ip=secondary_ip,
4093                                    master_candidate=self.master_candidate,
4094                                    offline=False, drained=False,
4095                                    group=node_group)
4096
4097     if self.op.ndparams:
4098       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4099
4100   def Exec(self, feedback_fn):
4101     """Adds the new node to the cluster.
4102
4103     """
4104     new_node = self.new_node
4105     node = new_node.name
4106
4107     # We adding a new node so we assume it's powered
4108     new_node.powered = True
4109
4110     # for re-adds, reset the offline/drained/master-candidate flags;
4111     # we need to reset here, otherwise offline would prevent RPC calls
4112     # later in the procedure; this also means that if the re-add
4113     # fails, we are left with a non-offlined, broken node
4114     if self.op.readd:
4115       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4116       self.LogInfo("Readding a node, the offline/drained flags were reset")
4117       # if we demote the node, we do cleanup later in the procedure
4118       new_node.master_candidate = self.master_candidate
4119       if self.changed_primary_ip:
4120         new_node.primary_ip = self.op.primary_ip
4121
4122     # copy the master/vm_capable flags
4123     for attr in self._NFLAGS:
4124       setattr(new_node, attr, getattr(self.op, attr))
4125
4126     # notify the user about any possible mc promotion
4127     if new_node.master_candidate:
4128       self.LogInfo("Node will be a master candidate")
4129
4130     if self.op.ndparams:
4131       new_node.ndparams = self.op.ndparams
4132     else:
4133       new_node.ndparams = {}
4134
4135     # check connectivity
4136     result = self.rpc.call_version([node])[node]
4137     result.Raise("Can't get version information from node %s" % node)
4138     if constants.PROTOCOL_VERSION == result.payload:
4139       logging.info("Communication to node %s fine, sw version %s match",
4140                    node, result.payload)
4141     else:
4142       raise errors.OpExecError("Version mismatch master version %s,"
4143                                " node version %s" %
4144                                (constants.PROTOCOL_VERSION, result.payload))
4145
4146     # Add node to our /etc/hosts, and add key to known_hosts
4147     if self.cfg.GetClusterInfo().modify_etc_hosts:
4148       master_node = self.cfg.GetMasterNode()
4149       result = self.rpc.call_etc_hosts_modify(master_node,
4150                                               constants.ETC_HOSTS_ADD,
4151                                               self.hostname.name,
4152                                               self.hostname.ip)
4153       result.Raise("Can't update hosts file with new host data")
4154
4155     if new_node.secondary_ip != new_node.primary_ip:
4156       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4157                                False)
4158
4159     node_verify_list = [self.cfg.GetMasterNode()]
4160     node_verify_param = {
4161       constants.NV_NODELIST: [node],
4162       # TODO: do a node-net-test as well?
4163     }
4164
4165     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4166                                        self.cfg.GetClusterName())
4167     for verifier in node_verify_list:
4168       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4169       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4170       if nl_payload:
4171         for failed in nl_payload:
4172           feedback_fn("ssh/hostname verification failed"
4173                       " (checking from %s): %s" %
4174                       (verifier, nl_payload[failed]))
4175         raise errors.OpExecError("ssh/hostname verification failed.")
4176
4177     if self.op.readd:
4178       _RedistributeAncillaryFiles(self)
4179       self.context.ReaddNode(new_node)
4180       # make sure we redistribute the config
4181       self.cfg.Update(new_node, feedback_fn)
4182       # and make sure the new node will not have old files around
4183       if not new_node.master_candidate:
4184         result = self.rpc.call_node_demote_from_mc(new_node.name)
4185         msg = result.fail_msg
4186         if msg:
4187           self.LogWarning("Node failed to demote itself from master"
4188                           " candidate status: %s" % msg)
4189     else:
4190       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4191                                   additional_vm=self.op.vm_capable)
4192       self.context.AddNode(new_node, self.proc.GetECId())
4193
4194
4195 class LUNodeSetParams(LogicalUnit):
4196   """Modifies the parameters of a node.
4197
4198   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4199       to the node role (as _ROLE_*)
4200   @cvar _R2F: a dictionary from node role to tuples of flags
4201   @cvar _FLAGS: a list of attribute names corresponding to the flags
4202
4203   """
4204   HPATH = "node-modify"
4205   HTYPE = constants.HTYPE_NODE
4206   REQ_BGL = False
4207   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4208   _F2R = {
4209     (True, False, False): _ROLE_CANDIDATE,
4210     (False, True, False): _ROLE_DRAINED,
4211     (False, False, True): _ROLE_OFFLINE,
4212     (False, False, False): _ROLE_REGULAR,
4213     }
4214   _R2F = dict((v, k) for k, v in _F2R.items())
4215   _FLAGS = ["master_candidate", "drained", "offline"]
4216
4217   def CheckArguments(self):
4218     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4219     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4220                 self.op.master_capable, self.op.vm_capable,
4221                 self.op.secondary_ip, self.op.ndparams]
4222     if all_mods.count(None) == len(all_mods):
4223       raise errors.OpPrereqError("Please pass at least one modification",
4224                                  errors.ECODE_INVAL)
4225     if all_mods.count(True) > 1:
4226       raise errors.OpPrereqError("Can't set the node into more than one"
4227                                  " state at the same time",
4228                                  errors.ECODE_INVAL)
4229
4230     # Boolean value that tells us whether we might be demoting from MC
4231     self.might_demote = (self.op.master_candidate == False or
4232                          self.op.offline == True or
4233                          self.op.drained == True or
4234                          self.op.master_capable == False)
4235
4236     if self.op.secondary_ip:
4237       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4238         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4239                                    " address" % self.op.secondary_ip,
4240                                    errors.ECODE_INVAL)
4241
4242     self.lock_all = self.op.auto_promote and self.might_demote
4243     self.lock_instances = self.op.secondary_ip is not None
4244
4245   def ExpandNames(self):
4246     if self.lock_all:
4247       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4248     else:
4249       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4250
4251     if self.lock_instances:
4252       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4253
4254   def DeclareLocks(self, level):
4255     # If we have locked all instances, before waiting to lock nodes, release
4256     # all the ones living on nodes unrelated to the current operation.
4257     if level == locking.LEVEL_NODE and self.lock_instances:
4258       instances_release = []
4259       instances_keep = []
4260       self.affected_instances = []
4261       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4262         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4263           instance = self.context.cfg.GetInstanceInfo(instance_name)
4264           i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4265           if i_mirrored and self.op.node_name in instance.all_nodes:
4266             instances_keep.append(instance_name)
4267             self.affected_instances.append(instance)
4268           else:
4269             instances_release.append(instance_name)
4270         if instances_release:
4271           self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4272           self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4273
4274   def BuildHooksEnv(self):
4275     """Build hooks env.
4276
4277     This runs on the master node.
4278
4279     """
4280     env = {
4281       "OP_TARGET": self.op.node_name,
4282       "MASTER_CANDIDATE": str(self.op.master_candidate),
4283       "OFFLINE": str(self.op.offline),
4284       "DRAINED": str(self.op.drained),
4285       "MASTER_CAPABLE": str(self.op.master_capable),
4286       "VM_CAPABLE": str(self.op.vm_capable),
4287       }
4288     nl = [self.cfg.GetMasterNode(),
4289           self.op.node_name]
4290     return env, nl, nl
4291
4292   def CheckPrereq(self):
4293     """Check prerequisites.
4294
4295     This only checks the instance list against the existing names.
4296
4297     """
4298     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4299
4300     if (self.op.master_candidate is not None or
4301         self.op.drained is not None or
4302         self.op.offline is not None):
4303       # we can't change the master's node flags
4304       if self.op.node_name == self.cfg.GetMasterNode():
4305         raise errors.OpPrereqError("The master role can be changed"
4306                                    " only via master-failover",
4307                                    errors.ECODE_INVAL)
4308
4309     if self.op.master_candidate and not node.master_capable:
4310       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4311                                  " it a master candidate" % node.name,
4312                                  errors.ECODE_STATE)
4313
4314     if self.op.vm_capable == False:
4315       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4316       if ipri or isec:
4317         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4318                                    " the vm_capable flag" % node.name,
4319                                    errors.ECODE_STATE)
4320
4321     if node.master_candidate and self.might_demote and not self.lock_all:
4322       assert not self.op.auto_promote, "auto-promote set but lock_all not"
4323       # check if after removing the current node, we're missing master
4324       # candidates
4325       (mc_remaining, mc_should, _) = \
4326           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4327       if mc_remaining < mc_should:
4328         raise errors.OpPrereqError("Not enough master candidates, please"
4329                                    " pass auto_promote to allow promotion",
4330                                    errors.ECODE_STATE)
4331
4332     self.old_flags = old_flags = (node.master_candidate,
4333                                   node.drained, node.offline)
4334     assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4335     self.old_role = old_role = self._F2R[old_flags]
4336
4337     # Check for ineffective changes
4338     for attr in self._FLAGS:
4339       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4340         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4341         setattr(self.op, attr, None)
4342
4343     # Past this point, any flag change to False means a transition
4344     # away from the respective state, as only real changes are kept
4345
4346     # TODO: We might query the real power state if it supports OOB
4347     if _SupportsOob(self.cfg, node):
4348       if self.op.offline is False and not (node.powered or
4349                                            self.op.powered == True):
4350         raise errors.OpPrereqError(("Please power on node %s first before you"
4351                                     " can reset offline state") %
4352                                    self.op.node_name)
4353     elif self.op.powered is not None:
4354       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4355                                   " which does not support out-of-band"
4356                                   " handling") % self.op.node_name)
4357
4358     # If we're being deofflined/drained, we'll MC ourself if needed
4359     if (self.op.drained == False or self.op.offline == False or
4360         (self.op.master_capable and not node.master_capable)):
4361       if _DecideSelfPromotion(self):
4362         self.op.master_candidate = True
4363         self.LogInfo("Auto-promoting node to master candidate")
4364
4365     # If we're no longer master capable, we'll demote ourselves from MC
4366     if self.op.master_capable == False and node.master_candidate:
4367       self.LogInfo("Demoting from master candidate")
4368       self.op.master_candidate = False
4369
4370     # Compute new role
4371     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4372     if self.op.master_candidate:
4373       new_role = self._ROLE_CANDIDATE
4374     elif self.op.drained:
4375       new_role = self._ROLE_DRAINED
4376     elif self.op.offline:
4377       new_role = self._ROLE_OFFLINE
4378     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4379       # False is still in new flags, which means we're un-setting (the
4380       # only) True flag
4381       new_role = self._ROLE_REGULAR
4382     else: # no new flags, nothing, keep old role
4383       new_role = old_role
4384
4385     self.new_role = new_role
4386
4387     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4388       # Trying to transition out of offline status
4389       result = self.rpc.call_version([node.name])[node.name]
4390       if result.fail_msg:
4391         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4392                                    " to report its version: %s" %
4393                                    (node.name, result.fail_msg),
4394                                    errors.ECODE_STATE)
4395       else:
4396         self.LogWarning("Transitioning node from offline to online state"
4397                         " without using re-add. Please make sure the node"
4398                         " is healthy!")
4399
4400     if self.op.secondary_ip:
4401       # Ok even without locking, because this can't be changed by any LU
4402       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4403       master_singlehomed = master.secondary_ip == master.primary_ip
4404       if master_singlehomed and self.op.secondary_ip:
4405         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4406                                    " homed cluster", errors.ECODE_INVAL)
4407
4408       if node.offline:
4409         if self.affected_instances:
4410           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4411                                      " node has instances (%s) configured"
4412                                      " to use it" % self.affected_instances)
4413       else:
4414         # On online nodes, check that no instances are running, and that
4415         # the node has the new ip and we can reach it.
4416         for instance in self.affected_instances:
4417           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4418
4419         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4420         if master.name != node.name:
4421           # check reachability from master secondary ip to new secondary ip
4422           if not netutils.TcpPing(self.op.secondary_ip,
4423                                   constants.DEFAULT_NODED_PORT,
4424                                   source=master.secondary_ip):
4425             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4426                                        " based ping to node daemon port",
4427                                        errors.ECODE_ENVIRON)
4428
4429     if self.op.ndparams:
4430       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4431       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4432       self.new_ndparams = new_ndparams
4433
4434   def Exec(self, feedback_fn):
4435     """Modifies a node.
4436
4437     """
4438     node = self.node
4439     old_role = self.old_role
4440     new_role = self.new_role
4441
4442     result = []
4443
4444     if self.op.ndparams:
4445       node.ndparams = self.new_ndparams
4446
4447     if self.op.powered is not None:
4448       node.powered = self.op.powered
4449
4450     for attr in ["master_capable", "vm_capable"]:
4451       val = getattr(self.op, attr)
4452       if val is not None:
4453         setattr(node, attr, val)
4454         result.append((attr, str(val)))
4455
4456     if new_role != old_role:
4457       # Tell the node to demote itself, if no longer MC and not offline
4458       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4459         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4460         if msg:
4461           self.LogWarning("Node failed to demote itself: %s", msg)
4462
4463       new_flags = self._R2F[new_role]
4464       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4465         if of != nf:
4466           result.append((desc, str(nf)))
4467       (node.master_candidate, node.drained, node.offline) = new_flags
4468
4469       # we locked all nodes, we adjust the CP before updating this node
4470       if self.lock_all:
4471         _AdjustCandidatePool(self, [node.name])
4472
4473     if self.op.secondary_ip:
4474       node.secondary_ip = self.op.secondary_ip
4475       result.append(("secondary_ip", self.op.secondary_ip))
4476
4477     # this will trigger configuration file update, if needed
4478     self.cfg.Update(node, feedback_fn)
4479
4480     # this will trigger job queue propagation or cleanup if the mc
4481     # flag changed
4482     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4483       self.context.ReaddNode(node)
4484
4485     return result
4486
4487
4488 class LUNodePowercycle(NoHooksLU):
4489   """Powercycles a node.
4490
4491   """
4492   REQ_BGL = False
4493
4494   def CheckArguments(self):
4495     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4496     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4497       raise errors.OpPrereqError("The node is the master and the force"
4498                                  " parameter was not set",
4499                                  errors.ECODE_INVAL)
4500
4501   def ExpandNames(self):
4502     """Locking for PowercycleNode.
4503
4504     This is a last-resort option and shouldn't block on other
4505     jobs. Therefore, we grab no locks.
4506
4507     """
4508     self.needed_locks = {}
4509
4510   def Exec(self, feedback_fn):
4511     """Reboots a node.
4512
4513     """
4514     result = self.rpc.call_node_powercycle(self.op.node_name,
4515                                            self.cfg.GetHypervisorType())
4516     result.Raise("Failed to schedule the reboot")
4517     return result.payload
4518
4519
4520 class LUClusterQuery(NoHooksLU):
4521   """Query cluster configuration.
4522
4523   """
4524   REQ_BGL = False
4525
4526   def ExpandNames(self):
4527     self.needed_locks = {}
4528
4529   def Exec(self, feedback_fn):
4530     """Return cluster config.
4531
4532     """
4533     cluster = self.cfg.GetClusterInfo()
4534     os_hvp = {}
4535
4536     # Filter just for enabled hypervisors
4537     for os_name, hv_dict in cluster.os_hvp.items():
4538       os_hvp[os_name] = {}
4539       for hv_name, hv_params in hv_dict.items():
4540         if hv_name in cluster.enabled_hypervisors:
4541           os_hvp[os_name][hv_name] = hv_params
4542
4543     # Convert ip_family to ip_version
4544     primary_ip_version = constants.IP4_VERSION
4545     if cluster.primary_ip_family == netutils.IP6Address.family:
4546       primary_ip_version = constants.IP6_VERSION
4547
4548     result = {
4549       "software_version": constants.RELEASE_VERSION,
4550       "protocol_version": constants.PROTOCOL_VERSION,
4551       "config_version": constants.CONFIG_VERSION,
4552       "os_api_version": max(constants.OS_API_VERSIONS),
4553       "export_version": constants.EXPORT_VERSION,
4554       "architecture": (platform.architecture()[0], platform.machine()),
4555       "name": cluster.cluster_name,
4556       "master": cluster.master_node,
4557       "default_hypervisor": cluster.enabled_hypervisors[0],
4558       "enabled_hypervisors": cluster.enabled_hypervisors,
4559       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4560                         for hypervisor_name in cluster.enabled_hypervisors]),
4561       "os_hvp": os_hvp,
4562       "beparams": cluster.beparams,
4563       "osparams": cluster.osparams,
4564       "nicparams": cluster.nicparams,
4565       "ndparams": cluster.ndparams,
4566       "candidate_pool_size": cluster.candidate_pool_size,
4567       "master_netdev": cluster.master_netdev,
4568       "volume_group_name": cluster.volume_group_name,
4569       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4570       "file_storage_dir": cluster.file_storage_dir,
4571       "maintain_node_health": cluster.maintain_node_health,
4572       "ctime": cluster.ctime,
4573       "mtime": cluster.mtime,
4574       "uuid": cluster.uuid,
4575       "tags": list(cluster.GetTags()),
4576       "uid_pool": cluster.uid_pool,
4577       "default_iallocator": cluster.default_iallocator,
4578       "reserved_lvs": cluster.reserved_lvs,
4579       "primary_ip_version": primary_ip_version,
4580       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4581       }
4582
4583     return result
4584
4585
4586 class LUClusterConfigQuery(NoHooksLU):
4587   """Return configuration values.
4588
4589   """
4590   REQ_BGL = False
4591   _FIELDS_DYNAMIC = utils.FieldSet()
4592   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4593                                   "watcher_pause", "volume_group_name")
4594
4595   def CheckArguments(self):
4596     _CheckOutputFields(static=self._FIELDS_STATIC,
4597                        dynamic=self._FIELDS_DYNAMIC,
4598                        selected=self.op.output_fields)
4599
4600   def ExpandNames(self):
4601     self.needed_locks = {}
4602
4603   def Exec(self, feedback_fn):
4604     """Dump a representation of the cluster config to the standard output.
4605
4606     """
4607     values = []
4608     for field in self.op.output_fields:
4609       if field == "cluster_name":
4610         entry = self.cfg.GetClusterName()
4611       elif field == "master_node":
4612         entry = self.cfg.GetMasterNode()
4613       elif field == "drain_flag":
4614         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4615       elif field == "watcher_pause":
4616         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4617       elif field == "volume_group_name":
4618         entry = self.cfg.GetVGName()
4619       else:
4620         raise errors.ParameterError(field)
4621       values.append(entry)
4622     return values
4623
4624
4625 class LUInstanceActivateDisks(NoHooksLU):
4626   """Bring up an instance's disks.
4627
4628   """
4629   REQ_BGL = False
4630
4631   def ExpandNames(self):
4632     self._ExpandAndLockInstance()
4633     self.needed_locks[locking.LEVEL_NODE] = []
4634     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4635
4636   def DeclareLocks(self, level):
4637     if level == locking.LEVEL_NODE:
4638       self._LockInstancesNodes()
4639
4640   def CheckPrereq(self):
4641     """Check prerequisites.
4642
4643     This checks that the instance is in the cluster.
4644
4645     """
4646     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4647     assert self.instance is not None, \
4648       "Cannot retrieve locked instance %s" % self.op.instance_name
4649     _CheckNodeOnline(self, self.instance.primary_node)
4650
4651   def Exec(self, feedback_fn):
4652     """Activate the disks.
4653
4654     """
4655     disks_ok, disks_info = \
4656               _AssembleInstanceDisks(self, self.instance,
4657                                      ignore_size=self.op.ignore_size)
4658     if not disks_ok:
4659       raise errors.OpExecError("Cannot activate block devices")
4660
4661     return disks_info
4662
4663
4664 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4665                            ignore_size=False):
4666   """Prepare the block devices for an instance.
4667
4668   This sets up the block devices on all nodes.
4669
4670   @type lu: L{LogicalUnit}
4671   @param lu: the logical unit on whose behalf we execute
4672   @type instance: L{objects.Instance}
4673   @param instance: the instance for whose disks we assemble
4674   @type disks: list of L{objects.Disk} or None
4675   @param disks: which disks to assemble (or all, if None)
4676   @type ignore_secondaries: boolean
4677   @param ignore_secondaries: if true, errors on secondary nodes
4678       won't result in an error return from the function
4679   @type ignore_size: boolean
4680   @param ignore_size: if true, the current known size of the disk
4681       will not be used during the disk activation, useful for cases
4682       when the size is wrong
4683   @return: False if the operation failed, otherwise a list of
4684       (host, instance_visible_name, node_visible_name)
4685       with the mapping from node devices to instance devices
4686
4687   """
4688   device_info = []
4689   disks_ok = True
4690   iname = instance.name
4691   disks = _ExpandCheckDisks(instance, disks)
4692
4693   # With the two passes mechanism we try to reduce the window of
4694   # opportunity for the race condition of switching DRBD to primary
4695   # before handshaking occured, but we do not eliminate it
4696
4697   # The proper fix would be to wait (with some limits) until the
4698   # connection has been made and drbd transitions from WFConnection
4699   # into any other network-connected state (Connected, SyncTarget,
4700   # SyncSource, etc.)
4701
4702   # 1st pass, assemble on all nodes in secondary mode
4703   for inst_disk in disks:
4704     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4705       if ignore_size:
4706         node_disk = node_disk.Copy()
4707         node_disk.UnsetSize()
4708       lu.cfg.SetDiskID(node_disk, node)
4709       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4710       msg = result.fail_msg
4711       if msg:
4712         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4713                            " (is_primary=False, pass=1): %s",
4714                            inst_disk.iv_name, node, msg)
4715         if not ignore_secondaries:
4716           disks_ok = False
4717
4718   # FIXME: race condition on drbd migration to primary
4719
4720   # 2nd pass, do only the primary node
4721   for inst_disk in disks:
4722     dev_path = None
4723
4724     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4725       if node != instance.primary_node:
4726         continue
4727       if ignore_size:
4728         node_disk = node_disk.Copy()
4729         node_disk.UnsetSize()
4730       lu.cfg.SetDiskID(node_disk, node)
4731       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4732       msg = result.fail_msg
4733       if msg:
4734         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4735                            " (is_primary=True, pass=2): %s",
4736                            inst_disk.iv_name, node, msg)
4737         disks_ok = False
4738       else:
4739         dev_path = result.payload
4740
4741     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4742
4743   # leave the disks configured for the primary node
4744   # this is a workaround that would be fixed better by
4745   # improving the logical/physical id handling
4746   for disk in disks:
4747     lu.cfg.SetDiskID(disk, instance.primary_node)
4748
4749   return disks_ok, device_info
4750
4751
4752 def _StartInstanceDisks(lu, instance, force):
4753   """Start the disks of an instance.
4754
4755   """
4756   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4757                                            ignore_secondaries=force)
4758   if not disks_ok:
4759     _ShutdownInstanceDisks(lu, instance)
4760     if force is not None and not force:
4761       lu.proc.LogWarning("", hint="If the message above refers to a"
4762                          " secondary node,"
4763                          " you can retry the operation using '--force'.")
4764     raise errors.OpExecError("Disk consistency error")
4765
4766
4767 class LUInstanceDeactivateDisks(NoHooksLU):
4768   """Shutdown an instance's disks.
4769
4770   """
4771   REQ_BGL = False
4772
4773   def ExpandNames(self):
4774     self._ExpandAndLockInstance()
4775     self.needed_locks[locking.LEVEL_NODE] = []
4776     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4777
4778   def DeclareLocks(self, level):
4779     if level == locking.LEVEL_NODE:
4780       self._LockInstancesNodes()
4781
4782   def CheckPrereq(self):
4783     """Check prerequisites.
4784
4785     This checks that the instance is in the cluster.
4786
4787     """
4788     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4789     assert self.instance is not None, \
4790       "Cannot retrieve locked instance %s" % self.op.instance_name
4791
4792   def Exec(self, feedback_fn):
4793     """Deactivate the disks
4794
4795     """
4796     instance = self.instance
4797     _SafeShutdownInstanceDisks(self, instance)
4798
4799
4800 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4801   """Shutdown block devices of an instance.
4802
4803   This function checks if an instance is running, before calling
4804   _ShutdownInstanceDisks.
4805
4806   """
4807   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4808   _ShutdownInstanceDisks(lu, instance, disks=disks)
4809
4810
4811 def _ExpandCheckDisks(instance, disks):
4812   """Return the instance disks selected by the disks list
4813
4814   @type disks: list of L{objects.Disk} or None
4815   @param disks: selected disks
4816   @rtype: list of L{objects.Disk}
4817   @return: selected instance disks to act on
4818
4819   """
4820   if disks is None:
4821     return instance.disks
4822   else:
4823     if not set(disks).issubset(instance.disks):
4824       raise errors.ProgrammerError("Can only act on disks belonging to the"
4825                                    " target instance")
4826     return disks
4827
4828
4829 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4830   """Shutdown block devices of an instance.
4831
4832   This does the shutdown on all nodes of the instance.
4833
4834   If the ignore_primary is false, errors on the primary node are
4835   ignored.
4836
4837   """
4838   all_result = True
4839   disks = _ExpandCheckDisks(instance, disks)
4840
4841   for disk in disks:
4842     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4843       lu.cfg.SetDiskID(top_disk, node)
4844       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4845       msg = result.fail_msg
4846       if msg:
4847         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4848                       disk.iv_name, node, msg)
4849         if ((node == instance.primary_node and not ignore_primary) or
4850             (node != instance.primary_node and not result.offline)):
4851           all_result = False
4852   return all_result
4853
4854
4855 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4856   """Checks if a node has enough free memory.
4857
4858   This function check if a given node has the needed amount of free
4859   memory. In case the node has less memory or we cannot get the
4860   information from the node, this function raise an OpPrereqError
4861   exception.
4862
4863   @type lu: C{LogicalUnit}
4864   @param lu: a logical unit from which we get configuration data
4865   @type node: C{str}
4866   @param node: the node to check
4867   @type reason: C{str}
4868   @param reason: string to use in the error message
4869   @type requested: C{int}
4870   @param requested: the amount of memory in MiB to check for
4871   @type hypervisor_name: C{str}
4872   @param hypervisor_name: the hypervisor to ask for memory stats
4873   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4874       we cannot check the node
4875
4876   """
4877   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4878   nodeinfo[node].Raise("Can't get data from node %s" % node,
4879                        prereq=True, ecode=errors.ECODE_ENVIRON)
4880   free_mem = nodeinfo[node].payload.get('memory_free', None)
4881   if not isinstance(free_mem, int):
4882     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4883                                " was '%s'" % (node, free_mem),
4884                                errors.ECODE_ENVIRON)
4885   if requested > free_mem:
4886     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4887                                " needed %s MiB, available %s MiB" %
4888                                (node, reason, requested, free_mem),
4889                                errors.ECODE_NORES)
4890
4891
4892 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
4893   """Checks if nodes have enough free disk space in the all VGs.
4894
4895   This function check if all given nodes have the needed amount of
4896   free disk. In case any node has less disk or we cannot get the
4897   information from the node, this function raise an OpPrereqError
4898   exception.
4899
4900   @type lu: C{LogicalUnit}
4901   @param lu: a logical unit from which we get configuration data
4902   @type nodenames: C{list}
4903   @param nodenames: the list of node names to check
4904   @type req_sizes: C{dict}
4905   @param req_sizes: the hash of vg and corresponding amount of disk in
4906       MiB to check for
4907   @raise errors.OpPrereqError: if the node doesn't have enough disk,
4908       or we cannot check the node
4909
4910   """
4911   for vg, req_size in req_sizes.items():
4912     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
4913
4914
4915 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
4916   """Checks if nodes have enough free disk space in the specified VG.
4917
4918   This function check if all given nodes have the needed amount of
4919   free disk. In case any node has less disk or we cannot get the
4920   information from the node, this function raise an OpPrereqError
4921   exception.
4922
4923   @type lu: C{LogicalUnit}
4924   @param lu: a logical unit from which we get configuration data
4925   @type nodenames: C{list}
4926   @param nodenames: the list of node names to check
4927   @type vg: C{str}
4928   @param vg: the volume group to check
4929   @type requested: C{int}
4930   @param requested: the amount of disk in MiB to check for
4931   @raise errors.OpPrereqError: if the node doesn't have enough disk,
4932       or we cannot check the node
4933
4934   """
4935   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
4936   for node in nodenames:
4937     info = nodeinfo[node]
4938     info.Raise("Cannot get current information from node %s" % node,
4939                prereq=True, ecode=errors.ECODE_ENVIRON)
4940     vg_free = info.payload.get("vg_free", None)
4941     if not isinstance(vg_free, int):
4942       raise errors.OpPrereqError("Can't compute free disk space on node"
4943                                  " %s for vg %s, result was '%s'" %
4944                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
4945     if requested > vg_free:
4946       raise errors.OpPrereqError("Not enough disk space on target node %s"
4947                                  " vg %s: required %d MiB, available %d MiB" %
4948                                  (node, vg, requested, vg_free),
4949                                  errors.ECODE_NORES)
4950
4951
4952 class LUInstanceStartup(LogicalUnit):
4953   """Starts an instance.
4954
4955   """
4956   HPATH = "instance-start"
4957   HTYPE = constants.HTYPE_INSTANCE
4958   REQ_BGL = False
4959
4960   def CheckArguments(self):
4961     # extra beparams
4962     if self.op.beparams:
4963       # fill the beparams dict
4964       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4965
4966   def ExpandNames(self):
4967     self._ExpandAndLockInstance()
4968
4969   def BuildHooksEnv(self):
4970     """Build hooks env.
4971
4972     This runs on master, primary and secondary nodes of the instance.
4973
4974     """
4975     env = {
4976       "FORCE": self.op.force,
4977       }
4978     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4979     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4980     return env, nl, nl
4981
4982   def CheckPrereq(self):
4983     """Check prerequisites.
4984
4985     This checks that the instance is in the cluster.
4986
4987     """
4988     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4989     assert self.instance is not None, \
4990       "Cannot retrieve locked instance %s" % self.op.instance_name
4991
4992     # extra hvparams
4993     if self.op.hvparams:
4994       # check hypervisor parameter syntax (locally)
4995       cluster = self.cfg.GetClusterInfo()
4996       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4997       filled_hvp = cluster.FillHV(instance)
4998       filled_hvp.update(self.op.hvparams)
4999       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5000       hv_type.CheckParameterSyntax(filled_hvp)
5001       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5002
5003     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5004
5005     if self.primary_offline and self.op.ignore_offline_nodes:
5006       self.proc.LogWarning("Ignoring offline primary node")
5007
5008       if self.op.hvparams or self.op.beparams:
5009         self.proc.LogWarning("Overridden parameters are ignored")
5010     else:
5011       _CheckNodeOnline(self, instance.primary_node)
5012
5013       bep = self.cfg.GetClusterInfo().FillBE(instance)
5014
5015       # check bridges existence
5016       _CheckInstanceBridgesExist(self, instance)
5017
5018       remote_info = self.rpc.call_instance_info(instance.primary_node,
5019                                                 instance.name,
5020                                                 instance.hypervisor)
5021       remote_info.Raise("Error checking node %s" % instance.primary_node,
5022                         prereq=True, ecode=errors.ECODE_ENVIRON)
5023       if not remote_info.payload: # not running already
5024         _CheckNodeFreeMemory(self, instance.primary_node,
5025                              "starting instance %s" % instance.name,
5026                              bep[constants.BE_MEMORY], instance.hypervisor)
5027
5028   def Exec(self, feedback_fn):
5029     """Start the instance.
5030
5031     """
5032     instance = self.instance
5033     force = self.op.force
5034
5035     self.cfg.MarkInstanceUp(instance.name)
5036
5037     if self.primary_offline:
5038       assert self.op.ignore_offline_nodes
5039       self.proc.LogInfo("Primary node offline, marked instance as started")
5040     else:
5041       node_current = instance.primary_node
5042
5043       _StartInstanceDisks(self, instance, force)
5044
5045       result = self.rpc.call_instance_start(node_current, instance,
5046                                             self.op.hvparams, self.op.beparams)
5047       msg = result.fail_msg
5048       if msg:
5049         _ShutdownInstanceDisks(self, instance)
5050         raise errors.OpExecError("Could not start instance: %s" % msg)
5051
5052
5053 class LUInstanceReboot(LogicalUnit):
5054   """Reboot an instance.
5055
5056   """
5057   HPATH = "instance-reboot"
5058   HTYPE = constants.HTYPE_INSTANCE
5059   REQ_BGL = False
5060
5061   def ExpandNames(self):
5062     self._ExpandAndLockInstance()
5063
5064   def BuildHooksEnv(self):
5065     """Build hooks env.
5066
5067     This runs on master, primary and secondary nodes of the instance.
5068
5069     """
5070     env = {
5071       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5072       "REBOOT_TYPE": self.op.reboot_type,
5073       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5074       }
5075     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5076     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5077     return env, nl, nl
5078
5079   def CheckPrereq(self):
5080     """Check prerequisites.
5081
5082     This checks that the instance is in the cluster.
5083
5084     """
5085     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5086     assert self.instance is not None, \
5087       "Cannot retrieve locked instance %s" % self.op.instance_name
5088
5089     _CheckNodeOnline(self, instance.primary_node)
5090
5091     # check bridges existence
5092     _CheckInstanceBridgesExist(self, instance)
5093
5094   def Exec(self, feedback_fn):
5095     """Reboot the instance.
5096
5097     """
5098     instance = self.instance
5099     ignore_secondaries = self.op.ignore_secondaries
5100     reboot_type = self.op.reboot_type
5101
5102     node_current = instance.primary_node
5103
5104     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5105                        constants.INSTANCE_REBOOT_HARD]:
5106       for disk in instance.disks:
5107         self.cfg.SetDiskID(disk, node_current)
5108       result = self.rpc.call_instance_reboot(node_current, instance,
5109                                              reboot_type,
5110                                              self.op.shutdown_timeout)
5111       result.Raise("Could not reboot instance")
5112     else:
5113       result = self.rpc.call_instance_shutdown(node_current, instance,
5114                                                self.op.shutdown_timeout)
5115       result.Raise("Could not shutdown instance for full reboot")
5116       _ShutdownInstanceDisks(self, instance)
5117       _StartInstanceDisks(self, instance, ignore_secondaries)
5118       result = self.rpc.call_instance_start(node_current, instance, None, None)
5119       msg = result.fail_msg
5120       if msg:
5121         _ShutdownInstanceDisks(self, instance)
5122         raise errors.OpExecError("Could not start instance for"
5123                                  " full reboot: %s" % msg)
5124
5125     self.cfg.MarkInstanceUp(instance.name)
5126
5127
5128 class LUInstanceShutdown(LogicalUnit):
5129   """Shutdown an instance.
5130
5131   """
5132   HPATH = "instance-stop"
5133   HTYPE = constants.HTYPE_INSTANCE
5134   REQ_BGL = False
5135
5136   def ExpandNames(self):
5137     self._ExpandAndLockInstance()
5138
5139   def BuildHooksEnv(self):
5140     """Build hooks env.
5141
5142     This runs on master, primary and secondary nodes of the instance.
5143
5144     """
5145     env = _BuildInstanceHookEnvByObject(self, self.instance)
5146     env["TIMEOUT"] = self.op.timeout
5147     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5148     return env, nl, nl
5149
5150   def CheckPrereq(self):
5151     """Check prerequisites.
5152
5153     This checks that the instance is in the cluster.
5154
5155     """
5156     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5157     assert self.instance is not None, \
5158       "Cannot retrieve locked instance %s" % self.op.instance_name
5159
5160     self.primary_offline = \
5161       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5162
5163     if self.primary_offline and self.op.ignore_offline_nodes:
5164       self.proc.LogWarning("Ignoring offline primary node")
5165     else:
5166       _CheckNodeOnline(self, self.instance.primary_node)
5167
5168   def Exec(self, feedback_fn):
5169     """Shutdown the instance.
5170
5171     """
5172     instance = self.instance
5173     node_current = instance.primary_node
5174     timeout = self.op.timeout
5175
5176     self.cfg.MarkInstanceDown(instance.name)
5177
5178     if self.primary_offline:
5179       assert self.op.ignore_offline_nodes
5180       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5181     else:
5182       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5183       msg = result.fail_msg
5184       if msg:
5185         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5186
5187       _ShutdownInstanceDisks(self, instance)
5188
5189
5190 class LUInstanceReinstall(LogicalUnit):
5191   """Reinstall an instance.
5192
5193   """
5194   HPATH = "instance-reinstall"
5195   HTYPE = constants.HTYPE_INSTANCE
5196   REQ_BGL = False
5197
5198   def ExpandNames(self):
5199     self._ExpandAndLockInstance()
5200
5201   def BuildHooksEnv(self):
5202     """Build hooks env.
5203
5204     This runs on master, primary and secondary nodes of the instance.
5205
5206     """
5207     env = _BuildInstanceHookEnvByObject(self, self.instance)
5208     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5209     return env, nl, nl
5210
5211   def CheckPrereq(self):
5212     """Check prerequisites.
5213
5214     This checks that the instance is in the cluster and is not running.
5215
5216     """
5217     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5218     assert instance is not None, \
5219       "Cannot retrieve locked instance %s" % self.op.instance_name
5220     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5221                      " offline, cannot reinstall")
5222     for node in instance.secondary_nodes:
5223       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5224                        " cannot reinstall")
5225
5226     if instance.disk_template == constants.DT_DISKLESS:
5227       raise errors.OpPrereqError("Instance '%s' has no disks" %
5228                                  self.op.instance_name,
5229                                  errors.ECODE_INVAL)
5230     _CheckInstanceDown(self, instance, "cannot reinstall")
5231
5232     if self.op.os_type is not None:
5233       # OS verification
5234       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5235       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5236       instance_os = self.op.os_type
5237     else:
5238       instance_os = instance.os
5239
5240     nodelist = list(instance.all_nodes)
5241
5242     if self.op.osparams:
5243       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5244       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5245       self.os_inst = i_osdict # the new dict (without defaults)
5246     else:
5247       self.os_inst = None
5248
5249     self.instance = instance
5250
5251   def Exec(self, feedback_fn):
5252     """Reinstall the instance.
5253
5254     """
5255     inst = self.instance
5256
5257     if self.op.os_type is not None:
5258       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5259       inst.os = self.op.os_type
5260       # Write to configuration
5261       self.cfg.Update(inst, feedback_fn)
5262
5263     _StartInstanceDisks(self, inst, None)
5264     try:
5265       feedback_fn("Running the instance OS create scripts...")
5266       # FIXME: pass debug option from opcode to backend
5267       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5268                                              self.op.debug_level,
5269                                              osparams=self.os_inst)
5270       result.Raise("Could not install OS for instance %s on node %s" %
5271                    (inst.name, inst.primary_node))
5272     finally:
5273       _ShutdownInstanceDisks(self, inst)
5274
5275
5276 class LUInstanceRecreateDisks(LogicalUnit):
5277   """Recreate an instance's missing disks.
5278
5279   """
5280   HPATH = "instance-recreate-disks"
5281   HTYPE = constants.HTYPE_INSTANCE
5282   REQ_BGL = False
5283
5284   def ExpandNames(self):
5285     self._ExpandAndLockInstance()
5286
5287   def BuildHooksEnv(self):
5288     """Build hooks env.
5289
5290     This runs on master, primary and secondary nodes of the instance.
5291
5292     """
5293     env = _BuildInstanceHookEnvByObject(self, self.instance)
5294     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5295     return env, nl, nl
5296
5297   def CheckPrereq(self):
5298     """Check prerequisites.
5299
5300     This checks that the instance is in the cluster and is not running.
5301
5302     """
5303     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5304     assert instance is not None, \
5305       "Cannot retrieve locked instance %s" % self.op.instance_name
5306     _CheckNodeOnline(self, instance.primary_node)
5307
5308     if instance.disk_template == constants.DT_DISKLESS:
5309       raise errors.OpPrereqError("Instance '%s' has no disks" %
5310                                  self.op.instance_name, errors.ECODE_INVAL)
5311     _CheckInstanceDown(self, instance, "cannot recreate disks")
5312
5313     if not self.op.disks:
5314       self.op.disks = range(len(instance.disks))
5315     else:
5316       for idx in self.op.disks:
5317         if idx >= len(instance.disks):
5318           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5319                                      errors.ECODE_INVAL)
5320
5321     self.instance = instance
5322
5323   def Exec(self, feedback_fn):
5324     """Recreate the disks.
5325
5326     """
5327     to_skip = []
5328     for idx, _ in enumerate(self.instance.disks):
5329       if idx not in self.op.disks: # disk idx has not been passed in
5330         to_skip.append(idx)
5331         continue
5332
5333     _CreateDisks(self, self.instance, to_skip=to_skip)
5334
5335
5336 class LUInstanceRename(LogicalUnit):
5337   """Rename an instance.
5338
5339   """
5340   HPATH = "instance-rename"
5341   HTYPE = constants.HTYPE_INSTANCE
5342
5343   def CheckArguments(self):
5344     """Check arguments.
5345
5346     """
5347     if self.op.ip_check and not self.op.name_check:
5348       # TODO: make the ip check more flexible and not depend on the name check
5349       raise errors.OpPrereqError("Cannot do ip check without a name check",
5350                                  errors.ECODE_INVAL)
5351
5352   def BuildHooksEnv(self):
5353     """Build hooks env.
5354
5355     This runs on master, primary and secondary nodes of the instance.
5356
5357     """
5358     env = _BuildInstanceHookEnvByObject(self, self.instance)
5359     env["INSTANCE_NEW_NAME"] = self.op.new_name
5360     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5361     return env, nl, nl
5362
5363   def CheckPrereq(self):
5364     """Check prerequisites.
5365
5366     This checks that the instance is in the cluster and is not running.
5367
5368     """
5369     self.op.instance_name = _ExpandInstanceName(self.cfg,
5370                                                 self.op.instance_name)
5371     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5372     assert instance is not None
5373     _CheckNodeOnline(self, instance.primary_node)
5374     _CheckInstanceDown(self, instance, "cannot rename")
5375     self.instance = instance
5376
5377     new_name = self.op.new_name
5378     if self.op.name_check:
5379       hostname = netutils.GetHostname(name=new_name)
5380       self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5381                    hostname.name)
5382       new_name = self.op.new_name = hostname.name
5383       if (self.op.ip_check and
5384           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5385         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5386                                    (hostname.ip, new_name),
5387                                    errors.ECODE_NOTUNIQUE)
5388
5389     instance_list = self.cfg.GetInstanceList()
5390     if new_name in instance_list and new_name != instance.name:
5391       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5392                                  new_name, errors.ECODE_EXISTS)
5393
5394   def Exec(self, feedback_fn):
5395     """Rename the instance.
5396
5397     """
5398     inst = self.instance
5399     old_name = inst.name
5400
5401     rename_file_storage = False
5402     if (inst.disk_template == constants.DT_FILE and
5403         self.op.new_name != inst.name):
5404       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5405       rename_file_storage = True
5406
5407     self.cfg.RenameInstance(inst.name, self.op.new_name)
5408     # Change the instance lock. This is definitely safe while we hold the BGL
5409     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5410     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5411
5412     # re-read the instance from the configuration after rename
5413     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5414
5415     if rename_file_storage:
5416       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5417       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5418                                                      old_file_storage_dir,
5419                                                      new_file_storage_dir)
5420       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5421                    " (but the instance has been renamed in Ganeti)" %
5422                    (inst.primary_node, old_file_storage_dir,
5423                     new_file_storage_dir))
5424
5425     _StartInstanceDisks(self, inst, None)
5426     try:
5427       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5428                                                  old_name, self.op.debug_level)
5429       msg = result.fail_msg
5430       if msg:
5431         msg = ("Could not run OS rename script for instance %s on node %s"
5432                " (but the instance has been renamed in Ganeti): %s" %
5433                (inst.name, inst.primary_node, msg))
5434         self.proc.LogWarning(msg)
5435     finally:
5436       _ShutdownInstanceDisks(self, inst)
5437
5438     return inst.name
5439
5440
5441 class LUInstanceRemove(LogicalUnit):
5442   """Remove an instance.
5443
5444   """
5445   HPATH = "instance-remove"
5446   HTYPE = constants.HTYPE_INSTANCE
5447   REQ_BGL = False
5448
5449   def ExpandNames(self):
5450     self._ExpandAndLockInstance()
5451     self.needed_locks[locking.LEVEL_NODE] = []
5452     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5453
5454   def DeclareLocks(self, level):
5455     if level == locking.LEVEL_NODE:
5456       self._LockInstancesNodes()
5457
5458   def BuildHooksEnv(self):
5459     """Build hooks env.
5460
5461     This runs on master, primary and secondary nodes of the instance.
5462
5463     """
5464     env = _BuildInstanceHookEnvByObject(self, self.instance)
5465     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5466     nl = [self.cfg.GetMasterNode()]
5467     nl_post = list(self.instance.all_nodes) + nl
5468     return env, nl, nl_post
5469
5470   def CheckPrereq(self):
5471     """Check prerequisites.
5472
5473     This checks that the instance is in the cluster.
5474
5475     """
5476     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5477     assert self.instance is not None, \
5478       "Cannot retrieve locked instance %s" % self.op.instance_name
5479
5480   def Exec(self, feedback_fn):
5481     """Remove the instance.
5482
5483     """
5484     instance = self.instance
5485     logging.info("Shutting down instance %s on node %s",
5486                  instance.name, instance.primary_node)
5487
5488     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5489                                              self.op.shutdown_timeout)
5490     msg = result.fail_msg
5491     if msg:
5492       if self.op.ignore_failures:
5493         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5494       else:
5495         raise errors.OpExecError("Could not shutdown instance %s on"
5496                                  " node %s: %s" %
5497                                  (instance.name, instance.primary_node, msg))
5498
5499     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5500
5501
5502 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5503   """Utility function to remove an instance.
5504
5505   """
5506   logging.info("Removing block devices for instance %s", instance.name)
5507
5508   if not _RemoveDisks(lu, instance):
5509     if not ignore_failures:
5510       raise errors.OpExecError("Can't remove instance's disks")
5511     feedback_fn("Warning: can't remove instance's disks")
5512
5513   logging.info("Removing instance %s out of cluster config", instance.name)
5514
5515   lu.cfg.RemoveInstance(instance.name)
5516
5517   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5518     "Instance lock removal conflict"
5519
5520   # Remove lock for the instance
5521   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5522
5523
5524 class LUInstanceQuery(NoHooksLU):
5525   """Logical unit for querying instances.
5526
5527   """
5528   # pylint: disable-msg=W0142
5529   REQ_BGL = False
5530
5531   def CheckArguments(self):
5532     self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5533                              self.op.use_locking)
5534
5535   def ExpandNames(self):
5536     self.iq.ExpandNames(self)
5537
5538   def DeclareLocks(self, level):
5539     self.iq.DeclareLocks(self, level)
5540
5541   def Exec(self, feedback_fn):
5542     return self.iq.OldStyleQuery(self)
5543
5544
5545 class LUInstanceFailover(LogicalUnit):
5546   """Failover an instance.
5547
5548   """
5549   HPATH = "instance-failover"
5550   HTYPE = constants.HTYPE_INSTANCE
5551   REQ_BGL = False
5552
5553   def ExpandNames(self):
5554     self._ExpandAndLockInstance()
5555     self.needed_locks[locking.LEVEL_NODE] = []
5556     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5557
5558   def DeclareLocks(self, level):
5559     if level == locking.LEVEL_NODE:
5560       self._LockInstancesNodes()
5561
5562   def BuildHooksEnv(self):
5563     """Build hooks env.
5564
5565     This runs on master, primary and secondary nodes of the instance.
5566
5567     """
5568     instance = self.instance
5569     source_node = instance.primary_node
5570     target_node = instance.secondary_nodes[0]
5571     env = {
5572       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5573       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5574       "OLD_PRIMARY": source_node,
5575       "OLD_SECONDARY": target_node,
5576       "NEW_PRIMARY": target_node,
5577       "NEW_SECONDARY": source_node,
5578       }
5579     env.update(_BuildInstanceHookEnvByObject(self, instance))
5580     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5581     nl_post = list(nl)
5582     nl_post.append(source_node)
5583     return env, nl, nl_post
5584
5585   def CheckPrereq(self):
5586     """Check prerequisites.
5587
5588     This checks that the instance is in the cluster.
5589
5590     """
5591     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5592     assert self.instance is not None, \
5593       "Cannot retrieve locked instance %s" % self.op.instance_name
5594
5595     bep = self.cfg.GetClusterInfo().FillBE(instance)
5596     if instance.disk_template not in constants.DTS_NET_MIRROR:
5597       raise errors.OpPrereqError("Instance's disk layout is not"
5598                                  " network mirrored, cannot failover.",
5599                                  errors.ECODE_STATE)
5600
5601     secondary_nodes = instance.secondary_nodes
5602     if not secondary_nodes:
5603       raise errors.ProgrammerError("no secondary node but using "
5604                                    "a mirrored disk template")
5605
5606     target_node = secondary_nodes[0]
5607     _CheckNodeOnline(self, target_node)
5608     _CheckNodeNotDrained(self, target_node)
5609     if instance.admin_up:
5610       # check memory requirements on the secondary node
5611       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5612                            instance.name, bep[constants.BE_MEMORY],
5613                            instance.hypervisor)
5614     else:
5615       self.LogInfo("Not checking memory on the secondary node as"
5616                    " instance will not be started")
5617
5618     # check bridge existance
5619     _CheckInstanceBridgesExist(self, instance, node=target_node)
5620
5621   def Exec(self, feedback_fn):
5622     """Failover an instance.
5623
5624     The failover is done by shutting it down on its present node and
5625     starting it on the secondary.
5626
5627     """
5628     instance = self.instance
5629     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5630
5631     source_node = instance.primary_node
5632     target_node = instance.secondary_nodes[0]
5633
5634     if instance.admin_up:
5635       feedback_fn("* checking disk consistency between source and target")
5636       for dev in instance.disks:
5637         # for drbd, these are drbd over lvm
5638         if not _CheckDiskConsistency(self, dev, target_node, False):
5639           if not self.op.ignore_consistency:
5640             raise errors.OpExecError("Disk %s is degraded on target node,"
5641                                      " aborting failover." % dev.iv_name)
5642     else:
5643       feedback_fn("* not checking disk consistency as instance is not running")
5644
5645     feedback_fn("* shutting down instance on source node")
5646     logging.info("Shutting down instance %s on node %s",
5647                  instance.name, source_node)
5648
5649     result = self.rpc.call_instance_shutdown(source_node, instance,
5650                                              self.op.shutdown_timeout)
5651     msg = result.fail_msg
5652     if msg:
5653       if self.op.ignore_consistency or primary_node.offline:
5654         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5655                              " Proceeding anyway. Please make sure node"
5656                              " %s is down. Error details: %s",
5657                              instance.name, source_node, source_node, msg)
5658       else:
5659         raise errors.OpExecError("Could not shutdown instance %s on"
5660                                  " node %s: %s" %
5661                                  (instance.name, source_node, msg))
5662
5663     feedback_fn("* deactivating the instance's disks on source node")
5664     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5665       raise errors.OpExecError("Can't shut down the instance's disks.")
5666
5667     instance.primary_node = target_node
5668     # distribute new instance config to the other nodes
5669     self.cfg.Update(instance, feedback_fn)
5670
5671     # Only start the instance if it's marked as up
5672     if instance.admin_up:
5673       feedback_fn("* activating the instance's disks on target node")
5674       logging.info("Starting instance %s on node %s",
5675                    instance.name, target_node)
5676
5677       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5678                                            ignore_secondaries=True)
5679       if not disks_ok:
5680         _ShutdownInstanceDisks(self, instance)
5681         raise errors.OpExecError("Can't activate the instance's disks")
5682
5683       feedback_fn("* starting the instance on the target node")
5684       result = self.rpc.call_instance_start(target_node, instance, None, None)
5685       msg = result.fail_msg
5686       if msg:
5687         _ShutdownInstanceDisks(self, instance)
5688         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5689                                  (instance.name, target_node, msg))
5690
5691
5692 class LUInstanceMigrate(LogicalUnit):
5693   """Migrate an instance.
5694
5695   This is migration without shutting down, compared to the failover,
5696   which is done with shutdown.
5697
5698   """
5699   HPATH = "instance-migrate"
5700   HTYPE = constants.HTYPE_INSTANCE
5701   REQ_BGL = False
5702
5703   def ExpandNames(self):
5704     self._ExpandAndLockInstance()
5705
5706     self.needed_locks[locking.LEVEL_NODE] = []
5707     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5708
5709     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5710                                        self.op.cleanup)
5711     self.tasklets = [self._migrater]
5712
5713   def DeclareLocks(self, level):
5714     if level == locking.LEVEL_NODE:
5715       self._LockInstancesNodes()
5716
5717   def BuildHooksEnv(self):
5718     """Build hooks env.
5719
5720     This runs on master, primary and secondary nodes of the instance.
5721
5722     """
5723     instance = self._migrater.instance
5724     source_node = instance.primary_node
5725     target_node = instance.secondary_nodes[0]
5726     env = _BuildInstanceHookEnvByObject(self, instance)
5727     env["MIGRATE_LIVE"] = self._migrater.live
5728     env["MIGRATE_CLEANUP"] = self.op.cleanup
5729     env.update({
5730         "OLD_PRIMARY": source_node,
5731         "OLD_SECONDARY": target_node,
5732         "NEW_PRIMARY": target_node,
5733         "NEW_SECONDARY": source_node,
5734         })
5735     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5736     nl_post = list(nl)
5737     nl_post.append(source_node)
5738     return env, nl, nl_post
5739
5740
5741 class LUInstanceMove(LogicalUnit):
5742   """Move an instance by data-copying.
5743
5744   """
5745   HPATH = "instance-move"
5746   HTYPE = constants.HTYPE_INSTANCE
5747   REQ_BGL = False
5748
5749   def ExpandNames(self):
5750     self._ExpandAndLockInstance()
5751     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5752     self.op.target_node = target_node
5753     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5754     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5755
5756   def DeclareLocks(self, level):
5757     if level == locking.LEVEL_NODE:
5758       self._LockInstancesNodes(primary_only=True)
5759
5760   def BuildHooksEnv(self):
5761     """Build hooks env.
5762
5763     This runs on master, primary and secondary nodes of the instance.
5764
5765     """
5766     env = {
5767       "TARGET_NODE": self.op.target_node,
5768       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5769       }
5770     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5771     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5772                                        self.op.target_node]
5773     return env, nl, nl
5774
5775   def CheckPrereq(self):
5776     """Check prerequisites.
5777
5778     This checks that the instance is in the cluster.
5779
5780     """
5781     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5782     assert self.instance is not None, \
5783       "Cannot retrieve locked instance %s" % self.op.instance_name
5784
5785     node = self.cfg.GetNodeInfo(self.op.target_node)
5786     assert node is not None, \
5787       "Cannot retrieve locked node %s" % self.op.target_node
5788
5789     self.target_node = target_node = node.name
5790
5791     if target_node == instance.primary_node:
5792       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5793                                  (instance.name, target_node),
5794                                  errors.ECODE_STATE)
5795
5796     bep = self.cfg.GetClusterInfo().FillBE(instance)
5797
5798     for idx, dsk in enumerate(instance.disks):
5799       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5800         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5801                                    " cannot copy" % idx, errors.ECODE_STATE)
5802
5803     _CheckNodeOnline(self, target_node)
5804     _CheckNodeNotDrained(self, target_node)
5805     _CheckNodeVmCapable(self, target_node)
5806
5807     if instance.admin_up:
5808       # check memory requirements on the secondary node
5809       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5810                            instance.name, bep[constants.BE_MEMORY],
5811                            instance.hypervisor)
5812     else:
5813       self.LogInfo("Not checking memory on the secondary node as"
5814                    " instance will not be started")
5815
5816     # check bridge existance
5817     _CheckInstanceBridgesExist(self, instance, node=target_node)
5818
5819   def Exec(self, feedback_fn):
5820     """Move an instance.
5821
5822     The move is done by shutting it down on its present node, copying
5823     the data over (slow) and starting it on the new node.
5824
5825     """
5826     instance = self.instance
5827
5828     source_node = instance.primary_node
5829     target_node = self.target_node
5830
5831     self.LogInfo("Shutting down instance %s on source node %s",
5832                  instance.name, source_node)
5833
5834     result = self.rpc.call_instance_shutdown(source_node, instance,
5835                                              self.op.shutdown_timeout)
5836     msg = result.fail_msg
5837     if msg:
5838       if self.op.ignore_consistency:
5839         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5840                              " Proceeding anyway. Please make sure node"
5841                              " %s is down. Error details: %s",
5842                              instance.name, source_node, source_node, msg)
5843       else:
5844         raise errors.OpExecError("Could not shutdown instance %s on"
5845                                  " node %s: %s" %
5846                                  (instance.name, source_node, msg))
5847
5848     # create the target disks
5849     try:
5850       _CreateDisks(self, instance, target_node=target_node)
5851     except errors.OpExecError:
5852       self.LogWarning("Device creation failed, reverting...")
5853       try:
5854         _RemoveDisks(self, instance, target_node=target_node)
5855       finally:
5856         self.cfg.ReleaseDRBDMinors(instance.name)
5857         raise
5858
5859     cluster_name = self.cfg.GetClusterInfo().cluster_name
5860
5861     errs = []
5862     # activate, get path, copy the data over
5863     for idx, disk in enumerate(instance.disks):
5864       self.LogInfo("Copying data for disk %d", idx)
5865       result = self.rpc.call_blockdev_assemble(target_node, disk,
5866                                                instance.name, True)
5867       if result.fail_msg:
5868         self.LogWarning("Can't assemble newly created disk %d: %s",
5869                         idx, result.fail_msg)
5870         errs.append(result.fail_msg)
5871         break
5872       dev_path = result.payload
5873       result = self.rpc.call_blockdev_export(source_node, disk,
5874                                              target_node, dev_path,
5875                                              cluster_name)
5876       if result.fail_msg:
5877         self.LogWarning("Can't copy data over for disk %d: %s",
5878                         idx, result.fail_msg)
5879         errs.append(result.fail_msg)
5880         break
5881
5882     if errs:
5883       self.LogWarning("Some disks failed to copy, aborting")
5884       try:
5885         _RemoveDisks(self, instance, target_node=target_node)
5886       finally:
5887         self.cfg.ReleaseDRBDMinors(instance.name)
5888         raise errors.OpExecError("Errors during disk copy: %s" %
5889                                  (",".join(errs),))
5890
5891     instance.primary_node = target_node
5892     self.cfg.Update(instance, feedback_fn)
5893
5894     self.LogInfo("Removing the disks on the original node")
5895     _RemoveDisks(self, instance, target_node=source_node)
5896
5897     # Only start the instance if it's marked as up
5898     if instance.admin_up:
5899       self.LogInfo("Starting instance %s on node %s",
5900                    instance.name, target_node)
5901
5902       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5903                                            ignore_secondaries=True)
5904       if not disks_ok:
5905         _ShutdownInstanceDisks(self, instance)
5906         raise errors.OpExecError("Can't activate the instance's disks")
5907
5908       result = self.rpc.call_instance_start(target_node, instance, None, None)
5909       msg = result.fail_msg
5910       if msg:
5911         _ShutdownInstanceDisks(self, instance)
5912         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5913                                  (instance.name, target_node, msg))
5914
5915
5916 class LUNodeMigrate(LogicalUnit):
5917   """Migrate all instances from a node.
5918
5919   """
5920   HPATH = "node-migrate"
5921   HTYPE = constants.HTYPE_NODE
5922   REQ_BGL = False
5923
5924   def ExpandNames(self):
5925     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5926
5927     self.needed_locks = {
5928       locking.LEVEL_NODE: [self.op.node_name],
5929       }
5930
5931     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5932
5933     # Create tasklets for migrating instances for all instances on this node
5934     names = []
5935     tasklets = []
5936
5937     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5938       logging.debug("Migrating instance %s", inst.name)
5939       names.append(inst.name)
5940
5941       tasklets.append(TLMigrateInstance(self, inst.name, False))
5942
5943     self.tasklets = tasklets
5944
5945     # Declare instance locks
5946     self.needed_locks[locking.LEVEL_INSTANCE] = names
5947
5948   def DeclareLocks(self, level):
5949     if level == locking.LEVEL_NODE:
5950       self._LockInstancesNodes()
5951
5952   def BuildHooksEnv(self):
5953     """Build hooks env.
5954
5955     This runs on the master, the primary and all the secondaries.
5956
5957     """
5958     env = {
5959       "NODE_NAME": self.op.node_name,
5960       }
5961
5962     nl = [self.cfg.GetMasterNode()]
5963
5964     return (env, nl, nl)
5965
5966
5967 class TLMigrateInstance(Tasklet):
5968   """Tasklet class for instance migration.
5969
5970   @type live: boolean
5971   @ivar live: whether the migration will be done live or non-live;
5972       this variable is initalized only after CheckPrereq has run
5973
5974   """
5975   def __init__(self, lu, instance_name, cleanup):
5976     """Initializes this class.
5977
5978     """
5979     Tasklet.__init__(self, lu)
5980
5981     # Parameters
5982     self.instance_name = instance_name
5983     self.cleanup = cleanup
5984     self.live = False # will be overridden later
5985
5986   def CheckPrereq(self):
5987     """Check prerequisites.
5988
5989     This checks that the instance is in the cluster.
5990
5991     """
5992     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5993     instance = self.cfg.GetInstanceInfo(instance_name)
5994     assert instance is not None
5995
5996     if instance.disk_template != constants.DT_DRBD8:
5997       raise errors.OpPrereqError("Instance's disk layout is not"
5998                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5999
6000     secondary_nodes = instance.secondary_nodes
6001     if not secondary_nodes:
6002       raise errors.ConfigurationError("No secondary node but using"
6003                                       " drbd8 disk template")
6004
6005     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6006
6007     target_node = secondary_nodes[0]
6008     # check memory requirements on the secondary node
6009     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6010                          instance.name, i_be[constants.BE_MEMORY],
6011                          instance.hypervisor)
6012
6013     # check bridge existance
6014     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6015
6016     if not self.cleanup:
6017       _CheckNodeNotDrained(self.lu, target_node)
6018       result = self.rpc.call_instance_migratable(instance.primary_node,
6019                                                  instance)
6020       result.Raise("Can't migrate, please use failover",
6021                    prereq=True, ecode=errors.ECODE_STATE)
6022
6023     self.instance = instance
6024
6025     if self.lu.op.live is not None and self.lu.op.mode is not None:
6026       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6027                                  " parameters are accepted",
6028                                  errors.ECODE_INVAL)
6029     if self.lu.op.live is not None:
6030       if self.lu.op.live:
6031         self.lu.op.mode = constants.HT_MIGRATION_LIVE
6032       else:
6033         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6034       # reset the 'live' parameter to None so that repeated
6035       # invocations of CheckPrereq do not raise an exception
6036       self.lu.op.live = None
6037     elif self.lu.op.mode is None:
6038       # read the default value from the hypervisor
6039       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6040       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6041
6042     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6043
6044   def _WaitUntilSync(self):
6045     """Poll with custom rpc for disk sync.
6046
6047     This uses our own step-based rpc call.
6048
6049     """
6050     self.feedback_fn("* wait until resync is done")
6051     all_done = False
6052     while not all_done:
6053       all_done = True
6054       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6055                                             self.nodes_ip,
6056                                             self.instance.disks)
6057       min_percent = 100
6058       for node, nres in result.items():
6059         nres.Raise("Cannot resync disks on node %s" % node)
6060         node_done, node_percent = nres.payload
6061         all_done = all_done and node_done
6062         if node_percent is not None:
6063           min_percent = min(min_percent, node_percent)
6064       if not all_done:
6065         if min_percent < 100:
6066           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6067         time.sleep(2)
6068
6069   def _EnsureSecondary(self, node):
6070     """Demote a node to secondary.
6071
6072     """
6073     self.feedback_fn("* switching node %s to secondary mode" % node)
6074
6075     for dev in self.instance.disks:
6076       self.cfg.SetDiskID(dev, node)
6077
6078     result = self.rpc.call_blockdev_close(node, self.instance.name,
6079                                           self.instance.disks)
6080     result.Raise("Cannot change disk to secondary on node %s" % node)
6081
6082   def _GoStandalone(self):
6083     """Disconnect from the network.
6084
6085     """
6086     self.feedback_fn("* changing into standalone mode")
6087     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6088                                                self.instance.disks)
6089     for node, nres in result.items():
6090       nres.Raise("Cannot disconnect disks node %s" % node)
6091
6092   def _GoReconnect(self, multimaster):
6093     """Reconnect to the network.
6094
6095     """
6096     if multimaster:
6097       msg = "dual-master"
6098     else:
6099       msg = "single-master"
6100     self.feedback_fn("* changing disks into %s mode" % msg)
6101     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6102                                            self.instance.disks,
6103                                            self.instance.name, multimaster)
6104     for node, nres in result.items():
6105       nres.Raise("Cannot change disks config on node %s" % node)
6106
6107   def _ExecCleanup(self):
6108     """Try to cleanup after a failed migration.
6109
6110     The cleanup is done by:
6111       - check that the instance is running only on one node
6112         (and update the config if needed)
6113       - change disks on its secondary node to secondary
6114       - wait until disks are fully synchronized
6115       - disconnect from the network
6116       - change disks into single-master mode
6117       - wait again until disks are fully synchronized
6118
6119     """
6120     instance = self.instance
6121     target_node = self.target_node
6122     source_node = self.source_node
6123
6124     # check running on only one node
6125     self.feedback_fn("* checking where the instance actually runs"
6126                      " (if this hangs, the hypervisor might be in"
6127                      " a bad state)")
6128     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6129     for node, result in ins_l.items():
6130       result.Raise("Can't contact node %s" % node)
6131
6132     runningon_source = instance.name in ins_l[source_node].payload
6133     runningon_target = instance.name in ins_l[target_node].payload
6134
6135     if runningon_source and runningon_target:
6136       raise errors.OpExecError("Instance seems to be running on two nodes,"
6137                                " or the hypervisor is confused. You will have"
6138                                " to ensure manually that it runs only on one"
6139                                " and restart this operation.")
6140
6141     if not (runningon_source or runningon_target):
6142       raise errors.OpExecError("Instance does not seem to be running at all."
6143                                " In this case, it's safer to repair by"
6144                                " running 'gnt-instance stop' to ensure disk"
6145                                " shutdown, and then restarting it.")
6146
6147     if runningon_target:
6148       # the migration has actually succeeded, we need to update the config
6149       self.feedback_fn("* instance running on secondary node (%s),"
6150                        " updating config" % target_node)
6151       instance.primary_node = target_node
6152       self.cfg.Update(instance, self.feedback_fn)
6153       demoted_node = source_node
6154     else:
6155       self.feedback_fn("* instance confirmed to be running on its"
6156                        " primary node (%s)" % source_node)
6157       demoted_node = target_node
6158
6159     self._EnsureSecondary(demoted_node)
6160     try:
6161       self._WaitUntilSync()
6162     except errors.OpExecError:
6163       # we ignore here errors, since if the device is standalone, it
6164       # won't be able to sync
6165       pass
6166     self._GoStandalone()
6167     self._GoReconnect(False)
6168     self._WaitUntilSync()
6169
6170     self.feedback_fn("* done")
6171
6172   def _RevertDiskStatus(self):
6173     """Try to revert the disk status after a failed migration.
6174
6175     """
6176     target_node = self.target_node
6177     try:
6178       self._EnsureSecondary(target_node)
6179       self._GoStandalone()
6180       self._GoReconnect(False)
6181       self._WaitUntilSync()
6182     except errors.OpExecError, err:
6183       self.lu.LogWarning("Migration failed and I can't reconnect the"
6184                          " drives: error '%s'\n"
6185                          "Please look and recover the instance status" %
6186                          str(err))
6187
6188   def _AbortMigration(self):
6189     """Call the hypervisor code to abort a started migration.
6190
6191     """
6192     instance = self.instance
6193     target_node = self.target_node
6194     migration_info = self.migration_info
6195
6196     abort_result = self.rpc.call_finalize_migration(target_node,
6197                                                     instance,
6198                                                     migration_info,
6199                                                     False)
6200     abort_msg = abort_result.fail_msg
6201     if abort_msg:
6202       logging.error("Aborting migration failed on target node %s: %s",
6203                     target_node, abort_msg)
6204       # Don't raise an exception here, as we stil have to try to revert the
6205       # disk status, even if this step failed.
6206
6207   def _ExecMigration(self):
6208     """Migrate an instance.
6209
6210     The migrate is done by:
6211       - change the disks into dual-master mode
6212       - wait until disks are fully synchronized again
6213       - migrate the instance
6214       - change disks on the new secondary node (the old primary) to secondary
6215       - wait until disks are fully synchronized
6216       - change disks into single-master mode
6217
6218     """
6219     instance = self.instance
6220     target_node = self.target_node
6221     source_node = self.source_node
6222
6223     self.feedback_fn("* checking disk consistency between source and target")
6224     for dev in instance.disks:
6225       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6226         raise errors.OpExecError("Disk %s is degraded or not fully"
6227                                  " synchronized on target node,"
6228                                  " aborting migrate." % dev.iv_name)
6229
6230     # First get the migration information from the remote node
6231     result = self.rpc.call_migration_info(source_node, instance)
6232     msg = result.fail_msg
6233     if msg:
6234       log_err = ("Failed fetching source migration information from %s: %s" %
6235                  (source_node, msg))
6236       logging.error(log_err)
6237       raise errors.OpExecError(log_err)
6238
6239     self.migration_info = migration_info = result.payload
6240
6241     # Then switch the disks to master/master mode
6242     self._EnsureSecondary(target_node)
6243     self._GoStandalone()
6244     self._GoReconnect(True)
6245     self._WaitUntilSync()
6246
6247     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6248     result = self.rpc.call_accept_instance(target_node,
6249                                            instance,
6250                                            migration_info,
6251                                            self.nodes_ip[target_node])
6252
6253     msg = result.fail_msg
6254     if msg:
6255       logging.error("Instance pre-migration failed, trying to revert"
6256                     " disk status: %s", msg)
6257       self.feedback_fn("Pre-migration failed, aborting")
6258       self._AbortMigration()
6259       self._RevertDiskStatus()
6260       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6261                                (instance.name, msg))
6262
6263     self.feedback_fn("* migrating instance to %s" % target_node)
6264     time.sleep(10)
6265     result = self.rpc.call_instance_migrate(source_node, instance,
6266                                             self.nodes_ip[target_node],
6267                                             self.live)
6268     msg = result.fail_msg
6269     if msg:
6270       logging.error("Instance migration failed, trying to revert"
6271                     " disk status: %s", msg)
6272       self.feedback_fn("Migration failed, aborting")
6273       self._AbortMigration()
6274       self._RevertDiskStatus()
6275       raise errors.OpExecError("Could not migrate instance %s: %s" %
6276                                (instance.name, msg))
6277     time.sleep(10)
6278
6279     instance.primary_node = target_node
6280     # distribute new instance config to the other nodes
6281     self.cfg.Update(instance, self.feedback_fn)
6282
6283     result = self.rpc.call_finalize_migration(target_node,
6284                                               instance,
6285                                               migration_info,
6286                                               True)
6287     msg = result.fail_msg
6288     if msg:
6289       logging.error("Instance migration succeeded, but finalization failed:"
6290                     " %s", msg)
6291       raise errors.OpExecError("Could not finalize instance migration: %s" %
6292                                msg)
6293
6294     self._EnsureSecondary(source_node)
6295     self._WaitUntilSync()
6296     self._GoStandalone()
6297     self._GoReconnect(False)
6298     self._WaitUntilSync()
6299
6300     self.feedback_fn("* done")
6301
6302   def Exec(self, feedback_fn):
6303     """Perform the migration.
6304
6305     """
6306     feedback_fn("Migrating instance %s" % self.instance.name)
6307
6308     self.feedback_fn = feedback_fn
6309
6310     self.source_node = self.instance.primary_node
6311     self.target_node = self.instance.secondary_nodes[0]
6312     self.all_nodes = [self.source_node, self.target_node]
6313     self.nodes_ip = {
6314       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6315       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6316       }
6317
6318     if self.cleanup:
6319       return self._ExecCleanup()
6320     else:
6321       return self._ExecMigration()
6322
6323
6324 def _CreateBlockDev(lu, node, instance, device, force_create,
6325                     info, force_open):
6326   """Create a tree of block devices on a given node.
6327
6328   If this device type has to be created on secondaries, create it and
6329   all its children.
6330
6331   If not, just recurse to children keeping the same 'force' value.
6332
6333   @param lu: the lu on whose behalf we execute
6334   @param node: the node on which to create the device
6335   @type instance: L{objects.Instance}
6336   @param instance: the instance which owns the device
6337   @type device: L{objects.Disk}
6338   @param device: the device to create
6339   @type force_create: boolean
6340   @param force_create: whether to force creation of this device; this
6341       will be change to True whenever we find a device which has
6342       CreateOnSecondary() attribute
6343   @param info: the extra 'metadata' we should attach to the device
6344       (this will be represented as a LVM tag)
6345   @type force_open: boolean
6346   @param force_open: this parameter will be passes to the
6347       L{backend.BlockdevCreate} function where it specifies
6348       whether we run on primary or not, and it affects both
6349       the child assembly and the device own Open() execution
6350
6351   """
6352   if device.CreateOnSecondary():
6353     force_create = True
6354
6355   if device.children:
6356     for child in device.children:
6357       _CreateBlockDev(lu, node, instance, child, force_create,
6358                       info, force_open)
6359
6360   if not force_create:
6361     return
6362
6363   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6364
6365
6366 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6367   """Create a single block device on a given node.
6368
6369   This will not recurse over children of the device, so they must be
6370   created in advance.
6371
6372   @param lu: the lu on whose behalf we execute
6373   @param node: the node on which to create the device
6374   @type instance: L{objects.Instance}
6375   @param instance: the instance which owns the device
6376   @type device: L{objects.Disk}
6377   @param device: the device to create
6378   @param info: the extra 'metadata' we should attach to the device
6379       (this will be represented as a LVM tag)
6380   @type force_open: boolean
6381   @param force_open: this parameter will be passes to the
6382       L{backend.BlockdevCreate} function where it specifies
6383       whether we run on primary or not, and it affects both
6384       the child assembly and the device own Open() execution
6385
6386   """
6387   lu.cfg.SetDiskID(device, node)
6388   result = lu.rpc.call_blockdev_create(node, device, device.size,
6389                                        instance.name, force_open, info)
6390   result.Raise("Can't create block device %s on"
6391                " node %s for instance %s" % (device, node, instance.name))
6392   if device.physical_id is None:
6393     device.physical_id = result.payload
6394
6395
6396 def _GenerateUniqueNames(lu, exts):
6397   """Generate a suitable LV name.
6398
6399   This will generate a logical volume name for the given instance.
6400
6401   """
6402   results = []
6403   for val in exts:
6404     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6405     results.append("%s%s" % (new_id, val))
6406   return results
6407
6408
6409 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6410                          p_minor, s_minor):
6411   """Generate a drbd8 device complete with its children.
6412
6413   """
6414   port = lu.cfg.AllocatePort()
6415   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6416   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6417                           logical_id=(vgname, names[0]))
6418   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6419                           logical_id=(vgname, names[1]))
6420   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6421                           logical_id=(primary, secondary, port,
6422                                       p_minor, s_minor,
6423                                       shared_secret),
6424                           children=[dev_data, dev_meta],
6425                           iv_name=iv_name)
6426   return drbd_dev
6427
6428
6429 def _GenerateDiskTemplate(lu, template_name,
6430                           instance_name, primary_node,
6431                           secondary_nodes, disk_info,
6432                           file_storage_dir, file_driver,
6433                           base_index, feedback_fn):
6434   """Generate the entire disk layout for a given template type.
6435
6436   """
6437   #TODO: compute space requirements
6438
6439   vgname = lu.cfg.GetVGName()
6440   disk_count = len(disk_info)
6441   disks = []
6442   if template_name == constants.DT_DISKLESS:
6443     pass
6444   elif template_name == constants.DT_PLAIN:
6445     if len(secondary_nodes) != 0:
6446       raise errors.ProgrammerError("Wrong template configuration")
6447
6448     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6449                                       for i in range(disk_count)])
6450     for idx, disk in enumerate(disk_info):
6451       disk_index = idx + base_index
6452       vg = disk.get("vg", vgname)
6453       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6454       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6455                               logical_id=(vg, names[idx]),
6456                               iv_name="disk/%d" % disk_index,
6457                               mode=disk["mode"])
6458       disks.append(disk_dev)
6459   elif template_name == constants.DT_DRBD8:
6460     if len(secondary_nodes) != 1:
6461       raise errors.ProgrammerError("Wrong template configuration")
6462     remote_node = secondary_nodes[0]
6463     minors = lu.cfg.AllocateDRBDMinor(
6464       [primary_node, remote_node] * len(disk_info), instance_name)
6465
6466     names = []
6467     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6468                                                for i in range(disk_count)]):
6469       names.append(lv_prefix + "_data")
6470       names.append(lv_prefix + "_meta")
6471     for idx, disk in enumerate(disk_info):
6472       disk_index = idx + base_index
6473       vg = disk.get("vg", vgname)
6474       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6475                                       disk["size"], vg, names[idx*2:idx*2+2],
6476                                       "disk/%d" % disk_index,
6477                                       minors[idx*2], minors[idx*2+1])
6478       disk_dev.mode = disk["mode"]
6479       disks.append(disk_dev)
6480   elif template_name == constants.DT_FILE:
6481     if len(secondary_nodes) != 0:
6482       raise errors.ProgrammerError("Wrong template configuration")
6483
6484     opcodes.RequireFileStorage()
6485
6486     for idx, disk in enumerate(disk_info):
6487       disk_index = idx + base_index
6488       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6489                               iv_name="disk/%d" % disk_index,
6490                               logical_id=(file_driver,
6491                                           "%s/disk%d" % (file_storage_dir,
6492                                                          disk_index)),
6493                               mode=disk["mode"])
6494       disks.append(disk_dev)
6495   else:
6496     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6497   return disks
6498
6499
6500 def _GetInstanceInfoText(instance):
6501   """Compute that text that should be added to the disk's metadata.
6502
6503   """
6504   return "originstname+%s" % instance.name
6505
6506
6507 def _CalcEta(time_taken, written, total_size):
6508   """Calculates the ETA based on size written and total size.
6509
6510   @param time_taken: The time taken so far
6511   @param written: amount written so far
6512   @param total_size: The total size of data to be written
6513   @return: The remaining time in seconds
6514
6515   """
6516   avg_time = time_taken / float(written)
6517   return (total_size - written) * avg_time
6518
6519
6520 def _WipeDisks(lu, instance):
6521   """Wipes instance disks.
6522
6523   @type lu: L{LogicalUnit}
6524   @param lu: the logical unit on whose behalf we execute
6525   @type instance: L{objects.Instance}
6526   @param instance: the instance whose disks we should create
6527   @return: the success of the wipe
6528
6529   """
6530   node = instance.primary_node
6531   logging.info("Pause sync of instance %s disks", instance.name)
6532   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6533
6534   for idx, success in enumerate(result.payload):
6535     if not success:
6536       logging.warn("pause-sync of instance %s for disks %d failed",
6537                    instance.name, idx)
6538
6539   try:
6540     for idx, device in enumerate(instance.disks):
6541       lu.LogInfo("* Wiping disk %d", idx)
6542       logging.info("Wiping disk %d for instance %s", idx, instance.name)
6543
6544       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6545       # MAX_WIPE_CHUNK at max
6546       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6547                             constants.MIN_WIPE_CHUNK_PERCENT)
6548
6549       offset = 0
6550       size = device.size
6551       last_output = 0
6552       start_time = time.time()
6553
6554       while offset < size:
6555         wipe_size = min(wipe_chunk_size, size - offset)
6556         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6557         result.Raise("Could not wipe disk %d at offset %d for size %d" %
6558                      (idx, offset, wipe_size))
6559         now = time.time()
6560         offset += wipe_size
6561         if now - last_output >= 60:
6562           eta = _CalcEta(now - start_time, offset, size)
6563           lu.LogInfo(" - done: %.1f%% ETA: %s" %
6564                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
6565           last_output = now
6566   finally:
6567     logging.info("Resume sync of instance %s disks", instance.name)
6568
6569     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6570
6571     for idx, success in enumerate(result.payload):
6572       if not success:
6573         lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6574                       " look at the status and troubleshoot the issue.", idx)
6575         logging.warn("resume-sync of instance %s for disks %d failed",
6576                      instance.name, idx)
6577
6578
6579 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6580   """Create all disks for an instance.
6581
6582   This abstracts away some work from AddInstance.
6583
6584   @type lu: L{LogicalUnit}
6585   @param lu: the logical unit on whose behalf we execute
6586   @type instance: L{objects.Instance}
6587   @param instance: the instance whose disks we should create
6588   @type to_skip: list
6589   @param to_skip: list of indices to skip
6590   @type target_node: string
6591   @param target_node: if passed, overrides the target node for creation
6592   @rtype: boolean
6593   @return: the success of the creation
6594
6595   """
6596   info = _GetInstanceInfoText(instance)
6597   if target_node is None:
6598     pnode = instance.primary_node
6599     all_nodes = instance.all_nodes
6600   else:
6601     pnode = target_node
6602     all_nodes = [pnode]
6603
6604   if instance.disk_template == constants.DT_FILE:
6605     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6606     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6607
6608     result.Raise("Failed to create directory '%s' on"
6609                  " node %s" % (file_storage_dir, pnode))
6610
6611   # Note: this needs to be kept in sync with adding of disks in
6612   # LUInstanceSetParams
6613   for idx, device in enumerate(instance.disks):
6614     if to_skip and idx in to_skip:
6615       continue
6616     logging.info("Creating volume %s for instance %s",
6617                  device.iv_name, instance.name)
6618     #HARDCODE
6619     for node in all_nodes:
6620       f_create = node == pnode
6621       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6622
6623
6624 def _RemoveDisks(lu, instance, target_node=None):
6625   """Remove all disks for an instance.
6626
6627   This abstracts away some work from `AddInstance()` and
6628   `RemoveInstance()`. Note that in case some of the devices couldn't
6629   be removed, the removal will continue with the other ones (compare
6630   with `_CreateDisks()`).
6631
6632   @type lu: L{LogicalUnit}
6633   @param lu: the logical unit on whose behalf we execute
6634   @type instance: L{objects.Instance}
6635   @param instance: the instance whose disks we should remove
6636   @type target_node: string
6637   @param target_node: used to override the node on which to remove the disks
6638   @rtype: boolean
6639   @return: the success of the removal
6640
6641   """
6642   logging.info("Removing block devices for instance %s", instance.name)
6643
6644   all_result = True
6645   for device in instance.disks:
6646     if target_node:
6647       edata = [(target_node, device)]
6648     else:
6649       edata = device.ComputeNodeTree(instance.primary_node)
6650     for node, disk in edata:
6651       lu.cfg.SetDiskID(disk, node)
6652       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6653       if msg:
6654         lu.LogWarning("Could not remove block device %s on node %s,"
6655                       " continuing anyway: %s", device.iv_name, node, msg)
6656         all_result = False
6657
6658   if instance.disk_template == constants.DT_FILE:
6659     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6660     if target_node:
6661       tgt = target_node
6662     else:
6663       tgt = instance.primary_node
6664     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6665     if result.fail_msg:
6666       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6667                     file_storage_dir, instance.primary_node, result.fail_msg)
6668       all_result = False
6669
6670   return all_result
6671
6672
6673 def _ComputeDiskSizePerVG(disk_template, disks):
6674   """Compute disk size requirements in the volume group
6675
6676   """
6677   def _compute(disks, payload):
6678     """Universal algorithm
6679
6680     """
6681     vgs = {}
6682     for disk in disks:
6683       vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6684
6685     return vgs
6686
6687   # Required free disk space as a function of disk and swap space
6688   req_size_dict = {
6689     constants.DT_DISKLESS: {},
6690     constants.DT_PLAIN: _compute(disks, 0),
6691     # 128 MB are added for drbd metadata for each disk
6692     constants.DT_DRBD8: _compute(disks, 128),
6693     constants.DT_FILE: {},
6694   }
6695
6696   if disk_template not in req_size_dict:
6697     raise errors.ProgrammerError("Disk template '%s' size requirement"
6698                                  " is unknown" %  disk_template)
6699
6700   return req_size_dict[disk_template]
6701
6702
6703 def _ComputeDiskSize(disk_template, disks):
6704   """Compute disk size requirements in the volume group
6705
6706   """
6707   # Required free disk space as a function of disk and swap space
6708   req_size_dict = {
6709     constants.DT_DISKLESS: None,
6710     constants.DT_PLAIN: sum(d["size"] for d in disks),
6711     # 128 MB are added for drbd metadata for each disk
6712     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6713     constants.DT_FILE: None,
6714   }
6715
6716   if disk_template not in req_size_dict:
6717     raise errors.ProgrammerError("Disk template '%s' size requirement"
6718                                  " is unknown" %  disk_template)
6719
6720   return req_size_dict[disk_template]
6721
6722
6723 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6724   """Hypervisor parameter validation.
6725
6726   This function abstract the hypervisor parameter validation to be
6727   used in both instance create and instance modify.
6728
6729   @type lu: L{LogicalUnit}
6730   @param lu: the logical unit for which we check
6731   @type nodenames: list
6732   @param nodenames: the list of nodes on which we should check
6733   @type hvname: string
6734   @param hvname: the name of the hypervisor we should use
6735   @type hvparams: dict
6736   @param hvparams: the parameters which we need to check
6737   @raise errors.OpPrereqError: if the parameters are not valid
6738
6739   """
6740   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6741                                                   hvname,
6742                                                   hvparams)
6743   for node in nodenames:
6744     info = hvinfo[node]
6745     if info.offline:
6746       continue
6747     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6748
6749
6750 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6751   """OS parameters validation.
6752
6753   @type lu: L{LogicalUnit}
6754   @param lu: the logical unit for which we check
6755   @type required: boolean
6756   @param required: whether the validation should fail if the OS is not
6757       found
6758   @type nodenames: list
6759   @param nodenames: the list of nodes on which we should check
6760   @type osname: string
6761   @param osname: the name of the hypervisor we should use
6762   @type osparams: dict
6763   @param osparams: the parameters which we need to check
6764   @raise errors.OpPrereqError: if the parameters are not valid
6765
6766   """
6767   result = lu.rpc.call_os_validate(required, nodenames, osname,
6768                                    [constants.OS_VALIDATE_PARAMETERS],
6769                                    osparams)
6770   for node, nres in result.items():
6771     # we don't check for offline cases since this should be run only
6772     # against the master node and/or an instance's nodes
6773     nres.Raise("OS Parameters validation failed on node %s" % node)
6774     if not nres.payload:
6775       lu.LogInfo("OS %s not found on node %s, validation skipped",
6776                  osname, node)
6777
6778
6779 class LUInstanceCreate(LogicalUnit):
6780   """Create an instance.
6781
6782   """
6783   HPATH = "instance-add"
6784   HTYPE = constants.HTYPE_INSTANCE
6785   REQ_BGL = False
6786
6787   def CheckArguments(self):
6788     """Check arguments.
6789
6790     """
6791     # do not require name_check to ease forward/backward compatibility
6792     # for tools
6793     if self.op.no_install and self.op.start:
6794       self.LogInfo("No-installation mode selected, disabling startup")
6795       self.op.start = False
6796     # validate/normalize the instance name
6797     self.op.instance_name = \
6798       netutils.Hostname.GetNormalizedName(self.op.instance_name)
6799
6800     if self.op.ip_check and not self.op.name_check:
6801       # TODO: make the ip check more flexible and not depend on the name check
6802       raise errors.OpPrereqError("Cannot do ip check without a name check",
6803                                  errors.ECODE_INVAL)
6804
6805     # check nics' parameter names
6806     for nic in self.op.nics:
6807       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6808
6809     # check disks. parameter names and consistent adopt/no-adopt strategy
6810     has_adopt = has_no_adopt = False
6811     for disk in self.op.disks:
6812       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6813       if "adopt" in disk:
6814         has_adopt = True
6815       else:
6816         has_no_adopt = True
6817     if has_adopt and has_no_adopt:
6818       raise errors.OpPrereqError("Either all disks are adopted or none is",
6819                                  errors.ECODE_INVAL)
6820     if has_adopt:
6821       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6822         raise errors.OpPrereqError("Disk adoption is not supported for the"
6823                                    " '%s' disk template" %
6824                                    self.op.disk_template,
6825                                    errors.ECODE_INVAL)
6826       if self.op.iallocator is not None:
6827         raise errors.OpPrereqError("Disk adoption not allowed with an"
6828                                    " iallocator script", errors.ECODE_INVAL)
6829       if self.op.mode == constants.INSTANCE_IMPORT:
6830         raise errors.OpPrereqError("Disk adoption not allowed for"
6831                                    " instance import", errors.ECODE_INVAL)
6832
6833     self.adopt_disks = has_adopt
6834
6835     # instance name verification
6836     if self.op.name_check:
6837       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6838       self.op.instance_name = self.hostname1.name
6839       # used in CheckPrereq for ip ping check
6840       self.check_ip = self.hostname1.ip
6841     else:
6842       self.check_ip = None
6843
6844     # file storage checks
6845     if (self.op.file_driver and
6846         not self.op.file_driver in constants.FILE_DRIVER):
6847       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6848                                  self.op.file_driver, errors.ECODE_INVAL)
6849
6850     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6851       raise errors.OpPrereqError("File storage directory path not absolute",
6852                                  errors.ECODE_INVAL)
6853
6854     ### Node/iallocator related checks
6855     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6856
6857     if self.op.pnode is not None:
6858       if self.op.disk_template in constants.DTS_NET_MIRROR:
6859         if self.op.snode is None:
6860           raise errors.OpPrereqError("The networked disk templates need"
6861                                      " a mirror node", errors.ECODE_INVAL)
6862       elif self.op.snode:
6863         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6864                         " template")
6865         self.op.snode = None
6866
6867     self._cds = _GetClusterDomainSecret()
6868
6869     if self.op.mode == constants.INSTANCE_IMPORT:
6870       # On import force_variant must be True, because if we forced it at
6871       # initial install, our only chance when importing it back is that it
6872       # works again!
6873       self.op.force_variant = True
6874
6875       if self.op.no_install:
6876         self.LogInfo("No-installation mode has no effect during import")
6877
6878     elif self.op.mode == constants.INSTANCE_CREATE:
6879       if self.op.os_type is None:
6880         raise errors.OpPrereqError("No guest OS specified",
6881                                    errors.ECODE_INVAL)
6882       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6883         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6884                                    " installation" % self.op.os_type,
6885                                    errors.ECODE_STATE)
6886       if self.op.disk_template is None:
6887         raise errors.OpPrereqError("No disk template specified",
6888                                    errors.ECODE_INVAL)
6889
6890     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6891       # Check handshake to ensure both clusters have the same domain secret
6892       src_handshake = self.op.source_handshake
6893       if not src_handshake:
6894         raise errors.OpPrereqError("Missing source handshake",
6895                                    errors.ECODE_INVAL)
6896
6897       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6898                                                            src_handshake)
6899       if errmsg:
6900         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6901                                    errors.ECODE_INVAL)
6902
6903       # Load and check source CA
6904       self.source_x509_ca_pem = self.op.source_x509_ca
6905       if not self.source_x509_ca_pem:
6906         raise errors.OpPrereqError("Missing source X509 CA",
6907                                    errors.ECODE_INVAL)
6908
6909       try:
6910         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6911                                                     self._cds)
6912       except OpenSSL.crypto.Error, err:
6913         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6914                                    (err, ), errors.ECODE_INVAL)
6915
6916       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6917       if errcode is not None:
6918         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6919                                    errors.ECODE_INVAL)
6920
6921       self.source_x509_ca = cert
6922
6923       src_instance_name = self.op.source_instance_name
6924       if not src_instance_name:
6925         raise errors.OpPrereqError("Missing source instance name",
6926                                    errors.ECODE_INVAL)
6927
6928       self.source_instance_name = \
6929           netutils.GetHostname(name=src_instance_name).name
6930
6931     else:
6932       raise errors.OpPrereqError("Invalid instance creation mode %r" %
6933                                  self.op.mode, errors.ECODE_INVAL)
6934
6935   def ExpandNames(self):
6936     """ExpandNames for CreateInstance.
6937
6938     Figure out the right locks for instance creation.
6939
6940     """
6941     self.needed_locks = {}
6942
6943     instance_name = self.op.instance_name
6944     # this is just a preventive check, but someone might still add this
6945     # instance in the meantime, and creation will fail at lock-add time
6946     if instance_name in self.cfg.GetInstanceList():
6947       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6948                                  instance_name, errors.ECODE_EXISTS)
6949
6950     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6951
6952     if self.op.iallocator:
6953       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6954     else:
6955       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6956       nodelist = [self.op.pnode]
6957       if self.op.snode is not None:
6958         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6959         nodelist.append(self.op.snode)
6960       self.needed_locks[locking.LEVEL_NODE] = nodelist
6961
6962     # in case of import lock the source node too
6963     if self.op.mode == constants.INSTANCE_IMPORT:
6964       src_node = self.op.src_node
6965       src_path = self.op.src_path
6966
6967       if src_path is None:
6968         self.op.src_path = src_path = self.op.instance_name
6969
6970       if src_node is None:
6971         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6972         self.op.src_node = None
6973         if os.path.isabs(src_path):
6974           raise errors.OpPrereqError("Importing an instance from an absolute"
6975                                      " path requires a source node option.",
6976                                      errors.ECODE_INVAL)
6977       else:
6978         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6979         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6980           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6981         if not os.path.isabs(src_path):
6982           self.op.src_path = src_path = \
6983             utils.PathJoin(constants.EXPORT_DIR, src_path)
6984
6985   def _RunAllocator(self):
6986     """Run the allocator based on input opcode.
6987
6988     """
6989     nics = [n.ToDict() for n in self.nics]
6990     ial = IAllocator(self.cfg, self.rpc,
6991                      mode=constants.IALLOCATOR_MODE_ALLOC,
6992                      name=self.op.instance_name,
6993                      disk_template=self.op.disk_template,
6994                      tags=[],
6995                      os=self.op.os_type,
6996                      vcpus=self.be_full[constants.BE_VCPUS],
6997                      mem_size=self.be_full[constants.BE_MEMORY],
6998                      disks=self.disks,
6999                      nics=nics,
7000                      hypervisor=self.op.hypervisor,
7001                      )
7002
7003     ial.Run(self.op.iallocator)
7004
7005     if not ial.success:
7006       raise errors.OpPrereqError("Can't compute nodes using"
7007                                  " iallocator '%s': %s" %
7008                                  (self.op.iallocator, ial.info),
7009                                  errors.ECODE_NORES)
7010     if len(ial.result) != ial.required_nodes:
7011       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7012                                  " of nodes (%s), required %s" %
7013                                  (self.op.iallocator, len(ial.result),
7014                                   ial.required_nodes), errors.ECODE_FAULT)
7015     self.op.pnode = ial.result[0]
7016     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7017                  self.op.instance_name, self.op.iallocator,
7018                  utils.CommaJoin(ial.result))
7019     if ial.required_nodes == 2:
7020       self.op.snode = ial.result[1]
7021
7022   def BuildHooksEnv(self):
7023     """Build hooks env.
7024
7025     This runs on master, primary and secondary nodes of the instance.
7026
7027     """
7028     env = {
7029       "ADD_MODE": self.op.mode,
7030       }
7031     if self.op.mode == constants.INSTANCE_IMPORT:
7032       env["SRC_NODE"] = self.op.src_node
7033       env["SRC_PATH"] = self.op.src_path
7034       env["SRC_IMAGES"] = self.src_images
7035
7036     env.update(_BuildInstanceHookEnv(
7037       name=self.op.instance_name,
7038       primary_node=self.op.pnode,
7039       secondary_nodes=self.secondaries,
7040       status=self.op.start,
7041       os_type=self.op.os_type,
7042       memory=self.be_full[constants.BE_MEMORY],
7043       vcpus=self.be_full[constants.BE_VCPUS],
7044       nics=_NICListToTuple(self, self.nics),
7045       disk_template=self.op.disk_template,
7046       disks=[(d["size"], d["mode"]) for d in self.disks],
7047       bep=self.be_full,
7048       hvp=self.hv_full,
7049       hypervisor_name=self.op.hypervisor,
7050     ))
7051
7052     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7053           self.secondaries)
7054     return env, nl, nl
7055
7056   def _ReadExportInfo(self):
7057     """Reads the export information from disk.
7058
7059     It will override the opcode source node and path with the actual
7060     information, if these two were not specified before.
7061
7062     @return: the export information
7063
7064     """
7065     assert self.op.mode == constants.INSTANCE_IMPORT
7066
7067     src_node = self.op.src_node
7068     src_path = self.op.src_path
7069
7070     if src_node is None:
7071       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7072       exp_list = self.rpc.call_export_list(locked_nodes)
7073       found = False
7074       for node in exp_list:
7075         if exp_list[node].fail_msg:
7076           continue
7077         if src_path in exp_list[node].payload:
7078           found = True
7079           self.op.src_node = src_node = node
7080           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7081                                                        src_path)
7082           break
7083       if not found:
7084         raise errors.OpPrereqError("No export found for relative path %s" %
7085                                     src_path, errors.ECODE_INVAL)
7086
7087     _CheckNodeOnline(self, src_node)
7088     result = self.rpc.call_export_info(src_node, src_path)
7089     result.Raise("No export or invalid export found in dir %s" % src_path)
7090
7091     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7092     if not export_info.has_section(constants.INISECT_EXP):
7093       raise errors.ProgrammerError("Corrupted export config",
7094                                    errors.ECODE_ENVIRON)
7095
7096     ei_version = export_info.get(constants.INISECT_EXP, "version")
7097     if (int(ei_version) != constants.EXPORT_VERSION):
7098       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7099                                  (ei_version, constants.EXPORT_VERSION),
7100                                  errors.ECODE_ENVIRON)
7101     return export_info
7102
7103   def _ReadExportParams(self, einfo):
7104     """Use export parameters as defaults.
7105
7106     In case the opcode doesn't specify (as in override) some instance
7107     parameters, then try to use them from the export information, if
7108     that declares them.
7109
7110     """
7111     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7112
7113     if self.op.disk_template is None:
7114       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7115         self.op.disk_template = einfo.get(constants.INISECT_INS,
7116                                           "disk_template")
7117       else:
7118         raise errors.OpPrereqError("No disk template specified and the export"
7119                                    " is missing the disk_template information",
7120                                    errors.ECODE_INVAL)
7121
7122     if not self.op.disks:
7123       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7124         disks = []
7125         # TODO: import the disk iv_name too
7126         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7127           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7128           disks.append({"size": disk_sz})
7129         self.op.disks = disks
7130       else:
7131         raise errors.OpPrereqError("No disk info specified and the export"
7132                                    " is missing the disk information",
7133                                    errors.ECODE_INVAL)
7134
7135     if (not self.op.nics and
7136         einfo.has_option(constants.INISECT_INS, "nic_count")):
7137       nics = []
7138       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7139         ndict = {}
7140         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7141           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7142           ndict[name] = v
7143         nics.append(ndict)
7144       self.op.nics = nics
7145
7146     if (self.op.hypervisor is None and
7147         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7148       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7149     if einfo.has_section(constants.INISECT_HYP):
7150       # use the export parameters but do not override the ones
7151       # specified by the user
7152       for name, value in einfo.items(constants.INISECT_HYP):
7153         if name not in self.op.hvparams:
7154           self.op.hvparams[name] = value
7155
7156     if einfo.has_section(constants.INISECT_BEP):
7157       # use the parameters, without overriding
7158       for name, value in einfo.items(constants.INISECT_BEP):
7159         if name not in self.op.beparams:
7160           self.op.beparams[name] = value
7161     else:
7162       # try to read the parameters old style, from the main section
7163       for name in constants.BES_PARAMETERS:
7164         if (name not in self.op.beparams and
7165             einfo.has_option(constants.INISECT_INS, name)):
7166           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7167
7168     if einfo.has_section(constants.INISECT_OSP):
7169       # use the parameters, without overriding
7170       for name, value in einfo.items(constants.INISECT_OSP):
7171         if name not in self.op.osparams:
7172           self.op.osparams[name] = value
7173
7174   def _RevertToDefaults(self, cluster):
7175     """Revert the instance parameters to the default values.
7176
7177     """
7178     # hvparams
7179     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7180     for name in self.op.hvparams.keys():
7181       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7182         del self.op.hvparams[name]
7183     # beparams
7184     be_defs = cluster.SimpleFillBE({})
7185     for name in self.op.beparams.keys():
7186       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7187         del self.op.beparams[name]
7188     # nic params
7189     nic_defs = cluster.SimpleFillNIC({})
7190     for nic in self.op.nics:
7191       for name in constants.NICS_PARAMETERS:
7192         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7193           del nic[name]
7194     # osparams
7195     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7196     for name in self.op.osparams.keys():
7197       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7198         del self.op.osparams[name]
7199
7200   def CheckPrereq(self):
7201     """Check prerequisites.
7202
7203     """
7204     if self.op.mode == constants.INSTANCE_IMPORT:
7205       export_info = self._ReadExportInfo()
7206       self._ReadExportParams(export_info)
7207
7208     if (not self.cfg.GetVGName() and
7209         self.op.disk_template not in constants.DTS_NOT_LVM):
7210       raise errors.OpPrereqError("Cluster does not support lvm-based"
7211                                  " instances", errors.ECODE_STATE)
7212
7213     if self.op.hypervisor is None:
7214       self.op.hypervisor = self.cfg.GetHypervisorType()
7215
7216     cluster = self.cfg.GetClusterInfo()
7217     enabled_hvs = cluster.enabled_hypervisors
7218     if self.op.hypervisor not in enabled_hvs:
7219       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7220                                  " cluster (%s)" % (self.op.hypervisor,
7221                                   ",".join(enabled_hvs)),
7222                                  errors.ECODE_STATE)
7223
7224     # check hypervisor parameter syntax (locally)
7225     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7226     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7227                                       self.op.hvparams)
7228     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7229     hv_type.CheckParameterSyntax(filled_hvp)
7230     self.hv_full = filled_hvp
7231     # check that we don't specify global parameters on an instance
7232     _CheckGlobalHvParams(self.op.hvparams)
7233
7234     # fill and remember the beparams dict
7235     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7236     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7237
7238     # build os parameters
7239     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7240
7241     # now that hvp/bep are in final format, let's reset to defaults,
7242     # if told to do so
7243     if self.op.identify_defaults:
7244       self._RevertToDefaults(cluster)
7245
7246     # NIC buildup
7247     self.nics = []
7248     for idx, nic in enumerate(self.op.nics):
7249       nic_mode_req = nic.get("mode", None)
7250       nic_mode = nic_mode_req
7251       if nic_mode is None:
7252         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7253
7254       # in routed mode, for the first nic, the default ip is 'auto'
7255       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7256         default_ip_mode = constants.VALUE_AUTO
7257       else:
7258         default_ip_mode = constants.VALUE_NONE
7259
7260       # ip validity checks
7261       ip = nic.get("ip", default_ip_mode)
7262       if ip is None or ip.lower() == constants.VALUE_NONE:
7263         nic_ip = None
7264       elif ip.lower() == constants.VALUE_AUTO:
7265         if not self.op.name_check:
7266           raise errors.OpPrereqError("IP address set to auto but name checks"
7267                                      " have been skipped",
7268                                      errors.ECODE_INVAL)
7269         nic_ip = self.hostname1.ip
7270       else:
7271         if not netutils.IPAddress.IsValid(ip):
7272           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7273                                      errors.ECODE_INVAL)
7274         nic_ip = ip
7275
7276       # TODO: check the ip address for uniqueness
7277       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7278         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7279                                    errors.ECODE_INVAL)
7280
7281       # MAC address verification
7282       mac = nic.get("mac", constants.VALUE_AUTO)
7283       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7284         mac = utils.NormalizeAndValidateMac(mac)
7285
7286         try:
7287           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7288         except errors.ReservationError:
7289           raise errors.OpPrereqError("MAC address %s already in use"
7290                                      " in cluster" % mac,
7291                                      errors.ECODE_NOTUNIQUE)
7292
7293       # bridge verification
7294       bridge = nic.get("bridge", None)
7295       link = nic.get("link", None)
7296       if bridge and link:
7297         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7298                                    " at the same time", errors.ECODE_INVAL)
7299       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7300         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7301                                    errors.ECODE_INVAL)
7302       elif bridge:
7303         link = bridge
7304
7305       nicparams = {}
7306       if nic_mode_req:
7307         nicparams[constants.NIC_MODE] = nic_mode_req
7308       if link:
7309         nicparams[constants.NIC_LINK] = link
7310
7311       check_params = cluster.SimpleFillNIC(nicparams)
7312       objects.NIC.CheckParameterSyntax(check_params)
7313       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7314
7315     # disk checks/pre-build
7316     self.disks = []
7317     for disk in self.op.disks:
7318       mode = disk.get("mode", constants.DISK_RDWR)
7319       if mode not in constants.DISK_ACCESS_SET:
7320         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7321                                    mode, errors.ECODE_INVAL)
7322       size = disk.get("size", None)
7323       if size is None:
7324         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7325       try:
7326         size = int(size)
7327       except (TypeError, ValueError):
7328         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7329                                    errors.ECODE_INVAL)
7330       vg = disk.get("vg", self.cfg.GetVGName())
7331       new_disk = {"size": size, "mode": mode, "vg": vg}
7332       if "adopt" in disk:
7333         new_disk["adopt"] = disk["adopt"]
7334       self.disks.append(new_disk)
7335
7336     if self.op.mode == constants.INSTANCE_IMPORT:
7337
7338       # Check that the new instance doesn't have less disks than the export
7339       instance_disks = len(self.disks)
7340       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7341       if instance_disks < export_disks:
7342         raise errors.OpPrereqError("Not enough disks to import."
7343                                    " (instance: %d, export: %d)" %
7344                                    (instance_disks, export_disks),
7345                                    errors.ECODE_INVAL)
7346
7347       disk_images = []
7348       for idx in range(export_disks):
7349         option = 'disk%d_dump' % idx
7350         if export_info.has_option(constants.INISECT_INS, option):
7351           # FIXME: are the old os-es, disk sizes, etc. useful?
7352           export_name = export_info.get(constants.INISECT_INS, option)
7353           image = utils.PathJoin(self.op.src_path, export_name)
7354           disk_images.append(image)
7355         else:
7356           disk_images.append(False)
7357
7358       self.src_images = disk_images
7359
7360       old_name = export_info.get(constants.INISECT_INS, 'name')
7361       try:
7362         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7363       except (TypeError, ValueError), err:
7364         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7365                                    " an integer: %s" % str(err),
7366                                    errors.ECODE_STATE)
7367       if self.op.instance_name == old_name:
7368         for idx, nic in enumerate(self.nics):
7369           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7370             nic_mac_ini = 'nic%d_mac' % idx
7371             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7372
7373     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7374
7375     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7376     if self.op.ip_check:
7377       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7378         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7379                                    (self.check_ip, self.op.instance_name),
7380                                    errors.ECODE_NOTUNIQUE)
7381
7382     #### mac address generation
7383     # By generating here the mac address both the allocator and the hooks get
7384     # the real final mac address rather than the 'auto' or 'generate' value.
7385     # There is a race condition between the generation and the instance object
7386     # creation, which means that we know the mac is valid now, but we're not
7387     # sure it will be when we actually add the instance. If things go bad
7388     # adding the instance will abort because of a duplicate mac, and the
7389     # creation job will fail.
7390     for nic in self.nics:
7391       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7392         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7393
7394     #### allocator run
7395
7396     if self.op.iallocator is not None:
7397       self._RunAllocator()
7398
7399     #### node related checks
7400
7401     # check primary node
7402     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7403     assert self.pnode is not None, \
7404       "Cannot retrieve locked node %s" % self.op.pnode
7405     if pnode.offline:
7406       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7407                                  pnode.name, errors.ECODE_STATE)
7408     if pnode.drained:
7409       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7410                                  pnode.name, errors.ECODE_STATE)
7411     if not pnode.vm_capable:
7412       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7413                                  " '%s'" % pnode.name, errors.ECODE_STATE)
7414
7415     self.secondaries = []
7416
7417     # mirror node verification
7418     if self.op.disk_template in constants.DTS_NET_MIRROR:
7419       if self.op.snode == pnode.name:
7420         raise errors.OpPrereqError("The secondary node cannot be the"
7421                                    " primary node.", errors.ECODE_INVAL)
7422       _CheckNodeOnline(self, self.op.snode)
7423       _CheckNodeNotDrained(self, self.op.snode)
7424       _CheckNodeVmCapable(self, self.op.snode)
7425       self.secondaries.append(self.op.snode)
7426
7427     nodenames = [pnode.name] + self.secondaries
7428
7429     if not self.adopt_disks:
7430       # Check lv size requirements, if not adopting
7431       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7432       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7433
7434     else: # instead, we must check the adoption data
7435       all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7436       if len(all_lvs) != len(self.disks):
7437         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7438                                    errors.ECODE_INVAL)
7439       for lv_name in all_lvs:
7440         try:
7441           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7442           # to ReserveLV uses the same syntax
7443           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7444         except errors.ReservationError:
7445           raise errors.OpPrereqError("LV named %s used by another instance" %
7446                                      lv_name, errors.ECODE_NOTUNIQUE)
7447
7448       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7449       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7450
7451       node_lvs = self.rpc.call_lv_list([pnode.name],
7452                                        vg_names.payload.keys())[pnode.name]
7453       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7454       node_lvs = node_lvs.payload
7455
7456       delta = all_lvs.difference(node_lvs.keys())
7457       if delta:
7458         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7459                                    utils.CommaJoin(delta),
7460                                    errors.ECODE_INVAL)
7461       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7462       if online_lvs:
7463         raise errors.OpPrereqError("Online logical volumes found, cannot"
7464                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7465                                    errors.ECODE_STATE)
7466       # update the size of disk based on what is found
7467       for dsk in self.disks:
7468         dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7469
7470     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7471
7472     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7473     # check OS parameters (remotely)
7474     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7475
7476     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7477
7478     # memory check on primary node
7479     if self.op.start:
7480       _CheckNodeFreeMemory(self, self.pnode.name,
7481                            "creating instance %s" % self.op.instance_name,
7482                            self.be_full[constants.BE_MEMORY],
7483                            self.op.hypervisor)
7484
7485     self.dry_run_result = list(nodenames)
7486
7487   def Exec(self, feedback_fn):
7488     """Create and add the instance to the cluster.
7489
7490     """
7491     instance = self.op.instance_name
7492     pnode_name = self.pnode.name
7493
7494     ht_kind = self.op.hypervisor
7495     if ht_kind in constants.HTS_REQ_PORT:
7496       network_port = self.cfg.AllocatePort()
7497     else:
7498       network_port = None
7499
7500     if constants.ENABLE_FILE_STORAGE:
7501       # this is needed because os.path.join does not accept None arguments
7502       if self.op.file_storage_dir is None:
7503         string_file_storage_dir = ""
7504       else:
7505         string_file_storage_dir = self.op.file_storage_dir
7506
7507       # build the full file storage dir path
7508       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7509                                         string_file_storage_dir, instance)
7510     else:
7511       file_storage_dir = ""
7512
7513     disks = _GenerateDiskTemplate(self,
7514                                   self.op.disk_template,
7515                                   instance, pnode_name,
7516                                   self.secondaries,
7517                                   self.disks,
7518                                   file_storage_dir,
7519                                   self.op.file_driver,
7520                                   0,
7521                                   feedback_fn)
7522
7523     iobj = objects.Instance(name=instance, os=self.op.os_type,
7524                             primary_node=pnode_name,
7525                             nics=self.nics, disks=disks,
7526                             disk_template=self.op.disk_template,
7527                             admin_up=False,
7528                             network_port=network_port,
7529                             beparams=self.op.beparams,
7530                             hvparams=self.op.hvparams,
7531                             hypervisor=self.op.hypervisor,
7532                             osparams=self.op.osparams,
7533                             )
7534
7535     if self.adopt_disks:
7536       # rename LVs to the newly-generated names; we need to construct
7537       # 'fake' LV disks with the old data, plus the new unique_id
7538       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7539       rename_to = []
7540       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7541         rename_to.append(t_dsk.logical_id)
7542         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7543         self.cfg.SetDiskID(t_dsk, pnode_name)
7544       result = self.rpc.call_blockdev_rename(pnode_name,
7545                                              zip(tmp_disks, rename_to))
7546       result.Raise("Failed to rename adoped LVs")
7547     else:
7548       feedback_fn("* creating instance disks...")
7549       try:
7550         _CreateDisks(self, iobj)
7551       except errors.OpExecError:
7552         self.LogWarning("Device creation failed, reverting...")
7553         try:
7554           _RemoveDisks(self, iobj)
7555         finally:
7556           self.cfg.ReleaseDRBDMinors(instance)
7557           raise
7558
7559       if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7560         feedback_fn("* wiping instance disks...")
7561         try:
7562           _WipeDisks(self, iobj)
7563         except errors.OpExecError:
7564           self.LogWarning("Device wiping failed, reverting...")
7565           try:
7566             _RemoveDisks(self, iobj)
7567           finally:
7568             self.cfg.ReleaseDRBDMinors(instance)
7569             raise
7570
7571     feedback_fn("adding instance %s to cluster config" % instance)
7572
7573     self.cfg.AddInstance(iobj, self.proc.GetECId())
7574
7575     # Declare that we don't want to remove the instance lock anymore, as we've
7576     # added the instance to the config
7577     del self.remove_locks[locking.LEVEL_INSTANCE]
7578     # Unlock all the nodes
7579     if self.op.mode == constants.INSTANCE_IMPORT:
7580       nodes_keep = [self.op.src_node]
7581       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7582                        if node != self.op.src_node]
7583       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7584       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7585     else:
7586       self.context.glm.release(locking.LEVEL_NODE)
7587       del self.acquired_locks[locking.LEVEL_NODE]
7588
7589     if self.op.wait_for_sync:
7590       disk_abort = not _WaitForSync(self, iobj)
7591     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7592       # make sure the disks are not degraded (still sync-ing is ok)
7593       time.sleep(15)
7594       feedback_fn("* checking mirrors status")
7595       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7596     else:
7597       disk_abort = False
7598
7599     if disk_abort:
7600       _RemoveDisks(self, iobj)
7601       self.cfg.RemoveInstance(iobj.name)
7602       # Make sure the instance lock gets removed
7603       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7604       raise errors.OpExecError("There are some degraded disks for"
7605                                " this instance")
7606
7607     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7608       if self.op.mode == constants.INSTANCE_CREATE:
7609         if not self.op.no_install:
7610           feedback_fn("* running the instance OS create scripts...")
7611           # FIXME: pass debug option from opcode to backend
7612           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7613                                                  self.op.debug_level)
7614           result.Raise("Could not add os for instance %s"
7615                        " on node %s" % (instance, pnode_name))
7616
7617       elif self.op.mode == constants.INSTANCE_IMPORT:
7618         feedback_fn("* running the instance OS import scripts...")
7619
7620         transfers = []
7621
7622         for idx, image in enumerate(self.src_images):
7623           if not image:
7624             continue
7625
7626           # FIXME: pass debug option from opcode to backend
7627           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7628                                              constants.IEIO_FILE, (image, ),
7629                                              constants.IEIO_SCRIPT,
7630                                              (iobj.disks[idx], idx),
7631                                              None)
7632           transfers.append(dt)
7633
7634         import_result = \
7635           masterd.instance.TransferInstanceData(self, feedback_fn,
7636                                                 self.op.src_node, pnode_name,
7637                                                 self.pnode.secondary_ip,
7638                                                 iobj, transfers)
7639         if not compat.all(import_result):
7640           self.LogWarning("Some disks for instance %s on node %s were not"
7641                           " imported successfully" % (instance, pnode_name))
7642
7643       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7644         feedback_fn("* preparing remote import...")
7645         # The source cluster will stop the instance before attempting to make a
7646         # connection. In some cases stopping an instance can take a long time,
7647         # hence the shutdown timeout is added to the connection timeout.
7648         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7649                            self.op.source_shutdown_timeout)
7650         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7651
7652         assert iobj.primary_node == self.pnode.name
7653         disk_results = \
7654           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7655                                         self.source_x509_ca,
7656                                         self._cds, timeouts)
7657         if not compat.all(disk_results):
7658           # TODO: Should the instance still be started, even if some disks
7659           # failed to import (valid for local imports, too)?
7660           self.LogWarning("Some disks for instance %s on node %s were not"
7661                           " imported successfully" % (instance, pnode_name))
7662
7663         # Run rename script on newly imported instance
7664         assert iobj.name == instance
7665         feedback_fn("Running rename script for %s" % instance)
7666         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7667                                                    self.source_instance_name,
7668                                                    self.op.debug_level)
7669         if result.fail_msg:
7670           self.LogWarning("Failed to run rename script for %s on node"
7671                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7672
7673       else:
7674         # also checked in the prereq part
7675         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7676                                      % self.op.mode)
7677
7678     if self.op.start:
7679       iobj.admin_up = True
7680       self.cfg.Update(iobj, feedback_fn)
7681       logging.info("Starting instance %s on node %s", instance, pnode_name)
7682       feedback_fn("* starting instance...")
7683       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7684       result.Raise("Could not start instance")
7685
7686     return list(iobj.all_nodes)
7687
7688
7689 class LUInstanceConsole(NoHooksLU):
7690   """Connect to an instance's console.
7691
7692   This is somewhat special in that it returns the command line that
7693   you need to run on the master node in order to connect to the
7694   console.
7695
7696   """
7697   REQ_BGL = False
7698
7699   def ExpandNames(self):
7700     self._ExpandAndLockInstance()
7701
7702   def CheckPrereq(self):
7703     """Check prerequisites.
7704
7705     This checks that the instance is in the cluster.
7706
7707     """
7708     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7709     assert self.instance is not None, \
7710       "Cannot retrieve locked instance %s" % self.op.instance_name
7711     _CheckNodeOnline(self, self.instance.primary_node)
7712
7713   def Exec(self, feedback_fn):
7714     """Connect to the console of an instance
7715
7716     """
7717     instance = self.instance
7718     node = instance.primary_node
7719
7720     node_insts = self.rpc.call_instance_list([node],
7721                                              [instance.hypervisor])[node]
7722     node_insts.Raise("Can't get node information from %s" % node)
7723
7724     if instance.name not in node_insts.payload:
7725       if instance.admin_up:
7726         state = "ERROR_down"
7727       else:
7728         state = "ADMIN_down"
7729       raise errors.OpExecError("Instance %s is not running (state %s)" %
7730                                (instance.name, state))
7731
7732     logging.debug("Connecting to console of %s on %s", instance.name, node)
7733
7734     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7735     cluster = self.cfg.GetClusterInfo()
7736     # beparams and hvparams are passed separately, to avoid editing the
7737     # instance and then saving the defaults in the instance itself.
7738     hvparams = cluster.FillHV(instance)
7739     beparams = cluster.FillBE(instance)
7740     console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7741
7742     assert console.instance == instance.name
7743     assert console.Validate()
7744
7745     return console.ToDict()
7746
7747
7748 class LUInstanceReplaceDisks(LogicalUnit):
7749   """Replace the disks of an instance.
7750
7751   """
7752   HPATH = "mirrors-replace"
7753   HTYPE = constants.HTYPE_INSTANCE
7754   REQ_BGL = False
7755
7756   def CheckArguments(self):
7757     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7758                                   self.op.iallocator)
7759
7760   def ExpandNames(self):
7761     self._ExpandAndLockInstance()
7762
7763     if self.op.iallocator is not None:
7764       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7765
7766     elif self.op.remote_node is not None:
7767       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7768       self.op.remote_node = remote_node
7769
7770       # Warning: do not remove the locking of the new secondary here
7771       # unless DRBD8.AddChildren is changed to work in parallel;
7772       # currently it doesn't since parallel invocations of
7773       # FindUnusedMinor will conflict
7774       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7775       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7776
7777     else:
7778       self.needed_locks[locking.LEVEL_NODE] = []
7779       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7780
7781     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7782                                    self.op.iallocator, self.op.remote_node,
7783                                    self.op.disks, False, self.op.early_release)
7784
7785     self.tasklets = [self.replacer]
7786
7787   def DeclareLocks(self, level):
7788     # If we're not already locking all nodes in the set we have to declare the
7789     # instance's primary/secondary nodes.
7790     if (level == locking.LEVEL_NODE and
7791         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7792       self._LockInstancesNodes()
7793
7794   def BuildHooksEnv(self):
7795     """Build hooks env.
7796
7797     This runs on the master, the primary and all the secondaries.
7798
7799     """
7800     instance = self.replacer.instance
7801     env = {
7802       "MODE": self.op.mode,
7803       "NEW_SECONDARY": self.op.remote_node,
7804       "OLD_SECONDARY": instance.secondary_nodes[0],
7805       }
7806     env.update(_BuildInstanceHookEnvByObject(self, instance))
7807     nl = [
7808       self.cfg.GetMasterNode(),
7809       instance.primary_node,
7810       ]
7811     if self.op.remote_node is not None:
7812       nl.append(self.op.remote_node)
7813     return env, nl, nl
7814
7815
7816 class TLReplaceDisks(Tasklet):
7817   """Replaces disks for an instance.
7818
7819   Note: Locking is not within the scope of this class.
7820
7821   """
7822   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7823                disks, delay_iallocator, early_release):
7824     """Initializes this class.
7825
7826     """
7827     Tasklet.__init__(self, lu)
7828
7829     # Parameters
7830     self.instance_name = instance_name
7831     self.mode = mode
7832     self.iallocator_name = iallocator_name
7833     self.remote_node = remote_node
7834     self.disks = disks
7835     self.delay_iallocator = delay_iallocator
7836     self.early_release = early_release
7837
7838     # Runtime data
7839     self.instance = None
7840     self.new_node = None
7841     self.target_node = None
7842     self.other_node = None
7843     self.remote_node_info = None
7844     self.node_secondary_ip = None
7845
7846   @staticmethod
7847   def CheckArguments(mode, remote_node, iallocator):
7848     """Helper function for users of this class.
7849
7850     """
7851     # check for valid parameter combination
7852     if mode == constants.REPLACE_DISK_CHG:
7853       if remote_node is None and iallocator is None:
7854         raise errors.OpPrereqError("When changing the secondary either an"
7855                                    " iallocator script must be used or the"
7856                                    " new node given", errors.ECODE_INVAL)
7857
7858       if remote_node is not None and iallocator is not None:
7859         raise errors.OpPrereqError("Give either the iallocator or the new"
7860                                    " secondary, not both", errors.ECODE_INVAL)
7861
7862     elif remote_node is not None or iallocator is not None:
7863       # Not replacing the secondary
7864       raise errors.OpPrereqError("The iallocator and new node options can"
7865                                  " only be used when changing the"
7866                                  " secondary node", errors.ECODE_INVAL)
7867
7868   @staticmethod
7869   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7870     """Compute a new secondary node using an IAllocator.
7871
7872     """
7873     ial = IAllocator(lu.cfg, lu.rpc,
7874                      mode=constants.IALLOCATOR_MODE_RELOC,
7875                      name=instance_name,
7876                      relocate_from=relocate_from)
7877
7878     ial.Run(iallocator_name)
7879
7880     if not ial.success:
7881       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7882                                  " %s" % (iallocator_name, ial.info),
7883                                  errors.ECODE_NORES)
7884
7885     if len(ial.result) != ial.required_nodes:
7886       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7887                                  " of nodes (%s), required %s" %
7888                                  (iallocator_name,
7889                                   len(ial.result), ial.required_nodes),
7890                                  errors.ECODE_FAULT)
7891
7892     remote_node_name = ial.result[0]
7893
7894     lu.LogInfo("Selected new secondary for instance '%s': %s",
7895                instance_name, remote_node_name)
7896
7897     return remote_node_name
7898
7899   def _FindFaultyDisks(self, node_name):
7900     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7901                                     node_name, True)
7902
7903   def CheckPrereq(self):
7904     """Check prerequisites.
7905
7906     This checks that the instance is in the cluster.
7907
7908     """
7909     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7910     assert instance is not None, \
7911       "Cannot retrieve locked instance %s" % self.instance_name
7912
7913     if instance.disk_template != constants.DT_DRBD8:
7914       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7915                                  " instances", errors.ECODE_INVAL)
7916
7917     if len(instance.secondary_nodes) != 1:
7918       raise errors.OpPrereqError("The instance has a strange layout,"
7919                                  " expected one secondary but found %d" %
7920                                  len(instance.secondary_nodes),
7921                                  errors.ECODE_FAULT)
7922
7923     if not self.delay_iallocator:
7924       self._CheckPrereq2()
7925
7926   def _CheckPrereq2(self):
7927     """Check prerequisites, second part.
7928
7929     This function should always be part of CheckPrereq. It was separated and is
7930     now called from Exec because during node evacuation iallocator was only
7931     called with an unmodified cluster model, not taking planned changes into
7932     account.
7933
7934     """
7935     instance = self.instance
7936     secondary_node = instance.secondary_nodes[0]
7937
7938     if self.iallocator_name is None:
7939       remote_node = self.remote_node
7940     else:
7941       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7942                                        instance.name, instance.secondary_nodes)
7943
7944     if remote_node is not None:
7945       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7946       assert self.remote_node_info is not None, \
7947         "Cannot retrieve locked node %s" % remote_node
7948     else:
7949       self.remote_node_info = None
7950
7951     if remote_node == self.instance.primary_node:
7952       raise errors.OpPrereqError("The specified node is the primary node of"
7953                                  " the instance.", errors.ECODE_INVAL)
7954
7955     if remote_node == secondary_node:
7956       raise errors.OpPrereqError("The specified node is already the"
7957                                  " secondary node of the instance.",
7958                                  errors.ECODE_INVAL)
7959
7960     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7961                                     constants.REPLACE_DISK_CHG):
7962       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7963                                  errors.ECODE_INVAL)
7964
7965     if self.mode == constants.REPLACE_DISK_AUTO:
7966       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7967       faulty_secondary = self._FindFaultyDisks(secondary_node)
7968
7969       if faulty_primary and faulty_secondary:
7970         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7971                                    " one node and can not be repaired"
7972                                    " automatically" % self.instance_name,
7973                                    errors.ECODE_STATE)
7974
7975       if faulty_primary:
7976         self.disks = faulty_primary
7977         self.target_node = instance.primary_node
7978         self.other_node = secondary_node
7979         check_nodes = [self.target_node, self.other_node]
7980       elif faulty_secondary:
7981         self.disks = faulty_secondary
7982         self.target_node = secondary_node
7983         self.other_node = instance.primary_node
7984         check_nodes = [self.target_node, self.other_node]
7985       else:
7986         self.disks = []
7987         check_nodes = []
7988
7989     else:
7990       # Non-automatic modes
7991       if self.mode == constants.REPLACE_DISK_PRI:
7992         self.target_node = instance.primary_node
7993         self.other_node = secondary_node
7994         check_nodes = [self.target_node, self.other_node]
7995
7996       elif self.mode == constants.REPLACE_DISK_SEC:
7997         self.target_node = secondary_node
7998         self.other_node = instance.primary_node
7999         check_nodes = [self.target_node, self.other_node]
8000
8001       elif self.mode == constants.REPLACE_DISK_CHG:
8002         self.new_node = remote_node
8003         self.other_node = instance.primary_node
8004         self.target_node = secondary_node
8005         check_nodes = [self.new_node, self.other_node]
8006
8007         _CheckNodeNotDrained(self.lu, remote_node)
8008         _CheckNodeVmCapable(self.lu, remote_node)
8009
8010         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8011         assert old_node_info is not None
8012         if old_node_info.offline and not self.early_release:
8013           # doesn't make sense to delay the release
8014           self.early_release = True
8015           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8016                           " early-release mode", secondary_node)
8017
8018       else:
8019         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8020                                      self.mode)
8021
8022       # If not specified all disks should be replaced
8023       if not self.disks:
8024         self.disks = range(len(self.instance.disks))
8025
8026     for node in check_nodes:
8027       _CheckNodeOnline(self.lu, node)
8028
8029     # Check whether disks are valid
8030     for disk_idx in self.disks:
8031       instance.FindDisk(disk_idx)
8032
8033     # Get secondary node IP addresses
8034     node_2nd_ip = {}
8035
8036     for node_name in [self.target_node, self.other_node, self.new_node]:
8037       if node_name is not None:
8038         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8039
8040     self.node_secondary_ip = node_2nd_ip
8041
8042   def Exec(self, feedback_fn):
8043     """Execute disk replacement.
8044
8045     This dispatches the disk replacement to the appropriate handler.
8046
8047     """
8048     if self.delay_iallocator:
8049       self._CheckPrereq2()
8050
8051     if not self.disks:
8052       feedback_fn("No disks need replacement")
8053       return
8054
8055     feedback_fn("Replacing disk(s) %s for %s" %
8056                 (utils.CommaJoin(self.disks), self.instance.name))
8057
8058     activate_disks = (not self.instance.admin_up)
8059
8060     # Activate the instance disks if we're replacing them on a down instance
8061     if activate_disks:
8062       _StartInstanceDisks(self.lu, self.instance, True)
8063
8064     try:
8065       # Should we replace the secondary node?
8066       if self.new_node is not None:
8067         fn = self._ExecDrbd8Secondary
8068       else:
8069         fn = self._ExecDrbd8DiskOnly
8070
8071       return fn(feedback_fn)
8072
8073     finally:
8074       # Deactivate the instance disks if we're replacing them on a
8075       # down instance
8076       if activate_disks:
8077         _SafeShutdownInstanceDisks(self.lu, self.instance)
8078
8079   def _CheckVolumeGroup(self, nodes):
8080     self.lu.LogInfo("Checking volume groups")
8081
8082     vgname = self.cfg.GetVGName()
8083
8084     # Make sure volume group exists on all involved nodes
8085     results = self.rpc.call_vg_list(nodes)
8086     if not results:
8087       raise errors.OpExecError("Can't list volume groups on the nodes")
8088
8089     for node in nodes:
8090       res = results[node]
8091       res.Raise("Error checking node %s" % node)
8092       if vgname not in res.payload:
8093         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8094                                  (vgname, node))
8095
8096   def _CheckDisksExistence(self, nodes):
8097     # Check disk existence
8098     for idx, dev in enumerate(self.instance.disks):
8099       if idx not in self.disks:
8100         continue
8101
8102       for node in nodes:
8103         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8104         self.cfg.SetDiskID(dev, node)
8105
8106         result = self.rpc.call_blockdev_find(node, dev)
8107
8108         msg = result.fail_msg
8109         if msg or not result.payload:
8110           if not msg:
8111             msg = "disk not found"
8112           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8113                                    (idx, node, msg))
8114
8115   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8116     for idx, dev in enumerate(self.instance.disks):
8117       if idx not in self.disks:
8118         continue
8119
8120       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8121                       (idx, node_name))
8122
8123       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8124                                    ldisk=ldisk):
8125         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8126                                  " replace disks for instance %s" %
8127                                  (node_name, self.instance.name))
8128
8129   def _CreateNewStorage(self, node_name):
8130     vgname = self.cfg.GetVGName()
8131     iv_names = {}
8132
8133     for idx, dev in enumerate(self.instance.disks):
8134       if idx not in self.disks:
8135         continue
8136
8137       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8138
8139       self.cfg.SetDiskID(dev, node_name)
8140
8141       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8142       names = _GenerateUniqueNames(self.lu, lv_names)
8143
8144       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8145                              logical_id=(vgname, names[0]))
8146       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8147                              logical_id=(vgname, names[1]))
8148
8149       new_lvs = [lv_data, lv_meta]
8150       old_lvs = dev.children
8151       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8152
8153       # we pass force_create=True to force the LVM creation
8154       for new_lv in new_lvs:
8155         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8156                         _GetInstanceInfoText(self.instance), False)
8157
8158     return iv_names
8159
8160   def _CheckDevices(self, node_name, iv_names):
8161     for name, (dev, _, _) in iv_names.iteritems():
8162       self.cfg.SetDiskID(dev, node_name)
8163
8164       result = self.rpc.call_blockdev_find(node_name, dev)
8165
8166       msg = result.fail_msg
8167       if msg or not result.payload:
8168         if not msg:
8169           msg = "disk not found"
8170         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8171                                  (name, msg))
8172
8173       if result.payload.is_degraded:
8174         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8175
8176   def _RemoveOldStorage(self, node_name, iv_names):
8177     for name, (_, old_lvs, _) in iv_names.iteritems():
8178       self.lu.LogInfo("Remove logical volumes for %s" % name)
8179
8180       for lv in old_lvs:
8181         self.cfg.SetDiskID(lv, node_name)
8182
8183         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8184         if msg:
8185           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8186                              hint="remove unused LVs manually")
8187
8188   def _ReleaseNodeLock(self, node_name):
8189     """Releases the lock for a given node."""
8190     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8191
8192   def _ExecDrbd8DiskOnly(self, feedback_fn):
8193     """Replace a disk on the primary or secondary for DRBD 8.
8194
8195     The algorithm for replace is quite complicated:
8196
8197       1. for each disk to be replaced:
8198
8199         1. create new LVs on the target node with unique names
8200         1. detach old LVs from the drbd device
8201         1. rename old LVs to name_replaced.<time_t>
8202         1. rename new LVs to old LVs
8203         1. attach the new LVs (with the old names now) to the drbd device
8204
8205       1. wait for sync across all devices
8206
8207       1. for each modified disk:
8208
8209         1. remove old LVs (which have the name name_replaces.<time_t>)
8210
8211     Failures are not very well handled.
8212
8213     """
8214     steps_total = 6
8215
8216     # Step: check device activation
8217     self.lu.LogStep(1, steps_total, "Check device existence")
8218     self._CheckDisksExistence([self.other_node, self.target_node])
8219     self._CheckVolumeGroup([self.target_node, self.other_node])
8220
8221     # Step: check other node consistency
8222     self.lu.LogStep(2, steps_total, "Check peer consistency")
8223     self._CheckDisksConsistency(self.other_node,
8224                                 self.other_node == self.instance.primary_node,
8225                                 False)
8226
8227     # Step: create new storage
8228     self.lu.LogStep(3, steps_total, "Allocate new storage")
8229     iv_names = self._CreateNewStorage(self.target_node)
8230
8231     # Step: for each lv, detach+rename*2+attach
8232     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8233     for dev, old_lvs, new_lvs in iv_names.itervalues():
8234       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8235
8236       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8237                                                      old_lvs)
8238       result.Raise("Can't detach drbd from local storage on node"
8239                    " %s for device %s" % (self.target_node, dev.iv_name))
8240       #dev.children = []
8241       #cfg.Update(instance)
8242
8243       # ok, we created the new LVs, so now we know we have the needed
8244       # storage; as such, we proceed on the target node to rename
8245       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8246       # using the assumption that logical_id == physical_id (which in
8247       # turn is the unique_id on that node)
8248
8249       # FIXME(iustin): use a better name for the replaced LVs
8250       temp_suffix = int(time.time())
8251       ren_fn = lambda d, suff: (d.physical_id[0],
8252                                 d.physical_id[1] + "_replaced-%s" % suff)
8253
8254       # Build the rename list based on what LVs exist on the node
8255       rename_old_to_new = []
8256       for to_ren in old_lvs:
8257         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8258         if not result.fail_msg and result.payload:
8259           # device exists
8260           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8261
8262       self.lu.LogInfo("Renaming the old LVs on the target node")
8263       result = self.rpc.call_blockdev_rename(self.target_node,
8264                                              rename_old_to_new)
8265       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8266
8267       # Now we rename the new LVs to the old LVs
8268       self.lu.LogInfo("Renaming the new LVs on the target node")
8269       rename_new_to_old = [(new, old.physical_id)
8270                            for old, new in zip(old_lvs, new_lvs)]
8271       result = self.rpc.call_blockdev_rename(self.target_node,
8272                                              rename_new_to_old)
8273       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8274
8275       for old, new in zip(old_lvs, new_lvs):
8276         new.logical_id = old.logical_id
8277         self.cfg.SetDiskID(new, self.target_node)
8278
8279       for disk in old_lvs:
8280         disk.logical_id = ren_fn(disk, temp_suffix)
8281         self.cfg.SetDiskID(disk, self.target_node)
8282
8283       # Now that the new lvs have the old name, we can add them to the device
8284       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8285       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8286                                                   new_lvs)
8287       msg = result.fail_msg
8288       if msg:
8289         for new_lv in new_lvs:
8290           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8291                                                new_lv).fail_msg
8292           if msg2:
8293             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8294                                hint=("cleanup manually the unused logical"
8295                                      "volumes"))
8296         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8297
8298       dev.children = new_lvs
8299
8300       self.cfg.Update(self.instance, feedback_fn)
8301
8302     cstep = 5
8303     if self.early_release:
8304       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8305       cstep += 1
8306       self._RemoveOldStorage(self.target_node, iv_names)
8307       # WARNING: we release both node locks here, do not do other RPCs
8308       # than WaitForSync to the primary node
8309       self._ReleaseNodeLock([self.target_node, self.other_node])
8310
8311     # Wait for sync
8312     # This can fail as the old devices are degraded and _WaitForSync
8313     # does a combined result over all disks, so we don't check its return value
8314     self.lu.LogStep(cstep, steps_total, "Sync devices")
8315     cstep += 1
8316     _WaitForSync(self.lu, self.instance)
8317
8318     # Check all devices manually
8319     self._CheckDevices(self.instance.primary_node, iv_names)
8320
8321     # Step: remove old storage
8322     if not self.early_release:
8323       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8324       cstep += 1
8325       self._RemoveOldStorage(self.target_node, iv_names)
8326
8327   def _ExecDrbd8Secondary(self, feedback_fn):
8328     """Replace the secondary node for DRBD 8.
8329
8330     The algorithm for replace is quite complicated:
8331       - for all disks of the instance:
8332         - create new LVs on the new node with same names
8333         - shutdown the drbd device on the old secondary
8334         - disconnect the drbd network on the primary
8335         - create the drbd device on the new secondary
8336         - network attach the drbd on the primary, using an artifice:
8337           the drbd code for Attach() will connect to the network if it
8338           finds a device which is connected to the good local disks but
8339           not network enabled
8340       - wait for sync across all devices
8341       - remove all disks from the old secondary
8342
8343     Failures are not very well handled.
8344
8345     """
8346     steps_total = 6
8347
8348     # Step: check device activation
8349     self.lu.LogStep(1, steps_total, "Check device existence")
8350     self._CheckDisksExistence([self.instance.primary_node])
8351     self._CheckVolumeGroup([self.instance.primary_node])
8352
8353     # Step: check other node consistency
8354     self.lu.LogStep(2, steps_total, "Check peer consistency")
8355     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8356
8357     # Step: create new storage
8358     self.lu.LogStep(3, steps_total, "Allocate new storage")
8359     for idx, dev in enumerate(self.instance.disks):
8360       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8361                       (self.new_node, idx))
8362       # we pass force_create=True to force LVM creation
8363       for new_lv in dev.children:
8364         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8365                         _GetInstanceInfoText(self.instance), False)
8366
8367     # Step 4: dbrd minors and drbd setups changes
8368     # after this, we must manually remove the drbd minors on both the
8369     # error and the success paths
8370     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8371     minors = self.cfg.AllocateDRBDMinor([self.new_node
8372                                          for dev in self.instance.disks],
8373                                         self.instance.name)
8374     logging.debug("Allocated minors %r", minors)
8375
8376     iv_names = {}
8377     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8378       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8379                       (self.new_node, idx))
8380       # create new devices on new_node; note that we create two IDs:
8381       # one without port, so the drbd will be activated without
8382       # networking information on the new node at this stage, and one
8383       # with network, for the latter activation in step 4
8384       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8385       if self.instance.primary_node == o_node1:
8386         p_minor = o_minor1
8387       else:
8388         assert self.instance.primary_node == o_node2, "Three-node instance?"
8389         p_minor = o_minor2
8390
8391       new_alone_id = (self.instance.primary_node, self.new_node, None,
8392                       p_minor, new_minor, o_secret)
8393       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8394                     p_minor, new_minor, o_secret)
8395
8396       iv_names[idx] = (dev, dev.children, new_net_id)
8397       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8398                     new_net_id)
8399       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8400                               logical_id=new_alone_id,
8401                               children=dev.children,
8402                               size=dev.size)
8403       try:
8404         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8405                               _GetInstanceInfoText(self.instance), False)
8406       except errors.GenericError:
8407         self.cfg.ReleaseDRBDMinors(self.instance.name)
8408         raise
8409
8410     # We have new devices, shutdown the drbd on the old secondary
8411     for idx, dev in enumerate(self.instance.disks):
8412       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8413       self.cfg.SetDiskID(dev, self.target_node)
8414       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8415       if msg:
8416         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8417                            "node: %s" % (idx, msg),
8418                            hint=("Please cleanup this device manually as"
8419                                  " soon as possible"))
8420
8421     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8422     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8423                                                self.node_secondary_ip,
8424                                                self.instance.disks)\
8425                                               [self.instance.primary_node]
8426
8427     msg = result.fail_msg
8428     if msg:
8429       # detaches didn't succeed (unlikely)
8430       self.cfg.ReleaseDRBDMinors(self.instance.name)
8431       raise errors.OpExecError("Can't detach the disks from the network on"
8432                                " old node: %s" % (msg,))
8433
8434     # if we managed to detach at least one, we update all the disks of
8435     # the instance to point to the new secondary
8436     self.lu.LogInfo("Updating instance configuration")
8437     for dev, _, new_logical_id in iv_names.itervalues():
8438       dev.logical_id = new_logical_id
8439       self.cfg.SetDiskID(dev, self.instance.primary_node)
8440
8441     self.cfg.Update(self.instance, feedback_fn)
8442
8443     # and now perform the drbd attach
8444     self.lu.LogInfo("Attaching primary drbds to new secondary"
8445                     " (standalone => connected)")
8446     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8447                                             self.new_node],
8448                                            self.node_secondary_ip,
8449                                            self.instance.disks,
8450                                            self.instance.name,
8451                                            False)
8452     for to_node, to_result in result.items():
8453       msg = to_result.fail_msg
8454       if msg:
8455         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8456                            to_node, msg,
8457                            hint=("please do a gnt-instance info to see the"
8458                                  " status of disks"))
8459     cstep = 5
8460     if self.early_release:
8461       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8462       cstep += 1
8463       self._RemoveOldStorage(self.target_node, iv_names)
8464       # WARNING: we release all node locks here, do not do other RPCs
8465       # than WaitForSync to the primary node
8466       self._ReleaseNodeLock([self.instance.primary_node,
8467                              self.target_node,
8468                              self.new_node])
8469
8470     # Wait for sync
8471     # This can fail as the old devices are degraded and _WaitForSync
8472     # does a combined result over all disks, so we don't check its return value
8473     self.lu.LogStep(cstep, steps_total, "Sync devices")
8474     cstep += 1
8475     _WaitForSync(self.lu, self.instance)
8476
8477     # Check all devices manually
8478     self._CheckDevices(self.instance.primary_node, iv_names)
8479
8480     # Step: remove old storage
8481     if not self.early_release:
8482       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8483       self._RemoveOldStorage(self.target_node, iv_names)
8484
8485
8486 class LURepairNodeStorage(NoHooksLU):
8487   """Repairs the volume group on a node.
8488
8489   """
8490   REQ_BGL = False
8491
8492   def CheckArguments(self):
8493     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8494
8495     storage_type = self.op.storage_type
8496
8497     if (constants.SO_FIX_CONSISTENCY not in
8498         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8499       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8500                                  " repaired" % storage_type,
8501                                  errors.ECODE_INVAL)
8502
8503   def ExpandNames(self):
8504     self.needed_locks = {
8505       locking.LEVEL_NODE: [self.op.node_name],
8506       }
8507
8508   def _CheckFaultyDisks(self, instance, node_name):
8509     """Ensure faulty disks abort the opcode or at least warn."""
8510     try:
8511       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8512                                   node_name, True):
8513         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8514                                    " node '%s'" % (instance.name, node_name),
8515                                    errors.ECODE_STATE)
8516     except errors.OpPrereqError, err:
8517       if self.op.ignore_consistency:
8518         self.proc.LogWarning(str(err.args[0]))
8519       else:
8520         raise
8521
8522   def CheckPrereq(self):
8523     """Check prerequisites.
8524
8525     """
8526     # Check whether any instance on this node has faulty disks
8527     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8528       if not inst.admin_up:
8529         continue
8530       check_nodes = set(inst.all_nodes)
8531       check_nodes.discard(self.op.node_name)
8532       for inst_node_name in check_nodes:
8533         self._CheckFaultyDisks(inst, inst_node_name)
8534
8535   def Exec(self, feedback_fn):
8536     feedback_fn("Repairing storage unit '%s' on %s ..." %
8537                 (self.op.name, self.op.node_name))
8538
8539     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8540     result = self.rpc.call_storage_execute(self.op.node_name,
8541                                            self.op.storage_type, st_args,
8542                                            self.op.name,
8543                                            constants.SO_FIX_CONSISTENCY)
8544     result.Raise("Failed to repair storage unit '%s' on %s" %
8545                  (self.op.name, self.op.node_name))
8546
8547
8548 class LUNodeEvacStrategy(NoHooksLU):
8549   """Computes the node evacuation strategy.
8550
8551   """
8552   REQ_BGL = False
8553
8554   def CheckArguments(self):
8555     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8556
8557   def ExpandNames(self):
8558     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8559     self.needed_locks = locks = {}
8560     if self.op.remote_node is None:
8561       locks[locking.LEVEL_NODE] = locking.ALL_SET
8562     else:
8563       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8564       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8565
8566   def Exec(self, feedback_fn):
8567     if self.op.remote_node is not None:
8568       instances = []
8569       for node in self.op.nodes:
8570         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8571       result = []
8572       for i in instances:
8573         if i.primary_node == self.op.remote_node:
8574           raise errors.OpPrereqError("Node %s is the primary node of"
8575                                      " instance %s, cannot use it as"
8576                                      " secondary" %
8577                                      (self.op.remote_node, i.name),
8578                                      errors.ECODE_INVAL)
8579         result.append([i.name, self.op.remote_node])
8580     else:
8581       ial = IAllocator(self.cfg, self.rpc,
8582                        mode=constants.IALLOCATOR_MODE_MEVAC,
8583                        evac_nodes=self.op.nodes)
8584       ial.Run(self.op.iallocator, validate=True)
8585       if not ial.success:
8586         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8587                                  errors.ECODE_NORES)
8588       result = ial.result
8589     return result
8590
8591
8592 class LUInstanceGrowDisk(LogicalUnit):
8593   """Grow a disk of an instance.
8594
8595   """
8596   HPATH = "disk-grow"
8597   HTYPE = constants.HTYPE_INSTANCE
8598   REQ_BGL = False
8599
8600   def ExpandNames(self):
8601     self._ExpandAndLockInstance()
8602     self.needed_locks[locking.LEVEL_NODE] = []
8603     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8604
8605   def DeclareLocks(self, level):
8606     if level == locking.LEVEL_NODE:
8607       self._LockInstancesNodes()
8608
8609   def BuildHooksEnv(self):
8610     """Build hooks env.
8611
8612     This runs on the master, the primary and all the secondaries.
8613
8614     """
8615     env = {
8616       "DISK": self.op.disk,
8617       "AMOUNT": self.op.amount,
8618       }
8619     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8620     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8621     return env, nl, nl
8622
8623   def CheckPrereq(self):
8624     """Check prerequisites.
8625
8626     This checks that the instance is in the cluster.
8627
8628     """
8629     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8630     assert instance is not None, \
8631       "Cannot retrieve locked instance %s" % self.op.instance_name
8632     nodenames = list(instance.all_nodes)
8633     for node in nodenames:
8634       _CheckNodeOnline(self, node)
8635
8636     self.instance = instance
8637
8638     if instance.disk_template not in constants.DTS_GROWABLE:
8639       raise errors.OpPrereqError("Instance's disk layout does not support"
8640                                  " growing.", errors.ECODE_INVAL)
8641
8642     self.disk = instance.FindDisk(self.op.disk)
8643
8644     if instance.disk_template != constants.DT_FILE:
8645       # TODO: check the free disk space for file, when that feature
8646       # will be supported
8647       _CheckNodesFreeDiskPerVG(self, nodenames,
8648                                self.disk.ComputeGrowth(self.op.amount))
8649
8650   def Exec(self, feedback_fn):
8651     """Execute disk grow.
8652
8653     """
8654     instance = self.instance
8655     disk = self.disk
8656
8657     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8658     if not disks_ok:
8659       raise errors.OpExecError("Cannot activate block device to grow")
8660
8661     for node in instance.all_nodes:
8662       self.cfg.SetDiskID(disk, node)
8663       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8664       result.Raise("Grow request failed to node %s" % node)
8665
8666       # TODO: Rewrite code to work properly
8667       # DRBD goes into sync mode for a short amount of time after executing the
8668       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8669       # calling "resize" in sync mode fails. Sleeping for a short amount of
8670       # time is a work-around.
8671       time.sleep(5)
8672
8673     disk.RecordGrow(self.op.amount)
8674     self.cfg.Update(instance, feedback_fn)
8675     if self.op.wait_for_sync:
8676       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8677       if disk_abort:
8678         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8679                              " status.\nPlease check the instance.")
8680       if not instance.admin_up:
8681         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8682     elif not instance.admin_up:
8683       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8684                            " not supposed to be running because no wait for"
8685                            " sync mode was requested.")
8686
8687
8688 class LUInstanceQueryData(NoHooksLU):
8689   """Query runtime instance data.
8690
8691   """
8692   REQ_BGL = False
8693
8694   def ExpandNames(self):
8695     self.needed_locks = {}
8696     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8697
8698     if self.op.instances:
8699       self.wanted_names = []
8700       for name in self.op.instances:
8701         full_name = _ExpandInstanceName(self.cfg, name)
8702         self.wanted_names.append(full_name)
8703       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8704     else:
8705       self.wanted_names = None
8706       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8707
8708     self.needed_locks[locking.LEVEL_NODE] = []
8709     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8710
8711   def DeclareLocks(self, level):
8712     if level == locking.LEVEL_NODE:
8713       self._LockInstancesNodes()
8714
8715   def CheckPrereq(self):
8716     """Check prerequisites.
8717
8718     This only checks the optional instance list against the existing names.
8719
8720     """
8721     if self.wanted_names is None:
8722       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8723
8724     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8725                              in self.wanted_names]
8726
8727   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8728     """Returns the status of a block device
8729
8730     """
8731     if self.op.static or not node:
8732       return None
8733
8734     self.cfg.SetDiskID(dev, node)
8735
8736     result = self.rpc.call_blockdev_find(node, dev)
8737     if result.offline:
8738       return None
8739
8740     result.Raise("Can't compute disk status for %s" % instance_name)
8741
8742     status = result.payload
8743     if status is None:
8744       return None
8745
8746     return (status.dev_path, status.major, status.minor,
8747             status.sync_percent, status.estimated_time,
8748             status.is_degraded, status.ldisk_status)
8749
8750   def _ComputeDiskStatus(self, instance, snode, dev):
8751     """Compute block device status.
8752
8753     """
8754     if dev.dev_type in constants.LDS_DRBD:
8755       # we change the snode then (otherwise we use the one passed in)
8756       if dev.logical_id[0] == instance.primary_node:
8757         snode = dev.logical_id[1]
8758       else:
8759         snode = dev.logical_id[0]
8760
8761     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8762                                               instance.name, dev)
8763     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8764
8765     if dev.children:
8766       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8767                       for child in dev.children]
8768     else:
8769       dev_children = []
8770
8771     data = {
8772       "iv_name": dev.iv_name,
8773       "dev_type": dev.dev_type,
8774       "logical_id": dev.logical_id,
8775       "physical_id": dev.physical_id,
8776       "pstatus": dev_pstatus,
8777       "sstatus": dev_sstatus,
8778       "children": dev_children,
8779       "mode": dev.mode,
8780       "size": dev.size,
8781       }
8782
8783     return data
8784
8785   def Exec(self, feedback_fn):
8786     """Gather and return data"""
8787     result = {}
8788
8789     cluster = self.cfg.GetClusterInfo()
8790
8791     for instance in self.wanted_instances:
8792       if not self.op.static:
8793         remote_info = self.rpc.call_instance_info(instance.primary_node,
8794                                                   instance.name,
8795                                                   instance.hypervisor)
8796         remote_info.Raise("Error checking node %s" % instance.primary_node)
8797         remote_info = remote_info.payload
8798         if remote_info and "state" in remote_info:
8799           remote_state = "up"
8800         else:
8801           remote_state = "down"
8802       else:
8803         remote_state = None
8804       if instance.admin_up:
8805         config_state = "up"
8806       else:
8807         config_state = "down"
8808
8809       disks = [self._ComputeDiskStatus(instance, None, device)
8810                for device in instance.disks]
8811
8812       idict = {
8813         "name": instance.name,
8814         "config_state": config_state,
8815         "run_state": remote_state,
8816         "pnode": instance.primary_node,
8817         "snodes": instance.secondary_nodes,
8818         "os": instance.os,
8819         # this happens to be the same format used for hooks
8820         "nics": _NICListToTuple(self, instance.nics),
8821         "disk_template": instance.disk_template,
8822         "disks": disks,
8823         "hypervisor": instance.hypervisor,
8824         "network_port": instance.network_port,
8825         "hv_instance": instance.hvparams,
8826         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8827         "be_instance": instance.beparams,
8828         "be_actual": cluster.FillBE(instance),
8829         "os_instance": instance.osparams,
8830         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8831         "serial_no": instance.serial_no,
8832         "mtime": instance.mtime,
8833         "ctime": instance.ctime,
8834         "uuid": instance.uuid,
8835         }
8836
8837       result[instance.name] = idict
8838
8839     return result
8840
8841
8842 class LUInstanceSetParams(LogicalUnit):
8843   """Modifies an instances's parameters.
8844
8845   """
8846   HPATH = "instance-modify"
8847   HTYPE = constants.HTYPE_INSTANCE
8848   REQ_BGL = False
8849
8850   def CheckArguments(self):
8851     if not (self.op.nics or self.op.disks or self.op.disk_template or
8852             self.op.hvparams or self.op.beparams or self.op.os_name):
8853       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8854
8855     if self.op.hvparams:
8856       _CheckGlobalHvParams(self.op.hvparams)
8857
8858     # Disk validation
8859     disk_addremove = 0
8860     for disk_op, disk_dict in self.op.disks:
8861       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8862       if disk_op == constants.DDM_REMOVE:
8863         disk_addremove += 1
8864         continue
8865       elif disk_op == constants.DDM_ADD:
8866         disk_addremove += 1
8867       else:
8868         if not isinstance(disk_op, int):
8869           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8870         if not isinstance(disk_dict, dict):
8871           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8872           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8873
8874       if disk_op == constants.DDM_ADD:
8875         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8876         if mode not in constants.DISK_ACCESS_SET:
8877           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8878                                      errors.ECODE_INVAL)
8879         size = disk_dict.get('size', None)
8880         if size is None:
8881           raise errors.OpPrereqError("Required disk parameter size missing",
8882                                      errors.ECODE_INVAL)
8883         try:
8884           size = int(size)
8885         except (TypeError, ValueError), err:
8886           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8887                                      str(err), errors.ECODE_INVAL)
8888         disk_dict['size'] = size
8889       else:
8890         # modification of disk
8891         if 'size' in disk_dict:
8892           raise errors.OpPrereqError("Disk size change not possible, use"
8893                                      " grow-disk", errors.ECODE_INVAL)
8894
8895     if disk_addremove > 1:
8896       raise errors.OpPrereqError("Only one disk add or remove operation"
8897                                  " supported at a time", errors.ECODE_INVAL)
8898
8899     if self.op.disks and self.op.disk_template is not None:
8900       raise errors.OpPrereqError("Disk template conversion and other disk"
8901                                  " changes not supported at the same time",
8902                                  errors.ECODE_INVAL)
8903
8904     if (self.op.disk_template and
8905         self.op.disk_template in constants.DTS_NET_MIRROR and
8906         self.op.remote_node is None):
8907       raise errors.OpPrereqError("Changing the disk template to a mirrored"
8908                                  " one requires specifying a secondary node",
8909                                  errors.ECODE_INVAL)
8910
8911     # NIC validation
8912     nic_addremove = 0
8913     for nic_op, nic_dict in self.op.nics:
8914       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8915       if nic_op == constants.DDM_REMOVE:
8916         nic_addremove += 1
8917         continue
8918       elif nic_op == constants.DDM_ADD:
8919         nic_addremove += 1
8920       else:
8921         if not isinstance(nic_op, int):
8922           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8923         if not isinstance(nic_dict, dict):
8924           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8925           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8926
8927       # nic_dict should be a dict
8928       nic_ip = nic_dict.get('ip', None)
8929       if nic_ip is not None:
8930         if nic_ip.lower() == constants.VALUE_NONE:
8931           nic_dict['ip'] = None
8932         else:
8933           if not netutils.IPAddress.IsValid(nic_ip):
8934             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8935                                        errors.ECODE_INVAL)
8936
8937       nic_bridge = nic_dict.get('bridge', None)
8938       nic_link = nic_dict.get('link', None)
8939       if nic_bridge and nic_link:
8940         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8941                                    " at the same time", errors.ECODE_INVAL)
8942       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8943         nic_dict['bridge'] = None
8944       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8945         nic_dict['link'] = None
8946
8947       if nic_op == constants.DDM_ADD:
8948         nic_mac = nic_dict.get('mac', None)
8949         if nic_mac is None:
8950           nic_dict['mac'] = constants.VALUE_AUTO
8951
8952       if 'mac' in nic_dict:
8953         nic_mac = nic_dict['mac']
8954         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8955           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8956
8957         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8958           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8959                                      " modifying an existing nic",
8960                                      errors.ECODE_INVAL)
8961
8962     if nic_addremove > 1:
8963       raise errors.OpPrereqError("Only one NIC add or remove operation"
8964                                  " supported at a time", errors.ECODE_INVAL)
8965
8966   def ExpandNames(self):
8967     self._ExpandAndLockInstance()
8968     self.needed_locks[locking.LEVEL_NODE] = []
8969     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8970
8971   def DeclareLocks(self, level):
8972     if level == locking.LEVEL_NODE:
8973       self._LockInstancesNodes()
8974       if self.op.disk_template and self.op.remote_node:
8975         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8976         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8977
8978   def BuildHooksEnv(self):
8979     """Build hooks env.
8980
8981     This runs on the master, primary and secondaries.
8982
8983     """
8984     args = dict()
8985     if constants.BE_MEMORY in self.be_new:
8986       args['memory'] = self.be_new[constants.BE_MEMORY]
8987     if constants.BE_VCPUS in self.be_new:
8988       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8989     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8990     # information at all.
8991     if self.op.nics:
8992       args['nics'] = []
8993       nic_override = dict(self.op.nics)
8994       for idx, nic in enumerate(self.instance.nics):
8995         if idx in nic_override:
8996           this_nic_override = nic_override[idx]
8997         else:
8998           this_nic_override = {}
8999         if 'ip' in this_nic_override:
9000           ip = this_nic_override['ip']
9001         else:
9002           ip = nic.ip
9003         if 'mac' in this_nic_override:
9004           mac = this_nic_override['mac']
9005         else:
9006           mac = nic.mac
9007         if idx in self.nic_pnew:
9008           nicparams = self.nic_pnew[idx]
9009         else:
9010           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9011         mode = nicparams[constants.NIC_MODE]
9012         link = nicparams[constants.NIC_LINK]
9013         args['nics'].append((ip, mac, mode, link))
9014       if constants.DDM_ADD in nic_override:
9015         ip = nic_override[constants.DDM_ADD].get('ip', None)
9016         mac = nic_override[constants.DDM_ADD]['mac']
9017         nicparams = self.nic_pnew[constants.DDM_ADD]
9018         mode = nicparams[constants.NIC_MODE]
9019         link = nicparams[constants.NIC_LINK]
9020         args['nics'].append((ip, mac, mode, link))
9021       elif constants.DDM_REMOVE in nic_override:
9022         del args['nics'][-1]
9023
9024     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9025     if self.op.disk_template:
9026       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9027     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9028     return env, nl, nl
9029
9030   def CheckPrereq(self):
9031     """Check prerequisites.
9032
9033     This only checks the instance list against the existing names.
9034
9035     """
9036     # checking the new params on the primary/secondary nodes
9037
9038     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9039     cluster = self.cluster = self.cfg.GetClusterInfo()
9040     assert self.instance is not None, \
9041       "Cannot retrieve locked instance %s" % self.op.instance_name
9042     pnode = instance.primary_node
9043     nodelist = list(instance.all_nodes)
9044
9045     # OS change
9046     if self.op.os_name and not self.op.force:
9047       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9048                       self.op.force_variant)
9049       instance_os = self.op.os_name
9050     else:
9051       instance_os = instance.os
9052
9053     if self.op.disk_template:
9054       if instance.disk_template == self.op.disk_template:
9055         raise errors.OpPrereqError("Instance already has disk template %s" %
9056                                    instance.disk_template, errors.ECODE_INVAL)
9057
9058       if (instance.disk_template,
9059           self.op.disk_template) not in self._DISK_CONVERSIONS:
9060         raise errors.OpPrereqError("Unsupported disk template conversion from"
9061                                    " %s to %s" % (instance.disk_template,
9062                                                   self.op.disk_template),
9063                                    errors.ECODE_INVAL)
9064       _CheckInstanceDown(self, instance, "cannot change disk template")
9065       if self.op.disk_template in constants.DTS_NET_MIRROR:
9066         if self.op.remote_node == pnode:
9067           raise errors.OpPrereqError("Given new secondary node %s is the same"
9068                                      " as the primary node of the instance" %
9069                                      self.op.remote_node, errors.ECODE_STATE)
9070         _CheckNodeOnline(self, self.op.remote_node)
9071         _CheckNodeNotDrained(self, self.op.remote_node)
9072         # FIXME: here we assume that the old instance type is DT_PLAIN
9073         assert instance.disk_template == constants.DT_PLAIN
9074         disks = [{"size": d.size, "vg": d.logical_id[0]}
9075                  for d in instance.disks]
9076         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9077         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9078
9079     # hvparams processing
9080     if self.op.hvparams:
9081       hv_type = instance.hypervisor
9082       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9083       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9084       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9085
9086       # local check
9087       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9088       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9089       self.hv_new = hv_new # the new actual values
9090       self.hv_inst = i_hvdict # the new dict (without defaults)
9091     else:
9092       self.hv_new = self.hv_inst = {}
9093
9094     # beparams processing
9095     if self.op.beparams:
9096       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9097                                    use_none=True)
9098       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9099       be_new = cluster.SimpleFillBE(i_bedict)
9100       self.be_new = be_new # the new actual values
9101       self.be_inst = i_bedict # the new dict (without defaults)
9102     else:
9103       self.be_new = self.be_inst = {}
9104
9105     # osparams processing
9106     if self.op.osparams:
9107       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9108       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9109       self.os_inst = i_osdict # the new dict (without defaults)
9110     else:
9111       self.os_inst = {}
9112
9113     self.warn = []
9114
9115     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9116       mem_check_list = [pnode]
9117       if be_new[constants.BE_AUTO_BALANCE]:
9118         # either we changed auto_balance to yes or it was from before
9119         mem_check_list.extend(instance.secondary_nodes)
9120       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9121                                                   instance.hypervisor)
9122       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9123                                          instance.hypervisor)
9124       pninfo = nodeinfo[pnode]
9125       msg = pninfo.fail_msg
9126       if msg:
9127         # Assume the primary node is unreachable and go ahead
9128         self.warn.append("Can't get info from primary node %s: %s" %
9129                          (pnode,  msg))
9130       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9131         self.warn.append("Node data from primary node %s doesn't contain"
9132                          " free memory information" % pnode)
9133       elif instance_info.fail_msg:
9134         self.warn.append("Can't get instance runtime information: %s" %
9135                         instance_info.fail_msg)
9136       else:
9137         if instance_info.payload:
9138           current_mem = int(instance_info.payload['memory'])
9139         else:
9140           # Assume instance not running
9141           # (there is a slight race condition here, but it's not very probable,
9142           # and we have no other way to check)
9143           current_mem = 0
9144         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9145                     pninfo.payload['memory_free'])
9146         if miss_mem > 0:
9147           raise errors.OpPrereqError("This change will prevent the instance"
9148                                      " from starting, due to %d MB of memory"
9149                                      " missing on its primary node" % miss_mem,
9150                                      errors.ECODE_NORES)
9151
9152       if be_new[constants.BE_AUTO_BALANCE]:
9153         for node, nres in nodeinfo.items():
9154           if node not in instance.secondary_nodes:
9155             continue
9156           msg = nres.fail_msg
9157           if msg:
9158             self.warn.append("Can't get info from secondary node %s: %s" %
9159                              (node, msg))
9160           elif not isinstance(nres.payload.get('memory_free', None), int):
9161             self.warn.append("Secondary node %s didn't return free"
9162                              " memory information" % node)
9163           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9164             self.warn.append("Not enough memory to failover instance to"
9165                              " secondary node %s" % node)
9166
9167     # NIC processing
9168     self.nic_pnew = {}
9169     self.nic_pinst = {}
9170     for nic_op, nic_dict in self.op.nics:
9171       if nic_op == constants.DDM_REMOVE:
9172         if not instance.nics:
9173           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9174                                      errors.ECODE_INVAL)
9175         continue
9176       if nic_op != constants.DDM_ADD:
9177         # an existing nic
9178         if not instance.nics:
9179           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9180                                      " no NICs" % nic_op,
9181                                      errors.ECODE_INVAL)
9182         if nic_op < 0 or nic_op >= len(instance.nics):
9183           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9184                                      " are 0 to %d" %
9185                                      (nic_op, len(instance.nics) - 1),
9186                                      errors.ECODE_INVAL)
9187         old_nic_params = instance.nics[nic_op].nicparams
9188         old_nic_ip = instance.nics[nic_op].ip
9189       else:
9190         old_nic_params = {}
9191         old_nic_ip = None
9192
9193       update_params_dict = dict([(key, nic_dict[key])
9194                                  for key in constants.NICS_PARAMETERS
9195                                  if key in nic_dict])
9196
9197       if 'bridge' in nic_dict:
9198         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9199
9200       new_nic_params = _GetUpdatedParams(old_nic_params,
9201                                          update_params_dict)
9202       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9203       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9204       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9205       self.nic_pinst[nic_op] = new_nic_params
9206       self.nic_pnew[nic_op] = new_filled_nic_params
9207       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9208
9209       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9210         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9211         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9212         if msg:
9213           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9214           if self.op.force:
9215             self.warn.append(msg)
9216           else:
9217             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9218       if new_nic_mode == constants.NIC_MODE_ROUTED:
9219         if 'ip' in nic_dict:
9220           nic_ip = nic_dict['ip']
9221         else:
9222           nic_ip = old_nic_ip
9223         if nic_ip is None:
9224           raise errors.OpPrereqError('Cannot set the nic ip to None'
9225                                      ' on a routed nic', errors.ECODE_INVAL)
9226       if 'mac' in nic_dict:
9227         nic_mac = nic_dict['mac']
9228         if nic_mac is None:
9229           raise errors.OpPrereqError('Cannot set the nic mac to None',
9230                                      errors.ECODE_INVAL)
9231         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9232           # otherwise generate the mac
9233           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9234         else:
9235           # or validate/reserve the current one
9236           try:
9237             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9238           except errors.ReservationError:
9239             raise errors.OpPrereqError("MAC address %s already in use"
9240                                        " in cluster" % nic_mac,
9241                                        errors.ECODE_NOTUNIQUE)
9242
9243     # DISK processing
9244     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9245       raise errors.OpPrereqError("Disk operations not supported for"
9246                                  " diskless instances",
9247                                  errors.ECODE_INVAL)
9248     for disk_op, _ in self.op.disks:
9249       if disk_op == constants.DDM_REMOVE:
9250         if len(instance.disks) == 1:
9251           raise errors.OpPrereqError("Cannot remove the last disk of"
9252                                      " an instance", errors.ECODE_INVAL)
9253         _CheckInstanceDown(self, instance, "cannot remove disks")
9254
9255       if (disk_op == constants.DDM_ADD and
9256           len(instance.disks) >= constants.MAX_DISKS):
9257         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9258                                    " add more" % constants.MAX_DISKS,
9259                                    errors.ECODE_STATE)
9260       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9261         # an existing disk
9262         if disk_op < 0 or disk_op >= len(instance.disks):
9263           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9264                                      " are 0 to %d" %
9265                                      (disk_op, len(instance.disks)),
9266                                      errors.ECODE_INVAL)
9267
9268     return
9269
9270   def _ConvertPlainToDrbd(self, feedback_fn):
9271     """Converts an instance from plain to drbd.
9272
9273     """
9274     feedback_fn("Converting template to drbd")
9275     instance = self.instance
9276     pnode = instance.primary_node
9277     snode = self.op.remote_node
9278
9279     # create a fake disk info for _GenerateDiskTemplate
9280     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9281     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9282                                       instance.name, pnode, [snode],
9283                                       disk_info, None, None, 0, feedback_fn)
9284     info = _GetInstanceInfoText(instance)
9285     feedback_fn("Creating aditional volumes...")
9286     # first, create the missing data and meta devices
9287     for disk in new_disks:
9288       # unfortunately this is... not too nice
9289       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9290                             info, True)
9291       for child in disk.children:
9292         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9293     # at this stage, all new LVs have been created, we can rename the
9294     # old ones
9295     feedback_fn("Renaming original volumes...")
9296     rename_list = [(o, n.children[0].logical_id)
9297                    for (o, n) in zip(instance.disks, new_disks)]
9298     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9299     result.Raise("Failed to rename original LVs")
9300
9301     feedback_fn("Initializing DRBD devices...")
9302     # all child devices are in place, we can now create the DRBD devices
9303     for disk in new_disks:
9304       for node in [pnode, snode]:
9305         f_create = node == pnode
9306         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9307
9308     # at this point, the instance has been modified
9309     instance.disk_template = constants.DT_DRBD8
9310     instance.disks = new_disks
9311     self.cfg.Update(instance, feedback_fn)
9312
9313     # disks are created, waiting for sync
9314     disk_abort = not _WaitForSync(self, instance)
9315     if disk_abort:
9316       raise errors.OpExecError("There are some degraded disks for"
9317                                " this instance, please cleanup manually")
9318
9319   def _ConvertDrbdToPlain(self, feedback_fn):
9320     """Converts an instance from drbd to plain.
9321
9322     """
9323     instance = self.instance
9324     assert len(instance.secondary_nodes) == 1
9325     pnode = instance.primary_node
9326     snode = instance.secondary_nodes[0]
9327     feedback_fn("Converting template to plain")
9328
9329     old_disks = instance.disks
9330     new_disks = [d.children[0] for d in old_disks]
9331
9332     # copy over size and mode
9333     for parent, child in zip(old_disks, new_disks):
9334       child.size = parent.size
9335       child.mode = parent.mode
9336
9337     # update instance structure
9338     instance.disks = new_disks
9339     instance.disk_template = constants.DT_PLAIN
9340     self.cfg.Update(instance, feedback_fn)
9341
9342     feedback_fn("Removing volumes on the secondary node...")
9343     for disk in old_disks:
9344       self.cfg.SetDiskID(disk, snode)
9345       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9346       if msg:
9347         self.LogWarning("Could not remove block device %s on node %s,"
9348                         " continuing anyway: %s", disk.iv_name, snode, msg)
9349
9350     feedback_fn("Removing unneeded volumes on the primary node...")
9351     for idx, disk in enumerate(old_disks):
9352       meta = disk.children[1]
9353       self.cfg.SetDiskID(meta, pnode)
9354       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9355       if msg:
9356         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9357                         " continuing anyway: %s", idx, pnode, msg)
9358
9359   def Exec(self, feedback_fn):
9360     """Modifies an instance.
9361
9362     All parameters take effect only at the next restart of the instance.
9363
9364     """
9365     # Process here the warnings from CheckPrereq, as we don't have a
9366     # feedback_fn there.
9367     for warn in self.warn:
9368       feedback_fn("WARNING: %s" % warn)
9369
9370     result = []
9371     instance = self.instance
9372     # disk changes
9373     for disk_op, disk_dict in self.op.disks:
9374       if disk_op == constants.DDM_REMOVE:
9375         # remove the last disk
9376         device = instance.disks.pop()
9377         device_idx = len(instance.disks)
9378         for node, disk in device.ComputeNodeTree(instance.primary_node):
9379           self.cfg.SetDiskID(disk, node)
9380           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9381           if msg:
9382             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9383                             " continuing anyway", device_idx, node, msg)
9384         result.append(("disk/%d" % device_idx, "remove"))
9385       elif disk_op == constants.DDM_ADD:
9386         # add a new disk
9387         if instance.disk_template == constants.DT_FILE:
9388           file_driver, file_path = instance.disks[0].logical_id
9389           file_path = os.path.dirname(file_path)
9390         else:
9391           file_driver = file_path = None
9392         disk_idx_base = len(instance.disks)
9393         new_disk = _GenerateDiskTemplate(self,
9394                                          instance.disk_template,
9395                                          instance.name, instance.primary_node,
9396                                          instance.secondary_nodes,
9397                                          [disk_dict],
9398                                          file_path,
9399                                          file_driver,
9400                                          disk_idx_base, feedback_fn)[0]
9401         instance.disks.append(new_disk)
9402         info = _GetInstanceInfoText(instance)
9403
9404         logging.info("Creating volume %s for instance %s",
9405                      new_disk.iv_name, instance.name)
9406         # Note: this needs to be kept in sync with _CreateDisks
9407         #HARDCODE
9408         for node in instance.all_nodes:
9409           f_create = node == instance.primary_node
9410           try:
9411             _CreateBlockDev(self, node, instance, new_disk,
9412                             f_create, info, f_create)
9413           except errors.OpExecError, err:
9414             self.LogWarning("Failed to create volume %s (%s) on"
9415                             " node %s: %s",
9416                             new_disk.iv_name, new_disk, node, err)
9417         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9418                        (new_disk.size, new_disk.mode)))
9419       else:
9420         # change a given disk
9421         instance.disks[disk_op].mode = disk_dict['mode']
9422         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9423
9424     if self.op.disk_template:
9425       r_shut = _ShutdownInstanceDisks(self, instance)
9426       if not r_shut:
9427         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9428                                  " proceed with disk template conversion")
9429       mode = (instance.disk_template, self.op.disk_template)
9430       try:
9431         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9432       except:
9433         self.cfg.ReleaseDRBDMinors(instance.name)
9434         raise
9435       result.append(("disk_template", self.op.disk_template))
9436
9437     # NIC changes
9438     for nic_op, nic_dict in self.op.nics:
9439       if nic_op == constants.DDM_REMOVE:
9440         # remove the last nic
9441         del instance.nics[-1]
9442         result.append(("nic.%d" % len(instance.nics), "remove"))
9443       elif nic_op == constants.DDM_ADD:
9444         # mac and bridge should be set, by now
9445         mac = nic_dict['mac']
9446         ip = nic_dict.get('ip', None)
9447         nicparams = self.nic_pinst[constants.DDM_ADD]
9448         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9449         instance.nics.append(new_nic)
9450         result.append(("nic.%d" % (len(instance.nics) - 1),
9451                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9452                        (new_nic.mac, new_nic.ip,
9453                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9454                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9455                        )))
9456       else:
9457         for key in 'mac', 'ip':
9458           if key in nic_dict:
9459             setattr(instance.nics[nic_op], key, nic_dict[key])
9460         if nic_op in self.nic_pinst:
9461           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9462         for key, val in nic_dict.iteritems():
9463           result.append(("nic.%s/%d" % (key, nic_op), val))
9464
9465     # hvparams changes
9466     if self.op.hvparams:
9467       instance.hvparams = self.hv_inst
9468       for key, val in self.op.hvparams.iteritems():
9469         result.append(("hv/%s" % key, val))
9470
9471     # beparams changes
9472     if self.op.beparams:
9473       instance.beparams = self.be_inst
9474       for key, val in self.op.beparams.iteritems():
9475         result.append(("be/%s" % key, val))
9476
9477     # OS change
9478     if self.op.os_name:
9479       instance.os = self.op.os_name
9480
9481     # osparams changes
9482     if self.op.osparams:
9483       instance.osparams = self.os_inst
9484       for key, val in self.op.osparams.iteritems():
9485         result.append(("os/%s" % key, val))
9486
9487     self.cfg.Update(instance, feedback_fn)
9488
9489     return result
9490
9491   _DISK_CONVERSIONS = {
9492     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9493     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9494     }
9495
9496
9497 class LUBackupQuery(NoHooksLU):
9498   """Query the exports list
9499
9500   """
9501   REQ_BGL = False
9502
9503   def ExpandNames(self):
9504     self.needed_locks = {}
9505     self.share_locks[locking.LEVEL_NODE] = 1
9506     if not self.op.nodes:
9507       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9508     else:
9509       self.needed_locks[locking.LEVEL_NODE] = \
9510         _GetWantedNodes(self, self.op.nodes)
9511
9512   def Exec(self, feedback_fn):
9513     """Compute the list of all the exported system images.
9514
9515     @rtype: dict
9516     @return: a dictionary with the structure node->(export-list)
9517         where export-list is a list of the instances exported on
9518         that node.
9519
9520     """
9521     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9522     rpcresult = self.rpc.call_export_list(self.nodes)
9523     result = {}
9524     for node in rpcresult:
9525       if rpcresult[node].fail_msg:
9526         result[node] = False
9527       else:
9528         result[node] = rpcresult[node].payload
9529
9530     return result
9531
9532
9533 class LUBackupPrepare(NoHooksLU):
9534   """Prepares an instance for an export and returns useful information.
9535
9536   """
9537   REQ_BGL = False
9538
9539   def ExpandNames(self):
9540     self._ExpandAndLockInstance()
9541
9542   def CheckPrereq(self):
9543     """Check prerequisites.
9544
9545     """
9546     instance_name = self.op.instance_name
9547
9548     self.instance = self.cfg.GetInstanceInfo(instance_name)
9549     assert self.instance is not None, \
9550           "Cannot retrieve locked instance %s" % self.op.instance_name
9551     _CheckNodeOnline(self, self.instance.primary_node)
9552
9553     self._cds = _GetClusterDomainSecret()
9554
9555   def Exec(self, feedback_fn):
9556     """Prepares an instance for an export.
9557
9558     """
9559     instance = self.instance
9560
9561     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9562       salt = utils.GenerateSecret(8)
9563
9564       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9565       result = self.rpc.call_x509_cert_create(instance.primary_node,
9566                                               constants.RIE_CERT_VALIDITY)
9567       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9568
9569       (name, cert_pem) = result.payload
9570
9571       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9572                                              cert_pem)
9573
9574       return {
9575         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9576         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9577                           salt),
9578         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9579         }
9580
9581     return None
9582
9583
9584 class LUBackupExport(LogicalUnit):
9585   """Export an instance to an image in the cluster.
9586
9587   """
9588   HPATH = "instance-export"
9589   HTYPE = constants.HTYPE_INSTANCE
9590   REQ_BGL = False
9591
9592   def CheckArguments(self):
9593     """Check the arguments.
9594
9595     """
9596     self.x509_key_name = self.op.x509_key_name
9597     self.dest_x509_ca_pem = self.op.destination_x509_ca
9598
9599     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9600       if not self.x509_key_name:
9601         raise errors.OpPrereqError("Missing X509 key name for encryption",
9602                                    errors.ECODE_INVAL)
9603
9604       if not self.dest_x509_ca_pem:
9605         raise errors.OpPrereqError("Missing destination X509 CA",
9606                                    errors.ECODE_INVAL)
9607
9608   def ExpandNames(self):
9609     self._ExpandAndLockInstance()
9610
9611     # Lock all nodes for local exports
9612     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9613       # FIXME: lock only instance primary and destination node
9614       #
9615       # Sad but true, for now we have do lock all nodes, as we don't know where
9616       # the previous export might be, and in this LU we search for it and
9617       # remove it from its current node. In the future we could fix this by:
9618       #  - making a tasklet to search (share-lock all), then create the
9619       #    new one, then one to remove, after
9620       #  - removing the removal operation altogether
9621       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9622
9623   def DeclareLocks(self, level):
9624     """Last minute lock declaration."""
9625     # All nodes are locked anyway, so nothing to do here.
9626
9627   def BuildHooksEnv(self):
9628     """Build hooks env.
9629
9630     This will run on the master, primary node and target node.
9631
9632     """
9633     env = {
9634       "EXPORT_MODE": self.op.mode,
9635       "EXPORT_NODE": self.op.target_node,
9636       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9637       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9638       # TODO: Generic function for boolean env variables
9639       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9640       }
9641
9642     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9643
9644     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9645
9646     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9647       nl.append(self.op.target_node)
9648
9649     return env, nl, nl
9650
9651   def CheckPrereq(self):
9652     """Check prerequisites.
9653
9654     This checks that the instance and node names are valid.
9655
9656     """
9657     instance_name = self.op.instance_name
9658
9659     self.instance = self.cfg.GetInstanceInfo(instance_name)
9660     assert self.instance is not None, \
9661           "Cannot retrieve locked instance %s" % self.op.instance_name
9662     _CheckNodeOnline(self, self.instance.primary_node)
9663
9664     if (self.op.remove_instance and self.instance.admin_up and
9665         not self.op.shutdown):
9666       raise errors.OpPrereqError("Can not remove instance without shutting it"
9667                                  " down before")
9668
9669     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9670       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9671       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9672       assert self.dst_node is not None
9673
9674       _CheckNodeOnline(self, self.dst_node.name)
9675       _CheckNodeNotDrained(self, self.dst_node.name)
9676
9677       self._cds = None
9678       self.dest_disk_info = None
9679       self.dest_x509_ca = None
9680
9681     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9682       self.dst_node = None
9683
9684       if len(self.op.target_node) != len(self.instance.disks):
9685         raise errors.OpPrereqError(("Received destination information for %s"
9686                                     " disks, but instance %s has %s disks") %
9687                                    (len(self.op.target_node), instance_name,
9688                                     len(self.instance.disks)),
9689                                    errors.ECODE_INVAL)
9690
9691       cds = _GetClusterDomainSecret()
9692
9693       # Check X509 key name
9694       try:
9695         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9696       except (TypeError, ValueError), err:
9697         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9698
9699       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9700         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9701                                    errors.ECODE_INVAL)
9702
9703       # Load and verify CA
9704       try:
9705         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9706       except OpenSSL.crypto.Error, err:
9707         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9708                                    (err, ), errors.ECODE_INVAL)
9709
9710       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9711       if errcode is not None:
9712         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9713                                    (msg, ), errors.ECODE_INVAL)
9714
9715       self.dest_x509_ca = cert
9716
9717       # Verify target information
9718       disk_info = []
9719       for idx, disk_data in enumerate(self.op.target_node):
9720         try:
9721           (host, port, magic) = \
9722             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9723         except errors.GenericError, err:
9724           raise errors.OpPrereqError("Target info for disk %s: %s" %
9725                                      (idx, err), errors.ECODE_INVAL)
9726
9727         disk_info.append((host, port, magic))
9728
9729       assert len(disk_info) == len(self.op.target_node)
9730       self.dest_disk_info = disk_info
9731
9732     else:
9733       raise errors.ProgrammerError("Unhandled export mode %r" %
9734                                    self.op.mode)
9735
9736     # instance disk type verification
9737     # TODO: Implement export support for file-based disks
9738     for disk in self.instance.disks:
9739       if disk.dev_type == constants.LD_FILE:
9740         raise errors.OpPrereqError("Export not supported for instances with"
9741                                    " file-based disks", errors.ECODE_INVAL)
9742
9743   def _CleanupExports(self, feedback_fn):
9744     """Removes exports of current instance from all other nodes.
9745
9746     If an instance in a cluster with nodes A..D was exported to node C, its
9747     exports will be removed from the nodes A, B and D.
9748
9749     """
9750     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9751
9752     nodelist = self.cfg.GetNodeList()
9753     nodelist.remove(self.dst_node.name)
9754
9755     # on one-node clusters nodelist will be empty after the removal
9756     # if we proceed the backup would be removed because OpBackupQuery
9757     # substitutes an empty list with the full cluster node list.
9758     iname = self.instance.name
9759     if nodelist:
9760       feedback_fn("Removing old exports for instance %s" % iname)
9761       exportlist = self.rpc.call_export_list(nodelist)
9762       for node in exportlist:
9763         if exportlist[node].fail_msg:
9764           continue
9765         if iname in exportlist[node].payload:
9766           msg = self.rpc.call_export_remove(node, iname).fail_msg
9767           if msg:
9768             self.LogWarning("Could not remove older export for instance %s"
9769                             " on node %s: %s", iname, node, msg)
9770
9771   def Exec(self, feedback_fn):
9772     """Export an instance to an image in the cluster.
9773
9774     """
9775     assert self.op.mode in constants.EXPORT_MODES
9776
9777     instance = self.instance
9778     src_node = instance.primary_node
9779
9780     if self.op.shutdown:
9781       # shutdown the instance, but not the disks
9782       feedback_fn("Shutting down instance %s" % instance.name)
9783       result = self.rpc.call_instance_shutdown(src_node, instance,
9784                                                self.op.shutdown_timeout)
9785       # TODO: Maybe ignore failures if ignore_remove_failures is set
9786       result.Raise("Could not shutdown instance %s on"
9787                    " node %s" % (instance.name, src_node))
9788
9789     # set the disks ID correctly since call_instance_start needs the
9790     # correct drbd minor to create the symlinks
9791     for disk in instance.disks:
9792       self.cfg.SetDiskID(disk, src_node)
9793
9794     activate_disks = (not instance.admin_up)
9795
9796     if activate_disks:
9797       # Activate the instance disks if we'exporting a stopped instance
9798       feedback_fn("Activating disks for %s" % instance.name)
9799       _StartInstanceDisks(self, instance, None)
9800
9801     try:
9802       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9803                                                      instance)
9804
9805       helper.CreateSnapshots()
9806       try:
9807         if (self.op.shutdown and instance.admin_up and
9808             not self.op.remove_instance):
9809           assert not activate_disks
9810           feedback_fn("Starting instance %s" % instance.name)
9811           result = self.rpc.call_instance_start(src_node, instance, None, None)
9812           msg = result.fail_msg
9813           if msg:
9814             feedback_fn("Failed to start instance: %s" % msg)
9815             _ShutdownInstanceDisks(self, instance)
9816             raise errors.OpExecError("Could not start instance: %s" % msg)
9817
9818         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9819           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9820         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9821           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9822           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9823
9824           (key_name, _, _) = self.x509_key_name
9825
9826           dest_ca_pem = \
9827             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9828                                             self.dest_x509_ca)
9829
9830           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9831                                                      key_name, dest_ca_pem,
9832                                                      timeouts)
9833       finally:
9834         helper.Cleanup()
9835
9836       # Check for backwards compatibility
9837       assert len(dresults) == len(instance.disks)
9838       assert compat.all(isinstance(i, bool) for i in dresults), \
9839              "Not all results are boolean: %r" % dresults
9840
9841     finally:
9842       if activate_disks:
9843         feedback_fn("Deactivating disks for %s" % instance.name)
9844         _ShutdownInstanceDisks(self, instance)
9845
9846     if not (compat.all(dresults) and fin_resu):
9847       failures = []
9848       if not fin_resu:
9849         failures.append("export finalization")
9850       if not compat.all(dresults):
9851         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9852                                if not dsk)
9853         failures.append("disk export: disk(s) %s" % fdsk)
9854
9855       raise errors.OpExecError("Export failed, errors in %s" %
9856                                utils.CommaJoin(failures))
9857
9858     # At this point, the export was successful, we can cleanup/finish
9859
9860     # Remove instance if requested
9861     if self.op.remove_instance:
9862       feedback_fn("Removing instance %s" % instance.name)
9863       _RemoveInstance(self, feedback_fn, instance,
9864                       self.op.ignore_remove_failures)
9865
9866     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9867       self._CleanupExports(feedback_fn)
9868
9869     return fin_resu, dresults
9870
9871
9872 class LUBackupRemove(NoHooksLU):
9873   """Remove exports related to the named instance.
9874
9875   """
9876   REQ_BGL = False
9877
9878   def ExpandNames(self):
9879     self.needed_locks = {}
9880     # We need all nodes to be locked in order for RemoveExport to work, but we
9881     # don't need to lock the instance itself, as nothing will happen to it (and
9882     # we can remove exports also for a removed instance)
9883     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9884
9885   def Exec(self, feedback_fn):
9886     """Remove any export.
9887
9888     """
9889     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9890     # If the instance was not found we'll try with the name that was passed in.
9891     # This will only work if it was an FQDN, though.
9892     fqdn_warn = False
9893     if not instance_name:
9894       fqdn_warn = True
9895       instance_name = self.op.instance_name
9896
9897     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9898     exportlist = self.rpc.call_export_list(locked_nodes)
9899     found = False
9900     for node in exportlist:
9901       msg = exportlist[node].fail_msg
9902       if msg:
9903         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9904         continue
9905       if instance_name in exportlist[node].payload:
9906         found = True
9907         result = self.rpc.call_export_remove(node, instance_name)
9908         msg = result.fail_msg
9909         if msg:
9910           logging.error("Could not remove export for instance %s"
9911                         " on node %s: %s", instance_name, node, msg)
9912
9913     if fqdn_warn and not found:
9914       feedback_fn("Export not found. If trying to remove an export belonging"
9915                   " to a deleted instance please use its Fully Qualified"
9916                   " Domain Name.")
9917
9918
9919 class LUGroupAdd(LogicalUnit):
9920   """Logical unit for creating node groups.
9921
9922   """
9923   HPATH = "group-add"
9924   HTYPE = constants.HTYPE_GROUP
9925   REQ_BGL = False
9926
9927   def ExpandNames(self):
9928     # We need the new group's UUID here so that we can create and acquire the
9929     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
9930     # that it should not check whether the UUID exists in the configuration.
9931     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
9932     self.needed_locks = {}
9933     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
9934
9935   def CheckPrereq(self):
9936     """Check prerequisites.
9937
9938     This checks that the given group name is not an existing node group
9939     already.
9940
9941     """
9942     try:
9943       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
9944     except errors.OpPrereqError:
9945       pass
9946     else:
9947       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
9948                                  " node group (UUID: %s)" %
9949                                  (self.op.group_name, existing_uuid),
9950                                  errors.ECODE_EXISTS)
9951
9952     if self.op.ndparams:
9953       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
9954
9955   def BuildHooksEnv(self):
9956     """Build hooks env.
9957
9958     """
9959     env = {
9960       "GROUP_NAME": self.op.group_name,
9961       }
9962     mn = self.cfg.GetMasterNode()
9963     return env, [mn], [mn]
9964
9965   def Exec(self, feedback_fn):
9966     """Add the node group to the cluster.
9967
9968     """
9969     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
9970                                   uuid=self.group_uuid,
9971                                   alloc_policy=self.op.alloc_policy,
9972                                   ndparams=self.op.ndparams)
9973
9974     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
9975     del self.remove_locks[locking.LEVEL_NODEGROUP]
9976
9977
9978 class LUGroupAssignNodes(NoHooksLU):
9979   """Logical unit for assigning nodes to groups.
9980
9981   """
9982   REQ_BGL = False
9983
9984   def ExpandNames(self):
9985     # These raise errors.OpPrereqError on their own:
9986     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
9987     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9988
9989     # We want to lock all the affected nodes and groups. We have readily
9990     # available the list of nodes, and the *destination* group. To gather the
9991     # list of "source" groups, we need to fetch node information.
9992     self.node_data = self.cfg.GetAllNodesInfo()
9993     affected_groups = set(self.node_data[node].group for node in self.op.nodes)
9994     affected_groups.add(self.group_uuid)
9995
9996     self.needed_locks = {
9997       locking.LEVEL_NODEGROUP: list(affected_groups),
9998       locking.LEVEL_NODE: self.op.nodes,
9999       }
10000
10001   def CheckPrereq(self):
10002     """Check prerequisites.
10003
10004     """
10005     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10006     instance_data = self.cfg.GetAllInstancesInfo()
10007
10008     if self.group is None:
10009       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10010                                (self.op.group_name, self.group_uuid))
10011
10012     (new_splits, previous_splits) = \
10013       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10014                                              for node in self.op.nodes],
10015                                             self.node_data, instance_data)
10016
10017     if new_splits:
10018       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10019
10020       if not self.op.force:
10021         raise errors.OpExecError("The following instances get split by this"
10022                                  " change and --force was not given: %s" %
10023                                  fmt_new_splits)
10024       else:
10025         self.LogWarning("This operation will split the following instances: %s",
10026                         fmt_new_splits)
10027
10028         if previous_splits:
10029           self.LogWarning("In addition, these already-split instances continue"
10030                           " to be spit across groups: %s",
10031                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10032
10033   def Exec(self, feedback_fn):
10034     """Assign nodes to a new group.
10035
10036     """
10037     for node in self.op.nodes:
10038       self.node_data[node].group = self.group_uuid
10039
10040     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10041
10042   @staticmethod
10043   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10044     """Check for split instances after a node assignment.
10045
10046     This method considers a series of node assignments as an atomic operation,
10047     and returns information about split instances after applying the set of
10048     changes.
10049
10050     In particular, it returns information about newly split instances, and
10051     instances that were already split, and remain so after the change.
10052
10053     Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10054     considered.
10055
10056     @type changes: list of (node_name, new_group_uuid) pairs.
10057     @param changes: list of node assignments to consider.
10058     @param node_data: a dict with data for all nodes
10059     @param instance_data: a dict with all instances to consider
10060     @rtype: a two-tuple
10061     @return: a list of instances that were previously okay and result split as a
10062       consequence of this change, and a list of instances that were previously
10063       split and this change does not fix.
10064
10065     """
10066     changed_nodes = dict((node, group) for node, group in changes
10067                          if node_data[node].group != group)
10068
10069     all_split_instances = set()
10070     previously_split_instances = set()
10071
10072     def InstanceNodes(instance):
10073       return [instance.primary_node] + list(instance.secondary_nodes)
10074
10075     for inst in instance_data.values():
10076       if inst.disk_template not in constants.DTS_NET_MIRROR:
10077         continue
10078
10079       instance_nodes = InstanceNodes(inst)
10080
10081       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10082         previously_split_instances.add(inst.name)
10083
10084       if len(set(changed_nodes.get(node, node_data[node].group)
10085                  for node in instance_nodes)) > 1:
10086         all_split_instances.add(inst.name)
10087
10088     return (list(all_split_instances - previously_split_instances),
10089             list(previously_split_instances & all_split_instances))
10090
10091
10092 class _GroupQuery(_QueryBase):
10093
10094   FIELDS = query.GROUP_FIELDS
10095
10096   def ExpandNames(self, lu):
10097     lu.needed_locks = {}
10098
10099     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10100     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10101
10102     if not self.names:
10103       self.wanted = [name_to_uuid[name]
10104                      for name in utils.NiceSort(name_to_uuid.keys())]
10105     else:
10106       # Accept names to be either names or UUIDs.
10107       missing = []
10108       self.wanted = []
10109       all_uuid = frozenset(self._all_groups.keys())
10110
10111       for name in self.names:
10112         if name in all_uuid:
10113           self.wanted.append(name)
10114         elif name in name_to_uuid:
10115           self.wanted.append(name_to_uuid[name])
10116         else:
10117           missing.append(name)
10118
10119       if missing:
10120         raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10121                                    errors.ECODE_NOENT)
10122
10123   def DeclareLocks(self, lu, level):
10124     pass
10125
10126   def _GetQueryData(self, lu):
10127     """Computes the list of node groups and their attributes.
10128
10129     """
10130     do_nodes = query.GQ_NODE in self.requested_data
10131     do_instances = query.GQ_INST in self.requested_data
10132
10133     group_to_nodes = None
10134     group_to_instances = None
10135
10136     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10137     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10138     # latter GetAllInstancesInfo() is not enough, for we have to go through
10139     # instance->node. Hence, we will need to process nodes even if we only need
10140     # instance information.
10141     if do_nodes or do_instances:
10142       all_nodes = lu.cfg.GetAllNodesInfo()
10143       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10144       node_to_group = {}
10145
10146       for node in all_nodes.values():
10147         if node.group in group_to_nodes:
10148           group_to_nodes[node.group].append(node.name)
10149           node_to_group[node.name] = node.group
10150
10151       if do_instances:
10152         all_instances = lu.cfg.GetAllInstancesInfo()
10153         group_to_instances = dict((uuid, []) for uuid in self.wanted)
10154
10155         for instance in all_instances.values():
10156           node = instance.primary_node
10157           if node in node_to_group:
10158             group_to_instances[node_to_group[node]].append(instance.name)
10159
10160         if not do_nodes:
10161           # Do not pass on node information if it was not requested.
10162           group_to_nodes = None
10163
10164     return query.GroupQueryData([self._all_groups[uuid]
10165                                  for uuid in self.wanted],
10166                                 group_to_nodes, group_to_instances)
10167
10168
10169 class LUGroupQuery(NoHooksLU):
10170   """Logical unit for querying node groups.
10171
10172   """
10173   REQ_BGL = False
10174
10175   def CheckArguments(self):
10176     self.gq = _GroupQuery(self.op.names, self.op.output_fields, False)
10177
10178   def ExpandNames(self):
10179     self.gq.ExpandNames(self)
10180
10181   def Exec(self, feedback_fn):
10182     return self.gq.OldStyleQuery(self)
10183
10184
10185 class LUGroupSetParams(LogicalUnit):
10186   """Modifies the parameters of a node group.
10187
10188   """
10189   HPATH = "group-modify"
10190   HTYPE = constants.HTYPE_GROUP
10191   REQ_BGL = False
10192
10193   def CheckArguments(self):
10194     all_changes = [
10195       self.op.ndparams,
10196       self.op.alloc_policy,
10197       ]
10198
10199     if all_changes.count(None) == len(all_changes):
10200       raise errors.OpPrereqError("Please pass at least one modification",
10201                                  errors.ECODE_INVAL)
10202
10203   def ExpandNames(self):
10204     # This raises errors.OpPrereqError on its own:
10205     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10206
10207     self.needed_locks = {
10208       locking.LEVEL_NODEGROUP: [self.group_uuid],
10209       }
10210
10211   def CheckPrereq(self):
10212     """Check prerequisites.
10213
10214     """
10215     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10216
10217     if self.group is None:
10218       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10219                                (self.op.group_name, self.group_uuid))
10220
10221     if self.op.ndparams:
10222       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10223       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10224       self.new_ndparams = new_ndparams
10225
10226   def BuildHooksEnv(self):
10227     """Build hooks env.
10228
10229     """
10230     env = {
10231       "GROUP_NAME": self.op.group_name,
10232       "NEW_ALLOC_POLICY": self.op.alloc_policy,
10233       }
10234     mn = self.cfg.GetMasterNode()
10235     return env, [mn], [mn]
10236
10237   def Exec(self, feedback_fn):
10238     """Modifies the node group.
10239
10240     """
10241     result = []
10242
10243     if self.op.ndparams:
10244       self.group.ndparams = self.new_ndparams
10245       result.append(("ndparams", str(self.group.ndparams)))
10246
10247     if self.op.alloc_policy:
10248       self.group.alloc_policy = self.op.alloc_policy
10249
10250     self.cfg.Update(self.group, feedback_fn)
10251     return result
10252
10253
10254
10255 class LUGroupRemove(LogicalUnit):
10256   HPATH = "group-remove"
10257   HTYPE = constants.HTYPE_GROUP
10258   REQ_BGL = False
10259
10260   def ExpandNames(self):
10261     # This will raises errors.OpPrereqError on its own:
10262     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10263     self.needed_locks = {
10264       locking.LEVEL_NODEGROUP: [self.group_uuid],
10265       }
10266
10267   def CheckPrereq(self):
10268     """Check prerequisites.
10269
10270     This checks that the given group name exists as a node group, that is
10271     empty (i.e., contains no nodes), and that is not the last group of the
10272     cluster.
10273
10274     """
10275     # Verify that the group is empty.
10276     group_nodes = [node.name
10277                    for node in self.cfg.GetAllNodesInfo().values()
10278                    if node.group == self.group_uuid]
10279
10280     if group_nodes:
10281       raise errors.OpPrereqError("Group '%s' not empty, has the following"
10282                                  " nodes: %s" %
10283                                  (self.op.group_name,
10284                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
10285                                  errors.ECODE_STATE)
10286
10287     # Verify the cluster would not be left group-less.
10288     if len(self.cfg.GetNodeGroupList()) == 1:
10289       raise errors.OpPrereqError("Group '%s' is the last group in the cluster,"
10290                                  " which cannot be left without at least one"
10291                                  " group" % self.op.group_name,
10292                                  errors.ECODE_STATE)
10293
10294   def BuildHooksEnv(self):
10295     """Build hooks env.
10296
10297     """
10298     env = {
10299       "GROUP_NAME": self.op.group_name,
10300       }
10301     mn = self.cfg.GetMasterNode()
10302     return env, [mn], [mn]
10303
10304   def Exec(self, feedback_fn):
10305     """Remove the node group.
10306
10307     """
10308     try:
10309       self.cfg.RemoveNodeGroup(self.group_uuid)
10310     except errors.ConfigurationError:
10311       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10312                                (self.op.group_name, self.group_uuid))
10313
10314     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10315
10316
10317 class LUGroupRename(LogicalUnit):
10318   HPATH = "group-rename"
10319   HTYPE = constants.HTYPE_GROUP
10320   REQ_BGL = False
10321
10322   def ExpandNames(self):
10323     # This raises errors.OpPrereqError on its own:
10324     self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10325
10326     self.needed_locks = {
10327       locking.LEVEL_NODEGROUP: [self.group_uuid],
10328       }
10329
10330   def CheckPrereq(self):
10331     """Check prerequisites.
10332
10333     This checks that the given old_name exists as a node group, and that
10334     new_name doesn't.
10335
10336     """
10337     try:
10338       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10339     except errors.OpPrereqError:
10340       pass
10341     else:
10342       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10343                                  " node group (UUID: %s)" %
10344                                  (self.op.new_name, new_name_uuid),
10345                                  errors.ECODE_EXISTS)
10346
10347   def BuildHooksEnv(self):
10348     """Build hooks env.
10349
10350     """
10351     env = {
10352       "OLD_NAME": self.op.old_name,
10353       "NEW_NAME": self.op.new_name,
10354       }
10355
10356     mn = self.cfg.GetMasterNode()
10357     all_nodes = self.cfg.GetAllNodesInfo()
10358     run_nodes = [mn]
10359     all_nodes.pop(mn, None)
10360
10361     for node in all_nodes.values():
10362       if node.group == self.group_uuid:
10363         run_nodes.append(node.name)
10364
10365     return env, run_nodes, run_nodes
10366
10367   def Exec(self, feedback_fn):
10368     """Rename the node group.
10369
10370     """
10371     group = self.cfg.GetNodeGroup(self.group_uuid)
10372
10373     if group is None:
10374       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10375                                (self.op.old_name, self.group_uuid))
10376
10377     group.name = self.op.new_name
10378     self.cfg.Update(group, feedback_fn)
10379
10380     return self.op.new_name
10381
10382
10383 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10384   """Generic tags LU.
10385
10386   This is an abstract class which is the parent of all the other tags LUs.
10387
10388   """
10389
10390   def ExpandNames(self):
10391     self.needed_locks = {}
10392     if self.op.kind == constants.TAG_NODE:
10393       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10394       self.needed_locks[locking.LEVEL_NODE] = self.op.name
10395     elif self.op.kind == constants.TAG_INSTANCE:
10396       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10397       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10398
10399     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10400     # not possible to acquire the BGL based on opcode parameters)
10401
10402   def CheckPrereq(self):
10403     """Check prerequisites.
10404
10405     """
10406     if self.op.kind == constants.TAG_CLUSTER:
10407       self.target = self.cfg.GetClusterInfo()
10408     elif self.op.kind == constants.TAG_NODE:
10409       self.target = self.cfg.GetNodeInfo(self.op.name)
10410     elif self.op.kind == constants.TAG_INSTANCE:
10411       self.target = self.cfg.GetInstanceInfo(self.op.name)
10412     else:
10413       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10414                                  str(self.op.kind), errors.ECODE_INVAL)
10415
10416
10417 class LUTagsGet(TagsLU):
10418   """Returns the tags of a given object.
10419
10420   """
10421   REQ_BGL = False
10422
10423   def ExpandNames(self):
10424     TagsLU.ExpandNames(self)
10425
10426     # Share locks as this is only a read operation
10427     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10428
10429   def Exec(self, feedback_fn):
10430     """Returns the tag list.
10431
10432     """
10433     return list(self.target.GetTags())
10434
10435
10436 class LUTagsSearch(NoHooksLU):
10437   """Searches the tags for a given pattern.
10438
10439   """
10440   REQ_BGL = False
10441
10442   def ExpandNames(self):
10443     self.needed_locks = {}
10444
10445   def CheckPrereq(self):
10446     """Check prerequisites.
10447
10448     This checks the pattern passed for validity by compiling it.
10449
10450     """
10451     try:
10452       self.re = re.compile(self.op.pattern)
10453     except re.error, err:
10454       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10455                                  (self.op.pattern, err), errors.ECODE_INVAL)
10456
10457   def Exec(self, feedback_fn):
10458     """Returns the tag list.
10459
10460     """
10461     cfg = self.cfg
10462     tgts = [("/cluster", cfg.GetClusterInfo())]
10463     ilist = cfg.GetAllInstancesInfo().values()
10464     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10465     nlist = cfg.GetAllNodesInfo().values()
10466     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10467     results = []
10468     for path, target in tgts:
10469       for tag in target.GetTags():
10470         if self.re.search(tag):
10471           results.append((path, tag))
10472     return results
10473
10474
10475 class LUTagsSet(TagsLU):
10476   """Sets a tag on a given object.
10477
10478   """
10479   REQ_BGL = False
10480
10481   def CheckPrereq(self):
10482     """Check prerequisites.
10483
10484     This checks the type and length of the tag name and value.
10485
10486     """
10487     TagsLU.CheckPrereq(self)
10488     for tag in self.op.tags:
10489       objects.TaggableObject.ValidateTag(tag)
10490
10491   def Exec(self, feedback_fn):
10492     """Sets the tag.
10493
10494     """
10495     try:
10496       for tag in self.op.tags:
10497         self.target.AddTag(tag)
10498     except errors.TagError, err:
10499       raise errors.OpExecError("Error while setting tag: %s" % str(err))
10500     self.cfg.Update(self.target, feedback_fn)
10501
10502
10503 class LUTagsDel(TagsLU):
10504   """Delete a list of tags from a given object.
10505
10506   """
10507   REQ_BGL = False
10508
10509   def CheckPrereq(self):
10510     """Check prerequisites.
10511
10512     This checks that we have the given tag.
10513
10514     """
10515     TagsLU.CheckPrereq(self)
10516     for tag in self.op.tags:
10517       objects.TaggableObject.ValidateTag(tag)
10518     del_tags = frozenset(self.op.tags)
10519     cur_tags = self.target.GetTags()
10520
10521     diff_tags = del_tags - cur_tags
10522     if diff_tags:
10523       diff_names = ("'%s'" % i for i in sorted(diff_tags))
10524       raise errors.OpPrereqError("Tag(s) %s not found" %
10525                                  (utils.CommaJoin(diff_names), ),
10526                                  errors.ECODE_NOENT)
10527
10528   def Exec(self, feedback_fn):
10529     """Remove the tag from the object.
10530
10531     """
10532     for tag in self.op.tags:
10533       self.target.RemoveTag(tag)
10534     self.cfg.Update(self.target, feedback_fn)
10535
10536
10537 class LUTestDelay(NoHooksLU):
10538   """Sleep for a specified amount of time.
10539
10540   This LU sleeps on the master and/or nodes for a specified amount of
10541   time.
10542
10543   """
10544   REQ_BGL = False
10545
10546   def ExpandNames(self):
10547     """Expand names and set required locks.
10548
10549     This expands the node list, if any.
10550
10551     """
10552     self.needed_locks = {}
10553     if self.op.on_nodes:
10554       # _GetWantedNodes can be used here, but is not always appropriate to use
10555       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10556       # more information.
10557       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10558       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10559
10560   def _TestDelay(self):
10561     """Do the actual sleep.
10562
10563     """
10564     if self.op.on_master:
10565       if not utils.TestDelay(self.op.duration):
10566         raise errors.OpExecError("Error during master delay test")
10567     if self.op.on_nodes:
10568       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10569       for node, node_result in result.items():
10570         node_result.Raise("Failure during rpc call to node %s" % node)
10571
10572   def Exec(self, feedback_fn):
10573     """Execute the test delay opcode, with the wanted repetitions.
10574
10575     """
10576     if self.op.repeat == 0:
10577       self._TestDelay()
10578     else:
10579       top_value = self.op.repeat - 1
10580       for i in range(self.op.repeat):
10581         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10582         self._TestDelay()
10583
10584
10585 class LUTestJqueue(NoHooksLU):
10586   """Utility LU to test some aspects of the job queue.
10587
10588   """
10589   REQ_BGL = False
10590
10591   # Must be lower than default timeout for WaitForJobChange to see whether it
10592   # notices changed jobs
10593   _CLIENT_CONNECT_TIMEOUT = 20.0
10594   _CLIENT_CONFIRM_TIMEOUT = 60.0
10595
10596   @classmethod
10597   def _NotifyUsingSocket(cls, cb, errcls):
10598     """Opens a Unix socket and waits for another program to connect.
10599
10600     @type cb: callable
10601     @param cb: Callback to send socket name to client
10602     @type errcls: class
10603     @param errcls: Exception class to use for errors
10604
10605     """
10606     # Using a temporary directory as there's no easy way to create temporary
10607     # sockets without writing a custom loop around tempfile.mktemp and
10608     # socket.bind
10609     tmpdir = tempfile.mkdtemp()
10610     try:
10611       tmpsock = utils.PathJoin(tmpdir, "sock")
10612
10613       logging.debug("Creating temporary socket at %s", tmpsock)
10614       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10615       try:
10616         sock.bind(tmpsock)
10617         sock.listen(1)
10618
10619         # Send details to client
10620         cb(tmpsock)
10621
10622         # Wait for client to connect before continuing
10623         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10624         try:
10625           (conn, _) = sock.accept()
10626         except socket.error, err:
10627           raise errcls("Client didn't connect in time (%s)" % err)
10628       finally:
10629         sock.close()
10630     finally:
10631       # Remove as soon as client is connected
10632       shutil.rmtree(tmpdir)
10633
10634     # Wait for client to close
10635     try:
10636       try:
10637         # pylint: disable-msg=E1101
10638         # Instance of '_socketobject' has no ... member
10639         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10640         conn.recv(1)
10641       except socket.error, err:
10642         raise errcls("Client failed to confirm notification (%s)" % err)
10643     finally:
10644       conn.close()
10645
10646   def _SendNotification(self, test, arg, sockname):
10647     """Sends a notification to the client.
10648
10649     @type test: string
10650     @param test: Test name
10651     @param arg: Test argument (depends on test)
10652     @type sockname: string
10653     @param sockname: Socket path
10654
10655     """
10656     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10657
10658   def _Notify(self, prereq, test, arg):
10659     """Notifies the client of a test.
10660
10661     @type prereq: bool
10662     @param prereq: Whether this is a prereq-phase test
10663     @type test: string
10664     @param test: Test name
10665     @param arg: Test argument (depends on test)
10666
10667     """
10668     if prereq:
10669       errcls = errors.OpPrereqError
10670     else:
10671       errcls = errors.OpExecError
10672
10673     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10674                                                   test, arg),
10675                                    errcls)
10676
10677   def CheckArguments(self):
10678     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10679     self.expandnames_calls = 0
10680
10681   def ExpandNames(self):
10682     checkargs_calls = getattr(self, "checkargs_calls", 0)
10683     if checkargs_calls < 1:
10684       raise errors.ProgrammerError("CheckArguments was not called")
10685
10686     self.expandnames_calls += 1
10687
10688     if self.op.notify_waitlock:
10689       self._Notify(True, constants.JQT_EXPANDNAMES, None)
10690
10691     self.LogInfo("Expanding names")
10692
10693     # Get lock on master node (just to get a lock, not for a particular reason)
10694     self.needed_locks = {
10695       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10696       }
10697
10698   def Exec(self, feedback_fn):
10699     if self.expandnames_calls < 1:
10700       raise errors.ProgrammerError("ExpandNames was not called")
10701
10702     if self.op.notify_exec:
10703       self._Notify(False, constants.JQT_EXEC, None)
10704
10705     self.LogInfo("Executing")
10706
10707     if self.op.log_messages:
10708       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10709       for idx, msg in enumerate(self.op.log_messages):
10710         self.LogInfo("Sending log message %s", idx + 1)
10711         feedback_fn(constants.JQT_MSGPREFIX + msg)
10712         # Report how many test messages have been sent
10713         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10714
10715     if self.op.fail:
10716       raise errors.OpExecError("Opcode failure was requested")
10717
10718     return True
10719
10720
10721 class IAllocator(object):
10722   """IAllocator framework.
10723
10724   An IAllocator instance has three sets of attributes:
10725     - cfg that is needed to query the cluster
10726     - input data (all members of the _KEYS class attribute are required)
10727     - four buffer attributes (in|out_data|text), that represent the
10728       input (to the external script) in text and data structure format,
10729       and the output from it, again in two formats
10730     - the result variables from the script (success, info, nodes) for
10731       easy usage
10732
10733   """
10734   # pylint: disable-msg=R0902
10735   # lots of instance attributes
10736   _ALLO_KEYS = [
10737     "name", "mem_size", "disks", "disk_template",
10738     "os", "tags", "nics", "vcpus", "hypervisor",
10739     ]
10740   _RELO_KEYS = [
10741     "name", "relocate_from",
10742     ]
10743   _EVAC_KEYS = [
10744     "evac_nodes",
10745     ]
10746
10747   def __init__(self, cfg, rpc, mode, **kwargs):
10748     self.cfg = cfg
10749     self.rpc = rpc
10750     # init buffer variables
10751     self.in_text = self.out_text = self.in_data = self.out_data = None
10752     # init all input fields so that pylint is happy
10753     self.mode = mode
10754     self.mem_size = self.disks = self.disk_template = None
10755     self.os = self.tags = self.nics = self.vcpus = None
10756     self.hypervisor = None
10757     self.relocate_from = None
10758     self.name = None
10759     self.evac_nodes = None
10760     # computed fields
10761     self.required_nodes = None
10762     # init result fields
10763     self.success = self.info = self.result = None
10764     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10765       keyset = self._ALLO_KEYS
10766       fn = self._AddNewInstance
10767     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10768       keyset = self._RELO_KEYS
10769       fn = self._AddRelocateInstance
10770     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10771       keyset = self._EVAC_KEYS
10772       fn = self._AddEvacuateNodes
10773     else:
10774       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10775                                    " IAllocator" % self.mode)
10776     for key in kwargs:
10777       if key not in keyset:
10778         raise errors.ProgrammerError("Invalid input parameter '%s' to"
10779                                      " IAllocator" % key)
10780       setattr(self, key, kwargs[key])
10781
10782     for key in keyset:
10783       if key not in kwargs:
10784         raise errors.ProgrammerError("Missing input parameter '%s' to"
10785                                      " IAllocator" % key)
10786     self._BuildInputData(fn)
10787
10788   def _ComputeClusterData(self):
10789     """Compute the generic allocator input data.
10790
10791     This is the data that is independent of the actual operation.
10792
10793     """
10794     cfg = self.cfg
10795     cluster_info = cfg.GetClusterInfo()
10796     # cluster data
10797     data = {
10798       "version": constants.IALLOCATOR_VERSION,
10799       "cluster_name": cfg.GetClusterName(),
10800       "cluster_tags": list(cluster_info.GetTags()),
10801       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10802       # we don't have job IDs
10803       }
10804     ninfo = cfg.GetAllNodesInfo()
10805     iinfo = cfg.GetAllInstancesInfo().values()
10806     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10807
10808     # node data
10809     node_list = [n.name for n in ninfo.values() if n.vm_capable]
10810
10811     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10812       hypervisor_name = self.hypervisor
10813     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10814       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10815     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10816       hypervisor_name = cluster_info.enabled_hypervisors[0]
10817
10818     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10819                                         hypervisor_name)
10820     node_iinfo = \
10821       self.rpc.call_all_instances_info(node_list,
10822                                        cluster_info.enabled_hypervisors)
10823
10824     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10825
10826     config_ndata = self._ComputeBasicNodeData(ninfo)
10827     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
10828                                                  i_list, config_ndata)
10829     assert len(data["nodes"]) == len(ninfo), \
10830         "Incomplete node data computed"
10831
10832     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10833
10834     self.in_data = data
10835
10836   @staticmethod
10837   def _ComputeNodeGroupData(cfg):
10838     """Compute node groups data.
10839
10840     """
10841     ng = {}
10842     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10843       ng[guuid] = {
10844         "name": gdata.name,
10845         "alloc_policy": gdata.alloc_policy,
10846         }
10847     return ng
10848
10849   @staticmethod
10850   def _ComputeBasicNodeData(node_cfg):
10851     """Compute global node data.
10852
10853     @rtype: dict
10854     @returns: a dict of name: (node dict, node config)
10855
10856     """
10857     node_results = {}
10858     for ninfo in node_cfg.values():
10859       # fill in static (config-based) values
10860       pnr = {
10861         "tags": list(ninfo.GetTags()),
10862         "primary_ip": ninfo.primary_ip,
10863         "secondary_ip": ninfo.secondary_ip,
10864         "offline": ninfo.offline,
10865         "drained": ninfo.drained,
10866         "master_candidate": ninfo.master_candidate,
10867         "group": ninfo.group,
10868         "master_capable": ninfo.master_capable,
10869         "vm_capable": ninfo.vm_capable,
10870         }
10871
10872       node_results[ninfo.name] = pnr
10873
10874     return node_results
10875
10876   @staticmethod
10877   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
10878                               node_results):
10879     """Compute global node data.
10880
10881     @param node_results: the basic node structures as filled from the config
10882
10883     """
10884     # make a copy of the current dict
10885     node_results = dict(node_results)
10886     for nname, nresult in node_data.items():
10887       assert nname in node_results, "Missing basic data for node %s" % nname
10888       ninfo = node_cfg[nname]
10889
10890       if not (ninfo.offline or ninfo.drained):
10891         nresult.Raise("Can't get data for node %s" % nname)
10892         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10893                                 nname)
10894         remote_info = nresult.payload
10895
10896         for attr in ['memory_total', 'memory_free', 'memory_dom0',
10897                      'vg_size', 'vg_free', 'cpu_total']:
10898           if attr not in remote_info:
10899             raise errors.OpExecError("Node '%s' didn't return attribute"
10900                                      " '%s'" % (nname, attr))
10901           if not isinstance(remote_info[attr], int):
10902             raise errors.OpExecError("Node '%s' returned invalid value"
10903                                      " for '%s': %s" %
10904                                      (nname, attr, remote_info[attr]))
10905         # compute memory used by primary instances
10906         i_p_mem = i_p_up_mem = 0
10907         for iinfo, beinfo in i_list:
10908           if iinfo.primary_node == nname:
10909             i_p_mem += beinfo[constants.BE_MEMORY]
10910             if iinfo.name not in node_iinfo[nname].payload:
10911               i_used_mem = 0
10912             else:
10913               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10914             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10915             remote_info['memory_free'] -= max(0, i_mem_diff)
10916
10917             if iinfo.admin_up:
10918               i_p_up_mem += beinfo[constants.BE_MEMORY]
10919
10920         # compute memory used by instances
10921         pnr_dyn = {
10922           "total_memory": remote_info['memory_total'],
10923           "reserved_memory": remote_info['memory_dom0'],
10924           "free_memory": remote_info['memory_free'],
10925           "total_disk": remote_info['vg_size'],
10926           "free_disk": remote_info['vg_free'],
10927           "total_cpus": remote_info['cpu_total'],
10928           "i_pri_memory": i_p_mem,
10929           "i_pri_up_memory": i_p_up_mem,
10930           }
10931         pnr_dyn.update(node_results[nname])
10932
10933       node_results[nname] = pnr_dyn
10934
10935     return node_results
10936
10937   @staticmethod
10938   def _ComputeInstanceData(cluster_info, i_list):
10939     """Compute global instance data.
10940
10941     """
10942     instance_data = {}
10943     for iinfo, beinfo in i_list:
10944       nic_data = []
10945       for nic in iinfo.nics:
10946         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10947         nic_dict = {"mac": nic.mac,
10948                     "ip": nic.ip,
10949                     "mode": filled_params[constants.NIC_MODE],
10950                     "link": filled_params[constants.NIC_LINK],
10951                    }
10952         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10953           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10954         nic_data.append(nic_dict)
10955       pir = {
10956         "tags": list(iinfo.GetTags()),
10957         "admin_up": iinfo.admin_up,
10958         "vcpus": beinfo[constants.BE_VCPUS],
10959         "memory": beinfo[constants.BE_MEMORY],
10960         "os": iinfo.os,
10961         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10962         "nics": nic_data,
10963         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10964         "disk_template": iinfo.disk_template,
10965         "hypervisor": iinfo.hypervisor,
10966         }
10967       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10968                                                  pir["disks"])
10969       instance_data[iinfo.name] = pir
10970
10971     return instance_data
10972
10973   def _AddNewInstance(self):
10974     """Add new instance data to allocator structure.
10975
10976     This in combination with _AllocatorGetClusterData will create the
10977     correct structure needed as input for the allocator.
10978
10979     The checks for the completeness of the opcode must have already been
10980     done.
10981
10982     """
10983     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10984
10985     if self.disk_template in constants.DTS_NET_MIRROR:
10986       self.required_nodes = 2
10987     else:
10988       self.required_nodes = 1
10989     request = {
10990       "name": self.name,
10991       "disk_template": self.disk_template,
10992       "tags": self.tags,
10993       "os": self.os,
10994       "vcpus": self.vcpus,
10995       "memory": self.mem_size,
10996       "disks": self.disks,
10997       "disk_space_total": disk_space,
10998       "nics": self.nics,
10999       "required_nodes": self.required_nodes,
11000       }
11001     return request
11002
11003   def _AddRelocateInstance(self):
11004     """Add relocate instance data to allocator structure.
11005
11006     This in combination with _IAllocatorGetClusterData will create the
11007     correct structure needed as input for the allocator.
11008
11009     The checks for the completeness of the opcode must have already been
11010     done.
11011
11012     """
11013     instance = self.cfg.GetInstanceInfo(self.name)
11014     if instance is None:
11015       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11016                                    " IAllocator" % self.name)
11017
11018     if instance.disk_template not in constants.DTS_NET_MIRROR:
11019       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11020                                  errors.ECODE_INVAL)
11021
11022     if len(instance.secondary_nodes) != 1:
11023       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11024                                  errors.ECODE_STATE)
11025
11026     self.required_nodes = 1
11027     disk_sizes = [{'size': disk.size} for disk in instance.disks]
11028     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11029
11030     request = {
11031       "name": self.name,
11032       "disk_space_total": disk_space,
11033       "required_nodes": self.required_nodes,
11034       "relocate_from": self.relocate_from,
11035       }
11036     return request
11037
11038   def _AddEvacuateNodes(self):
11039     """Add evacuate nodes data to allocator structure.
11040
11041     """
11042     request = {
11043       "evac_nodes": self.evac_nodes
11044       }
11045     return request
11046
11047   def _BuildInputData(self, fn):
11048     """Build input data structures.
11049
11050     """
11051     self._ComputeClusterData()
11052
11053     request = fn()
11054     request["type"] = self.mode
11055     self.in_data["request"] = request
11056
11057     self.in_text = serializer.Dump(self.in_data)
11058
11059   def Run(self, name, validate=True, call_fn=None):
11060     """Run an instance allocator and return the results.
11061
11062     """
11063     if call_fn is None:
11064       call_fn = self.rpc.call_iallocator_runner
11065
11066     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11067     result.Raise("Failure while running the iallocator script")
11068
11069     self.out_text = result.payload
11070     if validate:
11071       self._ValidateResult()
11072
11073   def _ValidateResult(self):
11074     """Process the allocator results.
11075
11076     This will process and if successful save the result in
11077     self.out_data and the other parameters.
11078
11079     """
11080     try:
11081       rdict = serializer.Load(self.out_text)
11082     except Exception, err:
11083       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11084
11085     if not isinstance(rdict, dict):
11086       raise errors.OpExecError("Can't parse iallocator results: not a dict")
11087
11088     # TODO: remove backwards compatiblity in later versions
11089     if "nodes" in rdict and "result" not in rdict:
11090       rdict["result"] = rdict["nodes"]
11091       del rdict["nodes"]
11092
11093     for key in "success", "info", "result":
11094       if key not in rdict:
11095         raise errors.OpExecError("Can't parse iallocator results:"
11096                                  " missing key '%s'" % key)
11097       setattr(self, key, rdict[key])
11098
11099     if not isinstance(rdict["result"], list):
11100       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11101                                " is not a list")
11102     self.out_data = rdict
11103
11104
11105 class LUTestAllocator(NoHooksLU):
11106   """Run allocator tests.
11107
11108   This LU runs the allocator tests
11109
11110   """
11111   def CheckPrereq(self):
11112     """Check prerequisites.
11113
11114     This checks the opcode parameters depending on the director and mode test.
11115
11116     """
11117     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11118       for attr in ["mem_size", "disks", "disk_template",
11119                    "os", "tags", "nics", "vcpus"]:
11120         if not hasattr(self.op, attr):
11121           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11122                                      attr, errors.ECODE_INVAL)
11123       iname = self.cfg.ExpandInstanceName(self.op.name)
11124       if iname is not None:
11125         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11126                                    iname, errors.ECODE_EXISTS)
11127       if not isinstance(self.op.nics, list):
11128         raise errors.OpPrereqError("Invalid parameter 'nics'",
11129                                    errors.ECODE_INVAL)
11130       if not isinstance(self.op.disks, list):
11131         raise errors.OpPrereqError("Invalid parameter 'disks'",
11132                                    errors.ECODE_INVAL)
11133       for row in self.op.disks:
11134         if (not isinstance(row, dict) or
11135             "size" not in row or
11136             not isinstance(row["size"], int) or
11137             "mode" not in row or
11138             row["mode"] not in ['r', 'w']):
11139           raise errors.OpPrereqError("Invalid contents of the 'disks'"
11140                                      " parameter", errors.ECODE_INVAL)
11141       if self.op.hypervisor is None:
11142         self.op.hypervisor = self.cfg.GetHypervisorType()
11143     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11144       fname = _ExpandInstanceName(self.cfg, self.op.name)
11145       self.op.name = fname
11146       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11147     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11148       if not hasattr(self.op, "evac_nodes"):
11149         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11150                                    " opcode input", errors.ECODE_INVAL)
11151     else:
11152       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11153                                  self.op.mode, errors.ECODE_INVAL)
11154
11155     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11156       if self.op.allocator is None:
11157         raise errors.OpPrereqError("Missing allocator name",
11158                                    errors.ECODE_INVAL)
11159     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11160       raise errors.OpPrereqError("Wrong allocator test '%s'" %
11161                                  self.op.direction, errors.ECODE_INVAL)
11162
11163   def Exec(self, feedback_fn):
11164     """Run the allocator test.
11165
11166     """
11167     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11168       ial = IAllocator(self.cfg, self.rpc,
11169                        mode=self.op.mode,
11170                        name=self.op.name,
11171                        mem_size=self.op.mem_size,
11172                        disks=self.op.disks,
11173                        disk_template=self.op.disk_template,
11174                        os=self.op.os,
11175                        tags=self.op.tags,
11176                        nics=self.op.nics,
11177                        vcpus=self.op.vcpus,
11178                        hypervisor=self.op.hypervisor,
11179                        )
11180     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11181       ial = IAllocator(self.cfg, self.rpc,
11182                        mode=self.op.mode,
11183                        name=self.op.name,
11184                        relocate_from=list(self.relocate_from),
11185                        )
11186     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11187       ial = IAllocator(self.cfg, self.rpc,
11188                        mode=self.op.mode,
11189                        evac_nodes=self.op.evac_nodes)
11190     else:
11191       raise errors.ProgrammerError("Uncatched mode %s in"
11192                                    " LUTestAllocator.Exec", self.op.mode)
11193
11194     if self.op.direction == constants.IALLOCATOR_DIR_IN:
11195       result = ial.in_text
11196     else:
11197       ial.Run(self.op.allocator, validate=False)
11198       result = ial.out_text
11199     return result
11200
11201
11202 #: Query type implementations
11203 _QUERY_IMPL = {
11204   constants.QR_INSTANCE: _InstanceQuery,
11205   constants.QR_NODE: _NodeQuery,
11206   constants.QR_GROUP: _GroupQuery,
11207   }
11208
11209
11210 def _GetQueryImplementation(name):
11211   """Returns the implemtnation for a query type.
11212
11213   @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11214
11215   """
11216   try:
11217     return _QUERY_IMPL[name]
11218   except KeyError:
11219     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11220                                errors.ECODE_INVAL)