code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 # End types
  78 class LogicalUnit(object):
  79   """Logical Unit base class.
  80
  81   Subclasses must follow these rules:
  82     - implement ExpandNames
  83     - implement CheckPrereq (except when tasklets are used)
  84     - implement Exec (except when tasklets are used)
  85     - implement BuildHooksEnv
  86     - redefine HPATH and HTYPE
  87     - optionally redefine their run requirements:
  88         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  89
  90   Note that all commands require root permissions.
  91
  92   @ivar dry_run_result: the value (if any) that will be returned to the caller
  93       in dry-run mode (signalled by opcode dry_run parameter)
  94
  95   """
  96   HPATH = None
  97   HTYPE = None
  98   REQ_BGL = True
  99
 100   def __init__(self, processor, op, context, rpc):
 101     """Constructor for LogicalUnit.
 102
 103     This needs to be overridden in derived classes in order to check op
 104     validity.
 105
 106     """
 107     self.proc = processor
 108     self.op = op
 109     self.cfg = context.cfg
 110     self.context = context
 111     self.rpc = rpc
 112     # Dicts used to declare locking needs to mcpu
 113     self.needed_locks = None
 114     self.acquired_locks = {}
 115     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 116     self.add_locks = {}
 117     self.remove_locks = {}
 118     # Used to force good behavior when calling helper functions
 119     self.recalculate_locks = {}
 120     self.__ssh = None
 121     # logging
 122     self.Log = processor.Log # pylint: disable-msg=C0103
 123     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 124     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 125     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 126     # support for dry-run
 127     self.dry_run_result = None
 128     # support for generic debug attribute
 129     if (not hasattr(self.op, "debug_level") or
 130         not isinstance(self.op.debug_level, int)):
 131       self.op.debug_level = 0
 132
 133     # Tasklets
 134     self.tasklets = None
 135
 136     # Validate opcode parameters and set defaults
 137     self.op.Validate(True)
 138
 139     self.CheckArguments()
 140
 141   def __GetSSH(self):
 142     """Returns the SshRunner object
 143
 144     """
 145     if not self.__ssh:
 146       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 147     return self.__ssh
 148
 149   ssh = property(fget=__GetSSH)
 150
 151   def CheckArguments(self):
 152     """Check syntactic validity for the opcode arguments.
 153
 154     This method is for doing a simple syntactic check and ensure
 155     validity of opcode parameters, without any cluster-related
 156     checks. While the same can be accomplished in ExpandNames and/or
 157     CheckPrereq, doing these separate is better because:
 158
 159       - ExpandNames is left as as purely a lock-related function
 160       - CheckPrereq is run after we have acquired locks (and possible
 161         waited for them)
 162
 163     The function is allowed to change the self.op attribute so that
 164     later methods can no longer worry about missing parameters.
 165
 166     """
 167     pass
 168
 169   def ExpandNames(self):
 170     """Expand names for this LU.
 171
 172     This method is called before starting to execute the opcode, and it should
 173     update all the parameters of the opcode to their canonical form (e.g. a
 174     short node name must be fully expanded after this method has successfully
 175     completed). This way locking, hooks, logging, etc. can work correctly.
 176
 177     LUs which implement this method must also populate the self.needed_locks
 178     member, as a dict with lock levels as keys, and a list of needed lock names
 179     as values. Rules:
 180
 181       - use an empty dict if you don't need any lock
 182       - if you don't need any lock at a particular level omit that level
 183       - don't put anything for the BGL level
 184       - if you want all locks at a level use locking.ALL_SET as a value
 185
 186     If you need to share locks (rather than acquire them exclusively) at one
 187     level you can modify self.share_locks, setting a true value (usually 1) for
 188     that level. By default locks are not shared.
 189
 190     This function can also define a list of tasklets, which then will be
 191     executed in order instead of the usual LU-level CheckPrereq and Exec
 192     functions, if those are not defined by the LU.
 193
 194     Examples::
 195
 196       # Acquire all nodes and one instance
 197       self.needed_locks = {
 198         locking.LEVEL_NODE: locking.ALL_SET,
 199         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 200       }
 201       # Acquire just two nodes
 202       self.needed_locks = {
 203         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 204       }
 205       # Acquire no locks
 206       self.needed_locks = {} # No, you can't leave it to the default value None
 207
 208     """
 209     # The implementation of this method is mandatory only if the new LU is
 210     # concurrent, so that old LUs don't need to be changed all at the same
 211     # time.
 212     if self.REQ_BGL:
 213       self.needed_locks = {} # Exclusive LUs don't need locks.
 214     else:
 215       raise NotImplementedError
 216
 217   def DeclareLocks(self, level):
 218     """Declare LU locking needs for a level
 219
 220     While most LUs can just declare their locking needs at ExpandNames time,
 221     sometimes there's the need to calculate some locks after having acquired
 222     the ones before. This function is called just before acquiring locks at a
 223     particular level, but after acquiring the ones at lower levels, and permits
 224     such calculations. It can be used to modify self.needed_locks, and by
 225     default it does nothing.
 226
 227     This function is only called if you have something already set in
 228     self.needed_locks for the level.
 229
 230     @param level: Locking level which is going to be locked
 231     @type level: member of ganeti.locking.LEVELS
 232
 233     """
 234
 235   def CheckPrereq(self):
 236     """Check prerequisites for this LU.
 237
 238     This method should check that the prerequisites for the execution
 239     of this LU are fulfilled. It can do internode communication, but
 240     it should be idempotent - no cluster or system changes are
 241     allowed.
 242
 243     The method should raise errors.OpPrereqError in case something is
 244     not fulfilled. Its return value is ignored.
 245
 246     This method should also update all the parameters of the opcode to
 247     their canonical form if it hasn't been done by ExpandNames before.
 248
 249     """
 250     if self.tasklets is not None:
 251       for (idx, tl) in enumerate(self.tasklets):
 252         logging.debug("Checking prerequisites for tasklet %s/%s",
 253                       idx + 1, len(self.tasklets))
 254         tl.CheckPrereq()
 255     else:
 256       pass
 257
 258   def Exec(self, feedback_fn):
 259     """Execute the LU.
 260
 261     This method should implement the actual work. It should raise
 262     errors.OpExecError for failures that are somewhat dealt with in
 263     code, or expected.
 264
 265     """
 266     if self.tasklets is not None:
 267       for (idx, tl) in enumerate(self.tasklets):
 268         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 269         tl.Exec(feedback_fn)
 270     else:
 271       raise NotImplementedError
 272
 273   def BuildHooksEnv(self):
 274     """Build hooks environment for this LU.
 275
 276     This method should return a three-node tuple consisting of: a dict
 277     containing the environment that will be used for running the
 278     specific hook for this LU, a list of node names on which the hook
 279     should run before the execution, and a list of node names on which
 280     the hook should run after the execution.
 281
 282     The keys of the dict must not have 'GANETI_' prefixed as this will
 283     be handled in the hooks runner. Also note additional keys will be
 284     added by the hooks runner. If the LU doesn't define any
 285     environment, an empty dict (and not None) should be returned.
 286
 287     No nodes should be returned as an empty list (and not None).
 288
 289     Note that if the HPATH for a LU class is None, this function will
 290     not be called.
 291
 292     """
 293     raise NotImplementedError
 294
 295   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 296     """Notify the LU about the results of its hooks.
 297
 298     This method is called every time a hooks phase is executed, and notifies
 299     the Logical Unit about the hooks' result. The LU can then use it to alter
 300     its result based on the hooks.  By default the method does nothing and the
 301     previous result is passed back unchanged but any LU can define it if it
 302     wants to use the local cluster hook-scripts somehow.
 303
 304     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 305         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 306     @param hook_results: the results of the multi-node hooks rpc call
 307     @param feedback_fn: function used send feedback back to the caller
 308     @param lu_result: the previous Exec result this LU had, or None
 309         in the PRE phase
 310     @return: the new Exec result, based on the previous result
 311         and hook results
 312
 313     """
 314     # API must be kept, thus we ignore the unused argument and could
 315     # be a function warnings
 316     # pylint: disable-msg=W0613,R0201
 317     return lu_result
 318
 319   def _ExpandAndLockInstance(self):
 320     """Helper function to expand and lock an instance.
 321
 322     Many LUs that work on an instance take its name in self.op.instance_name
 323     and need to expand it and then declare the expanded name for locking. This
 324     function does it, and then updates self.op.instance_name to the expanded
 325     name. It also initializes needed_locks as a dict, if this hasn't been done
 326     before.
 327
 328     """
 329     if self.needed_locks is None:
 330       self.needed_locks = {}
 331     else:
 332       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 333         "_ExpandAndLockInstance called with instance-level locks set"
 334     self.op.instance_name = _ExpandInstanceName(self.cfg,
 335                                                 self.op.instance_name)
 336     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 337
 338   def _LockInstancesNodes(self, primary_only=False):
 339     """Helper function to declare instances' nodes for locking.
 340
 341     This function should be called after locking one or more instances to lock
 342     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 343     with all primary or secondary nodes for instances already locked and
 344     present in self.needed_locks[locking.LEVEL_INSTANCE].
 345
 346     It should be called from DeclareLocks, and for safety only works if
 347     self.recalculate_locks[locking.LEVEL_NODE] is set.
 348
 349     In the future it may grow parameters to just lock some instance's nodes, or
 350     to just lock primaries or secondary nodes, if needed.
 351
 352     If should be called in DeclareLocks in a way similar to::
 353
 354       if level == locking.LEVEL_NODE:
 355         self._LockInstancesNodes()
 356
 357     @type primary_only: boolean
 358     @param primary_only: only lock primary nodes of locked instances
 359
 360     """
 361     assert locking.LEVEL_NODE in self.recalculate_locks, \
 362       "_LockInstancesNodes helper function called with no nodes to recalculate"
 363
 364     # TODO: check if we're really been called with the instance locks held
 365
 366     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 367     # future we might want to have different behaviors depending on the value
 368     # of self.recalculate_locks[locking.LEVEL_NODE]
 369     wanted_nodes = []
 370     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 371       instance = self.context.cfg.GetInstanceInfo(instance_name)
 372       wanted_nodes.append(instance.primary_node)
 373       if not primary_only:
 374         wanted_nodes.extend(instance.secondary_nodes)
 375
 376     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 377       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 378     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 379       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 380
 381     del self.recalculate_locks[locking.LEVEL_NODE]
 382
 383
 384 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 385   """Simple LU which runs no hooks.
 386
 387   This LU is intended as a parent for other LogicalUnits which will
 388   run no hooks, in order to reduce duplicate code.
 389
 390   """
 391   HPATH = None
 392   HTYPE = None
 393
 394   def BuildHooksEnv(self):
 395     """Empty BuildHooksEnv for NoHooksLu.
 396
 397     This just raises an error.
 398
 399     """
 400     assert False, "BuildHooksEnv called for NoHooksLUs"
 401
 402
 403 class Tasklet:
 404   """Tasklet base class.
 405
 406   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 407   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 408   tasklets know nothing about locks.
 409
 410   Subclasses must follow these rules:
 411     - Implement CheckPrereq
 412     - Implement Exec
 413
 414   """
 415   def __init__(self, lu):
 416     self.lu = lu
 417
 418     # Shortcuts
 419     self.cfg = lu.cfg
 420     self.rpc = lu.rpc
 421
 422   def CheckPrereq(self):
 423     """Check prerequisites for this tasklets.
 424
 425     This method should check whether the prerequisites for the execution of
 426     this tasklet are fulfilled. It can do internode communication, but it
 427     should be idempotent - no cluster or system changes are allowed.
 428
 429     The method should raise errors.OpPrereqError in case something is not
 430     fulfilled. Its return value is ignored.
 431
 432     This method should also update all parameters to their canonical form if it
 433     hasn't been done before.
 434
 435     """
 436     pass
 437
 438   def Exec(self, feedback_fn):
 439     """Execute the tasklet.
 440
 441     This method should implement the actual work. It should raise
 442     errors.OpExecError for failures that are somewhat dealt with in code, or
 443     expected.
 444
 445     """
 446     raise NotImplementedError
 447
 448
 449 class _QueryBase:
 450   """Base for query utility classes.
 451
 452   """
 453   #: Attribute holding field definitions
 454   FIELDS = None
 455
 456   def __init__(self, names, fields, use_locking):
 457     """Initializes this class.
 458
 459     """
 460     self.names = names
 461     self.use_locking = use_locking
 462
 463     self.query = query.Query(self.FIELDS, fields)
 464     self.requested_data = self.query.RequestedData()
 465
 466     self.do_locking = None
 467     self.wanted = None
 468
 469   def _GetNames(self, lu, all_names, lock_level):
 470     """Helper function to determine names asked for in the query.
 471
 472     """
 473     if self.do_locking:
 474       names = lu.acquired_locks[lock_level]
 475     else:
 476       names = all_names
 477
 478     if self.wanted == locking.ALL_SET:
 479       assert not self.names
 480       # caller didn't specify names, so ordering is not important
 481       return utils.NiceSort(names)
 482
 483     # caller specified names and we must keep the same order
 484     assert self.names
 485     assert not self.do_locking or lu.acquired_locks[lock_level]
 486
 487     missing = set(self.wanted).difference(names)
 488     if missing:
 489       raise errors.OpExecError("Some items were removed before retrieving"
 490                                " their data: %s" % missing)
 491
 492     # Return expanded names
 493     return self.wanted
 494
 495   @classmethod
 496   def FieldsQuery(cls, fields):
 497     """Returns list of available fields.
 498
 499     @return: List of L{objects.QueryFieldDefinition}
 500
 501     """
 502     return query.QueryFields(cls.FIELDS, fields)
 503
 504   def ExpandNames(self, lu):
 505     """Expand names for this query.
 506
 507     See L{LogicalUnit.ExpandNames}.
 508
 509     """
 510     raise NotImplementedError()
 511
 512   def DeclareLocks(self, lu, level):
 513     """Declare locks for this query.
 514
 515     See L{LogicalUnit.DeclareLocks}.
 516
 517     """
 518     raise NotImplementedError()
 519
 520   def _GetQueryData(self, lu):
 521     """Collects all data for this query.
 522
 523     @return: Query data object
 524
 525     """
 526     raise NotImplementedError()
 527
 528   def NewStyleQuery(self, lu):
 529     """Collect data and execute query.
 530
 531     """
 532     return query.GetQueryResponse(self.query, self._GetQueryData(lu))
 533
 534   def OldStyleQuery(self, lu):
 535     """Collect data and execute query.
 536
 537     """
 538     return self.query.OldStyleQuery(self._GetQueryData(lu))
 539
 540
 541 def _GetWantedNodes(lu, nodes):
 542   """Returns list of checked and expanded node names.
 543
 544   @type lu: L{LogicalUnit}
 545   @param lu: the logical unit on whose behalf we execute
 546   @type nodes: list
 547   @param nodes: list of node names or None for all nodes
 548   @rtype: list
 549   @return: the list of nodes, sorted
 550   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 551
 552   """
 553   if nodes:
 554     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 555
 556   return utils.NiceSort(lu.cfg.GetNodeList())
 557
 558
 559 def _GetWantedInstances(lu, instances):
 560   """Returns list of checked and expanded instance names.
 561
 562   @type lu: L{LogicalUnit}
 563   @param lu: the logical unit on whose behalf we execute
 564   @type instances: list
 565   @param instances: list of instance names or None for all instances
 566   @rtype: list
 567   @return: the list of instances, sorted
 568   @raise errors.OpPrereqError: if the instances parameter is wrong type
 569   @raise errors.OpPrereqError: if any of the passed instances is not found
 570
 571   """
 572   if instances:
 573     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 574   else:
 575     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 576   return wanted
 577
 578
 579 def _GetUpdatedParams(old_params, update_dict,
 580                       use_default=True, use_none=False):
 581   """Return the new version of a parameter dictionary.
 582
 583   @type old_params: dict
 584   @param old_params: old parameters
 585   @type update_dict: dict
 586   @param update_dict: dict containing new parameter values, or
 587       constants.VALUE_DEFAULT to reset the parameter to its default
 588       value
 589   @param use_default: boolean
 590   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 591       values as 'to be deleted' values
 592   @param use_none: boolean
 593   @type use_none: whether to recognise C{None} values as 'to be
 594       deleted' values
 595   @rtype: dict
 596   @return: the new parameter dictionary
 597
 598   """
 599   params_copy = copy.deepcopy(old_params)
 600   for key, val in update_dict.iteritems():
 601     if ((use_default and val == constants.VALUE_DEFAULT) or
 602         (use_none and val is None)):
 603       try:
 604         del params_copy[key]
 605       except KeyError:
 606         pass
 607     else:
 608       params_copy[key] = val
 609   return params_copy
 610
 611
 612 def _CheckOutputFields(static, dynamic, selected):
 613   """Checks whether all selected fields are valid.
 614
 615   @type static: L{utils.FieldSet}
 616   @param static: static fields set
 617   @type dynamic: L{utils.FieldSet}
 618   @param dynamic: dynamic fields set
 619
 620   """
 621   f = utils.FieldSet()
 622   f.Extend(static)
 623   f.Extend(dynamic)
 624
 625   delta = f.NonMatching(selected)
 626   if delta:
 627     raise errors.OpPrereqError("Unknown output fields selected: %s"
 628                                % ",".join(delta), errors.ECODE_INVAL)
 629
 630
 631 def _CheckGlobalHvParams(params):
 632   """Validates that given hypervisor params are not global ones.
 633
 634   This will ensure that instances don't get customised versions of
 635   global params.
 636
 637   """
 638   used_globals = constants.HVC_GLOBALS.intersection(params)
 639   if used_globals:
 640     msg = ("The following hypervisor parameters are global and cannot"
 641            " be customized at instance level, please modify them at"
 642            " cluster level: %s" % utils.CommaJoin(used_globals))
 643     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 644
 645
 646 def _CheckNodeOnline(lu, node, msg=None):
 647   """Ensure that a given node is online.
 648
 649   @param lu: the LU on behalf of which we make the check
 650   @param node: the node to check
 651   @param msg: if passed, should be a message to replace the default one
 652   @raise errors.OpPrereqError: if the node is offline
 653
 654   """
 655   if msg is None:
 656     msg = "Can't use offline node"
 657   if lu.cfg.GetNodeInfo(node).offline:
 658     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 659
 660
 661 def _CheckNodeNotDrained(lu, node):
 662   """Ensure that a given node is not drained.
 663
 664   @param lu: the LU on behalf of which we make the check
 665   @param node: the node to check
 666   @raise errors.OpPrereqError: if the node is drained
 667
 668   """
 669   if lu.cfg.GetNodeInfo(node).drained:
 670     raise errors.OpPrereqError("Can't use drained node %s" % node,
 671                                errors.ECODE_STATE)
 672
 673
 674 def _CheckNodeVmCapable(lu, node):
 675   """Ensure that a given node is vm capable.
 676
 677   @param lu: the LU on behalf of which we make the check
 678   @param node: the node to check
 679   @raise errors.OpPrereqError: if the node is not vm capable
 680
 681   """
 682   if not lu.cfg.GetNodeInfo(node).vm_capable:
 683     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 684                                errors.ECODE_STATE)
 685
 686
 687 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 688   """Ensure that a node supports a given OS.
 689
 690   @param lu: the LU on behalf of which we make the check
 691   @param node: the node to check
 692   @param os_name: the OS to query about
 693   @param force_variant: whether to ignore variant errors
 694   @raise errors.OpPrereqError: if the node is not supporting the OS
 695
 696   """
 697   result = lu.rpc.call_os_get(node, os_name)
 698   result.Raise("OS '%s' not in supported OS list for node %s" %
 699                (os_name, node),
 700                prereq=True, ecode=errors.ECODE_INVAL)
 701   if not force_variant:
 702     _CheckOSVariant(result.payload, os_name)
 703
 704
 705 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 706   """Ensure that a node has the given secondary ip.
 707
 708   @type lu: L{LogicalUnit}
 709   @param lu: the LU on behalf of which we make the check
 710   @type node: string
 711   @param node: the node to check
 712   @type secondary_ip: string
 713   @param secondary_ip: the ip to check
 714   @type prereq: boolean
 715   @param prereq: whether to throw a prerequisite or an execute error
 716   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 717   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 718
 719   """
 720   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 721   result.Raise("Failure checking secondary ip on node %s" % node,
 722                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 723   if not result.payload:
 724     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 725            " please fix and re-run this command" % secondary_ip)
 726     if prereq:
 727       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 728     else:
 729       raise errors.OpExecError(msg)
 730
 731
 732 def _GetClusterDomainSecret():
 733   """Reads the cluster domain secret.
 734
 735   """
 736   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 737                                strict=True)
 738
 739
 740 def _CheckInstanceDown(lu, instance, reason):
 741   """Ensure that an instance is not running."""
 742   if instance.admin_up:
 743     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 744                                (instance.name, reason), errors.ECODE_STATE)
 745
 746   pnode = instance.primary_node
 747   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 748   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 749               prereq=True, ecode=errors.ECODE_ENVIRON)
 750
 751   if instance.name in ins_l.payload:
 752     raise errors.OpPrereqError("Instance %s is running, %s" %
 753                                (instance.name, reason), errors.ECODE_STATE)
 754
 755
 756 def _ExpandItemName(fn, name, kind):
 757   """Expand an item name.
 758
 759   @param fn: the function to use for expansion
 760   @param name: requested item name
 761   @param kind: text description ('Node' or 'Instance')
 762   @return: the resolved (full) name
 763   @raise errors.OpPrereqError: if the item is not found
 764
 765   """
 766   full_name = fn(name)
 767   if full_name is None:
 768     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 769                                errors.ECODE_NOENT)
 770   return full_name
 771
 772
 773 def _ExpandNodeName(cfg, name):
 774   """Wrapper over L{_ExpandItemName} for nodes."""
 775   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 776
 777
 778 def _ExpandInstanceName(cfg, name):
 779   """Wrapper over L{_ExpandItemName} for instance."""
 780   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 781
 782
 783 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 784                           memory, vcpus, nics, disk_template, disks,
 785                           bep, hvp, hypervisor_name):
 786   """Builds instance related env variables for hooks
 787
 788   This builds the hook environment from individual variables.
 789
 790   @type name: string
 791   @param name: the name of the instance
 792   @type primary_node: string
 793   @param primary_node: the name of the instance's primary node
 794   @type secondary_nodes: list
 795   @param secondary_nodes: list of secondary nodes as strings
 796   @type os_type: string
 797   @param os_type: the name of the instance's OS
 798   @type status: boolean
 799   @param status: the should_run status of the instance
 800   @type memory: string
 801   @param memory: the memory size of the instance
 802   @type vcpus: string
 803   @param vcpus: the count of VCPUs the instance has
 804   @type nics: list
 805   @param nics: list of tuples (ip, mac, mode, link) representing
 806       the NICs the instance has
 807   @type disk_template: string
 808   @param disk_template: the disk template of the instance
 809   @type disks: list
 810   @param disks: the list of (size, mode) pairs
 811   @type bep: dict
 812   @param bep: the backend parameters for the instance
 813   @type hvp: dict
 814   @param hvp: the hypervisor parameters for the instance
 815   @type hypervisor_name: string
 816   @param hypervisor_name: the hypervisor for the instance
 817   @rtype: dict
 818   @return: the hook environment for this instance
 819
 820   """
 821   if status:
 822     str_status = "up"
 823   else:
 824     str_status = "down"
 825   env = {
 826     "OP_TARGET": name,
 827     "INSTANCE_NAME": name,
 828     "INSTANCE_PRIMARY": primary_node,
 829     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 830     "INSTANCE_OS_TYPE": os_type,
 831     "INSTANCE_STATUS": str_status,
 832     "INSTANCE_MEMORY": memory,
 833     "INSTANCE_VCPUS": vcpus,
 834     "INSTANCE_DISK_TEMPLATE": disk_template,
 835     "INSTANCE_HYPERVISOR": hypervisor_name,
 836   }
 837
 838   if nics:
 839     nic_count = len(nics)
 840     for idx, (ip, mac, mode, link) in enumerate(nics):
 841       if ip is None:
 842         ip = ""
 843       env["INSTANCE_NIC%d_IP" % idx] = ip
 844       env["INSTANCE_NIC%d_MAC" % idx] = mac
 845       env["INSTANCE_NIC%d_MODE" % idx] = mode
 846       env["INSTANCE_NIC%d_LINK" % idx] = link
 847       if mode == constants.NIC_MODE_BRIDGED:
 848         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 849   else:
 850     nic_count = 0
 851
 852   env["INSTANCE_NIC_COUNT"] = nic_count
 853
 854   if disks:
 855     disk_count = len(disks)
 856     for idx, (size, mode) in enumerate(disks):
 857       env["INSTANCE_DISK%d_SIZE" % idx] = size
 858       env["INSTANCE_DISK%d_MODE" % idx] = mode
 859   else:
 860     disk_count = 0
 861
 862   env["INSTANCE_DISK_COUNT"] = disk_count
 863
 864   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 865     for key, value in source.items():
 866       env["INSTANCE_%s_%s" % (kind, key)] = value
 867
 868   return env
 869
 870
 871 def _NICListToTuple(lu, nics):
 872   """Build a list of nic information tuples.
 873
 874   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 875   value in LUQueryInstanceData.
 876
 877   @type lu:  L{LogicalUnit}
 878   @param lu: the logical unit on whose behalf we execute
 879   @type nics: list of L{objects.NIC}
 880   @param nics: list of nics to convert to hooks tuples
 881
 882   """
 883   hooks_nics = []
 884   cluster = lu.cfg.GetClusterInfo()
 885   for nic in nics:
 886     ip = nic.ip
 887     mac = nic.mac
 888     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 889     mode = filled_params[constants.NIC_MODE]
 890     link = filled_params[constants.NIC_LINK]
 891     hooks_nics.append((ip, mac, mode, link))
 892   return hooks_nics
 893
 894
 895 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 896   """Builds instance related env variables for hooks from an object.
 897
 898   @type lu: L{LogicalUnit}
 899   @param lu: the logical unit on whose behalf we execute
 900   @type instance: L{objects.Instance}
 901   @param instance: the instance for which we should build the
 902       environment
 903   @type override: dict
 904   @param override: dictionary with key/values that will override
 905       our values
 906   @rtype: dict
 907   @return: the hook environment dictionary
 908
 909   """
 910   cluster = lu.cfg.GetClusterInfo()
 911   bep = cluster.FillBE(instance)
 912   hvp = cluster.FillHV(instance)
 913   args = {
 914     'name': instance.name,
 915     'primary_node': instance.primary_node,
 916     'secondary_nodes': instance.secondary_nodes,
 917     'os_type': instance.os,
 918     'status': instance.admin_up,
 919     'memory': bep[constants.BE_MEMORY],
 920     'vcpus': bep[constants.BE_VCPUS],
 921     'nics': _NICListToTuple(lu, instance.nics),
 922     'disk_template': instance.disk_template,
 923     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 924     'bep': bep,
 925     'hvp': hvp,
 926     'hypervisor_name': instance.hypervisor,
 927   }
 928   if override:
 929     args.update(override)
 930   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 931
 932
 933 def _AdjustCandidatePool(lu, exceptions):
 934   """Adjust the candidate pool after node operations.
 935
 936   """
 937   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 938   if mod_list:
 939     lu.LogInfo("Promoted nodes to master candidate role: %s",
 940                utils.CommaJoin(node.name for node in mod_list))
 941     for name in mod_list:
 942       lu.context.ReaddNode(name)
 943   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 944   if mc_now > mc_max:
 945     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 946                (mc_now, mc_max))
 947
 948
 949 def _DecideSelfPromotion(lu, exceptions=None):
 950   """Decide whether I should promote myself as a master candidate.
 951
 952   """
 953   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 954   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 955   # the new node will increase mc_max with one, so:
 956   mc_should = min(mc_should + 1, cp_size)
 957   return mc_now < mc_should
 958
 959
 960 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 961   """Check that the brigdes needed by a list of nics exist.
 962
 963   """
 964   cluster = lu.cfg.GetClusterInfo()
 965   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 966   brlist = [params[constants.NIC_LINK] for params in paramslist
 967             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 968   if brlist:
 969     result = lu.rpc.call_bridges_exist(target_node, brlist)
 970     result.Raise("Error checking bridges on destination node '%s'" %
 971                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 972
 973
 974 def _CheckInstanceBridgesExist(lu, instance, node=None):
 975   """Check that the brigdes needed by an instance exist.
 976
 977   """
 978   if node is None:
 979     node = instance.primary_node
 980   _CheckNicsBridgesExist(lu, instance.nics, node)
 981
 982
 983 def _CheckOSVariant(os_obj, name):
 984   """Check whether an OS name conforms to the os variants specification.
 985
 986   @type os_obj: L{objects.OS}
 987   @param os_obj: OS object to check
 988   @type name: string
 989   @param name: OS name passed by the user, to check for validity
 990
 991   """
 992   if not os_obj.supported_variants:
 993     return
 994   variant = objects.OS.GetVariant(name)
 995   if not variant:
 996     raise errors.OpPrereqError("OS name must include a variant",
 997                                errors.ECODE_INVAL)
 998
 999   if variant not in os_obj.supported_variants:
1000     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1001
1002
1003 def _GetNodeInstancesInner(cfg, fn):
1004   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1005
1006
1007 def _GetNodeInstances(cfg, node_name):
1008   """Returns a list of all primary and secondary instances on a node.
1009
1010   """
1011
1012   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1013
1014
1015 def _GetNodePrimaryInstances(cfg, node_name):
1016   """Returns primary instances on a node.
1017
1018   """
1019   return _GetNodeInstancesInner(cfg,
1020                                 lambda inst: node_name == inst.primary_node)
1021
1022
1023 def _GetNodeSecondaryInstances(cfg, node_name):
1024   """Returns secondary instances on a node.
1025
1026   """
1027   return _GetNodeInstancesInner(cfg,
1028                                 lambda inst: node_name in inst.secondary_nodes)
1029
1030
1031 def _GetStorageTypeArgs(cfg, storage_type):
1032   """Returns the arguments for a storage type.
1033
1034   """
1035   # Special case for file storage
1036   if storage_type == constants.ST_FILE:
1037     # storage.FileStorage wants a list of storage directories
1038     return [[cfg.GetFileStorageDir()]]
1039
1040   return []
1041
1042
1043 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1044   faulty = []
1045
1046   for dev in instance.disks:
1047     cfg.SetDiskID(dev, node_name)
1048
1049   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1050   result.Raise("Failed to get disk status from node %s" % node_name,
1051                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1052
1053   for idx, bdev_status in enumerate(result.payload):
1054     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1055       faulty.append(idx)
1056
1057   return faulty
1058
1059
1060 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1061   """Check the sanity of iallocator and node arguments and use the
1062   cluster-wide iallocator if appropriate.
1063
1064   Check that at most one of (iallocator, node) is specified. If none is
1065   specified, then the LU's opcode's iallocator slot is filled with the
1066   cluster-wide default iallocator.
1067
1068   @type iallocator_slot: string
1069   @param iallocator_slot: the name of the opcode iallocator slot
1070   @type node_slot: string
1071   @param node_slot: the name of the opcode target node slot
1072
1073   """
1074   node = getattr(lu.op, node_slot, None)
1075   iallocator = getattr(lu.op, iallocator_slot, None)
1076
1077   if node is not None and iallocator is not None:
1078     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1079                                errors.ECODE_INVAL)
1080   elif node is None and iallocator is None:
1081     default_iallocator = lu.cfg.GetDefaultIAllocator()
1082     if default_iallocator:
1083       setattr(lu.op, iallocator_slot, default_iallocator)
1084     else:
1085       raise errors.OpPrereqError("No iallocator or node given and no"
1086                                  " cluster-wide default iallocator found."
1087                                  " Please specify either an iallocator or a"
1088                                  " node, or set a cluster-wide default"
1089                                  " iallocator.")
1090
1091
1092 class LUClusterPostInit(LogicalUnit):
1093   """Logical unit for running hooks after cluster initialization.
1094
1095   """
1096   HPATH = "cluster-init"
1097   HTYPE = constants.HTYPE_CLUSTER
1098
1099   def BuildHooksEnv(self):
1100     """Build hooks env.
1101
1102     """
1103     env = {"OP_TARGET": self.cfg.GetClusterName()}
1104     mn = self.cfg.GetMasterNode()
1105     return env, [], [mn]
1106
1107   def Exec(self, feedback_fn):
1108     """Nothing to do.
1109
1110     """
1111     return True
1112
1113
1114 class LUClusterDestroy(LogicalUnit):
1115   """Logical unit for destroying the cluster.
1116
1117   """
1118   HPATH = "cluster-destroy"
1119   HTYPE = constants.HTYPE_CLUSTER
1120
1121   def BuildHooksEnv(self):
1122     """Build hooks env.
1123
1124     """
1125     env = {"OP_TARGET": self.cfg.GetClusterName()}
1126     return env, [], []
1127
1128   def CheckPrereq(self):
1129     """Check prerequisites.
1130
1131     This checks whether the cluster is empty.
1132
1133     Any errors are signaled by raising errors.OpPrereqError.
1134
1135     """
1136     master = self.cfg.GetMasterNode()
1137
1138     nodelist = self.cfg.GetNodeList()
1139     if len(nodelist) != 1 or nodelist[0] != master:
1140       raise errors.OpPrereqError("There are still %d node(s) in"
1141                                  " this cluster." % (len(nodelist) - 1),
1142                                  errors.ECODE_INVAL)
1143     instancelist = self.cfg.GetInstanceList()
1144     if instancelist:
1145       raise errors.OpPrereqError("There are still %d instance(s) in"
1146                                  " this cluster." % len(instancelist),
1147                                  errors.ECODE_INVAL)
1148
1149   def Exec(self, feedback_fn):
1150     """Destroys the cluster.
1151
1152     """
1153     master = self.cfg.GetMasterNode()
1154
1155     # Run post hooks on master node before it's removed
1156     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1157     try:
1158       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1159     except:
1160       # pylint: disable-msg=W0702
1161       self.LogWarning("Errors occurred running hooks on %s" % master)
1162
1163     result = self.rpc.call_node_stop_master(master, False)
1164     result.Raise("Could not disable the master role")
1165
1166     return master
1167
1168
1169 def _VerifyCertificate(filename):
1170   """Verifies a certificate for LUClusterVerify.
1171
1172   @type filename: string
1173   @param filename: Path to PEM file
1174
1175   """
1176   try:
1177     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1178                                            utils.ReadFile(filename))
1179   except Exception, err: # pylint: disable-msg=W0703
1180     return (LUClusterVerify.ETYPE_ERROR,
1181             "Failed to load X509 certificate %s: %s" % (filename, err))
1182
1183   (errcode, msg) = \
1184     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1185                                 constants.SSL_CERT_EXPIRATION_ERROR)
1186
1187   if msg:
1188     fnamemsg = "While verifying %s: %s" % (filename, msg)
1189   else:
1190     fnamemsg = None
1191
1192   if errcode is None:
1193     return (None, fnamemsg)
1194   elif errcode == utils.CERT_WARNING:
1195     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1196   elif errcode == utils.CERT_ERROR:
1197     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1198
1199   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1200
1201
1202 class LUClusterVerify(LogicalUnit):
1203   """Verifies the cluster status.
1204
1205   """
1206   HPATH = "cluster-verify"
1207   HTYPE = constants.HTYPE_CLUSTER
1208   REQ_BGL = False
1209
1210   TCLUSTER = "cluster"
1211   TNODE = "node"
1212   TINSTANCE = "instance"
1213
1214   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1215   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1216   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1217   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1218   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1219   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1220   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1221   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1222   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1223   ENODEDRBD = (TNODE, "ENODEDRBD")
1224   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1225   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1226   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1227   ENODEHV = (TNODE, "ENODEHV")
1228   ENODELVM = (TNODE, "ENODELVM")
1229   ENODEN1 = (TNODE, "ENODEN1")
1230   ENODENET = (TNODE, "ENODENET")
1231   ENODEOS = (TNODE, "ENODEOS")
1232   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1233   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1234   ENODERPC = (TNODE, "ENODERPC")
1235   ENODESSH = (TNODE, "ENODESSH")
1236   ENODEVERSION = (TNODE, "ENODEVERSION")
1237   ENODESETUP = (TNODE, "ENODESETUP")
1238   ENODETIME = (TNODE, "ENODETIME")
1239   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1240
1241   ETYPE_FIELD = "code"
1242   ETYPE_ERROR = "ERROR"
1243   ETYPE_WARNING = "WARNING"
1244
1245   _HOOKS_INDENT_RE = re.compile("^", re.M)
1246
1247   class NodeImage(object):
1248     """A class representing the logical and physical status of a node.
1249
1250     @type name: string
1251     @ivar name: the node name to which this object refers
1252     @ivar volumes: a structure as returned from
1253         L{ganeti.backend.GetVolumeList} (runtime)
1254     @ivar instances: a list of running instances (runtime)
1255     @ivar pinst: list of configured primary instances (config)
1256     @ivar sinst: list of configured secondary instances (config)
1257     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1258         of this node (config)
1259     @ivar mfree: free memory, as reported by hypervisor (runtime)
1260     @ivar dfree: free disk, as reported by the node (runtime)
1261     @ivar offline: the offline status (config)
1262     @type rpc_fail: boolean
1263     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1264         not whether the individual keys were correct) (runtime)
1265     @type lvm_fail: boolean
1266     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1267     @type hyp_fail: boolean
1268     @ivar hyp_fail: whether the RPC call didn't return the instance list
1269     @type ghost: boolean
1270     @ivar ghost: whether this is a known node or not (config)
1271     @type os_fail: boolean
1272     @ivar os_fail: whether the RPC call didn't return valid OS data
1273     @type oslist: list
1274     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1275     @type vm_capable: boolean
1276     @ivar vm_capable: whether the node can host instances
1277
1278     """
1279     def __init__(self, offline=False, name=None, vm_capable=True):
1280       self.name = name
1281       self.volumes = {}
1282       self.instances = []
1283       self.pinst = []
1284       self.sinst = []
1285       self.sbp = {}
1286       self.mfree = 0
1287       self.dfree = 0
1288       self.offline = offline
1289       self.vm_capable = vm_capable
1290       self.rpc_fail = False
1291       self.lvm_fail = False
1292       self.hyp_fail = False
1293       self.ghost = False
1294       self.os_fail = False
1295       self.oslist = {}
1296
1297   def ExpandNames(self):
1298     self.needed_locks = {
1299       locking.LEVEL_NODE: locking.ALL_SET,
1300       locking.LEVEL_INSTANCE: locking.ALL_SET,
1301     }
1302     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1303
1304   def _Error(self, ecode, item, msg, *args, **kwargs):
1305     """Format an error message.
1306
1307     Based on the opcode's error_codes parameter, either format a
1308     parseable error code, or a simpler error string.
1309
1310     This must be called only from Exec and functions called from Exec.
1311
1312     """
1313     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1314     itype, etxt = ecode
1315     # first complete the msg
1316     if args:
1317       msg = msg % args
1318     # then format the whole message
1319     if self.op.error_codes:
1320       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1321     else:
1322       if item:
1323         item = " " + item
1324       else:
1325         item = ""
1326       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1327     # and finally report it via the feedback_fn
1328     self._feedback_fn("  - %s" % msg)
1329
1330   def _ErrorIf(self, cond, *args, **kwargs):
1331     """Log an error message if the passed condition is True.
1332
1333     """
1334     cond = bool(cond) or self.op.debug_simulate_errors
1335     if cond:
1336       self._Error(*args, **kwargs)
1337     # do not mark the operation as failed for WARN cases only
1338     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1339       self.bad = self.bad or cond
1340
1341   def _VerifyNode(self, ninfo, nresult):
1342     """Perform some basic validation on data returned from a node.
1343
1344       - check the result data structure is well formed and has all the
1345         mandatory fields
1346       - check ganeti version
1347
1348     @type ninfo: L{objects.Node}
1349     @param ninfo: the node to check
1350     @param nresult: the results from the node
1351     @rtype: boolean
1352     @return: whether overall this call was successful (and we can expect
1353          reasonable values in the respose)
1354
1355     """
1356     node = ninfo.name
1357     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1358
1359     # main result, nresult should be a non-empty dict
1360     test = not nresult or not isinstance(nresult, dict)
1361     _ErrorIf(test, self.ENODERPC, node,
1362                   "unable to verify node: no data returned")
1363     if test:
1364       return False
1365
1366     # compares ganeti version
1367     local_version = constants.PROTOCOL_VERSION
1368     remote_version = nresult.get("version", None)
1369     test = not (remote_version and
1370                 isinstance(remote_version, (list, tuple)) and
1371                 len(remote_version) == 2)
1372     _ErrorIf(test, self.ENODERPC, node,
1373              "connection to node returned invalid data")
1374     if test:
1375       return False
1376
1377     test = local_version != remote_version[0]
1378     _ErrorIf(test, self.ENODEVERSION, node,
1379              "incompatible protocol versions: master %s,"
1380              " node %s", local_version, remote_version[0])
1381     if test:
1382       return False
1383
1384     # node seems compatible, we can actually try to look into its results
1385
1386     # full package version
1387     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1388                   self.ENODEVERSION, node,
1389                   "software version mismatch: master %s, node %s",
1390                   constants.RELEASE_VERSION, remote_version[1],
1391                   code=self.ETYPE_WARNING)
1392
1393     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1394     if ninfo.vm_capable and isinstance(hyp_result, dict):
1395       for hv_name, hv_result in hyp_result.iteritems():
1396         test = hv_result is not None
1397         _ErrorIf(test, self.ENODEHV, node,
1398                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1399
1400     test = nresult.get(constants.NV_NODESETUP,
1401                            ["Missing NODESETUP results"])
1402     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1403              "; ".join(test))
1404
1405     return True
1406
1407   def _VerifyNodeTime(self, ninfo, nresult,
1408                       nvinfo_starttime, nvinfo_endtime):
1409     """Check the node time.
1410
1411     @type ninfo: L{objects.Node}
1412     @param ninfo: the node to check
1413     @param nresult: the remote results for the node
1414     @param nvinfo_starttime: the start time of the RPC call
1415     @param nvinfo_endtime: the end time of the RPC call
1416
1417     """
1418     node = ninfo.name
1419     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1420
1421     ntime = nresult.get(constants.NV_TIME, None)
1422     try:
1423       ntime_merged = utils.MergeTime(ntime)
1424     except (ValueError, TypeError):
1425       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1426       return
1427
1428     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1429       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1430     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1431       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1432     else:
1433       ntime_diff = None
1434
1435     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1436              "Node time diverges by at least %s from master node time",
1437              ntime_diff)
1438
1439   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1440     """Check the node time.
1441
1442     @type ninfo: L{objects.Node}
1443     @param ninfo: the node to check
1444     @param nresult: the remote results for the node
1445     @param vg_name: the configured VG name
1446
1447     """
1448     if vg_name is None:
1449       return
1450
1451     node = ninfo.name
1452     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1453
1454     # checks vg existence and size > 20G
1455     vglist = nresult.get(constants.NV_VGLIST, None)
1456     test = not vglist
1457     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1458     if not test:
1459       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1460                                             constants.MIN_VG_SIZE)
1461       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1462
1463     # check pv names
1464     pvlist = nresult.get(constants.NV_PVLIST, None)
1465     test = pvlist is None
1466     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1467     if not test:
1468       # check that ':' is not present in PV names, since it's a
1469       # special character for lvcreate (denotes the range of PEs to
1470       # use on the PV)
1471       for _, pvname, owner_vg in pvlist:
1472         test = ":" in pvname
1473         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1474                  " '%s' of VG '%s'", pvname, owner_vg)
1475
1476   def _VerifyNodeNetwork(self, ninfo, nresult):
1477     """Check the node time.
1478
1479     @type ninfo: L{objects.Node}
1480     @param ninfo: the node to check
1481     @param nresult: the remote results for the node
1482
1483     """
1484     node = ninfo.name
1485     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1486
1487     test = constants.NV_NODELIST not in nresult
1488     _ErrorIf(test, self.ENODESSH, node,
1489              "node hasn't returned node ssh connectivity data")
1490     if not test:
1491       if nresult[constants.NV_NODELIST]:
1492         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1493           _ErrorIf(True, self.ENODESSH, node,
1494                    "ssh communication with node '%s': %s", a_node, a_msg)
1495
1496     test = constants.NV_NODENETTEST not in nresult
1497     _ErrorIf(test, self.ENODENET, node,
1498              "node hasn't returned node tcp connectivity data")
1499     if not test:
1500       if nresult[constants.NV_NODENETTEST]:
1501         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1502         for anode in nlist:
1503           _ErrorIf(True, self.ENODENET, node,
1504                    "tcp communication with node '%s': %s",
1505                    anode, nresult[constants.NV_NODENETTEST][anode])
1506
1507     test = constants.NV_MASTERIP not in nresult
1508     _ErrorIf(test, self.ENODENET, node,
1509              "node hasn't returned node master IP reachability data")
1510     if not test:
1511       if not nresult[constants.NV_MASTERIP]:
1512         if node == self.master_node:
1513           msg = "the master node cannot reach the master IP (not configured?)"
1514         else:
1515           msg = "cannot reach the master IP"
1516         _ErrorIf(True, self.ENODENET, node, msg)
1517
1518   def _VerifyInstance(self, instance, instanceconfig, node_image,
1519                       diskstatus):
1520     """Verify an instance.
1521
1522     This function checks to see if the required block devices are
1523     available on the instance's node.
1524
1525     """
1526     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1527     node_current = instanceconfig.primary_node
1528
1529     node_vol_should = {}
1530     instanceconfig.MapLVsByNode(node_vol_should)
1531
1532     for node in node_vol_should:
1533       n_img = node_image[node]
1534       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1535         # ignore missing volumes on offline or broken nodes
1536         continue
1537       for volume in node_vol_should[node]:
1538         test = volume not in n_img.volumes
1539         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1540                  "volume %s missing on node %s", volume, node)
1541
1542     if instanceconfig.admin_up:
1543       pri_img = node_image[node_current]
1544       test = instance not in pri_img.instances and not pri_img.offline
1545       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1546                "instance not running on its primary node %s",
1547                node_current)
1548
1549     for node, n_img in node_image.items():
1550       if (not node == node_current):
1551         test = instance in n_img.instances
1552         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1553                  "instance should not run on node %s", node)
1554
1555     diskdata = [(nname, success, status, idx)
1556                 for (nname, disks) in diskstatus.items()
1557                 for idx, (success, status) in enumerate(disks)]
1558
1559     for nname, success, bdev_status, idx in diskdata:
1560       _ErrorIf(instanceconfig.admin_up and not success,
1561                self.EINSTANCEFAULTYDISK, instance,
1562                "couldn't retrieve status for disk/%s on %s: %s",
1563                idx, nname, bdev_status)
1564       _ErrorIf((instanceconfig.admin_up and success and
1565                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1566                self.EINSTANCEFAULTYDISK, instance,
1567                "disk/%s on %s is faulty", idx, nname)
1568
1569   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1570     """Verify if there are any unknown volumes in the cluster.
1571
1572     The .os, .swap and backup volumes are ignored. All other volumes are
1573     reported as unknown.
1574
1575     @type reserved: L{ganeti.utils.FieldSet}
1576     @param reserved: a FieldSet of reserved volume names
1577
1578     """
1579     for node, n_img in node_image.items():
1580       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1581         # skip non-healthy nodes
1582         continue
1583       for volume in n_img.volumes:
1584         test = ((node not in node_vol_should or
1585                 volume not in node_vol_should[node]) and
1586                 not reserved.Matches(volume))
1587         self._ErrorIf(test, self.ENODEORPHANLV, node,
1588                       "volume %s is unknown", volume)
1589
1590   def _VerifyOrphanInstances(self, instancelist, node_image):
1591     """Verify the list of running instances.
1592
1593     This checks what instances are running but unknown to the cluster.
1594
1595     """
1596     for node, n_img in node_image.items():
1597       for o_inst in n_img.instances:
1598         test = o_inst not in instancelist
1599         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1600                       "instance %s on node %s should not exist", o_inst, node)
1601
1602   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1603     """Verify N+1 Memory Resilience.
1604
1605     Check that if one single node dies we can still start all the
1606     instances it was primary for.
1607
1608     """
1609     for node, n_img in node_image.items():
1610       # This code checks that every node which is now listed as
1611       # secondary has enough memory to host all instances it is
1612       # supposed to should a single other node in the cluster fail.
1613       # FIXME: not ready for failover to an arbitrary node
1614       # FIXME: does not support file-backed instances
1615       # WARNING: we currently take into account down instances as well
1616       # as up ones, considering that even if they're down someone
1617       # might want to start them even in the event of a node failure.
1618       for prinode, instances in n_img.sbp.items():
1619         needed_mem = 0
1620         for instance in instances:
1621           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1622           if bep[constants.BE_AUTO_BALANCE]:
1623             needed_mem += bep[constants.BE_MEMORY]
1624         test = n_img.mfree < needed_mem
1625         self._ErrorIf(test, self.ENODEN1, node,
1626                       "not enough memory to accomodate instance failovers"
1627                       " should node %s fail", prinode)
1628
1629   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1630                        master_files):
1631     """Verifies and computes the node required file checksums.
1632
1633     @type ninfo: L{objects.Node}
1634     @param ninfo: the node to check
1635     @param nresult: the remote results for the node
1636     @param file_list: required list of files
1637     @param local_cksum: dictionary of local files and their checksums
1638     @param master_files: list of files that only masters should have
1639
1640     """
1641     node = ninfo.name
1642     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1643
1644     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1645     test = not isinstance(remote_cksum, dict)
1646     _ErrorIf(test, self.ENODEFILECHECK, node,
1647              "node hasn't returned file checksum data")
1648     if test:
1649       return
1650
1651     for file_name in file_list:
1652       node_is_mc = ninfo.master_candidate
1653       must_have = (file_name not in master_files) or node_is_mc
1654       # missing
1655       test1 = file_name not in remote_cksum
1656       # invalid checksum
1657       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1658       # existing and good
1659       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1660       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1661                "file '%s' missing", file_name)
1662       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1663                "file '%s' has wrong checksum", file_name)
1664       # not candidate and this is not a must-have file
1665       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1666                "file '%s' should not exist on non master"
1667                " candidates (and the file is outdated)", file_name)
1668       # all good, except non-master/non-must have combination
1669       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1670                "file '%s' should not exist"
1671                " on non master candidates", file_name)
1672
1673   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1674                       drbd_map):
1675     """Verifies and the node DRBD status.
1676
1677     @type ninfo: L{objects.Node}
1678     @param ninfo: the node to check
1679     @param nresult: the remote results for the node
1680     @param instanceinfo: the dict of instances
1681     @param drbd_helper: the configured DRBD usermode helper
1682     @param drbd_map: the DRBD map as returned by
1683         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1684
1685     """
1686     node = ninfo.name
1687     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1688
1689     if drbd_helper:
1690       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1691       test = (helper_result == None)
1692       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1693                "no drbd usermode helper returned")
1694       if helper_result:
1695         status, payload = helper_result
1696         test = not status
1697         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1698                  "drbd usermode helper check unsuccessful: %s", payload)
1699         test = status and (payload != drbd_helper)
1700         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1701                  "wrong drbd usermode helper: %s", payload)
1702
1703     # compute the DRBD minors
1704     node_drbd = {}
1705     for minor, instance in drbd_map[node].items():
1706       test = instance not in instanceinfo
1707       _ErrorIf(test, self.ECLUSTERCFG, None,
1708                "ghost instance '%s' in temporary DRBD map", instance)
1709         # ghost instance should not be running, but otherwise we
1710         # don't give double warnings (both ghost instance and
1711         # unallocated minor in use)
1712       if test:
1713         node_drbd[minor] = (instance, False)
1714       else:
1715         instance = instanceinfo[instance]
1716         node_drbd[minor] = (instance.name, instance.admin_up)
1717
1718     # and now check them
1719     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1720     test = not isinstance(used_minors, (tuple, list))
1721     _ErrorIf(test, self.ENODEDRBD, node,
1722              "cannot parse drbd status file: %s", str(used_minors))
1723     if test:
1724       # we cannot check drbd status
1725       return
1726
1727     for minor, (iname, must_exist) in node_drbd.items():
1728       test = minor not in used_minors and must_exist
1729       _ErrorIf(test, self.ENODEDRBD, node,
1730                "drbd minor %d of instance %s is not active", minor, iname)
1731     for minor in used_minors:
1732       test = minor not in node_drbd
1733       _ErrorIf(test, self.ENODEDRBD, node,
1734                "unallocated drbd minor %d is in use", minor)
1735
1736   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1737     """Builds the node OS structures.
1738
1739     @type ninfo: L{objects.Node}
1740     @param ninfo: the node to check
1741     @param nresult: the remote results for the node
1742     @param nimg: the node image object
1743
1744     """
1745     node = ninfo.name
1746     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1747
1748     remote_os = nresult.get(constants.NV_OSLIST, None)
1749     test = (not isinstance(remote_os, list) or
1750             not compat.all(isinstance(v, list) and len(v) == 7
1751                            for v in remote_os))
1752
1753     _ErrorIf(test, self.ENODEOS, node,
1754              "node hasn't returned valid OS data")
1755
1756     nimg.os_fail = test
1757
1758     if test:
1759       return
1760
1761     os_dict = {}
1762
1763     for (name, os_path, status, diagnose,
1764          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1765
1766       if name not in os_dict:
1767         os_dict[name] = []
1768
1769       # parameters is a list of lists instead of list of tuples due to
1770       # JSON lacking a real tuple type, fix it:
1771       parameters = [tuple(v) for v in parameters]
1772       os_dict[name].append((os_path, status, diagnose,
1773                             set(variants), set(parameters), set(api_ver)))
1774
1775     nimg.oslist = os_dict
1776
1777   def _VerifyNodeOS(self, ninfo, nimg, base):
1778     """Verifies the node OS list.
1779
1780     @type ninfo: L{objects.Node}
1781     @param ninfo: the node to check
1782     @param nimg: the node image object
1783     @param base: the 'template' node we match against (e.g. from the master)
1784
1785     """
1786     node = ninfo.name
1787     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1788
1789     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1790
1791     for os_name, os_data in nimg.oslist.items():
1792       assert os_data, "Empty OS status for OS %s?!" % os_name
1793       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1794       _ErrorIf(not f_status, self.ENODEOS, node,
1795                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1796       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1797                "OS '%s' has multiple entries (first one shadows the rest): %s",
1798                os_name, utils.CommaJoin([v[0] for v in os_data]))
1799       # this will catched in backend too
1800       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1801                and not f_var, self.ENODEOS, node,
1802                "OS %s with API at least %d does not declare any variant",
1803                os_name, constants.OS_API_V15)
1804       # comparisons with the 'base' image
1805       test = os_name not in base.oslist
1806       _ErrorIf(test, self.ENODEOS, node,
1807                "Extra OS %s not present on reference node (%s)",
1808                os_name, base.name)
1809       if test:
1810         continue
1811       assert base.oslist[os_name], "Base node has empty OS status?"
1812       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1813       if not b_status:
1814         # base OS is invalid, skipping
1815         continue
1816       for kind, a, b in [("API version", f_api, b_api),
1817                          ("variants list", f_var, b_var),
1818                          ("parameters", f_param, b_param)]:
1819         _ErrorIf(a != b, self.ENODEOS, node,
1820                  "OS %s %s differs from reference node %s: %s vs. %s",
1821                  kind, os_name, base.name,
1822                  utils.CommaJoin(a), utils.CommaJoin(b))
1823
1824     # check any missing OSes
1825     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1826     _ErrorIf(missing, self.ENODEOS, node,
1827              "OSes present on reference node %s but missing on this node: %s",
1828              base.name, utils.CommaJoin(missing))
1829
1830   def _VerifyOob(self, ninfo, nresult):
1831     """Verifies out of band functionality of a node.
1832
1833     @type ninfo: L{objects.Node}
1834     @param ninfo: the node to check
1835     @param nresult: the remote results for the node
1836
1837     """
1838     node = ninfo.name
1839     # We just have to verify the paths on master and/or master candidates
1840     # as the oob helper is invoked on the master
1841     if ((ninfo.master_candidate or ninfo.master_capable) and
1842         constants.NV_OOB_PATHS in nresult):
1843       for path_result in nresult[constants.NV_OOB_PATHS]:
1844         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1845
1846   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1847     """Verifies and updates the node volume data.
1848
1849     This function will update a L{NodeImage}'s internal structures
1850     with data from the remote call.
1851
1852     @type ninfo: L{objects.Node}
1853     @param ninfo: the node to check
1854     @param nresult: the remote results for the node
1855     @param nimg: the node image object
1856     @param vg_name: the configured VG name
1857
1858     """
1859     node = ninfo.name
1860     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1861
1862     nimg.lvm_fail = True
1863     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1864     if vg_name is None:
1865       pass
1866     elif isinstance(lvdata, basestring):
1867       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1868                utils.SafeEncode(lvdata))
1869     elif not isinstance(lvdata, dict):
1870       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1871     else:
1872       nimg.volumes = lvdata
1873       nimg.lvm_fail = False
1874
1875   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1876     """Verifies and updates the node instance list.
1877
1878     If the listing was successful, then updates this node's instance
1879     list. Otherwise, it marks the RPC call as failed for the instance
1880     list key.
1881
1882     @type ninfo: L{objects.Node}
1883     @param ninfo: the node to check
1884     @param nresult: the remote results for the node
1885     @param nimg: the node image object
1886
1887     """
1888     idata = nresult.get(constants.NV_INSTANCELIST, None)
1889     test = not isinstance(idata, list)
1890     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1891                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1892     if test:
1893       nimg.hyp_fail = True
1894     else:
1895       nimg.instances = idata
1896
1897   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1898     """Verifies and computes a node information map
1899
1900     @type ninfo: L{objects.Node}
1901     @param ninfo: the node to check
1902     @param nresult: the remote results for the node
1903     @param nimg: the node image object
1904     @param vg_name: the configured VG name
1905
1906     """
1907     node = ninfo.name
1908     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1909
1910     # try to read free memory (from the hypervisor)
1911     hv_info = nresult.get(constants.NV_HVINFO, None)
1912     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1913     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1914     if not test:
1915       try:
1916         nimg.mfree = int(hv_info["memory_free"])
1917       except (ValueError, TypeError):
1918         _ErrorIf(True, self.ENODERPC, node,
1919                  "node returned invalid nodeinfo, check hypervisor")
1920
1921     # FIXME: devise a free space model for file based instances as well
1922     if vg_name is not None:
1923       test = (constants.NV_VGLIST not in nresult or
1924               vg_name not in nresult[constants.NV_VGLIST])
1925       _ErrorIf(test, self.ENODELVM, node,
1926                "node didn't return data for the volume group '%s'"
1927                " - it is either missing or broken", vg_name)
1928       if not test:
1929         try:
1930           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1931         except (ValueError, TypeError):
1932           _ErrorIf(True, self.ENODERPC, node,
1933                    "node returned invalid LVM info, check LVM status")
1934
1935   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1936     """Gets per-disk status information for all instances.
1937
1938     @type nodelist: list of strings
1939     @param nodelist: Node names
1940     @type node_image: dict of (name, L{objects.Node})
1941     @param node_image: Node objects
1942     @type instanceinfo: dict of (name, L{objects.Instance})
1943     @param instanceinfo: Instance objects
1944     @rtype: {instance: {node: [(succes, payload)]}}
1945     @return: a dictionary of per-instance dictionaries with nodes as
1946         keys and disk information as values; the disk information is a
1947         list of tuples (success, payload)
1948
1949     """
1950     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1951
1952     node_disks = {}
1953     node_disks_devonly = {}
1954     diskless_instances = set()
1955     diskless = constants.DT_DISKLESS
1956
1957     for nname in nodelist:
1958       node_instances = list(itertools.chain(node_image[nname].pinst,
1959                                             node_image[nname].sinst))
1960       diskless_instances.update(inst for inst in node_instances
1961                                 if instanceinfo[inst].disk_template == diskless)
1962       disks = [(inst, disk)
1963                for inst in node_instances
1964                for disk in instanceinfo[inst].disks]
1965
1966       if not disks:
1967         # No need to collect data
1968         continue
1969
1970       node_disks[nname] = disks
1971
1972       # Creating copies as SetDiskID below will modify the objects and that can
1973       # lead to incorrect data returned from nodes
1974       devonly = [dev.Copy() for (_, dev) in disks]
1975
1976       for dev in devonly:
1977         self.cfg.SetDiskID(dev, nname)
1978
1979       node_disks_devonly[nname] = devonly
1980
1981     assert len(node_disks) == len(node_disks_devonly)
1982
1983     # Collect data from all nodes with disks
1984     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
1985                                                           node_disks_devonly)
1986
1987     assert len(result) == len(node_disks)
1988
1989     instdisk = {}
1990
1991     for (nname, nres) in result.items():
1992       disks = node_disks[nname]
1993
1994       if nres.offline:
1995         # No data from this node
1996         data = len(disks) * [(False, "node offline")]
1997       else:
1998         msg = nres.fail_msg
1999         _ErrorIf(msg, self.ENODERPC, nname,
2000                  "while getting disk information: %s", msg)
2001         if msg:
2002           # No data from this node
2003           data = len(disks) * [(False, msg)]
2004         else:
2005           data = []
2006           for idx, i in enumerate(nres.payload):
2007             if isinstance(i, (tuple, list)) and len(i) == 2:
2008               data.append(i)
2009             else:
2010               logging.warning("Invalid result from node %s, entry %d: %s",
2011                               nname, idx, i)
2012               data.append((False, "Invalid result from the remote node"))
2013
2014       for ((inst, _), status) in zip(disks, data):
2015         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2016
2017     # Add empty entries for diskless instances.
2018     for inst in diskless_instances:
2019       assert inst not in instdisk
2020       instdisk[inst] = {}
2021
2022     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2023                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2024                       compat.all(isinstance(s, (tuple, list)) and
2025                                  len(s) == 2 for s in statuses)
2026                       for inst, nnames in instdisk.items()
2027                       for nname, statuses in nnames.items())
2028     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2029
2030     return instdisk
2031
2032   def BuildHooksEnv(self):
2033     """Build hooks env.
2034
2035     Cluster-Verify hooks just ran in the post phase and their failure makes
2036     the output be logged in the verify output and the verification to fail.
2037
2038     """
2039     all_nodes = self.cfg.GetNodeList()
2040     env = {
2041       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2042       }
2043     for node in self.cfg.GetAllNodesInfo().values():
2044       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2045
2046     return env, [], all_nodes
2047
2048   def Exec(self, feedback_fn):
2049     """Verify integrity of cluster, performing various test on nodes.
2050
2051     """
2052     # This method has too many local variables. pylint: disable-msg=R0914
2053     self.bad = False
2054     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2055     verbose = self.op.verbose
2056     self._feedback_fn = feedback_fn
2057     feedback_fn("* Verifying global settings")
2058     for msg in self.cfg.VerifyConfig():
2059       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2060
2061     # Check the cluster certificates
2062     for cert_filename in constants.ALL_CERT_FILES:
2063       (errcode, msg) = _VerifyCertificate(cert_filename)
2064       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2065
2066     vg_name = self.cfg.GetVGName()
2067     drbd_helper = self.cfg.GetDRBDHelper()
2068     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2069     cluster = self.cfg.GetClusterInfo()
2070     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2071     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2072     nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2073     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2074     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2075                         for iname in instancelist)
2076     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2077     i_non_redundant = [] # Non redundant instances
2078     i_non_a_balanced = [] # Non auto-balanced instances
2079     n_offline = 0 # Count of offline nodes
2080     n_drained = 0 # Count of nodes being drained
2081     node_vol_should = {}
2082
2083     # FIXME: verify OS list
2084     # do local checksums
2085     master_files = [constants.CLUSTER_CONF_FILE]
2086     master_node = self.master_node = self.cfg.GetMasterNode()
2087     master_ip = self.cfg.GetMasterIP()
2088
2089     file_names = ssconf.SimpleStore().GetFileList()
2090     file_names.extend(constants.ALL_CERT_FILES)
2091     file_names.extend(master_files)
2092     if cluster.modify_etc_hosts:
2093       file_names.append(constants.ETC_HOSTS)
2094
2095     local_checksums = utils.FingerprintFiles(file_names)
2096
2097     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2098     node_verify_param = {
2099       constants.NV_FILELIST: file_names,
2100       constants.NV_NODELIST: [node.name for node in nodeinfo
2101                               if not node.offline],
2102       constants.NV_HYPERVISOR: hypervisors,
2103       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2104                                   node.secondary_ip) for node in nodeinfo
2105                                  if not node.offline],
2106       constants.NV_INSTANCELIST: hypervisors,
2107       constants.NV_VERSION: None,
2108       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2109       constants.NV_NODESETUP: None,
2110       constants.NV_TIME: None,
2111       constants.NV_MASTERIP: (master_node, master_ip),
2112       constants.NV_OSLIST: None,
2113       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2114       }
2115
2116     if vg_name is not None:
2117       node_verify_param[constants.NV_VGLIST] = None
2118       node_verify_param[constants.NV_LVLIST] = vg_name
2119       node_verify_param[constants.NV_PVLIST] = [vg_name]
2120       node_verify_param[constants.NV_DRBDLIST] = None
2121
2122     if drbd_helper:
2123       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2124
2125     # Build our expected cluster state
2126     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2127                                                  name=node.name,
2128                                                  vm_capable=node.vm_capable))
2129                       for node in nodeinfo)
2130
2131     # Gather OOB paths
2132     oob_paths = []
2133     for node in nodeinfo:
2134       path = _SupportsOob(self.cfg, node)
2135       if path and path not in oob_paths:
2136         oob_paths.append(path)
2137
2138     if oob_paths:
2139       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2140
2141     for instance in instancelist:
2142       inst_config = instanceinfo[instance]
2143
2144       for nname in inst_config.all_nodes:
2145         if nname not in node_image:
2146           # ghost node
2147           gnode = self.NodeImage(name=nname)
2148           gnode.ghost = True
2149           node_image[nname] = gnode
2150
2151       inst_config.MapLVsByNode(node_vol_should)
2152
2153       pnode = inst_config.primary_node
2154       node_image[pnode].pinst.append(instance)
2155
2156       for snode in inst_config.secondary_nodes:
2157         nimg = node_image[snode]
2158         nimg.sinst.append(instance)
2159         if pnode not in nimg.sbp:
2160           nimg.sbp[pnode] = []
2161         nimg.sbp[pnode].append(instance)
2162
2163     # At this point, we have the in-memory data structures complete,
2164     # except for the runtime information, which we'll gather next
2165
2166     # Due to the way our RPC system works, exact response times cannot be
2167     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2168     # time before and after executing the request, we can at least have a time
2169     # window.
2170     nvinfo_starttime = time.time()
2171     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2172                                            self.cfg.GetClusterName())
2173     nvinfo_endtime = time.time()
2174
2175     all_drbd_map = self.cfg.ComputeDRBDMap()
2176
2177     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2178     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2179
2180     feedback_fn("* Verifying node status")
2181
2182     refos_img = None
2183
2184     for node_i in nodeinfo:
2185       node = node_i.name
2186       nimg = node_image[node]
2187
2188       if node_i.offline:
2189         if verbose:
2190           feedback_fn("* Skipping offline node %s" % (node,))
2191         n_offline += 1
2192         continue
2193
2194       if node == master_node:
2195         ntype = "master"
2196       elif node_i.master_candidate:
2197         ntype = "master candidate"
2198       elif node_i.drained:
2199         ntype = "drained"
2200         n_drained += 1
2201       else:
2202         ntype = "regular"
2203       if verbose:
2204         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2205
2206       msg = all_nvinfo[node].fail_msg
2207       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2208       if msg:
2209         nimg.rpc_fail = True
2210         continue
2211
2212       nresult = all_nvinfo[node].payload
2213
2214       nimg.call_ok = self._VerifyNode(node_i, nresult)
2215       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2216       self._VerifyNodeNetwork(node_i, nresult)
2217       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2218                             master_files)
2219
2220       self._VerifyOob(node_i, nresult)
2221
2222       if nimg.vm_capable:
2223         self._VerifyNodeLVM(node_i, nresult, vg_name)
2224         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2225                              all_drbd_map)
2226
2227         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2228         self._UpdateNodeInstances(node_i, nresult, nimg)
2229         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2230         self._UpdateNodeOS(node_i, nresult, nimg)
2231         if not nimg.os_fail:
2232           if refos_img is None:
2233             refos_img = nimg
2234           self._VerifyNodeOS(node_i, nimg, refos_img)
2235
2236     feedback_fn("* Verifying instance status")
2237     for instance in instancelist:
2238       if verbose:
2239         feedback_fn("* Verifying instance %s" % instance)
2240       inst_config = instanceinfo[instance]
2241       self._VerifyInstance(instance, inst_config, node_image,
2242                            instdisk[instance])
2243       inst_nodes_offline = []
2244
2245       pnode = inst_config.primary_node
2246       pnode_img = node_image[pnode]
2247       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2248                self.ENODERPC, pnode, "instance %s, connection to"
2249                " primary node failed", instance)
2250
2251       if pnode_img.offline:
2252         inst_nodes_offline.append(pnode)
2253
2254       # If the instance is non-redundant we cannot survive losing its primary
2255       # node, so we are not N+1 compliant. On the other hand we have no disk
2256       # templates with more than one secondary so that situation is not well
2257       # supported either.
2258       # FIXME: does not support file-backed instances
2259       if not inst_config.secondary_nodes:
2260         i_non_redundant.append(instance)
2261
2262       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2263                instance, "instance has multiple secondary nodes: %s",
2264                utils.CommaJoin(inst_config.secondary_nodes),
2265                code=self.ETYPE_WARNING)
2266
2267       if inst_config.disk_template in constants.DTS_NET_MIRROR:
2268         pnode = inst_config.primary_node
2269         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2270         instance_groups = {}
2271
2272         for node in instance_nodes:
2273           instance_groups.setdefault(nodeinfo_byname[node].group,
2274                                      []).append(node)
2275
2276         pretty_list = [
2277           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2278           # Sort so that we always list the primary node first.
2279           for group, nodes in sorted(instance_groups.items(),
2280                                      key=lambda (_, nodes): pnode in nodes,
2281                                      reverse=True)]
2282
2283         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2284                       instance, "instance has primary and secondary nodes in"
2285                       " different groups: %s", utils.CommaJoin(pretty_list),
2286                       code=self.ETYPE_WARNING)
2287
2288       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2289         i_non_a_balanced.append(instance)
2290
2291       for snode in inst_config.secondary_nodes:
2292         s_img = node_image[snode]
2293         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2294                  "instance %s, connection to secondary node failed", instance)
2295
2296         if s_img.offline:
2297           inst_nodes_offline.append(snode)
2298
2299       # warn that the instance lives on offline nodes
2300       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2301                "instance lives on offline node(s) %s",
2302                utils.CommaJoin(inst_nodes_offline))
2303       # ... or ghost/non-vm_capable nodes
2304       for node in inst_config.all_nodes:
2305         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2306                  "instance lives on ghost node %s", node)
2307         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2308                  instance, "instance lives on non-vm_capable node %s", node)
2309
2310     feedback_fn("* Verifying orphan volumes")
2311     reserved = utils.FieldSet(*cluster.reserved_lvs)
2312     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2313
2314     feedback_fn("* Verifying orphan instances")
2315     self._VerifyOrphanInstances(instancelist, node_image)
2316
2317     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2318       feedback_fn("* Verifying N+1 Memory redundancy")
2319       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2320
2321     feedback_fn("* Other Notes")
2322     if i_non_redundant:
2323       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2324                   % len(i_non_redundant))
2325
2326     if i_non_a_balanced:
2327       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2328                   % len(i_non_a_balanced))
2329
2330     if n_offline:
2331       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2332
2333     if n_drained:
2334       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2335
2336     return not self.bad
2337
2338   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2339     """Analyze the post-hooks' result
2340
2341     This method analyses the hook result, handles it, and sends some
2342     nicely-formatted feedback back to the user.
2343
2344     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2345         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2346     @param hooks_results: the results of the multi-node hooks rpc call
2347     @param feedback_fn: function used send feedback back to the caller
2348     @param lu_result: previous Exec result
2349     @return: the new Exec result, based on the previous result
2350         and hook results
2351
2352     """
2353     # We only really run POST phase hooks, and are only interested in
2354     # their results
2355     if phase == constants.HOOKS_PHASE_POST:
2356       # Used to change hooks' output to proper indentation
2357       feedback_fn("* Hooks Results")
2358       assert hooks_results, "invalid result from hooks"
2359
2360       for node_name in hooks_results:
2361         res = hooks_results[node_name]
2362         msg = res.fail_msg
2363         test = msg and not res.offline
2364         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2365                       "Communication failure in hooks execution: %s", msg)
2366         if res.offline or msg:
2367           # No need to investigate payload if node is offline or gave an error.
2368           # override manually lu_result here as _ErrorIf only
2369           # overrides self.bad
2370           lu_result = 1
2371           continue
2372         for script, hkr, output in res.payload:
2373           test = hkr == constants.HKR_FAIL
2374           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2375                         "Script %s failed, output:", script)
2376           if test:
2377             output = self._HOOKS_INDENT_RE.sub('      ', output)
2378             feedback_fn("%s" % output)
2379             lu_result = 0
2380
2381       return lu_result
2382
2383
2384 class LUClusterVerifyDisks(NoHooksLU):
2385   """Verifies the cluster disks status.
2386
2387   """
2388   REQ_BGL = False
2389
2390   def ExpandNames(self):
2391     self.needed_locks = {
2392       locking.LEVEL_NODE: locking.ALL_SET,
2393       locking.LEVEL_INSTANCE: locking.ALL_SET,
2394     }
2395     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2396
2397   def Exec(self, feedback_fn):
2398     """Verify integrity of cluster disks.
2399
2400     @rtype: tuple of three items
2401     @return: a tuple of (dict of node-to-node_error, list of instances
2402         which need activate-disks, dict of instance: (node, volume) for
2403         missing volumes
2404
2405     """
2406     result = res_nodes, res_instances, res_missing = {}, [], {}
2407
2408     nodes = utils.NiceSort(self.cfg.GetNodeList())
2409     instances = [self.cfg.GetInstanceInfo(name)
2410                  for name in self.cfg.GetInstanceList()]
2411
2412     nv_dict = {}
2413     for inst in instances:
2414       inst_lvs = {}
2415       if (not inst.admin_up or
2416           inst.disk_template not in constants.DTS_NET_MIRROR):
2417         continue
2418       inst.MapLVsByNode(inst_lvs)
2419       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2420       for node, vol_list in inst_lvs.iteritems():
2421         for vol in vol_list:
2422           nv_dict[(node, vol)] = inst
2423
2424     if not nv_dict:
2425       return result
2426
2427     vg_names = self.rpc.call_vg_list(nodes)
2428     vg_names.Raise("Cannot get list of VGs")
2429
2430     for node in nodes:
2431       # node_volume
2432       node_res = self.rpc.call_lv_list([node],
2433                                        vg_names[node].payload.keys())[node]
2434       if node_res.offline:
2435         continue
2436       msg = node_res.fail_msg
2437       if msg:
2438         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2439         res_nodes[node] = msg
2440         continue
2441
2442       lvs = node_res.payload
2443       for lv_name, (_, _, lv_online) in lvs.items():
2444         inst = nv_dict.pop((node, lv_name), None)
2445         if (not lv_online and inst is not None
2446             and inst.name not in res_instances):
2447           res_instances.append(inst.name)
2448
2449     # any leftover items in nv_dict are missing LVs, let's arrange the
2450     # data better
2451     for key, inst in nv_dict.iteritems():
2452       if inst.name not in res_missing:
2453         res_missing[inst.name] = []
2454       res_missing[inst.name].append(key)
2455
2456     return result
2457
2458
2459 class LUClusterRepairDiskSizes(NoHooksLU):
2460   """Verifies the cluster disks sizes.
2461
2462   """
2463   REQ_BGL = False
2464
2465   def ExpandNames(self):
2466     if self.op.instances:
2467       self.wanted_names = []
2468       for name in self.op.instances:
2469         full_name = _ExpandInstanceName(self.cfg, name)
2470         self.wanted_names.append(full_name)
2471       self.needed_locks = {
2472         locking.LEVEL_NODE: [],
2473         locking.LEVEL_INSTANCE: self.wanted_names,
2474         }
2475       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2476     else:
2477       self.wanted_names = None
2478       self.needed_locks = {
2479         locking.LEVEL_NODE: locking.ALL_SET,
2480         locking.LEVEL_INSTANCE: locking.ALL_SET,
2481         }
2482     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2483
2484   def DeclareLocks(self, level):
2485     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2486       self._LockInstancesNodes(primary_only=True)
2487
2488   def CheckPrereq(self):
2489     """Check prerequisites.
2490
2491     This only checks the optional instance list against the existing names.
2492
2493     """
2494     if self.wanted_names is None:
2495       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2496
2497     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2498                              in self.wanted_names]
2499
2500   def _EnsureChildSizes(self, disk):
2501     """Ensure children of the disk have the needed disk size.
2502
2503     This is valid mainly for DRBD8 and fixes an issue where the
2504     children have smaller disk size.
2505
2506     @param disk: an L{ganeti.objects.Disk} object
2507
2508     """
2509     if disk.dev_type == constants.LD_DRBD8:
2510       assert disk.children, "Empty children for DRBD8?"
2511       fchild = disk.children[0]
2512       mismatch = fchild.size < disk.size
2513       if mismatch:
2514         self.LogInfo("Child disk has size %d, parent %d, fixing",
2515                      fchild.size, disk.size)
2516         fchild.size = disk.size
2517
2518       # and we recurse on this child only, not on the metadev
2519       return self._EnsureChildSizes(fchild) or mismatch
2520     else:
2521       return False
2522
2523   def Exec(self, feedback_fn):
2524     """Verify the size of cluster disks.
2525
2526     """
2527     # TODO: check child disks too
2528     # TODO: check differences in size between primary/secondary nodes
2529     per_node_disks = {}
2530     for instance in self.wanted_instances:
2531       pnode = instance.primary_node
2532       if pnode not in per_node_disks:
2533         per_node_disks[pnode] = []
2534       for idx, disk in enumerate(instance.disks):
2535         per_node_disks[pnode].append((instance, idx, disk))
2536
2537     changed = []
2538     for node, dskl in per_node_disks.items():
2539       newl = [v[2].Copy() for v in dskl]
2540       for dsk in newl:
2541         self.cfg.SetDiskID(dsk, node)
2542       result = self.rpc.call_blockdev_getsizes(node, newl)
2543       if result.fail_msg:
2544         self.LogWarning("Failure in blockdev_getsizes call to node"
2545                         " %s, ignoring", node)
2546         continue
2547       if len(result.data) != len(dskl):
2548         self.LogWarning("Invalid result from node %s, ignoring node results",
2549                         node)
2550         continue
2551       for ((instance, idx, disk), size) in zip(dskl, result.data):
2552         if size is None:
2553           self.LogWarning("Disk %d of instance %s did not return size"
2554                           " information, ignoring", idx, instance.name)
2555           continue
2556         if not isinstance(size, (int, long)):
2557           self.LogWarning("Disk %d of instance %s did not return valid"
2558                           " size information, ignoring", idx, instance.name)
2559           continue
2560         size = size >> 20
2561         if size != disk.size:
2562           self.LogInfo("Disk %d of instance %s has mismatched size,"
2563                        " correcting: recorded %d, actual %d", idx,
2564                        instance.name, disk.size, size)
2565           disk.size = size
2566           self.cfg.Update(instance, feedback_fn)
2567           changed.append((instance.name, idx, size))
2568         if self._EnsureChildSizes(disk):
2569           self.cfg.Update(instance, feedback_fn)
2570           changed.append((instance.name, idx, disk.size))
2571     return changed
2572
2573
2574 class LUClusterRename(LogicalUnit):
2575   """Rename the cluster.
2576
2577   """
2578   HPATH = "cluster-rename"
2579   HTYPE = constants.HTYPE_CLUSTER
2580
2581   def BuildHooksEnv(self):
2582     """Build hooks env.
2583
2584     """
2585     env = {
2586       "OP_TARGET": self.cfg.GetClusterName(),
2587       "NEW_NAME": self.op.name,
2588       }
2589     mn = self.cfg.GetMasterNode()
2590     all_nodes = self.cfg.GetNodeList()
2591     return env, [mn], all_nodes
2592
2593   def CheckPrereq(self):
2594     """Verify that the passed name is a valid one.
2595
2596     """
2597     hostname = netutils.GetHostname(name=self.op.name,
2598                                     family=self.cfg.GetPrimaryIPFamily())
2599
2600     new_name = hostname.name
2601     self.ip = new_ip = hostname.ip
2602     old_name = self.cfg.GetClusterName()
2603     old_ip = self.cfg.GetMasterIP()
2604     if new_name == old_name and new_ip == old_ip:
2605       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2606                                  " cluster has changed",
2607                                  errors.ECODE_INVAL)
2608     if new_ip != old_ip:
2609       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2610         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2611                                    " reachable on the network" %
2612                                    new_ip, errors.ECODE_NOTUNIQUE)
2613
2614     self.op.name = new_name
2615
2616   def Exec(self, feedback_fn):
2617     """Rename the cluster.
2618
2619     """
2620     clustername = self.op.name
2621     ip = self.ip
2622
2623     # shutdown the master IP
2624     master = self.cfg.GetMasterNode()
2625     result = self.rpc.call_node_stop_master(master, False)
2626     result.Raise("Could not disable the master role")
2627
2628     try:
2629       cluster = self.cfg.GetClusterInfo()
2630       cluster.cluster_name = clustername
2631       cluster.master_ip = ip
2632       self.cfg.Update(cluster, feedback_fn)
2633
2634       # update the known hosts file
2635       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2636       node_list = self.cfg.GetOnlineNodeList()
2637       try:
2638         node_list.remove(master)
2639       except ValueError:
2640         pass
2641       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2642     finally:
2643       result = self.rpc.call_node_start_master(master, False, False)
2644       msg = result.fail_msg
2645       if msg:
2646         self.LogWarning("Could not re-enable the master role on"
2647                         " the master, please restart manually: %s", msg)
2648
2649     return clustername
2650
2651
2652 class LUClusterSetParams(LogicalUnit):
2653   """Change the parameters of the cluster.
2654
2655   """
2656   HPATH = "cluster-modify"
2657   HTYPE = constants.HTYPE_CLUSTER
2658   REQ_BGL = False
2659
2660   def CheckArguments(self):
2661     """Check parameters
2662
2663     """
2664     if self.op.uid_pool:
2665       uidpool.CheckUidPool(self.op.uid_pool)
2666
2667     if self.op.add_uids:
2668       uidpool.CheckUidPool(self.op.add_uids)
2669
2670     if self.op.remove_uids:
2671       uidpool.CheckUidPool(self.op.remove_uids)
2672
2673   def ExpandNames(self):
2674     # FIXME: in the future maybe other cluster params won't require checking on
2675     # all nodes to be modified.
2676     self.needed_locks = {
2677       locking.LEVEL_NODE: locking.ALL_SET,
2678     }
2679     self.share_locks[locking.LEVEL_NODE] = 1
2680
2681   def BuildHooksEnv(self):
2682     """Build hooks env.
2683
2684     """
2685     env = {
2686       "OP_TARGET": self.cfg.GetClusterName(),
2687       "NEW_VG_NAME": self.op.vg_name,
2688       }
2689     mn = self.cfg.GetMasterNode()
2690     return env, [mn], [mn]
2691
2692   def CheckPrereq(self):
2693     """Check prerequisites.
2694
2695     This checks whether the given params don't conflict and
2696     if the given volume group is valid.
2697
2698     """
2699     if self.op.vg_name is not None and not self.op.vg_name:
2700       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2701         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2702                                    " instances exist", errors.ECODE_INVAL)
2703
2704     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2705       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2706         raise errors.OpPrereqError("Cannot disable drbd helper while"
2707                                    " drbd-based instances exist",
2708                                    errors.ECODE_INVAL)
2709
2710     node_list = self.acquired_locks[locking.LEVEL_NODE]
2711
2712     # if vg_name not None, checks given volume group on all nodes
2713     if self.op.vg_name:
2714       vglist = self.rpc.call_vg_list(node_list)
2715       for node in node_list:
2716         msg = vglist[node].fail_msg
2717         if msg:
2718           # ignoring down node
2719           self.LogWarning("Error while gathering data on node %s"
2720                           " (ignoring node): %s", node, msg)
2721           continue
2722         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2723                                               self.op.vg_name,
2724                                               constants.MIN_VG_SIZE)
2725         if vgstatus:
2726           raise errors.OpPrereqError("Error on node '%s': %s" %
2727                                      (node, vgstatus), errors.ECODE_ENVIRON)
2728
2729     if self.op.drbd_helper:
2730       # checks given drbd helper on all nodes
2731       helpers = self.rpc.call_drbd_helper(node_list)
2732       for node in node_list:
2733         ninfo = self.cfg.GetNodeInfo(node)
2734         if ninfo.offline:
2735           self.LogInfo("Not checking drbd helper on offline node %s", node)
2736           continue
2737         msg = helpers[node].fail_msg
2738         if msg:
2739           raise errors.OpPrereqError("Error checking drbd helper on node"
2740                                      " '%s': %s" % (node, msg),
2741                                      errors.ECODE_ENVIRON)
2742         node_helper = helpers[node].payload
2743         if node_helper != self.op.drbd_helper:
2744           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2745                                      (node, node_helper), errors.ECODE_ENVIRON)
2746
2747     self.cluster = cluster = self.cfg.GetClusterInfo()
2748     # validate params changes
2749     if self.op.beparams:
2750       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2751       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2752
2753     if self.op.ndparams:
2754       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2755       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2756
2757     if self.op.nicparams:
2758       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2759       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2760       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2761       nic_errors = []
2762
2763       # check all instances for consistency
2764       for instance in self.cfg.GetAllInstancesInfo().values():
2765         for nic_idx, nic in enumerate(instance.nics):
2766           params_copy = copy.deepcopy(nic.nicparams)
2767           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2768
2769           # check parameter syntax
2770           try:
2771             objects.NIC.CheckParameterSyntax(params_filled)
2772           except errors.ConfigurationError, err:
2773             nic_errors.append("Instance %s, nic/%d: %s" %
2774                               (instance.name, nic_idx, err))
2775
2776           # if we're moving instances to routed, check that they have an ip
2777           target_mode = params_filled[constants.NIC_MODE]
2778           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2779             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2780                               (instance.name, nic_idx))
2781       if nic_errors:
2782         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2783                                    "\n".join(nic_errors))
2784
2785     # hypervisor list/parameters
2786     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2787     if self.op.hvparams:
2788       for hv_name, hv_dict in self.op.hvparams.items():
2789         if hv_name not in self.new_hvparams:
2790           self.new_hvparams[hv_name] = hv_dict
2791         else:
2792           self.new_hvparams[hv_name].update(hv_dict)
2793
2794     # os hypervisor parameters
2795     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2796     if self.op.os_hvp:
2797       for os_name, hvs in self.op.os_hvp.items():
2798         if os_name not in self.new_os_hvp:
2799           self.new_os_hvp[os_name] = hvs
2800         else:
2801           for hv_name, hv_dict in hvs.items():
2802             if hv_name not in self.new_os_hvp[os_name]:
2803               self.new_os_hvp[os_name][hv_name] = hv_dict
2804             else:
2805               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2806
2807     # os parameters
2808     self.new_osp = objects.FillDict(cluster.osparams, {})
2809     if self.op.osparams:
2810       for os_name, osp in self.op.osparams.items():
2811         if os_name not in self.new_osp:
2812           self.new_osp[os_name] = {}
2813
2814         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2815                                                   use_none=True)
2816
2817         if not self.new_osp[os_name]:
2818           # we removed all parameters
2819           del self.new_osp[os_name]
2820         else:
2821           # check the parameter validity (remote check)
2822           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2823                          os_name, self.new_osp[os_name])
2824
2825     # changes to the hypervisor list
2826     if self.op.enabled_hypervisors is not None:
2827       self.hv_list = self.op.enabled_hypervisors
2828       for hv in self.hv_list:
2829         # if the hypervisor doesn't already exist in the cluster
2830         # hvparams, we initialize it to empty, and then (in both
2831         # cases) we make sure to fill the defaults, as we might not
2832         # have a complete defaults list if the hypervisor wasn't
2833         # enabled before
2834         if hv not in new_hvp:
2835           new_hvp[hv] = {}
2836         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2837         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2838     else:
2839       self.hv_list = cluster.enabled_hypervisors
2840
2841     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2842       # either the enabled list has changed, or the parameters have, validate
2843       for hv_name, hv_params in self.new_hvparams.items():
2844         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2845             (self.op.enabled_hypervisors and
2846              hv_name in self.op.enabled_hypervisors)):
2847           # either this is a new hypervisor, or its parameters have changed
2848           hv_class = hypervisor.GetHypervisor(hv_name)
2849           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2850           hv_class.CheckParameterSyntax(hv_params)
2851           _CheckHVParams(self, node_list, hv_name, hv_params)
2852
2853     if self.op.os_hvp:
2854       # no need to check any newly-enabled hypervisors, since the
2855       # defaults have already been checked in the above code-block
2856       for os_name, os_hvp in self.new_os_hvp.items():
2857         for hv_name, hv_params in os_hvp.items():
2858           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2859           # we need to fill in the new os_hvp on top of the actual hv_p
2860           cluster_defaults = self.new_hvparams.get(hv_name, {})
2861           new_osp = objects.FillDict(cluster_defaults, hv_params)
2862           hv_class = hypervisor.GetHypervisor(hv_name)
2863           hv_class.CheckParameterSyntax(new_osp)
2864           _CheckHVParams(self, node_list, hv_name, new_osp)
2865
2866     if self.op.default_iallocator:
2867       alloc_script = utils.FindFile(self.op.default_iallocator,
2868                                     constants.IALLOCATOR_SEARCH_PATH,
2869                                     os.path.isfile)
2870       if alloc_script is None:
2871         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2872                                    " specified" % self.op.default_iallocator,
2873                                    errors.ECODE_INVAL)
2874
2875   def Exec(self, feedback_fn):
2876     """Change the parameters of the cluster.
2877
2878     """
2879     if self.op.vg_name is not None:
2880       new_volume = self.op.vg_name
2881       if not new_volume:
2882         new_volume = None
2883       if new_volume != self.cfg.GetVGName():
2884         self.cfg.SetVGName(new_volume)
2885       else:
2886         feedback_fn("Cluster LVM configuration already in desired"
2887                     " state, not changing")
2888     if self.op.drbd_helper is not None:
2889       new_helper = self.op.drbd_helper
2890       if not new_helper:
2891         new_helper = None
2892       if new_helper != self.cfg.GetDRBDHelper():
2893         self.cfg.SetDRBDHelper(new_helper)
2894       else:
2895         feedback_fn("Cluster DRBD helper already in desired state,"
2896                     " not changing")
2897     if self.op.hvparams:
2898       self.cluster.hvparams = self.new_hvparams
2899     if self.op.os_hvp:
2900       self.cluster.os_hvp = self.new_os_hvp
2901     if self.op.enabled_hypervisors is not None:
2902       self.cluster.hvparams = self.new_hvparams
2903       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2904     if self.op.beparams:
2905       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2906     if self.op.nicparams:
2907       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2908     if self.op.osparams:
2909       self.cluster.osparams = self.new_osp
2910     if self.op.ndparams:
2911       self.cluster.ndparams = self.new_ndparams
2912
2913     if self.op.candidate_pool_size is not None:
2914       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2915       # we need to update the pool size here, otherwise the save will fail
2916       _AdjustCandidatePool(self, [])
2917
2918     if self.op.maintain_node_health is not None:
2919       self.cluster.maintain_node_health = self.op.maintain_node_health
2920
2921     if self.op.prealloc_wipe_disks is not None:
2922       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2923
2924     if self.op.add_uids is not None:
2925       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2926
2927     if self.op.remove_uids is not None:
2928       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2929
2930     if self.op.uid_pool is not None:
2931       self.cluster.uid_pool = self.op.uid_pool
2932
2933     if self.op.default_iallocator is not None:
2934       self.cluster.default_iallocator = self.op.default_iallocator
2935
2936     if self.op.reserved_lvs is not None:
2937       self.cluster.reserved_lvs = self.op.reserved_lvs
2938
2939     def helper_os(aname, mods, desc):
2940       desc += " OS list"
2941       lst = getattr(self.cluster, aname)
2942       for key, val in mods:
2943         if key == constants.DDM_ADD:
2944           if val in lst:
2945             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2946           else:
2947             lst.append(val)
2948         elif key == constants.DDM_REMOVE:
2949           if val in lst:
2950             lst.remove(val)
2951           else:
2952             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2953         else:
2954           raise errors.ProgrammerError("Invalid modification '%s'" % key)
2955
2956     if self.op.hidden_os:
2957       helper_os("hidden_os", self.op.hidden_os, "hidden")
2958
2959     if self.op.blacklisted_os:
2960       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2961
2962     if self.op.master_netdev:
2963       master = self.cfg.GetMasterNode()
2964       feedback_fn("Shutting down master ip on the current netdev (%s)" %
2965                   self.cluster.master_netdev)
2966       result = self.rpc.call_node_stop_master(master, False)
2967       result.Raise("Could not disable the master ip")
2968       feedback_fn("Changing master_netdev from %s to %s" %
2969                   (self.cluster.master_netdev, self.op.master_netdev))
2970       self.cluster.master_netdev = self.op.master_netdev
2971
2972     self.cfg.Update(self.cluster, feedback_fn)
2973
2974     if self.op.master_netdev:
2975       feedback_fn("Starting the master ip on the new master netdev (%s)" %
2976                   self.op.master_netdev)
2977       result = self.rpc.call_node_start_master(master, False, False)
2978       if result.fail_msg:
2979         self.LogWarning("Could not re-enable the master ip on"
2980                         " the master, please restart manually: %s",
2981                         result.fail_msg)
2982
2983
2984 def _UploadHelper(lu, nodes, fname):
2985   """Helper for uploading a file and showing warnings.
2986
2987   """
2988   if os.path.exists(fname):
2989     result = lu.rpc.call_upload_file(nodes, fname)
2990     for to_node, to_result in result.items():
2991       msg = to_result.fail_msg
2992       if msg:
2993         msg = ("Copy of file %s to node %s failed: %s" %
2994                (fname, to_node, msg))
2995         lu.proc.LogWarning(msg)
2996
2997
2998 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
2999   """Distribute additional files which are part of the cluster configuration.
3000
3001   ConfigWriter takes care of distributing the config and ssconf files, but
3002   there are more files which should be distributed to all nodes. This function
3003   makes sure those are copied.
3004
3005   @param lu: calling logical unit
3006   @param additional_nodes: list of nodes not in the config to distribute to
3007   @type additional_vm: boolean
3008   @param additional_vm: whether the additional nodes are vm-capable or not
3009
3010   """
3011   # 1. Gather target nodes
3012   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3013   dist_nodes = lu.cfg.GetOnlineNodeList()
3014   nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3015   vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3016   if additional_nodes is not None:
3017     dist_nodes.extend(additional_nodes)
3018     if additional_vm:
3019       vm_nodes.extend(additional_nodes)
3020   if myself.name in dist_nodes:
3021     dist_nodes.remove(myself.name)
3022   if myself.name in vm_nodes:
3023     vm_nodes.remove(myself.name)
3024
3025   # 2. Gather files to distribute
3026   dist_files = set([constants.ETC_HOSTS,
3027                     constants.SSH_KNOWN_HOSTS_FILE,
3028                     constants.RAPI_CERT_FILE,
3029                     constants.RAPI_USERS_FILE,
3030                     constants.CONFD_HMAC_KEY,
3031                     constants.CLUSTER_DOMAIN_SECRET_FILE,
3032                    ])
3033
3034   vm_files = set()
3035   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3036   for hv_name in enabled_hypervisors:
3037     hv_class = hypervisor.GetHypervisor(hv_name)
3038     vm_files.update(hv_class.GetAncillaryFiles())
3039
3040   # 3. Perform the files upload
3041   for fname in dist_files:
3042     _UploadHelper(lu, dist_nodes, fname)
3043   for fname in vm_files:
3044     _UploadHelper(lu, vm_nodes, fname)
3045
3046
3047 class LUClusterRedistConf(NoHooksLU):
3048   """Force the redistribution of cluster configuration.
3049
3050   This is a very simple LU.
3051
3052   """
3053   REQ_BGL = False
3054
3055   def ExpandNames(self):
3056     self.needed_locks = {
3057       locking.LEVEL_NODE: locking.ALL_SET,
3058     }
3059     self.share_locks[locking.LEVEL_NODE] = 1
3060
3061   def Exec(self, feedback_fn):
3062     """Redistribute the configuration.
3063
3064     """
3065     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3066     _RedistributeAncillaryFiles(self)
3067
3068
3069 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3070   """Sleep and poll for an instance's disk to sync.
3071
3072   """
3073   if not instance.disks or disks is not None and not disks:
3074     return True
3075
3076   disks = _ExpandCheckDisks(instance, disks)
3077
3078   if not oneshot:
3079     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3080
3081   node = instance.primary_node
3082
3083   for dev in disks:
3084     lu.cfg.SetDiskID(dev, node)
3085
3086   # TODO: Convert to utils.Retry
3087
3088   retries = 0
3089   degr_retries = 10 # in seconds, as we sleep 1 second each time
3090   while True:
3091     max_time = 0
3092     done = True
3093     cumul_degraded = False
3094     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3095     msg = rstats.fail_msg
3096     if msg:
3097       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3098       retries += 1
3099       if retries >= 10:
3100         raise errors.RemoteError("Can't contact node %s for mirror data,"
3101                                  " aborting." % node)
3102       time.sleep(6)
3103       continue
3104     rstats = rstats.payload
3105     retries = 0
3106     for i, mstat in enumerate(rstats):
3107       if mstat is None:
3108         lu.LogWarning("Can't compute data for node %s/%s",
3109                            node, disks[i].iv_name)
3110         continue
3111
3112       cumul_degraded = (cumul_degraded or
3113                         (mstat.is_degraded and mstat.sync_percent is None))
3114       if mstat.sync_percent is not None:
3115         done = False
3116         if mstat.estimated_time is not None:
3117           rem_time = ("%s remaining (estimated)" %
3118                       utils.FormatSeconds(mstat.estimated_time))
3119           max_time = mstat.estimated_time
3120         else:
3121           rem_time = "no time estimate"
3122         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3123                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3124
3125     # if we're done but degraded, let's do a few small retries, to
3126     # make sure we see a stable and not transient situation; therefore
3127     # we force restart of the loop
3128     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3129       logging.info("Degraded disks found, %d retries left", degr_retries)
3130       degr_retries -= 1
3131       time.sleep(1)
3132       continue
3133
3134     if done or oneshot:
3135       break
3136
3137     time.sleep(min(60, max_time))
3138
3139   if done:
3140     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3141   return not cumul_degraded
3142
3143
3144 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3145   """Check that mirrors are not degraded.
3146
3147   The ldisk parameter, if True, will change the test from the
3148   is_degraded attribute (which represents overall non-ok status for
3149   the device(s)) to the ldisk (representing the local storage status).
3150
3151   """
3152   lu.cfg.SetDiskID(dev, node)
3153
3154   result = True
3155
3156   if on_primary or dev.AssembleOnSecondary():
3157     rstats = lu.rpc.call_blockdev_find(node, dev)
3158     msg = rstats.fail_msg
3159     if msg:
3160       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3161       result = False
3162     elif not rstats.payload:
3163       lu.LogWarning("Can't find disk on node %s", node)
3164       result = False
3165     else:
3166       if ldisk:
3167         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3168       else:
3169         result = result and not rstats.payload.is_degraded
3170
3171   if dev.children:
3172     for child in dev.children:
3173       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3174
3175   return result
3176
3177
3178 class LUOobCommand(NoHooksLU):
3179   """Logical unit for OOB handling.
3180
3181   """
3182   REG_BGL = False
3183
3184   def CheckPrereq(self):
3185     """Check prerequisites.
3186
3187     This checks:
3188      - the node exists in the configuration
3189      - OOB is supported
3190
3191     Any errors are signaled by raising errors.OpPrereqError.
3192
3193     """
3194     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3195     node = self.cfg.GetNodeInfo(self.op.node_name)
3196
3197     if node is None:
3198       raise errors.OpPrereqError("Node %s not found" % self.op.node_name)
3199
3200     self.oob_program = _SupportsOob(self.cfg, node)
3201
3202     if not self.oob_program:
3203       raise errors.OpPrereqError("OOB is not supported for node %s" %
3204                                  self.op.node_name)
3205
3206     if self.op.command == constants.OOB_POWER_OFF and not node.offline:
3207       raise errors.OpPrereqError(("Cannot power off node %s because it is"
3208                                   " not marked offline") % self.op.node_name)
3209
3210     self.node = node
3211
3212   def ExpandNames(self):
3213     """Gather locks we need.
3214
3215     """
3216     node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3217     self.needed_locks = {
3218       locking.LEVEL_NODE: [node_name],
3219       }
3220
3221   def Exec(self, feedback_fn):
3222     """Execute OOB and return result if we expect any.
3223
3224     """
3225     master_node = self.cfg.GetMasterNode()
3226     node = self.node
3227
3228     logging.info("Executing out-of-band command '%s' using '%s' on %s",
3229                  self.op.command, self.oob_program, self.op.node_name)
3230     result = self.rpc.call_run_oob(master_node, self.oob_program,
3231                                    self.op.command, self.op.node_name,
3232                                    self.op.timeout)
3233
3234     result.Raise("An error occurred on execution of OOB helper")
3235
3236     self._CheckPayload(result)
3237
3238     if self.op.command == constants.OOB_HEALTH:
3239       # For health we should log important events
3240       for item, status in result.payload:
3241         if status in [constants.OOB_STATUS_WARNING,
3242                       constants.OOB_STATUS_CRITICAL]:
3243           logging.warning("On node '%s' item '%s' has status '%s'",
3244                           self.op.node_name, item, status)
3245
3246     if self.op.command == constants.OOB_POWER_ON:
3247       node.powered = True
3248     elif self.op.command == constants.OOB_POWER_OFF:
3249       node.powered = False
3250     elif self.op.command == constants.OOB_POWER_STATUS:
3251       powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3252       if powered != self.node.powered:
3253         logging.warning(("Recorded power state (%s) of node '%s' does not match"
3254                          " actual power state (%s)"), node.powered,
3255                         self.op.node_name, powered)
3256
3257     self.cfg.Update(node, feedback_fn)
3258
3259     return result.payload
3260
3261   def _CheckPayload(self, result):
3262     """Checks if the payload is valid.
3263
3264     @param result: RPC result
3265     @raises errors.OpExecError: If payload is not valid
3266
3267     """
3268     errs = []
3269     if self.op.command == constants.OOB_HEALTH:
3270       if not isinstance(result.payload, list):
3271         errs.append("command 'health' is expected to return a list but got %s" %
3272                     type(result.payload))
3273       for item, status in result.payload:
3274         if status not in constants.OOB_STATUSES:
3275           errs.append("health item '%s' has invalid status '%s'" %
3276                       (item, status))
3277
3278     if self.op.command == constants.OOB_POWER_STATUS:
3279       if not isinstance(result.payload, dict):
3280         errs.append("power-status is expected to return a dict but got %s" %
3281                     type(result.payload))
3282
3283     if self.op.command in [
3284         constants.OOB_POWER_ON,
3285         constants.OOB_POWER_OFF,
3286         constants.OOB_POWER_CYCLE,
3287         ]:
3288       if result.payload is not None:
3289         errs.append("%s is expected to not return payload but got '%s'" %
3290                     (self.op.command, result.payload))
3291
3292     if errs:
3293       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3294                                utils.CommaJoin(errs))
3295
3296
3297
3298 class LUDiagnoseOS(NoHooksLU):
3299   """Logical unit for OS diagnose/query.
3300
3301   """
3302   REQ_BGL = False
3303   _HID = "hidden"
3304   _BLK = "blacklisted"
3305   _VLD = "valid"
3306   _FIELDS_STATIC = utils.FieldSet()
3307   _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3308                                    "parameters", "api_versions", _HID, _BLK)
3309
3310   def CheckArguments(self):
3311     if self.op.names:
3312       raise errors.OpPrereqError("Selective OS query not supported",
3313                                  errors.ECODE_INVAL)
3314
3315     _CheckOutputFields(static=self._FIELDS_STATIC,
3316                        dynamic=self._FIELDS_DYNAMIC,
3317                        selected=self.op.output_fields)
3318
3319   def ExpandNames(self):
3320     # Lock all nodes, in shared mode
3321     # Temporary removal of locks, should be reverted later
3322     # TODO: reintroduce locks when they are lighter-weight
3323     self.needed_locks = {}
3324     #self.share_locks[locking.LEVEL_NODE] = 1
3325     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3326
3327   @staticmethod
3328   def _DiagnoseByOS(rlist):
3329     """Remaps a per-node return list into an a per-os per-node dictionary
3330
3331     @param rlist: a map with node names as keys and OS objects as values
3332
3333     @rtype: dict
3334     @return: a dictionary with osnames as keys and as value another
3335         map, with nodes as keys and tuples of (path, status, diagnose,
3336         variants, parameters, api_versions) as values, eg::
3337
3338           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3339                                      (/srv/..., False, "invalid api")],
3340                            "node2": [(/srv/..., True, "", [], [])]}
3341           }
3342
3343     """
3344     all_os = {}
3345     # we build here the list of nodes that didn't fail the RPC (at RPC
3346     # level), so that nodes with a non-responding node daemon don't
3347     # make all OSes invalid
3348     good_nodes = [node_name for node_name in rlist
3349                   if not rlist[node_name].fail_msg]
3350     for node_name, nr in rlist.items():
3351       if nr.fail_msg or not nr.payload:
3352         continue
3353       for (name, path, status, diagnose, variants,
3354            params, api_versions) in nr.payload:
3355         if name not in all_os:
3356           # build a list of nodes for this os containing empty lists
3357           # for each node in node_list
3358           all_os[name] = {}
3359           for nname in good_nodes:
3360             all_os[name][nname] = []
3361         # convert params from [name, help] to (name, help)
3362         params = [tuple(v) for v in params]
3363         all_os[name][node_name].append((path, status, diagnose,
3364                                         variants, params, api_versions))
3365     return all_os
3366
3367   def Exec(self, feedback_fn):
3368     """Compute the list of OSes.
3369
3370     """
3371     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3372     node_data = self.rpc.call_os_diagnose(valid_nodes)
3373     pol = self._DiagnoseByOS(node_data)
3374     output = []
3375     cluster = self.cfg.GetClusterInfo()
3376
3377     for os_name in utils.NiceSort(pol.keys()):
3378       os_data = pol[os_name]
3379       row = []
3380       valid = True
3381       (variants, params, api_versions) = null_state = (set(), set(), set())
3382       for idx, osl in enumerate(os_data.values()):
3383         valid = bool(valid and osl and osl[0][1])
3384         if not valid:
3385           (variants, params, api_versions) = null_state
3386           break
3387         node_variants, node_params, node_api = osl[0][3:6]
3388         if idx == 0: # first entry
3389           variants = set(node_variants)
3390           params = set(node_params)
3391           api_versions = set(node_api)
3392         else: # keep consistency
3393           variants.intersection_update(node_variants)
3394           params.intersection_update(node_params)
3395           api_versions.intersection_update(node_api)
3396
3397       is_hid = os_name in cluster.hidden_os
3398       is_blk = os_name in cluster.blacklisted_os
3399       if ((self._HID not in self.op.output_fields and is_hid) or
3400           (self._BLK not in self.op.output_fields and is_blk) or
3401           (self._VLD not in self.op.output_fields and not valid)):
3402         continue
3403
3404       for field in self.op.output_fields:
3405         if field == "name":
3406           val = os_name
3407         elif field == self._VLD:
3408           val = valid
3409         elif field == "node_status":
3410           # this is just a copy of the dict
3411           val = {}
3412           for node_name, nos_list in os_data.items():
3413             val[node_name] = nos_list
3414         elif field == "variants":
3415           val = utils.NiceSort(list(variants))
3416         elif field == "parameters":
3417           val = list(params)
3418         elif field == "api_versions":
3419           val = list(api_versions)
3420         elif field == self._HID:
3421           val = is_hid
3422         elif field == self._BLK:
3423           val = is_blk
3424         else:
3425           raise errors.ParameterError(field)
3426         row.append(val)
3427       output.append(row)
3428
3429     return output
3430
3431
3432 class LURemoveNode(LogicalUnit):
3433   """Logical unit for removing a node.
3434
3435   """
3436   HPATH = "node-remove"
3437   HTYPE = constants.HTYPE_NODE
3438
3439   def BuildHooksEnv(self):
3440     """Build hooks env.
3441
3442     This doesn't run on the target node in the pre phase as a failed
3443     node would then be impossible to remove.
3444
3445     """
3446     env = {
3447       "OP_TARGET": self.op.node_name,
3448       "NODE_NAME": self.op.node_name,
3449       }
3450     all_nodes = self.cfg.GetNodeList()
3451     try:
3452       all_nodes.remove(self.op.node_name)
3453     except ValueError:
3454       logging.warning("Node %s which is about to be removed not found"
3455                       " in the all nodes list", self.op.node_name)
3456     return env, all_nodes, all_nodes
3457
3458   def CheckPrereq(self):
3459     """Check prerequisites.
3460
3461     This checks:
3462      - the node exists in the configuration
3463      - it does not have primary or secondary instances
3464      - it's not the master
3465
3466     Any errors are signaled by raising errors.OpPrereqError.
3467
3468     """
3469     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3470     node = self.cfg.GetNodeInfo(self.op.node_name)
3471     assert node is not None
3472
3473     instance_list = self.cfg.GetInstanceList()
3474
3475     masternode = self.cfg.GetMasterNode()
3476     if node.name == masternode:
3477       raise errors.OpPrereqError("Node is the master node,"
3478                                  " you need to failover first.",
3479                                  errors.ECODE_INVAL)
3480
3481     for instance_name in instance_list:
3482       instance = self.cfg.GetInstanceInfo(instance_name)
3483       if node.name in instance.all_nodes:
3484         raise errors.OpPrereqError("Instance %s is still running on the node,"
3485                                    " please remove first." % instance_name,
3486                                    errors.ECODE_INVAL)
3487     self.op.node_name = node.name
3488     self.node = node
3489
3490   def Exec(self, feedback_fn):
3491     """Removes the node from the cluster.
3492
3493     """
3494     node = self.node
3495     logging.info("Stopping the node daemon and removing configs from node %s",
3496                  node.name)
3497
3498     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3499
3500     # Promote nodes to master candidate as needed
3501     _AdjustCandidatePool(self, exceptions=[node.name])
3502     self.context.RemoveNode(node.name)
3503
3504     # Run post hooks on the node before it's removed
3505     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3506     try:
3507       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3508     except:
3509       # pylint: disable-msg=W0702
3510       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3511
3512     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3513     msg = result.fail_msg
3514     if msg:
3515       self.LogWarning("Errors encountered on the remote node while leaving"
3516                       " the cluster: %s", msg)
3517
3518     # Remove node from our /etc/hosts
3519     if self.cfg.GetClusterInfo().modify_etc_hosts:
3520       master_node = self.cfg.GetMasterNode()
3521       result = self.rpc.call_etc_hosts_modify(master_node,
3522                                               constants.ETC_HOSTS_REMOVE,
3523                                               node.name, None)
3524       result.Raise("Can't update hosts file with new host data")
3525       _RedistributeAncillaryFiles(self)
3526
3527
3528 class _NodeQuery(_QueryBase):
3529   FIELDS = query.NODE_FIELDS
3530
3531   def ExpandNames(self, lu):
3532     lu.needed_locks = {}
3533     lu.share_locks[locking.LEVEL_NODE] = 1
3534
3535     if self.names:
3536       self.wanted = _GetWantedNodes(lu, self.names)
3537     else:
3538       self.wanted = locking.ALL_SET
3539
3540     self.do_locking = (self.use_locking and
3541                        query.NQ_LIVE in self.requested_data)
3542
3543     if self.do_locking:
3544       # if we don't request only static fields, we need to lock the nodes
3545       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3546
3547   def DeclareLocks(self, lu, level):
3548     pass
3549
3550   def _GetQueryData(self, lu):
3551     """Computes the list of nodes and their attributes.
3552
3553     """
3554     all_info = lu.cfg.GetAllNodesInfo()
3555
3556     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3557
3558     # Gather data as requested
3559     if query.NQ_LIVE in self.requested_data:
3560       node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3561                                         lu.cfg.GetHypervisorType())
3562       live_data = dict((name, nresult.payload)
3563                        for (name, nresult) in node_data.items()
3564                        if not nresult.fail_msg and nresult.payload)
3565     else:
3566       live_data = None
3567
3568     if query.NQ_INST in self.requested_data:
3569       node_to_primary = dict([(name, set()) for name in nodenames])
3570       node_to_secondary = dict([(name, set()) for name in nodenames])
3571
3572       inst_data = lu.cfg.GetAllInstancesInfo()
3573
3574       for inst in inst_data.values():
3575         if inst.primary_node in node_to_primary:
3576           node_to_primary[inst.primary_node].add(inst.name)
3577         for secnode in inst.secondary_nodes:
3578           if secnode in node_to_secondary:
3579             node_to_secondary[secnode].add(inst.name)
3580     else:
3581       node_to_primary = None
3582       node_to_secondary = None
3583
3584     if query.NQ_OOB in self.requested_data:
3585       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3586                          for name, node in all_info.iteritems())
3587     else:
3588       oob_support = None
3589
3590     if query.NQ_GROUP in self.requested_data:
3591       groups = lu.cfg.GetAllNodeGroupsInfo()
3592     else:
3593       groups = {}
3594
3595     return query.NodeQueryData([all_info[name] for name in nodenames],
3596                                live_data, lu.cfg.GetMasterNode(),
3597                                node_to_primary, node_to_secondary, groups,
3598                                oob_support, lu.cfg.GetClusterInfo())
3599
3600
3601 class LUQueryNodes(NoHooksLU):
3602   """Logical unit for querying nodes.
3603
3604   """
3605   # pylint: disable-msg=W0142
3606   REQ_BGL = False
3607
3608   def CheckArguments(self):
3609     self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3610                          self.op.use_locking)
3611
3612   def ExpandNames(self):
3613     self.nq.ExpandNames(self)
3614
3615   def Exec(self, feedback_fn):
3616     return self.nq.OldStyleQuery(self)
3617
3618
3619 class LUQueryNodeVolumes(NoHooksLU):
3620   """Logical unit for getting volumes on node(s).
3621
3622   """
3623   REQ_BGL = False
3624   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3625   _FIELDS_STATIC = utils.FieldSet("node")
3626
3627   def CheckArguments(self):
3628     _CheckOutputFields(static=self._FIELDS_STATIC,
3629                        dynamic=self._FIELDS_DYNAMIC,
3630                        selected=self.op.output_fields)
3631
3632   def ExpandNames(self):
3633     self.needed_locks = {}
3634     self.share_locks[locking.LEVEL_NODE] = 1
3635     if not self.op.nodes:
3636       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3637     else:
3638       self.needed_locks[locking.LEVEL_NODE] = \
3639         _GetWantedNodes(self, self.op.nodes)
3640
3641   def Exec(self, feedback_fn):
3642     """Computes the list of nodes and their attributes.
3643
3644     """
3645     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3646     volumes = self.rpc.call_node_volumes(nodenames)
3647
3648     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3649              in self.cfg.GetInstanceList()]
3650
3651     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3652
3653     output = []
3654     for node in nodenames:
3655       nresult = volumes[node]
3656       if nresult.offline:
3657         continue
3658       msg = nresult.fail_msg
3659       if msg:
3660         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3661         continue
3662
3663       node_vols = nresult.payload[:]
3664       node_vols.sort(key=lambda vol: vol['dev'])
3665
3666       for vol in node_vols:
3667         node_output = []
3668         for field in self.op.output_fields:
3669           if field == "node":
3670             val = node
3671           elif field == "phys":
3672             val = vol['dev']
3673           elif field == "vg":
3674             val = vol['vg']
3675           elif field == "name":
3676             val = vol['name']
3677           elif field == "size":
3678             val = int(float(vol['size']))
3679           elif field == "instance":
3680             for inst in ilist:
3681               if node not in lv_by_node[inst]:
3682                 continue
3683               if vol['name'] in lv_by_node[inst][node]:
3684                 val = inst.name
3685                 break
3686             else:
3687               val = '-'
3688           else:
3689             raise errors.ParameterError(field)
3690           node_output.append(str(val))
3691
3692         output.append(node_output)
3693
3694     return output
3695
3696
3697 class LUQueryNodeStorage(NoHooksLU):
3698   """Logical unit for getting information on storage units on node(s).
3699
3700   """
3701   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3702   REQ_BGL = False
3703
3704   def CheckArguments(self):
3705     _CheckOutputFields(static=self._FIELDS_STATIC,
3706                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3707                        selected=self.op.output_fields)
3708
3709   def ExpandNames(self):
3710     self.needed_locks = {}
3711     self.share_locks[locking.LEVEL_NODE] = 1
3712
3713     if self.op.nodes:
3714       self.needed_locks[locking.LEVEL_NODE] = \
3715         _GetWantedNodes(self, self.op.nodes)
3716     else:
3717       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3718
3719   def Exec(self, feedback_fn):
3720     """Computes the list of nodes and their attributes.
3721
3722     """
3723     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3724
3725     # Always get name to sort by
3726     if constants.SF_NAME in self.op.output_fields:
3727       fields = self.op.output_fields[:]
3728     else:
3729       fields = [constants.SF_NAME] + self.op.output_fields
3730
3731     # Never ask for node or type as it's only known to the LU
3732     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3733       while extra in fields:
3734         fields.remove(extra)
3735
3736     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3737     name_idx = field_idx[constants.SF_NAME]
3738
3739     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3740     data = self.rpc.call_storage_list(self.nodes,
3741                                       self.op.storage_type, st_args,
3742                                       self.op.name, fields)
3743
3744     result = []
3745
3746     for node in utils.NiceSort(self.nodes):
3747       nresult = data[node]
3748       if nresult.offline:
3749         continue
3750
3751       msg = nresult.fail_msg
3752       if msg:
3753         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3754         continue
3755
3756       rows = dict([(row[name_idx], row) for row in nresult.payload])
3757
3758       for name in utils.NiceSort(rows.keys()):
3759         row = rows[name]
3760
3761         out = []
3762
3763         for field in self.op.output_fields:
3764           if field == constants.SF_NODE:
3765             val = node
3766           elif field == constants.SF_TYPE:
3767             val = self.op.storage_type
3768           elif field in field_idx:
3769             val = row[field_idx[field]]
3770           else:
3771             raise errors.ParameterError(field)
3772
3773           out.append(val)
3774
3775         result.append(out)
3776
3777     return result
3778
3779
3780 class _InstanceQuery(_QueryBase):
3781   FIELDS = query.INSTANCE_FIELDS
3782
3783   def ExpandNames(self, lu):
3784     lu.needed_locks = {}
3785     lu.share_locks[locking.LEVEL_INSTANCE] = 1
3786     lu.share_locks[locking.LEVEL_NODE] = 1
3787
3788     if self.names:
3789       self.wanted = _GetWantedInstances(lu, self.names)
3790     else:
3791       self.wanted = locking.ALL_SET
3792
3793     self.do_locking = (self.use_locking and
3794                        query.IQ_LIVE in self.requested_data)
3795     if self.do_locking:
3796       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3797       lu.needed_locks[locking.LEVEL_NODE] = []
3798       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3799
3800   def DeclareLocks(self, lu, level):
3801     if level == locking.LEVEL_NODE and self.do_locking:
3802       lu._LockInstancesNodes() # pylint: disable-msg=W0212
3803
3804   def _GetQueryData(self, lu):
3805     """Computes the list of instances and their attributes.
3806
3807     """
3808     all_info = lu.cfg.GetAllInstancesInfo()
3809
3810     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3811
3812     instance_list = [all_info[name] for name in instance_names]
3813     nodes = frozenset([inst.primary_node for inst in instance_list])
3814     hv_list = list(set([inst.hypervisor for inst in instance_list]))
3815     bad_nodes = []
3816     offline_nodes = []
3817
3818     # Gather data as requested
3819     if query.IQ_LIVE in self.requested_data:
3820       live_data = {}
3821       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3822       for name in nodes:
3823         result = node_data[name]
3824         if result.offline:
3825           # offline nodes will be in both lists
3826           assert result.fail_msg
3827           offline_nodes.append(name)
3828         if result.fail_msg:
3829           bad_nodes.append(name)
3830         elif result.payload:
3831           live_data.update(result.payload)
3832         # else no instance is alive
3833     else:
3834       live_data = {}
3835
3836     if query.IQ_DISKUSAGE in self.requested_data:
3837       disk_usage = dict((inst.name,
3838                          _ComputeDiskSize(inst.disk_template,
3839                                           [{"size": disk.size}
3840                                            for disk in inst.disks]))
3841                         for inst in instance_list)
3842     else:
3843       disk_usage = None
3844
3845     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3846                                    disk_usage, offline_nodes, bad_nodes,
3847                                    live_data)
3848
3849
3850 class LUQuery(NoHooksLU):
3851   """Query for resources/items of a certain kind.
3852
3853   """
3854   # pylint: disable-msg=W0142
3855   REQ_BGL = False
3856
3857   def CheckArguments(self):
3858     qcls = _GetQueryImplementation(self.op.what)
3859     names = qlang.ReadSimpleFilter("name", self.op.filter)
3860
3861     self.impl = qcls(names, self.op.fields, False)
3862
3863   def ExpandNames(self):
3864     self.impl.ExpandNames(self)
3865
3866   def DeclareLocks(self, level):
3867     self.impl.DeclareLocks(self, level)
3868
3869   def Exec(self, feedback_fn):
3870     return self.impl.NewStyleQuery(self)
3871
3872
3873 class LUQueryFields(NoHooksLU):
3874   """Query for resources/items of a certain kind.
3875
3876   """
3877   # pylint: disable-msg=W0142
3878   REQ_BGL = False
3879
3880   def CheckArguments(self):
3881     self.qcls = _GetQueryImplementation(self.op.what)
3882
3883   def ExpandNames(self):
3884     self.needed_locks = {}
3885
3886   def Exec(self, feedback_fn):
3887     return self.qcls.FieldsQuery(self.op.fields)
3888
3889
3890 class LUModifyNodeStorage(NoHooksLU):
3891   """Logical unit for modifying a storage volume on a node.
3892
3893   """
3894   REQ_BGL = False
3895
3896   def CheckArguments(self):
3897     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3898
3899     storage_type = self.op.storage_type
3900
3901     try:
3902       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3903     except KeyError:
3904       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3905                                  " modified" % storage_type,
3906                                  errors.ECODE_INVAL)
3907
3908     diff = set(self.op.changes.keys()) - modifiable
3909     if diff:
3910       raise errors.OpPrereqError("The following fields can not be modified for"
3911                                  " storage units of type '%s': %r" %
3912                                  (storage_type, list(diff)),
3913                                  errors.ECODE_INVAL)
3914
3915   def ExpandNames(self):
3916     self.needed_locks = {
3917       locking.LEVEL_NODE: self.op.node_name,
3918       }
3919
3920   def Exec(self, feedback_fn):
3921     """Computes the list of nodes and their attributes.
3922
3923     """
3924     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3925     result = self.rpc.call_storage_modify(self.op.node_name,
3926                                           self.op.storage_type, st_args,
3927                                           self.op.name, self.op.changes)
3928     result.Raise("Failed to modify storage unit '%s' on %s" %
3929                  (self.op.name, self.op.node_name))
3930
3931
3932 class LUAddNode(LogicalUnit):
3933   """Logical unit for adding node to the cluster.
3934
3935   """
3936   HPATH = "node-add"
3937   HTYPE = constants.HTYPE_NODE
3938   _NFLAGS = ["master_capable", "vm_capable"]
3939
3940   def CheckArguments(self):
3941     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3942     # validate/normalize the node name
3943     self.hostname = netutils.GetHostname(name=self.op.node_name,
3944                                          family=self.primary_ip_family)
3945     self.op.node_name = self.hostname.name
3946     if self.op.readd and self.op.group:
3947       raise errors.OpPrereqError("Cannot pass a node group when a node is"
3948                                  " being readded", errors.ECODE_INVAL)
3949
3950   def BuildHooksEnv(self):
3951     """Build hooks env.
3952
3953     This will run on all nodes before, and on all nodes + the new node after.
3954
3955     """
3956     env = {
3957       "OP_TARGET": self.op.node_name,
3958       "NODE_NAME": self.op.node_name,
3959       "NODE_PIP": self.op.primary_ip,
3960       "NODE_SIP": self.op.secondary_ip,
3961       "MASTER_CAPABLE": str(self.op.master_capable),
3962       "VM_CAPABLE": str(self.op.vm_capable),
3963       }
3964     nodes_0 = self.cfg.GetNodeList()
3965     nodes_1 = nodes_0 + [self.op.node_name, ]
3966     return env, nodes_0, nodes_1
3967
3968   def CheckPrereq(self):
3969     """Check prerequisites.
3970
3971     This checks:
3972      - the new node is not already in the config
3973      - it is resolvable
3974      - its parameters (single/dual homed) matches the cluster
3975
3976     Any errors are signaled by raising errors.OpPrereqError.
3977
3978     """
3979     cfg = self.cfg
3980     hostname = self.hostname
3981     node = hostname.name
3982     primary_ip = self.op.primary_ip = hostname.ip
3983     if self.op.secondary_ip is None:
3984       if self.primary_ip_family == netutils.IP6Address.family:
3985         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3986                                    " IPv4 address must be given as secondary",
3987                                    errors.ECODE_INVAL)
3988       self.op.secondary_ip = primary_ip
3989
3990     secondary_ip = self.op.secondary_ip
3991     if not netutils.IP4Address.IsValid(secondary_ip):
3992       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3993                                  " address" % secondary_ip, errors.ECODE_INVAL)
3994
3995     node_list = cfg.GetNodeList()
3996     if not self.op.readd and node in node_list:
3997       raise errors.OpPrereqError("Node %s is already in the configuration" %
3998                                  node, errors.ECODE_EXISTS)
3999     elif self.op.readd and node not in node_list:
4000       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4001                                  errors.ECODE_NOENT)
4002
4003     self.changed_primary_ip = False
4004
4005     for existing_node_name in node_list:
4006       existing_node = cfg.GetNodeInfo(existing_node_name)
4007
4008       if self.op.readd and node == existing_node_name:
4009         if existing_node.secondary_ip != secondary_ip:
4010           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4011                                      " address configuration as before",
4012                                      errors.ECODE_INVAL)
4013         if existing_node.primary_ip != primary_ip:
4014           self.changed_primary_ip = True
4015
4016         continue
4017
4018       if (existing_node.primary_ip == primary_ip or
4019           existing_node.secondary_ip == primary_ip or
4020           existing_node.primary_ip == secondary_ip or
4021           existing_node.secondary_ip == secondary_ip):
4022         raise errors.OpPrereqError("New node ip address(es) conflict with"
4023                                    " existing node %s" % existing_node.name,
4024                                    errors.ECODE_NOTUNIQUE)
4025
4026     # After this 'if' block, None is no longer a valid value for the
4027     # _capable op attributes
4028     if self.op.readd:
4029       old_node = self.cfg.GetNodeInfo(node)
4030       assert old_node is not None, "Can't retrieve locked node %s" % node
4031       for attr in self._NFLAGS:
4032         if getattr(self.op, attr) is None:
4033           setattr(self.op, attr, getattr(old_node, attr))
4034     else:
4035       for attr in self._NFLAGS:
4036         if getattr(self.op, attr) is None:
4037           setattr(self.op, attr, True)
4038
4039     if self.op.readd and not self.op.vm_capable:
4040       pri, sec = cfg.GetNodeInstances(node)
4041       if pri or sec:
4042         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4043                                    " flag set to false, but it already holds"
4044                                    " instances" % node,
4045                                    errors.ECODE_STATE)
4046
4047     # check that the type of the node (single versus dual homed) is the
4048     # same as for the master
4049     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4050     master_singlehomed = myself.secondary_ip == myself.primary_ip
4051     newbie_singlehomed = secondary_ip == primary_ip
4052     if master_singlehomed != newbie_singlehomed:
4053       if master_singlehomed:
4054         raise errors.OpPrereqError("The master has no secondary ip but the"
4055                                    " new node has one",
4056                                    errors.ECODE_INVAL)
4057       else:
4058         raise errors.OpPrereqError("The master has a secondary ip but the"
4059                                    " new node doesn't have one",
4060                                    errors.ECODE_INVAL)
4061
4062     # checks reachability
4063     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4064       raise errors.OpPrereqError("Node not reachable by ping",
4065                                  errors.ECODE_ENVIRON)
4066
4067     if not newbie_singlehomed:
4068       # check reachability from my secondary ip to newbie's secondary ip
4069       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4070                            source=myself.secondary_ip):
4071         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4072                                    " based ping to node daemon port",
4073                                    errors.ECODE_ENVIRON)
4074
4075     if self.op.readd:
4076       exceptions = [node]
4077     else:
4078       exceptions = []
4079
4080     if self.op.master_capable:
4081       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4082     else:
4083       self.master_candidate = False
4084
4085     if self.op.readd:
4086       self.new_node = old_node
4087     else:
4088       node_group = cfg.LookupNodeGroup(self.op.group)
4089       self.new_node = objects.Node(name=node,
4090                                    primary_ip=primary_ip,
4091                                    secondary_ip=secondary_ip,
4092                                    master_candidate=self.master_candidate,
4093                                    offline=False, drained=False,
4094                                    group=node_group)
4095
4096     if self.op.ndparams:
4097       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4098
4099   def Exec(self, feedback_fn):
4100     """Adds the new node to the cluster.
4101
4102     """
4103     new_node = self.new_node
4104     node = new_node.name
4105
4106     # We adding a new node so we assume it's powered
4107     new_node.powered = True
4108
4109     # for re-adds, reset the offline/drained/master-candidate flags;
4110     # we need to reset here, otherwise offline would prevent RPC calls
4111     # later in the procedure; this also means that if the re-add
4112     # fails, we are left with a non-offlined, broken node
4113     if self.op.readd:
4114       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4115       self.LogInfo("Readding a node, the offline/drained flags were reset")
4116       # if we demote the node, we do cleanup later in the procedure
4117       new_node.master_candidate = self.master_candidate
4118       if self.changed_primary_ip:
4119         new_node.primary_ip = self.op.primary_ip
4120
4121     # copy the master/vm_capable flags
4122     for attr in self._NFLAGS:
4123       setattr(new_node, attr, getattr(self.op, attr))
4124
4125     # notify the user about any possible mc promotion
4126     if new_node.master_candidate:
4127       self.LogInfo("Node will be a master candidate")
4128
4129     if self.op.ndparams:
4130       new_node.ndparams = self.op.ndparams
4131     else:
4132       new_node.ndparams = {}
4133
4134     # check connectivity
4135     result = self.rpc.call_version([node])[node]
4136     result.Raise("Can't get version information from node %s" % node)
4137     if constants.PROTOCOL_VERSION == result.payload:
4138       logging.info("Communication to node %s fine, sw version %s match",
4139                    node, result.payload)
4140     else:
4141       raise errors.OpExecError("Version mismatch master version %s,"
4142                                " node version %s" %
4143                                (constants.PROTOCOL_VERSION, result.payload))
4144
4145     # Add node to our /etc/hosts, and add key to known_hosts
4146     if self.cfg.GetClusterInfo().modify_etc_hosts:
4147       master_node = self.cfg.GetMasterNode()
4148       result = self.rpc.call_etc_hosts_modify(master_node,
4149                                               constants.ETC_HOSTS_ADD,
4150                                               self.hostname.name,
4151                                               self.hostname.ip)
4152       result.Raise("Can't update hosts file with new host data")
4153
4154     if new_node.secondary_ip != new_node.primary_ip:
4155       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4156                                False)
4157
4158     node_verify_list = [self.cfg.GetMasterNode()]
4159     node_verify_param = {
4160       constants.NV_NODELIST: [node],
4161       # TODO: do a node-net-test as well?
4162     }
4163
4164     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4165                                        self.cfg.GetClusterName())
4166     for verifier in node_verify_list:
4167       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4168       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4169       if nl_payload:
4170         for failed in nl_payload:
4171           feedback_fn("ssh/hostname verification failed"
4172                       " (checking from %s): %s" %
4173                       (verifier, nl_payload[failed]))
4174         raise errors.OpExecError("ssh/hostname verification failed.")
4175
4176     if self.op.readd:
4177       _RedistributeAncillaryFiles(self)
4178       self.context.ReaddNode(new_node)
4179       # make sure we redistribute the config
4180       self.cfg.Update(new_node, feedback_fn)
4181       # and make sure the new node will not have old files around
4182       if not new_node.master_candidate:
4183         result = self.rpc.call_node_demote_from_mc(new_node.name)
4184         msg = result.fail_msg
4185         if msg:
4186           self.LogWarning("Node failed to demote itself from master"
4187                           " candidate status: %s" % msg)
4188     else:
4189       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4190                                   additional_vm=self.op.vm_capable)
4191       self.context.AddNode(new_node, self.proc.GetECId())
4192
4193
4194 class LUSetNodeParams(LogicalUnit):
4195   """Modifies the parameters of a node.
4196
4197   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4198       to the node role (as _ROLE_*)
4199   @cvar _R2F: a dictionary from node role to tuples of flags
4200   @cvar _FLAGS: a list of attribute names corresponding to the flags
4201
4202   """
4203   HPATH = "node-modify"
4204   HTYPE = constants.HTYPE_NODE
4205   REQ_BGL = False
4206   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4207   _F2R = {
4208     (True, False, False): _ROLE_CANDIDATE,
4209     (False, True, False): _ROLE_DRAINED,
4210     (False, False, True): _ROLE_OFFLINE,
4211     (False, False, False): _ROLE_REGULAR,
4212     }
4213   _R2F = dict((v, k) for k, v in _F2R.items())
4214   _FLAGS = ["master_candidate", "drained", "offline"]
4215
4216   def CheckArguments(self):
4217     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4218     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4219                 self.op.master_capable, self.op.vm_capable,
4220                 self.op.secondary_ip, self.op.ndparams]
4221     if all_mods.count(None) == len(all_mods):
4222       raise errors.OpPrereqError("Please pass at least one modification",
4223                                  errors.ECODE_INVAL)
4224     if all_mods.count(True) > 1:
4225       raise errors.OpPrereqError("Can't set the node into more than one"
4226                                  " state at the same time",
4227                                  errors.ECODE_INVAL)
4228
4229     # Boolean value that tells us whether we might be demoting from MC
4230     self.might_demote = (self.op.master_candidate == False or
4231                          self.op.offline == True or
4232                          self.op.drained == True or
4233                          self.op.master_capable == False)
4234
4235     if self.op.secondary_ip:
4236       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4237         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4238                                    " address" % self.op.secondary_ip,
4239                                    errors.ECODE_INVAL)
4240
4241     self.lock_all = self.op.auto_promote and self.might_demote
4242     self.lock_instances = self.op.secondary_ip is not None
4243
4244   def ExpandNames(self):
4245     if self.lock_all:
4246       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4247     else:
4248       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4249
4250     if self.lock_instances:
4251       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4252
4253   def DeclareLocks(self, level):
4254     # If we have locked all instances, before waiting to lock nodes, release
4255     # all the ones living on nodes unrelated to the current operation.
4256     if level == locking.LEVEL_NODE and self.lock_instances:
4257       instances_release = []
4258       instances_keep = []
4259       self.affected_instances = []
4260       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4261         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4262           instance = self.context.cfg.GetInstanceInfo(instance_name)
4263           i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4264           if i_mirrored and self.op.node_name in instance.all_nodes:
4265             instances_keep.append(instance_name)
4266             self.affected_instances.append(instance)
4267           else:
4268             instances_release.append(instance_name)
4269         if instances_release:
4270           self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4271           self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4272
4273   def BuildHooksEnv(self):
4274     """Build hooks env.
4275
4276     This runs on the master node.
4277
4278     """
4279     env = {
4280       "OP_TARGET": self.op.node_name,
4281       "MASTER_CANDIDATE": str(self.op.master_candidate),
4282       "OFFLINE": str(self.op.offline),
4283       "DRAINED": str(self.op.drained),
4284       "MASTER_CAPABLE": str(self.op.master_capable),
4285       "VM_CAPABLE": str(self.op.vm_capable),
4286       }
4287     nl = [self.cfg.GetMasterNode(),
4288           self.op.node_name]
4289     return env, nl, nl
4290
4291   def CheckPrereq(self):
4292     """Check prerequisites.
4293
4294     This only checks the instance list against the existing names.
4295
4296     """
4297     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4298
4299     if (self.op.master_candidate is not None or
4300         self.op.drained is not None or
4301         self.op.offline is not None):
4302       # we can't change the master's node flags
4303       if self.op.node_name == self.cfg.GetMasterNode():
4304         raise errors.OpPrereqError("The master role can be changed"
4305                                    " only via master-failover",
4306                                    errors.ECODE_INVAL)
4307
4308     if self.op.master_candidate and not node.master_capable:
4309       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4310                                  " it a master candidate" % node.name,
4311                                  errors.ECODE_STATE)
4312
4313     if self.op.vm_capable == False:
4314       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4315       if ipri or isec:
4316         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4317                                    " the vm_capable flag" % node.name,
4318                                    errors.ECODE_STATE)
4319
4320     if node.master_candidate and self.might_demote and not self.lock_all:
4321       assert not self.op.auto_promote, "auto-promote set but lock_all not"
4322       # check if after removing the current node, we're missing master
4323       # candidates
4324       (mc_remaining, mc_should, _) = \
4325           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4326       if mc_remaining < mc_should:
4327         raise errors.OpPrereqError("Not enough master candidates, please"
4328                                    " pass auto_promote to allow promotion",
4329                                    errors.ECODE_STATE)
4330
4331     self.old_flags = old_flags = (node.master_candidate,
4332                                   node.drained, node.offline)
4333     assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4334     self.old_role = old_role = self._F2R[old_flags]
4335
4336     # Check for ineffective changes
4337     for attr in self._FLAGS:
4338       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4339         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4340         setattr(self.op, attr, None)
4341
4342     # Past this point, any flag change to False means a transition
4343     # away from the respective state, as only real changes are kept
4344
4345     # TODO: We might query the real power state if it supports OOB
4346     if _SupportsOob(self.cfg, node):
4347       if self.op.offline is False and not (node.powered or
4348                                            self.op.powered == True):
4349         raise errors.OpPrereqError(("Please power on node %s first before you"
4350                                     " can reset offline state") %
4351                                    self.op.node_name)
4352     elif self.op.powered is not None:
4353       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4354                                   " which does not support out-of-band"
4355                                   " handling") % self.op.node_name)
4356
4357     # If we're being deofflined/drained, we'll MC ourself if needed
4358     if (self.op.drained == False or self.op.offline == False or
4359         (self.op.master_capable and not node.master_capable)):
4360       if _DecideSelfPromotion(self):
4361         self.op.master_candidate = True
4362         self.LogInfo("Auto-promoting node to master candidate")
4363
4364     # If we're no longer master capable, we'll demote ourselves from MC
4365     if self.op.master_capable == False and node.master_candidate:
4366       self.LogInfo("Demoting from master candidate")
4367       self.op.master_candidate = False
4368
4369     # Compute new role
4370     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4371     if self.op.master_candidate:
4372       new_role = self._ROLE_CANDIDATE
4373     elif self.op.drained:
4374       new_role = self._ROLE_DRAINED
4375     elif self.op.offline:
4376       new_role = self._ROLE_OFFLINE
4377     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4378       # False is still in new flags, which means we're un-setting (the
4379       # only) True flag
4380       new_role = self._ROLE_REGULAR
4381     else: # no new flags, nothing, keep old role
4382       new_role = old_role
4383
4384     self.new_role = new_role
4385
4386     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4387       # Trying to transition out of offline status
4388       result = self.rpc.call_version([node.name])[node.name]
4389       if result.fail_msg:
4390         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4391                                    " to report its version: %s" %
4392                                    (node.name, result.fail_msg),
4393                                    errors.ECODE_STATE)
4394       else:
4395         self.LogWarning("Transitioning node from offline to online state"
4396                         " without using re-add. Please make sure the node"
4397                         " is healthy!")
4398
4399     if self.op.secondary_ip:
4400       # Ok even without locking, because this can't be changed by any LU
4401       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4402       master_singlehomed = master.secondary_ip == master.primary_ip
4403       if master_singlehomed and self.op.secondary_ip:
4404         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4405                                    " homed cluster", errors.ECODE_INVAL)
4406
4407       if node.offline:
4408         if self.affected_instances:
4409           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4410                                      " node has instances (%s) configured"
4411                                      " to use it" % self.affected_instances)
4412       else:
4413         # On online nodes, check that no instances are running, and that
4414         # the node has the new ip and we can reach it.
4415         for instance in self.affected_instances:
4416           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4417
4418         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4419         if master.name != node.name:
4420           # check reachability from master secondary ip to new secondary ip
4421           if not netutils.TcpPing(self.op.secondary_ip,
4422                                   constants.DEFAULT_NODED_PORT,
4423                                   source=master.secondary_ip):
4424             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4425                                        " based ping to node daemon port",
4426                                        errors.ECODE_ENVIRON)
4427
4428     if self.op.ndparams:
4429       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4430       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4431       self.new_ndparams = new_ndparams
4432
4433   def Exec(self, feedback_fn):
4434     """Modifies a node.
4435
4436     """
4437     node = self.node
4438     old_role = self.old_role
4439     new_role = self.new_role
4440
4441     result = []
4442
4443     if self.op.ndparams:
4444       node.ndparams = self.new_ndparams
4445
4446     if self.op.powered is not None:
4447       node.powered = self.op.powered
4448
4449     for attr in ["master_capable", "vm_capable"]:
4450       val = getattr(self.op, attr)
4451       if val is not None:
4452         setattr(node, attr, val)
4453         result.append((attr, str(val)))
4454
4455     if new_role != old_role:
4456       # Tell the node to demote itself, if no longer MC and not offline
4457       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4458         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4459         if msg:
4460           self.LogWarning("Node failed to demote itself: %s", msg)
4461
4462       new_flags = self._R2F[new_role]
4463       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4464         if of != nf:
4465           result.append((desc, str(nf)))
4466       (node.master_candidate, node.drained, node.offline) = new_flags
4467
4468       # we locked all nodes, we adjust the CP before updating this node
4469       if self.lock_all:
4470         _AdjustCandidatePool(self, [node.name])
4471
4472     if self.op.secondary_ip:
4473       node.secondary_ip = self.op.secondary_ip
4474       result.append(("secondary_ip", self.op.secondary_ip))
4475
4476     # this will trigger configuration file update, if needed
4477     self.cfg.Update(node, feedback_fn)
4478
4479     # this will trigger job queue propagation or cleanup if the mc
4480     # flag changed
4481     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4482       self.context.ReaddNode(node)
4483
4484     return result
4485
4486
4487 class LUPowercycleNode(NoHooksLU):
4488   """Powercycles a node.
4489
4490   """
4491   REQ_BGL = False
4492
4493   def CheckArguments(self):
4494     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4495     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4496       raise errors.OpPrereqError("The node is the master and the force"
4497                                  " parameter was not set",
4498                                  errors.ECODE_INVAL)
4499
4500   def ExpandNames(self):
4501     """Locking for PowercycleNode.
4502
4503     This is a last-resort option and shouldn't block on other
4504     jobs. Therefore, we grab no locks.
4505
4506     """
4507     self.needed_locks = {}
4508
4509   def Exec(self, feedback_fn):
4510     """Reboots a node.
4511
4512     """
4513     result = self.rpc.call_node_powercycle(self.op.node_name,
4514                                            self.cfg.GetHypervisorType())
4515     result.Raise("Failed to schedule the reboot")
4516     return result.payload
4517
4518
4519 class LUClusterQuery(NoHooksLU):
4520   """Query cluster configuration.
4521
4522   """
4523   REQ_BGL = False
4524
4525   def ExpandNames(self):
4526     self.needed_locks = {}
4527
4528   def Exec(self, feedback_fn):
4529     """Return cluster config.
4530
4531     """
4532     cluster = self.cfg.GetClusterInfo()
4533     os_hvp = {}
4534
4535     # Filter just for enabled hypervisors
4536     for os_name, hv_dict in cluster.os_hvp.items():
4537       os_hvp[os_name] = {}
4538       for hv_name, hv_params in hv_dict.items():
4539         if hv_name in cluster.enabled_hypervisors:
4540           os_hvp[os_name][hv_name] = hv_params
4541
4542     # Convert ip_family to ip_version
4543     primary_ip_version = constants.IP4_VERSION
4544     if cluster.primary_ip_family == netutils.IP6Address.family:
4545       primary_ip_version = constants.IP6_VERSION
4546
4547     result = {
4548       "software_version": constants.RELEASE_VERSION,
4549       "protocol_version": constants.PROTOCOL_VERSION,
4550       "config_version": constants.CONFIG_VERSION,
4551       "os_api_version": max(constants.OS_API_VERSIONS),
4552       "export_version": constants.EXPORT_VERSION,
4553       "architecture": (platform.architecture()[0], platform.machine()),
4554       "name": cluster.cluster_name,
4555       "master": cluster.master_node,
4556       "default_hypervisor": cluster.enabled_hypervisors[0],
4557       "enabled_hypervisors": cluster.enabled_hypervisors,
4558       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4559                         for hypervisor_name in cluster.enabled_hypervisors]),
4560       "os_hvp": os_hvp,
4561       "beparams": cluster.beparams,
4562       "osparams": cluster.osparams,
4563       "nicparams": cluster.nicparams,
4564       "ndparams": cluster.ndparams,
4565       "candidate_pool_size": cluster.candidate_pool_size,
4566       "master_netdev": cluster.master_netdev,
4567       "volume_group_name": cluster.volume_group_name,
4568       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4569       "file_storage_dir": cluster.file_storage_dir,
4570       "maintain_node_health": cluster.maintain_node_health,
4571       "ctime": cluster.ctime,
4572       "mtime": cluster.mtime,
4573       "uuid": cluster.uuid,
4574       "tags": list(cluster.GetTags()),
4575       "uid_pool": cluster.uid_pool,
4576       "default_iallocator": cluster.default_iallocator,
4577       "reserved_lvs": cluster.reserved_lvs,
4578       "primary_ip_version": primary_ip_version,
4579       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4580       }
4581
4582     return result
4583
4584
4585 class LUClusterConfigQuery(NoHooksLU):
4586   """Return configuration values.
4587
4588   """
4589   REQ_BGL = False
4590   _FIELDS_DYNAMIC = utils.FieldSet()
4591   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4592                                   "watcher_pause", "volume_group_name")
4593
4594   def CheckArguments(self):
4595     _CheckOutputFields(static=self._FIELDS_STATIC,
4596                        dynamic=self._FIELDS_DYNAMIC,
4597                        selected=self.op.output_fields)
4598
4599   def ExpandNames(self):
4600     self.needed_locks = {}
4601
4602   def Exec(self, feedback_fn):
4603     """Dump a representation of the cluster config to the standard output.
4604
4605     """
4606     values = []
4607     for field in self.op.output_fields:
4608       if field == "cluster_name":
4609         entry = self.cfg.GetClusterName()
4610       elif field == "master_node":
4611         entry = self.cfg.GetMasterNode()
4612       elif field == "drain_flag":
4613         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4614       elif field == "watcher_pause":
4615         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4616       elif field == "volume_group_name":
4617         entry = self.cfg.GetVGName()
4618       else:
4619         raise errors.ParameterError(field)
4620       values.append(entry)
4621     return values
4622
4623
4624 class LUInstanceActivateDisks(NoHooksLU):
4625   """Bring up an instance's disks.
4626
4627   """
4628   REQ_BGL = False
4629
4630   def ExpandNames(self):
4631     self._ExpandAndLockInstance()
4632     self.needed_locks[locking.LEVEL_NODE] = []
4633     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4634
4635   def DeclareLocks(self, level):
4636     if level == locking.LEVEL_NODE:
4637       self._LockInstancesNodes()
4638
4639   def CheckPrereq(self):
4640     """Check prerequisites.
4641
4642     This checks that the instance is in the cluster.
4643
4644     """
4645     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4646     assert self.instance is not None, \
4647       "Cannot retrieve locked instance %s" % self.op.instance_name
4648     _CheckNodeOnline(self, self.instance.primary_node)
4649
4650   def Exec(self, feedback_fn):
4651     """Activate the disks.
4652
4653     """
4654     disks_ok, disks_info = \
4655               _AssembleInstanceDisks(self, self.instance,
4656                                      ignore_size=self.op.ignore_size)
4657     if not disks_ok:
4658       raise errors.OpExecError("Cannot activate block devices")
4659
4660     return disks_info
4661
4662
4663 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4664                            ignore_size=False):
4665   """Prepare the block devices for an instance.
4666
4667   This sets up the block devices on all nodes.
4668
4669   @type lu: L{LogicalUnit}
4670   @param lu: the logical unit on whose behalf we execute
4671   @type instance: L{objects.Instance}
4672   @param instance: the instance for whose disks we assemble
4673   @type disks: list of L{objects.Disk} or None
4674   @param disks: which disks to assemble (or all, if None)
4675   @type ignore_secondaries: boolean
4676   @param ignore_secondaries: if true, errors on secondary nodes
4677       won't result in an error return from the function
4678   @type ignore_size: boolean
4679   @param ignore_size: if true, the current known size of the disk
4680       will not be used during the disk activation, useful for cases
4681       when the size is wrong
4682   @return: False if the operation failed, otherwise a list of
4683       (host, instance_visible_name, node_visible_name)
4684       with the mapping from node devices to instance devices
4685
4686   """
4687   device_info = []
4688   disks_ok = True
4689   iname = instance.name
4690   disks = _ExpandCheckDisks(instance, disks)
4691
4692   # With the two passes mechanism we try to reduce the window of
4693   # opportunity for the race condition of switching DRBD to primary
4694   # before handshaking occured, but we do not eliminate it
4695
4696   # The proper fix would be to wait (with some limits) until the
4697   # connection has been made and drbd transitions from WFConnection
4698   # into any other network-connected state (Connected, SyncTarget,
4699   # SyncSource, etc.)
4700
4701   # 1st pass, assemble on all nodes in secondary mode
4702   for inst_disk in disks:
4703     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4704       if ignore_size:
4705         node_disk = node_disk.Copy()
4706         node_disk.UnsetSize()
4707       lu.cfg.SetDiskID(node_disk, node)
4708       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4709       msg = result.fail_msg
4710       if msg:
4711         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4712                            " (is_primary=False, pass=1): %s",
4713                            inst_disk.iv_name, node, msg)
4714         if not ignore_secondaries:
4715           disks_ok = False
4716
4717   # FIXME: race condition on drbd migration to primary
4718
4719   # 2nd pass, do only the primary node
4720   for inst_disk in disks:
4721     dev_path = None
4722
4723     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4724       if node != instance.primary_node:
4725         continue
4726       if ignore_size:
4727         node_disk = node_disk.Copy()
4728         node_disk.UnsetSize()
4729       lu.cfg.SetDiskID(node_disk, node)
4730       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4731       msg = result.fail_msg
4732       if msg:
4733         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4734                            " (is_primary=True, pass=2): %s",
4735                            inst_disk.iv_name, node, msg)
4736         disks_ok = False
4737       else:
4738         dev_path = result.payload
4739
4740     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4741
4742   # leave the disks configured for the primary node
4743   # this is a workaround that would be fixed better by
4744   # improving the logical/physical id handling
4745   for disk in disks:
4746     lu.cfg.SetDiskID(disk, instance.primary_node)
4747
4748   return disks_ok, device_info
4749
4750
4751 def _StartInstanceDisks(lu, instance, force):
4752   """Start the disks of an instance.
4753
4754   """
4755   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4756                                            ignore_secondaries=force)
4757   if not disks_ok:
4758     _ShutdownInstanceDisks(lu, instance)
4759     if force is not None and not force:
4760       lu.proc.LogWarning("", hint="If the message above refers to a"
4761                          " secondary node,"
4762                          " you can retry the operation using '--force'.")
4763     raise errors.OpExecError("Disk consistency error")
4764
4765
4766 class LUInstanceDeactivateDisks(NoHooksLU):
4767   """Shutdown an instance's disks.
4768
4769   """
4770   REQ_BGL = False
4771
4772   def ExpandNames(self):
4773     self._ExpandAndLockInstance()
4774     self.needed_locks[locking.LEVEL_NODE] = []
4775     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4776
4777   def DeclareLocks(self, level):
4778     if level == locking.LEVEL_NODE:
4779       self._LockInstancesNodes()
4780
4781   def CheckPrereq(self):
4782     """Check prerequisites.
4783
4784     This checks that the instance is in the cluster.
4785
4786     """
4787     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4788     assert self.instance is not None, \
4789       "Cannot retrieve locked instance %s" % self.op.instance_name
4790
4791   def Exec(self, feedback_fn):
4792     """Deactivate the disks
4793
4794     """
4795     instance = self.instance
4796     _SafeShutdownInstanceDisks(self, instance)
4797
4798
4799 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4800   """Shutdown block devices of an instance.
4801
4802   This function checks if an instance is running, before calling
4803   _ShutdownInstanceDisks.
4804
4805   """
4806   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4807   _ShutdownInstanceDisks(lu, instance, disks=disks)
4808
4809
4810 def _ExpandCheckDisks(instance, disks):
4811   """Return the instance disks selected by the disks list
4812
4813   @type disks: list of L{objects.Disk} or None
4814   @param disks: selected disks
4815   @rtype: list of L{objects.Disk}
4816   @return: selected instance disks to act on
4817
4818   """
4819   if disks is None:
4820     return instance.disks
4821   else:
4822     if not set(disks).issubset(instance.disks):
4823       raise errors.ProgrammerError("Can only act on disks belonging to the"
4824                                    " target instance")
4825     return disks
4826
4827
4828 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4829   """Shutdown block devices of an instance.
4830
4831   This does the shutdown on all nodes of the instance.
4832
4833   If the ignore_primary is false, errors on the primary node are
4834   ignored.
4835
4836   """
4837   all_result = True
4838   disks = _ExpandCheckDisks(instance, disks)
4839
4840   for disk in disks:
4841     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4842       lu.cfg.SetDiskID(top_disk, node)
4843       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4844       msg = result.fail_msg
4845       if msg:
4846         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4847                       disk.iv_name, node, msg)
4848         if ((node == instance.primary_node and not ignore_primary) or
4849             (node != instance.primary_node and not result.offline)):
4850           all_result = False
4851   return all_result
4852
4853
4854 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4855   """Checks if a node has enough free memory.
4856
4857   This function check if a given node has the needed amount of free
4858   memory. In case the node has less memory or we cannot get the
4859   information from the node, this function raise an OpPrereqError
4860   exception.
4861
4862   @type lu: C{LogicalUnit}
4863   @param lu: a logical unit from which we get configuration data
4864   @type node: C{str}
4865   @param node: the node to check
4866   @type reason: C{str}
4867   @param reason: string to use in the error message
4868   @type requested: C{int}
4869   @param requested: the amount of memory in MiB to check for
4870   @type hypervisor_name: C{str}
4871   @param hypervisor_name: the hypervisor to ask for memory stats
4872   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4873       we cannot check the node
4874
4875   """
4876   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4877   nodeinfo[node].Raise("Can't get data from node %s" % node,
4878                        prereq=True, ecode=errors.ECODE_ENVIRON)
4879   free_mem = nodeinfo[node].payload.get('memory_free', None)
4880   if not isinstance(free_mem, int):
4881     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4882                                " was '%s'" % (node, free_mem),
4883                                errors.ECODE_ENVIRON)
4884   if requested > free_mem:
4885     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4886                                " needed %s MiB, available %s MiB" %
4887                                (node, reason, requested, free_mem),
4888                                errors.ECODE_NORES)
4889
4890
4891 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
4892   """Checks if nodes have enough free disk space in the all VGs.
4893
4894   This function check if all given nodes have the needed amount of
4895   free disk. In case any node has less disk or we cannot get the
4896   information from the node, this function raise an OpPrereqError
4897   exception.
4898
4899   @type lu: C{LogicalUnit}
4900   @param lu: a logical unit from which we get configuration data
4901   @type nodenames: C{list}
4902   @param nodenames: the list of node names to check
4903   @type req_sizes: C{dict}
4904   @param req_sizes: the hash of vg and corresponding amount of disk in
4905       MiB to check for
4906   @raise errors.OpPrereqError: if the node doesn't have enough disk,
4907       or we cannot check the node
4908
4909   """
4910   for vg, req_size in req_sizes.items():
4911     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
4912
4913
4914 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
4915   """Checks if nodes have enough free disk space in the specified VG.
4916
4917   This function check if all given nodes have the needed amount of
4918   free disk. In case any node has less disk or we cannot get the
4919   information from the node, this function raise an OpPrereqError
4920   exception.
4921
4922   @type lu: C{LogicalUnit}
4923   @param lu: a logical unit from which we get configuration data
4924   @type nodenames: C{list}
4925   @param nodenames: the list of node names to check
4926   @type vg: C{str}
4927   @param vg: the volume group to check
4928   @type requested: C{int}
4929   @param requested: the amount of disk in MiB to check for
4930   @raise errors.OpPrereqError: if the node doesn't have enough disk,
4931       or we cannot check the node
4932
4933   """
4934   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
4935   for node in nodenames:
4936     info = nodeinfo[node]
4937     info.Raise("Cannot get current information from node %s" % node,
4938                prereq=True, ecode=errors.ECODE_ENVIRON)
4939     vg_free = info.payload.get("vg_free", None)
4940     if not isinstance(vg_free, int):
4941       raise errors.OpPrereqError("Can't compute free disk space on node"
4942                                  " %s for vg %s, result was '%s'" %
4943                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
4944     if requested > vg_free:
4945       raise errors.OpPrereqError("Not enough disk space on target node %s"
4946                                  " vg %s: required %d MiB, available %d MiB" %
4947                                  (node, vg, requested, vg_free),
4948                                  errors.ECODE_NORES)
4949
4950
4951 class LUStartupInstance(LogicalUnit):
4952   """Starts an instance.
4953
4954   """
4955   HPATH = "instance-start"
4956   HTYPE = constants.HTYPE_INSTANCE
4957   REQ_BGL = False
4958
4959   def CheckArguments(self):
4960     # extra beparams
4961     if self.op.beparams:
4962       # fill the beparams dict
4963       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4964
4965   def ExpandNames(self):
4966     self._ExpandAndLockInstance()
4967
4968   def BuildHooksEnv(self):
4969     """Build hooks env.
4970
4971     This runs on master, primary and secondary nodes of the instance.
4972
4973     """
4974     env = {
4975       "FORCE": self.op.force,
4976       }
4977     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4978     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4979     return env, nl, nl
4980
4981   def CheckPrereq(self):
4982     """Check prerequisites.
4983
4984     This checks that the instance is in the cluster.
4985
4986     """
4987     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4988     assert self.instance is not None, \
4989       "Cannot retrieve locked instance %s" % self.op.instance_name
4990
4991     # extra hvparams
4992     if self.op.hvparams:
4993       # check hypervisor parameter syntax (locally)
4994       cluster = self.cfg.GetClusterInfo()
4995       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4996       filled_hvp = cluster.FillHV(instance)
4997       filled_hvp.update(self.op.hvparams)
4998       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4999       hv_type.CheckParameterSyntax(filled_hvp)
5000       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5001
5002     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5003
5004     if self.primary_offline and self.op.ignore_offline_nodes:
5005       self.proc.LogWarning("Ignoring offline primary node")
5006
5007       if self.op.hvparams or self.op.beparams:
5008         self.proc.LogWarning("Overridden parameters are ignored")
5009     else:
5010       _CheckNodeOnline(self, instance.primary_node)
5011
5012       bep = self.cfg.GetClusterInfo().FillBE(instance)
5013
5014       # check bridges existence
5015       _CheckInstanceBridgesExist(self, instance)
5016
5017       remote_info = self.rpc.call_instance_info(instance.primary_node,
5018                                                 instance.name,
5019                                                 instance.hypervisor)
5020       remote_info.Raise("Error checking node %s" % instance.primary_node,
5021                         prereq=True, ecode=errors.ECODE_ENVIRON)
5022       if not remote_info.payload: # not running already
5023         _CheckNodeFreeMemory(self, instance.primary_node,
5024                              "starting instance %s" % instance.name,
5025                              bep[constants.BE_MEMORY], instance.hypervisor)
5026
5027   def Exec(self, feedback_fn):
5028     """Start the instance.
5029
5030     """
5031     instance = self.instance
5032     force = self.op.force
5033
5034     self.cfg.MarkInstanceUp(instance.name)
5035
5036     if self.primary_offline:
5037       assert self.op.ignore_offline_nodes
5038       self.proc.LogInfo("Primary node offline, marked instance as started")
5039     else:
5040       node_current = instance.primary_node
5041
5042       _StartInstanceDisks(self, instance, force)
5043
5044       result = self.rpc.call_instance_start(node_current, instance,
5045                                             self.op.hvparams, self.op.beparams)
5046       msg = result.fail_msg
5047       if msg:
5048         _ShutdownInstanceDisks(self, instance)
5049         raise errors.OpExecError("Could not start instance: %s" % msg)
5050
5051
5052 class LURebootInstance(LogicalUnit):
5053   """Reboot an instance.
5054
5055   """
5056   HPATH = "instance-reboot"
5057   HTYPE = constants.HTYPE_INSTANCE
5058   REQ_BGL = False
5059
5060   def ExpandNames(self):
5061     self._ExpandAndLockInstance()
5062
5063   def BuildHooksEnv(self):
5064     """Build hooks env.
5065
5066     This runs on master, primary and secondary nodes of the instance.
5067
5068     """
5069     env = {
5070       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5071       "REBOOT_TYPE": self.op.reboot_type,
5072       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5073       }
5074     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5075     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5076     return env, nl, nl
5077
5078   def CheckPrereq(self):
5079     """Check prerequisites.
5080
5081     This checks that the instance is in the cluster.
5082
5083     """
5084     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5085     assert self.instance is not None, \
5086       "Cannot retrieve locked instance %s" % self.op.instance_name
5087
5088     _CheckNodeOnline(self, instance.primary_node)
5089
5090     # check bridges existence
5091     _CheckInstanceBridgesExist(self, instance)
5092
5093   def Exec(self, feedback_fn):
5094     """Reboot the instance.
5095
5096     """
5097     instance = self.instance
5098     ignore_secondaries = self.op.ignore_secondaries
5099     reboot_type = self.op.reboot_type
5100
5101     node_current = instance.primary_node
5102
5103     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5104                        constants.INSTANCE_REBOOT_HARD]:
5105       for disk in instance.disks:
5106         self.cfg.SetDiskID(disk, node_current)
5107       result = self.rpc.call_instance_reboot(node_current, instance,
5108                                              reboot_type,
5109                                              self.op.shutdown_timeout)
5110       result.Raise("Could not reboot instance")
5111     else:
5112       result = self.rpc.call_instance_shutdown(node_current, instance,
5113                                                self.op.shutdown_timeout)
5114       result.Raise("Could not shutdown instance for full reboot")
5115       _ShutdownInstanceDisks(self, instance)
5116       _StartInstanceDisks(self, instance, ignore_secondaries)
5117       result = self.rpc.call_instance_start(node_current, instance, None, None)
5118       msg = result.fail_msg
5119       if msg:
5120         _ShutdownInstanceDisks(self, instance)
5121         raise errors.OpExecError("Could not start instance for"
5122                                  " full reboot: %s" % msg)
5123
5124     self.cfg.MarkInstanceUp(instance.name)
5125
5126
5127 class LUShutdownInstance(LogicalUnit):
5128   """Shutdown an instance.
5129
5130   """
5131   HPATH = "instance-stop"
5132   HTYPE = constants.HTYPE_INSTANCE
5133   REQ_BGL = False
5134
5135   def ExpandNames(self):
5136     self._ExpandAndLockInstance()
5137
5138   def BuildHooksEnv(self):
5139     """Build hooks env.
5140
5141     This runs on master, primary and secondary nodes of the instance.
5142
5143     """
5144     env = _BuildInstanceHookEnvByObject(self, self.instance)
5145     env["TIMEOUT"] = self.op.timeout
5146     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5147     return env, nl, nl
5148
5149   def CheckPrereq(self):
5150     """Check prerequisites.
5151
5152     This checks that the instance is in the cluster.
5153
5154     """
5155     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5156     assert self.instance is not None, \
5157       "Cannot retrieve locked instance %s" % self.op.instance_name
5158
5159     self.primary_offline = \
5160       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5161
5162     if self.primary_offline and self.op.ignore_offline_nodes:
5163       self.proc.LogWarning("Ignoring offline primary node")
5164     else:
5165       _CheckNodeOnline(self, self.instance.primary_node)
5166
5167   def Exec(self, feedback_fn):
5168     """Shutdown the instance.
5169
5170     """
5171     instance = self.instance
5172     node_current = instance.primary_node
5173     timeout = self.op.timeout
5174
5175     self.cfg.MarkInstanceDown(instance.name)
5176
5177     if self.primary_offline:
5178       assert self.op.ignore_offline_nodes
5179       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5180     else:
5181       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5182       msg = result.fail_msg
5183       if msg:
5184         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5185
5186       _ShutdownInstanceDisks(self, instance)
5187
5188
5189 class LUReinstallInstance(LogicalUnit):
5190   """Reinstall an instance.
5191
5192   """
5193   HPATH = "instance-reinstall"
5194   HTYPE = constants.HTYPE_INSTANCE
5195   REQ_BGL = False
5196
5197   def ExpandNames(self):
5198     self._ExpandAndLockInstance()
5199
5200   def BuildHooksEnv(self):
5201     """Build hooks env.
5202
5203     This runs on master, primary and secondary nodes of the instance.
5204
5205     """
5206     env = _BuildInstanceHookEnvByObject(self, self.instance)
5207     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5208     return env, nl, nl
5209
5210   def CheckPrereq(self):
5211     """Check prerequisites.
5212
5213     This checks that the instance is in the cluster and is not running.
5214
5215     """
5216     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5217     assert instance is not None, \
5218       "Cannot retrieve locked instance %s" % self.op.instance_name
5219     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5220                      " offline, cannot reinstall")
5221     for node in instance.secondary_nodes:
5222       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5223                        " cannot reinstall")
5224
5225     if instance.disk_template == constants.DT_DISKLESS:
5226       raise errors.OpPrereqError("Instance '%s' has no disks" %
5227                                  self.op.instance_name,
5228                                  errors.ECODE_INVAL)
5229     _CheckInstanceDown(self, instance, "cannot reinstall")
5230
5231     if self.op.os_type is not None:
5232       # OS verification
5233       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5234       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5235       instance_os = self.op.os_type
5236     else:
5237       instance_os = instance.os
5238
5239     nodelist = list(instance.all_nodes)
5240
5241     if self.op.osparams:
5242       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5243       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5244       self.os_inst = i_osdict # the new dict (without defaults)
5245     else:
5246       self.os_inst = None
5247
5248     self.instance = instance
5249
5250   def Exec(self, feedback_fn):
5251     """Reinstall the instance.
5252
5253     """
5254     inst = self.instance
5255
5256     if self.op.os_type is not None:
5257       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5258       inst.os = self.op.os_type
5259       # Write to configuration
5260       self.cfg.Update(inst, feedback_fn)
5261
5262     _StartInstanceDisks(self, inst, None)
5263     try:
5264       feedback_fn("Running the instance OS create scripts...")
5265       # FIXME: pass debug option from opcode to backend
5266       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5267                                              self.op.debug_level,
5268                                              osparams=self.os_inst)
5269       result.Raise("Could not install OS for instance %s on node %s" %
5270                    (inst.name, inst.primary_node))
5271     finally:
5272       _ShutdownInstanceDisks(self, inst)
5273
5274
5275 class LURecreateInstanceDisks(LogicalUnit):
5276   """Recreate an instance's missing disks.
5277
5278   """
5279   HPATH = "instance-recreate-disks"
5280   HTYPE = constants.HTYPE_INSTANCE
5281   REQ_BGL = False
5282
5283   def ExpandNames(self):
5284     self._ExpandAndLockInstance()
5285
5286   def BuildHooksEnv(self):
5287     """Build hooks env.
5288
5289     This runs on master, primary and secondary nodes of the instance.
5290
5291     """
5292     env = _BuildInstanceHookEnvByObject(self, self.instance)
5293     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5294     return env, nl, nl
5295
5296   def CheckPrereq(self):
5297     """Check prerequisites.
5298
5299     This checks that the instance is in the cluster and is not running.
5300
5301     """
5302     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5303     assert instance is not None, \
5304       "Cannot retrieve locked instance %s" % self.op.instance_name
5305     _CheckNodeOnline(self, instance.primary_node)
5306
5307     if instance.disk_template == constants.DT_DISKLESS:
5308       raise errors.OpPrereqError("Instance '%s' has no disks" %
5309                                  self.op.instance_name, errors.ECODE_INVAL)
5310     _CheckInstanceDown(self, instance, "cannot recreate disks")
5311
5312     if not self.op.disks:
5313       self.op.disks = range(len(instance.disks))
5314     else:
5315       for idx in self.op.disks:
5316         if idx >= len(instance.disks):
5317           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5318                                      errors.ECODE_INVAL)
5319
5320     self.instance = instance
5321
5322   def Exec(self, feedback_fn):
5323     """Recreate the disks.
5324
5325     """
5326     to_skip = []
5327     for idx, _ in enumerate(self.instance.disks):
5328       if idx not in self.op.disks: # disk idx has not been passed in
5329         to_skip.append(idx)
5330         continue
5331
5332     _CreateDisks(self, self.instance, to_skip=to_skip)
5333
5334
5335 class LURenameInstance(LogicalUnit):
5336   """Rename an instance.
5337
5338   """
5339   HPATH = "instance-rename"
5340   HTYPE = constants.HTYPE_INSTANCE
5341
5342   def CheckArguments(self):
5343     """Check arguments.
5344
5345     """
5346     if self.op.ip_check and not self.op.name_check:
5347       # TODO: make the ip check more flexible and not depend on the name check
5348       raise errors.OpPrereqError("Cannot do ip check without a name check",
5349                                  errors.ECODE_INVAL)
5350
5351   def BuildHooksEnv(self):
5352     """Build hooks env.
5353
5354     This runs on master, primary and secondary nodes of the instance.
5355
5356     """
5357     env = _BuildInstanceHookEnvByObject(self, self.instance)
5358     env["INSTANCE_NEW_NAME"] = self.op.new_name
5359     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5360     return env, nl, nl
5361
5362   def CheckPrereq(self):
5363     """Check prerequisites.
5364
5365     This checks that the instance is in the cluster and is not running.
5366
5367     """
5368     self.op.instance_name = _ExpandInstanceName(self.cfg,
5369                                                 self.op.instance_name)
5370     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5371     assert instance is not None
5372     _CheckNodeOnline(self, instance.primary_node)
5373     _CheckInstanceDown(self, instance, "cannot rename")
5374     self.instance = instance
5375
5376     new_name = self.op.new_name
5377     if self.op.name_check:
5378       hostname = netutils.GetHostname(name=new_name)
5379       self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5380                    hostname.name)
5381       new_name = self.op.new_name = hostname.name
5382       if (self.op.ip_check and
5383           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5384         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5385                                    (hostname.ip, new_name),
5386                                    errors.ECODE_NOTUNIQUE)
5387
5388     instance_list = self.cfg.GetInstanceList()
5389     if new_name in instance_list and new_name != instance.name:
5390       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5391                                  new_name, errors.ECODE_EXISTS)
5392
5393   def Exec(self, feedback_fn):
5394     """Rename the instance.
5395
5396     """
5397     inst = self.instance
5398     old_name = inst.name
5399
5400     rename_file_storage = False
5401     if (inst.disk_template == constants.DT_FILE and
5402         self.op.new_name != inst.name):
5403       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5404       rename_file_storage = True
5405
5406     self.cfg.RenameInstance(inst.name, self.op.new_name)
5407     # Change the instance lock. This is definitely safe while we hold the BGL
5408     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5409     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5410
5411     # re-read the instance from the configuration after rename
5412     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5413
5414     if rename_file_storage:
5415       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5416       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5417                                                      old_file_storage_dir,
5418                                                      new_file_storage_dir)
5419       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5420                    " (but the instance has been renamed in Ganeti)" %
5421                    (inst.primary_node, old_file_storage_dir,
5422                     new_file_storage_dir))
5423
5424     _StartInstanceDisks(self, inst, None)
5425     try:
5426       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5427                                                  old_name, self.op.debug_level)
5428       msg = result.fail_msg
5429       if msg:
5430         msg = ("Could not run OS rename script for instance %s on node %s"
5431                " (but the instance has been renamed in Ganeti): %s" %
5432                (inst.name, inst.primary_node, msg))
5433         self.proc.LogWarning(msg)
5434     finally:
5435       _ShutdownInstanceDisks(self, inst)
5436
5437     return inst.name
5438
5439
5440 class LURemoveInstance(LogicalUnit):
5441   """Remove an instance.
5442
5443   """
5444   HPATH = "instance-remove"
5445   HTYPE = constants.HTYPE_INSTANCE
5446   REQ_BGL = False
5447
5448   def ExpandNames(self):
5449     self._ExpandAndLockInstance()
5450     self.needed_locks[locking.LEVEL_NODE] = []
5451     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5452
5453   def DeclareLocks(self, level):
5454     if level == locking.LEVEL_NODE:
5455       self._LockInstancesNodes()
5456
5457   def BuildHooksEnv(self):
5458     """Build hooks env.
5459
5460     This runs on master, primary and secondary nodes of the instance.
5461
5462     """
5463     env = _BuildInstanceHookEnvByObject(self, self.instance)
5464     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5465     nl = [self.cfg.GetMasterNode()]
5466     nl_post = list(self.instance.all_nodes) + nl
5467     return env, nl, nl_post
5468
5469   def CheckPrereq(self):
5470     """Check prerequisites.
5471
5472     This checks that the instance is in the cluster.
5473
5474     """
5475     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5476     assert self.instance is not None, \
5477       "Cannot retrieve locked instance %s" % self.op.instance_name
5478
5479   def Exec(self, feedback_fn):
5480     """Remove the instance.
5481
5482     """
5483     instance = self.instance
5484     logging.info("Shutting down instance %s on node %s",
5485                  instance.name, instance.primary_node)
5486
5487     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5488                                              self.op.shutdown_timeout)
5489     msg = result.fail_msg
5490     if msg:
5491       if self.op.ignore_failures:
5492         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5493       else:
5494         raise errors.OpExecError("Could not shutdown instance %s on"
5495                                  " node %s: %s" %
5496                                  (instance.name, instance.primary_node, msg))
5497
5498     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5499
5500
5501 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5502   """Utility function to remove an instance.
5503
5504   """
5505   logging.info("Removing block devices for instance %s", instance.name)
5506
5507   if not _RemoveDisks(lu, instance):
5508     if not ignore_failures:
5509       raise errors.OpExecError("Can't remove instance's disks")
5510     feedback_fn("Warning: can't remove instance's disks")
5511
5512   logging.info("Removing instance %s out of cluster config", instance.name)
5513
5514   lu.cfg.RemoveInstance(instance.name)
5515
5516   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5517     "Instance lock removal conflict"
5518
5519   # Remove lock for the instance
5520   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5521
5522
5523 class LUQueryInstances(NoHooksLU):
5524   """Logical unit for querying instances.
5525
5526   """
5527   # pylint: disable-msg=W0142
5528   REQ_BGL = False
5529
5530   def CheckArguments(self):
5531     self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5532                              self.op.use_locking)
5533
5534   def ExpandNames(self):
5535     self.iq.ExpandNames(self)
5536
5537   def DeclareLocks(self, level):
5538     self.iq.DeclareLocks(self, level)
5539
5540   def Exec(self, feedback_fn):
5541     return self.iq.OldStyleQuery(self)
5542
5543
5544 class LUInstanceFailover(LogicalUnit):
5545   """Failover an instance.
5546
5547   """
5548   HPATH = "instance-failover"
5549   HTYPE = constants.HTYPE_INSTANCE
5550   REQ_BGL = False
5551
5552   def ExpandNames(self):
5553     self._ExpandAndLockInstance()
5554     self.needed_locks[locking.LEVEL_NODE] = []
5555     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5556
5557   def DeclareLocks(self, level):
5558     if level == locking.LEVEL_NODE:
5559       self._LockInstancesNodes()
5560
5561   def BuildHooksEnv(self):
5562     """Build hooks env.
5563
5564     This runs on master, primary and secondary nodes of the instance.
5565
5566     """
5567     instance = self.instance
5568     source_node = instance.primary_node
5569     target_node = instance.secondary_nodes[0]
5570     env = {
5571       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5572       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5573       "OLD_PRIMARY": source_node,
5574       "OLD_SECONDARY": target_node,
5575       "NEW_PRIMARY": target_node,
5576       "NEW_SECONDARY": source_node,
5577       }
5578     env.update(_BuildInstanceHookEnvByObject(self, instance))
5579     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5580     nl_post = list(nl)
5581     nl_post.append(source_node)
5582     return env, nl, nl_post
5583
5584   def CheckPrereq(self):
5585     """Check prerequisites.
5586
5587     This checks that the instance is in the cluster.
5588
5589     """
5590     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5591     assert self.instance is not None, \
5592       "Cannot retrieve locked instance %s" % self.op.instance_name
5593
5594     bep = self.cfg.GetClusterInfo().FillBE(instance)
5595     if instance.disk_template not in constants.DTS_NET_MIRROR:
5596       raise errors.OpPrereqError("Instance's disk layout is not"
5597                                  " network mirrored, cannot failover.",
5598                                  errors.ECODE_STATE)
5599
5600     secondary_nodes = instance.secondary_nodes
5601     if not secondary_nodes:
5602       raise errors.ProgrammerError("no secondary node but using "
5603                                    "a mirrored disk template")
5604
5605     target_node = secondary_nodes[0]
5606     _CheckNodeOnline(self, target_node)
5607     _CheckNodeNotDrained(self, target_node)
5608     if instance.admin_up:
5609       # check memory requirements on the secondary node
5610       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5611                            instance.name, bep[constants.BE_MEMORY],
5612                            instance.hypervisor)
5613     else:
5614       self.LogInfo("Not checking memory on the secondary node as"
5615                    " instance will not be started")
5616
5617     # check bridge existance
5618     _CheckInstanceBridgesExist(self, instance, node=target_node)
5619
5620   def Exec(self, feedback_fn):
5621     """Failover an instance.
5622
5623     The failover is done by shutting it down on its present node and
5624     starting it on the secondary.
5625
5626     """
5627     instance = self.instance
5628     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5629
5630     source_node = instance.primary_node
5631     target_node = instance.secondary_nodes[0]
5632
5633     if instance.admin_up:
5634       feedback_fn("* checking disk consistency between source and target")
5635       for dev in instance.disks:
5636         # for drbd, these are drbd over lvm
5637         if not _CheckDiskConsistency(self, dev, target_node, False):
5638           if not self.op.ignore_consistency:
5639             raise errors.OpExecError("Disk %s is degraded on target node,"
5640                                      " aborting failover." % dev.iv_name)
5641     else:
5642       feedback_fn("* not checking disk consistency as instance is not running")
5643
5644     feedback_fn("* shutting down instance on source node")
5645     logging.info("Shutting down instance %s on node %s",
5646                  instance.name, source_node)
5647
5648     result = self.rpc.call_instance_shutdown(source_node, instance,
5649                                              self.op.shutdown_timeout)
5650     msg = result.fail_msg
5651     if msg:
5652       if self.op.ignore_consistency or primary_node.offline:
5653         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5654                              " Proceeding anyway. Please make sure node"
5655                              " %s is down. Error details: %s",
5656                              instance.name, source_node, source_node, msg)
5657       else:
5658         raise errors.OpExecError("Could not shutdown instance %s on"
5659                                  " node %s: %s" %
5660                                  (instance.name, source_node, msg))
5661
5662     feedback_fn("* deactivating the instance's disks on source node")
5663     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5664       raise errors.OpExecError("Can't shut down the instance's disks.")
5665
5666     instance.primary_node = target_node
5667     # distribute new instance config to the other nodes
5668     self.cfg.Update(instance, feedback_fn)
5669
5670     # Only start the instance if it's marked as up
5671     if instance.admin_up:
5672       feedback_fn("* activating the instance's disks on target node")
5673       logging.info("Starting instance %s on node %s",
5674                    instance.name, target_node)
5675
5676       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5677                                            ignore_secondaries=True)
5678       if not disks_ok:
5679         _ShutdownInstanceDisks(self, instance)
5680         raise errors.OpExecError("Can't activate the instance's disks")
5681
5682       feedback_fn("* starting the instance on the target node")
5683       result = self.rpc.call_instance_start(target_node, instance, None, None)
5684       msg = result.fail_msg
5685       if msg:
5686         _ShutdownInstanceDisks(self, instance)
5687         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5688                                  (instance.name, target_node, msg))
5689
5690
5691 class LUInstanceMigrate(LogicalUnit):
5692   """Migrate an instance.
5693
5694   This is migration without shutting down, compared to the failover,
5695   which is done with shutdown.
5696
5697   """
5698   HPATH = "instance-migrate"
5699   HTYPE = constants.HTYPE_INSTANCE
5700   REQ_BGL = False
5701
5702   def ExpandNames(self):
5703     self._ExpandAndLockInstance()
5704
5705     self.needed_locks[locking.LEVEL_NODE] = []
5706     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5707
5708     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5709                                        self.op.cleanup)
5710     self.tasklets = [self._migrater]
5711
5712   def DeclareLocks(self, level):
5713     if level == locking.LEVEL_NODE:
5714       self._LockInstancesNodes()
5715
5716   def BuildHooksEnv(self):
5717     """Build hooks env.
5718
5719     This runs on master, primary and secondary nodes of the instance.
5720
5721     """
5722     instance = self._migrater.instance
5723     source_node = instance.primary_node
5724     target_node = instance.secondary_nodes[0]
5725     env = _BuildInstanceHookEnvByObject(self, instance)
5726     env["MIGRATE_LIVE"] = self._migrater.live
5727     env["MIGRATE_CLEANUP"] = self.op.cleanup
5728     env.update({
5729         "OLD_PRIMARY": source_node,
5730         "OLD_SECONDARY": target_node,
5731         "NEW_PRIMARY": target_node,
5732         "NEW_SECONDARY": source_node,
5733         })
5734     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5735     nl_post = list(nl)
5736     nl_post.append(source_node)
5737     return env, nl, nl_post
5738
5739
5740 class LUInstanceMove(LogicalUnit):
5741   """Move an instance by data-copying.
5742
5743   """
5744   HPATH = "instance-move"
5745   HTYPE = constants.HTYPE_INSTANCE
5746   REQ_BGL = False
5747
5748   def ExpandNames(self):
5749     self._ExpandAndLockInstance()
5750     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5751     self.op.target_node = target_node
5752     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5753     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5754
5755   def DeclareLocks(self, level):
5756     if level == locking.LEVEL_NODE:
5757       self._LockInstancesNodes(primary_only=True)
5758
5759   def BuildHooksEnv(self):
5760     """Build hooks env.
5761
5762     This runs on master, primary and secondary nodes of the instance.
5763
5764     """
5765     env = {
5766       "TARGET_NODE": self.op.target_node,
5767       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5768       }
5769     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5770     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5771                                        self.op.target_node]
5772     return env, nl, nl
5773
5774   def CheckPrereq(self):
5775     """Check prerequisites.
5776
5777     This checks that the instance is in the cluster.
5778
5779     """
5780     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5781     assert self.instance is not None, \
5782       "Cannot retrieve locked instance %s" % self.op.instance_name
5783
5784     node = self.cfg.GetNodeInfo(self.op.target_node)
5785     assert node is not None, \
5786       "Cannot retrieve locked node %s" % self.op.target_node
5787
5788     self.target_node = target_node = node.name
5789
5790     if target_node == instance.primary_node:
5791       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5792                                  (instance.name, target_node),
5793                                  errors.ECODE_STATE)
5794
5795     bep = self.cfg.GetClusterInfo().FillBE(instance)
5796
5797     for idx, dsk in enumerate(instance.disks):
5798       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5799         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5800                                    " cannot copy" % idx, errors.ECODE_STATE)
5801
5802     _CheckNodeOnline(self, target_node)
5803     _CheckNodeNotDrained(self, target_node)
5804     _CheckNodeVmCapable(self, target_node)
5805
5806     if instance.admin_up:
5807       # check memory requirements on the secondary node
5808       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5809                            instance.name, bep[constants.BE_MEMORY],
5810                            instance.hypervisor)
5811     else:
5812       self.LogInfo("Not checking memory on the secondary node as"
5813                    " instance will not be started")
5814
5815     # check bridge existance
5816     _CheckInstanceBridgesExist(self, instance, node=target_node)
5817
5818   def Exec(self, feedback_fn):
5819     """Move an instance.
5820
5821     The move is done by shutting it down on its present node, copying
5822     the data over (slow) and starting it on the new node.
5823
5824     """
5825     instance = self.instance
5826
5827     source_node = instance.primary_node
5828     target_node = self.target_node
5829
5830     self.LogInfo("Shutting down instance %s on source node %s",
5831                  instance.name, source_node)
5832
5833     result = self.rpc.call_instance_shutdown(source_node, instance,
5834                                              self.op.shutdown_timeout)
5835     msg = result.fail_msg
5836     if msg:
5837       if self.op.ignore_consistency:
5838         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5839                              " Proceeding anyway. Please make sure node"
5840                              " %s is down. Error details: %s",
5841                              instance.name, source_node, source_node, msg)
5842       else:
5843         raise errors.OpExecError("Could not shutdown instance %s on"
5844                                  " node %s: %s" %
5845                                  (instance.name, source_node, msg))
5846
5847     # create the target disks
5848     try:
5849       _CreateDisks(self, instance, target_node=target_node)
5850     except errors.OpExecError:
5851       self.LogWarning("Device creation failed, reverting...")
5852       try:
5853         _RemoveDisks(self, instance, target_node=target_node)
5854       finally:
5855         self.cfg.ReleaseDRBDMinors(instance.name)
5856         raise
5857
5858     cluster_name = self.cfg.GetClusterInfo().cluster_name
5859
5860     errs = []
5861     # activate, get path, copy the data over
5862     for idx, disk in enumerate(instance.disks):
5863       self.LogInfo("Copying data for disk %d", idx)
5864       result = self.rpc.call_blockdev_assemble(target_node, disk,
5865                                                instance.name, True)
5866       if result.fail_msg:
5867         self.LogWarning("Can't assemble newly created disk %d: %s",
5868                         idx, result.fail_msg)
5869         errs.append(result.fail_msg)
5870         break
5871       dev_path = result.payload
5872       result = self.rpc.call_blockdev_export(source_node, disk,
5873                                              target_node, dev_path,
5874                                              cluster_name)
5875       if result.fail_msg:
5876         self.LogWarning("Can't copy data over for disk %d: %s",
5877                         idx, result.fail_msg)
5878         errs.append(result.fail_msg)
5879         break
5880
5881     if errs:
5882       self.LogWarning("Some disks failed to copy, aborting")
5883       try:
5884         _RemoveDisks(self, instance, target_node=target_node)
5885       finally:
5886         self.cfg.ReleaseDRBDMinors(instance.name)
5887         raise errors.OpExecError("Errors during disk copy: %s" %
5888                                  (",".join(errs),))
5889
5890     instance.primary_node = target_node
5891     self.cfg.Update(instance, feedback_fn)
5892
5893     self.LogInfo("Removing the disks on the original node")
5894     _RemoveDisks(self, instance, target_node=source_node)
5895
5896     # Only start the instance if it's marked as up
5897     if instance.admin_up:
5898       self.LogInfo("Starting instance %s on node %s",
5899                    instance.name, target_node)
5900
5901       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5902                                            ignore_secondaries=True)
5903       if not disks_ok:
5904         _ShutdownInstanceDisks(self, instance)
5905         raise errors.OpExecError("Can't activate the instance's disks")
5906
5907       result = self.rpc.call_instance_start(target_node, instance, None, None)
5908       msg = result.fail_msg
5909       if msg:
5910         _ShutdownInstanceDisks(self, instance)
5911         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5912                                  (instance.name, target_node, msg))
5913
5914
5915 class LUMigrateNode(LogicalUnit):
5916   """Migrate all instances from a node.
5917
5918   """
5919   HPATH = "node-migrate"
5920   HTYPE = constants.HTYPE_NODE
5921   REQ_BGL = False
5922
5923   def ExpandNames(self):
5924     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5925
5926     self.needed_locks = {
5927       locking.LEVEL_NODE: [self.op.node_name],
5928       }
5929
5930     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5931
5932     # Create tasklets for migrating instances for all instances on this node
5933     names = []
5934     tasklets = []
5935
5936     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5937       logging.debug("Migrating instance %s", inst.name)
5938       names.append(inst.name)
5939
5940       tasklets.append(TLMigrateInstance(self, inst.name, False))
5941
5942     self.tasklets = tasklets
5943
5944     # Declare instance locks
5945     self.needed_locks[locking.LEVEL_INSTANCE] = names
5946
5947   def DeclareLocks(self, level):
5948     if level == locking.LEVEL_NODE:
5949       self._LockInstancesNodes()
5950
5951   def BuildHooksEnv(self):
5952     """Build hooks env.
5953
5954     This runs on the master, the primary and all the secondaries.
5955
5956     """
5957     env = {
5958       "NODE_NAME": self.op.node_name,
5959       }
5960
5961     nl = [self.cfg.GetMasterNode()]
5962
5963     return (env, nl, nl)
5964
5965
5966 class TLMigrateInstance(Tasklet):
5967   """Tasklet class for instance migration.
5968
5969   @type live: boolean
5970   @ivar live: whether the migration will be done live or non-live;
5971       this variable is initalized only after CheckPrereq has run
5972
5973   """
5974   def __init__(self, lu, instance_name, cleanup):
5975     """Initializes this class.
5976
5977     """
5978     Tasklet.__init__(self, lu)
5979
5980     # Parameters
5981     self.instance_name = instance_name
5982     self.cleanup = cleanup
5983     self.live = False # will be overridden later
5984
5985   def CheckPrereq(self):
5986     """Check prerequisites.
5987
5988     This checks that the instance is in the cluster.
5989
5990     """
5991     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5992     instance = self.cfg.GetInstanceInfo(instance_name)
5993     assert instance is not None
5994
5995     if instance.disk_template != constants.DT_DRBD8:
5996       raise errors.OpPrereqError("Instance's disk layout is not"
5997                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5998
5999     secondary_nodes = instance.secondary_nodes
6000     if not secondary_nodes:
6001       raise errors.ConfigurationError("No secondary node but using"
6002                                       " drbd8 disk template")
6003
6004     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6005
6006     target_node = secondary_nodes[0]
6007     # check memory requirements on the secondary node
6008     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6009                          instance.name, i_be[constants.BE_MEMORY],
6010                          instance.hypervisor)
6011
6012     # check bridge existance
6013     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6014
6015     if not self.cleanup:
6016       _CheckNodeNotDrained(self.lu, target_node)
6017       result = self.rpc.call_instance_migratable(instance.primary_node,
6018                                                  instance)
6019       result.Raise("Can't migrate, please use failover",
6020                    prereq=True, ecode=errors.ECODE_STATE)
6021
6022     self.instance = instance
6023
6024     if self.lu.op.live is not None and self.lu.op.mode is not None:
6025       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6026                                  " parameters are accepted",
6027                                  errors.ECODE_INVAL)
6028     if self.lu.op.live is not None:
6029       if self.lu.op.live:
6030         self.lu.op.mode = constants.HT_MIGRATION_LIVE
6031       else:
6032         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6033       # reset the 'live' parameter to None so that repeated
6034       # invocations of CheckPrereq do not raise an exception
6035       self.lu.op.live = None
6036     elif self.lu.op.mode is None:
6037       # read the default value from the hypervisor
6038       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6039       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6040
6041     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6042
6043   def _WaitUntilSync(self):
6044     """Poll with custom rpc for disk sync.
6045
6046     This uses our own step-based rpc call.
6047
6048     """
6049     self.feedback_fn("* wait until resync is done")
6050     all_done = False
6051     while not all_done:
6052       all_done = True
6053       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6054                                             self.nodes_ip,
6055                                             self.instance.disks)
6056       min_percent = 100
6057       for node, nres in result.items():
6058         nres.Raise("Cannot resync disks on node %s" % node)
6059         node_done, node_percent = nres.payload
6060         all_done = all_done and node_done
6061         if node_percent is not None:
6062           min_percent = min(min_percent, node_percent)
6063       if not all_done:
6064         if min_percent < 100:
6065           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6066         time.sleep(2)
6067
6068   def _EnsureSecondary(self, node):
6069     """Demote a node to secondary.
6070
6071     """
6072     self.feedback_fn("* switching node %s to secondary mode" % node)
6073
6074     for dev in self.instance.disks:
6075       self.cfg.SetDiskID(dev, node)
6076
6077     result = self.rpc.call_blockdev_close(node, self.instance.name,
6078                                           self.instance.disks)
6079     result.Raise("Cannot change disk to secondary on node %s" % node)
6080
6081   def _GoStandalone(self):
6082     """Disconnect from the network.
6083
6084     """
6085     self.feedback_fn("* changing into standalone mode")
6086     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6087                                                self.instance.disks)
6088     for node, nres in result.items():
6089       nres.Raise("Cannot disconnect disks node %s" % node)
6090
6091   def _GoReconnect(self, multimaster):
6092     """Reconnect to the network.
6093
6094     """
6095     if multimaster:
6096       msg = "dual-master"
6097     else:
6098       msg = "single-master"
6099     self.feedback_fn("* changing disks into %s mode" % msg)
6100     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6101                                            self.instance.disks,
6102                                            self.instance.name, multimaster)
6103     for node, nres in result.items():
6104       nres.Raise("Cannot change disks config on node %s" % node)
6105
6106   def _ExecCleanup(self):
6107     """Try to cleanup after a failed migration.
6108
6109     The cleanup is done by:
6110       - check that the instance is running only on one node
6111         (and update the config if needed)
6112       - change disks on its secondary node to secondary
6113       - wait until disks are fully synchronized
6114       - disconnect from the network
6115       - change disks into single-master mode
6116       - wait again until disks are fully synchronized
6117
6118     """
6119     instance = self.instance
6120     target_node = self.target_node
6121     source_node = self.source_node
6122
6123     # check running on only one node
6124     self.feedback_fn("* checking where the instance actually runs"
6125                      " (if this hangs, the hypervisor might be in"
6126                      " a bad state)")
6127     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6128     for node, result in ins_l.items():
6129       result.Raise("Can't contact node %s" % node)
6130
6131     runningon_source = instance.name in ins_l[source_node].payload
6132     runningon_target = instance.name in ins_l[target_node].payload
6133
6134     if runningon_source and runningon_target:
6135       raise errors.OpExecError("Instance seems to be running on two nodes,"
6136                                " or the hypervisor is confused. You will have"
6137                                " to ensure manually that it runs only on one"
6138                                " and restart this operation.")
6139
6140     if not (runningon_source or runningon_target):
6141       raise errors.OpExecError("Instance does not seem to be running at all."
6142                                " In this case, it's safer to repair by"
6143                                " running 'gnt-instance stop' to ensure disk"
6144                                " shutdown, and then restarting it.")
6145
6146     if runningon_target:
6147       # the migration has actually succeeded, we need to update the config
6148       self.feedback_fn("* instance running on secondary node (%s),"
6149                        " updating config" % target_node)
6150       instance.primary_node = target_node
6151       self.cfg.Update(instance, self.feedback_fn)
6152       demoted_node = source_node
6153     else:
6154       self.feedback_fn("* instance confirmed to be running on its"
6155                        " primary node (%s)" % source_node)
6156       demoted_node = target_node
6157
6158     self._EnsureSecondary(demoted_node)
6159     try:
6160       self._WaitUntilSync()
6161     except errors.OpExecError:
6162       # we ignore here errors, since if the device is standalone, it
6163       # won't be able to sync
6164       pass
6165     self._GoStandalone()
6166     self._GoReconnect(False)
6167     self._WaitUntilSync()
6168
6169     self.feedback_fn("* done")
6170
6171   def _RevertDiskStatus(self):
6172     """Try to revert the disk status after a failed migration.
6173
6174     """
6175     target_node = self.target_node
6176     try:
6177       self._EnsureSecondary(target_node)
6178       self._GoStandalone()
6179       self._GoReconnect(False)
6180       self._WaitUntilSync()
6181     except errors.OpExecError, err:
6182       self.lu.LogWarning("Migration failed and I can't reconnect the"
6183                          " drives: error '%s'\n"
6184                          "Please look and recover the instance status" %
6185                          str(err))
6186
6187   def _AbortMigration(self):
6188     """Call the hypervisor code to abort a started migration.
6189
6190     """
6191     instance = self.instance
6192     target_node = self.target_node
6193     migration_info = self.migration_info
6194
6195     abort_result = self.rpc.call_finalize_migration(target_node,
6196                                                     instance,
6197                                                     migration_info,
6198                                                     False)
6199     abort_msg = abort_result.fail_msg
6200     if abort_msg:
6201       logging.error("Aborting migration failed on target node %s: %s",
6202                     target_node, abort_msg)
6203       # Don't raise an exception here, as we stil have to try to revert the
6204       # disk status, even if this step failed.
6205
6206   def _ExecMigration(self):
6207     """Migrate an instance.
6208
6209     The migrate is done by:
6210       - change the disks into dual-master mode
6211       - wait until disks are fully synchronized again
6212       - migrate the instance
6213       - change disks on the new secondary node (the old primary) to secondary
6214       - wait until disks are fully synchronized
6215       - change disks into single-master mode
6216
6217     """
6218     instance = self.instance
6219     target_node = self.target_node
6220     source_node = self.source_node
6221
6222     self.feedback_fn("* checking disk consistency between source and target")
6223     for dev in instance.disks:
6224       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6225         raise errors.OpExecError("Disk %s is degraded or not fully"
6226                                  " synchronized on target node,"
6227                                  " aborting migrate." % dev.iv_name)
6228
6229     # First get the migration information from the remote node
6230     result = self.rpc.call_migration_info(source_node, instance)
6231     msg = result.fail_msg
6232     if msg:
6233       log_err = ("Failed fetching source migration information from %s: %s" %
6234                  (source_node, msg))
6235       logging.error(log_err)
6236       raise errors.OpExecError(log_err)
6237
6238     self.migration_info = migration_info = result.payload
6239
6240     # Then switch the disks to master/master mode
6241     self._EnsureSecondary(target_node)
6242     self._GoStandalone()
6243     self._GoReconnect(True)
6244     self._WaitUntilSync()
6245
6246     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6247     result = self.rpc.call_accept_instance(target_node,
6248                                            instance,
6249                                            migration_info,
6250                                            self.nodes_ip[target_node])
6251
6252     msg = result.fail_msg
6253     if msg:
6254       logging.error("Instance pre-migration failed, trying to revert"
6255                     " disk status: %s", msg)
6256       self.feedback_fn("Pre-migration failed, aborting")
6257       self._AbortMigration()
6258       self._RevertDiskStatus()
6259       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6260                                (instance.name, msg))
6261
6262     self.feedback_fn("* migrating instance to %s" % target_node)
6263     time.sleep(10)
6264     result = self.rpc.call_instance_migrate(source_node, instance,
6265                                             self.nodes_ip[target_node],
6266                                             self.live)
6267     msg = result.fail_msg
6268     if msg:
6269       logging.error("Instance migration failed, trying to revert"
6270                     " disk status: %s", msg)
6271       self.feedback_fn("Migration failed, aborting")
6272       self._AbortMigration()
6273       self._RevertDiskStatus()
6274       raise errors.OpExecError("Could not migrate instance %s: %s" %
6275                                (instance.name, msg))
6276     time.sleep(10)
6277
6278     instance.primary_node = target_node
6279     # distribute new instance config to the other nodes
6280     self.cfg.Update(instance, self.feedback_fn)
6281
6282     result = self.rpc.call_finalize_migration(target_node,
6283                                               instance,
6284                                               migration_info,
6285                                               True)
6286     msg = result.fail_msg
6287     if msg:
6288       logging.error("Instance migration succeeded, but finalization failed:"
6289                     " %s", msg)
6290       raise errors.OpExecError("Could not finalize instance migration: %s" %
6291                                msg)
6292
6293     self._EnsureSecondary(source_node)
6294     self._WaitUntilSync()
6295     self._GoStandalone()
6296     self._GoReconnect(False)
6297     self._WaitUntilSync()
6298
6299     self.feedback_fn("* done")
6300
6301   def Exec(self, feedback_fn):
6302     """Perform the migration.
6303
6304     """
6305     feedback_fn("Migrating instance %s" % self.instance.name)
6306
6307     self.feedback_fn = feedback_fn
6308
6309     self.source_node = self.instance.primary_node
6310     self.target_node = self.instance.secondary_nodes[0]
6311     self.all_nodes = [self.source_node, self.target_node]
6312     self.nodes_ip = {
6313       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6314       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6315       }
6316
6317     if self.cleanup:
6318       return self._ExecCleanup()
6319     else:
6320       return self._ExecMigration()
6321
6322
6323 def _CreateBlockDev(lu, node, instance, device, force_create,
6324                     info, force_open):
6325   """Create a tree of block devices on a given node.
6326
6327   If this device type has to be created on secondaries, create it and
6328   all its children.
6329
6330   If not, just recurse to children keeping the same 'force' value.
6331
6332   @param lu: the lu on whose behalf we execute
6333   @param node: the node on which to create the device
6334   @type instance: L{objects.Instance}
6335   @param instance: the instance which owns the device
6336   @type device: L{objects.Disk}
6337   @param device: the device to create
6338   @type force_create: boolean
6339   @param force_create: whether to force creation of this device; this
6340       will be change to True whenever we find a device which has
6341       CreateOnSecondary() attribute
6342   @param info: the extra 'metadata' we should attach to the device
6343       (this will be represented as a LVM tag)
6344   @type force_open: boolean
6345   @param force_open: this parameter will be passes to the
6346       L{backend.BlockdevCreate} function where it specifies
6347       whether we run on primary or not, and it affects both
6348       the child assembly and the device own Open() execution
6349
6350   """
6351   if device.CreateOnSecondary():
6352     force_create = True
6353
6354   if device.children:
6355     for child in device.children:
6356       _CreateBlockDev(lu, node, instance, child, force_create,
6357                       info, force_open)
6358
6359   if not force_create:
6360     return
6361
6362   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6363
6364
6365 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6366   """Create a single block device on a given node.
6367
6368   This will not recurse over children of the device, so they must be
6369   created in advance.
6370
6371   @param lu: the lu on whose behalf we execute
6372   @param node: the node on which to create the device
6373   @type instance: L{objects.Instance}
6374   @param instance: the instance which owns the device
6375   @type device: L{objects.Disk}
6376   @param device: the device to create
6377   @param info: the extra 'metadata' we should attach to the device
6378       (this will be represented as a LVM tag)
6379   @type force_open: boolean
6380   @param force_open: this parameter will be passes to the
6381       L{backend.BlockdevCreate} function where it specifies
6382       whether we run on primary or not, and it affects both
6383       the child assembly and the device own Open() execution
6384
6385   """
6386   lu.cfg.SetDiskID(device, node)
6387   result = lu.rpc.call_blockdev_create(node, device, device.size,
6388                                        instance.name, force_open, info)
6389   result.Raise("Can't create block device %s on"
6390                " node %s for instance %s" % (device, node, instance.name))
6391   if device.physical_id is None:
6392     device.physical_id = result.payload
6393
6394
6395 def _GenerateUniqueNames(lu, exts):
6396   """Generate a suitable LV name.
6397
6398   This will generate a logical volume name for the given instance.
6399
6400   """
6401   results = []
6402   for val in exts:
6403     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6404     results.append("%s%s" % (new_id, val))
6405   return results
6406
6407
6408 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6409                          p_minor, s_minor):
6410   """Generate a drbd8 device complete with its children.
6411
6412   """
6413   port = lu.cfg.AllocatePort()
6414   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6415   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6416                           logical_id=(vgname, names[0]))
6417   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6418                           logical_id=(vgname, names[1]))
6419   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6420                           logical_id=(primary, secondary, port,
6421                                       p_minor, s_minor,
6422                                       shared_secret),
6423                           children=[dev_data, dev_meta],
6424                           iv_name=iv_name)
6425   return drbd_dev
6426
6427
6428 def _GenerateDiskTemplate(lu, template_name,
6429                           instance_name, primary_node,
6430                           secondary_nodes, disk_info,
6431                           file_storage_dir, file_driver,
6432                           base_index, feedback_fn):
6433   """Generate the entire disk layout for a given template type.
6434
6435   """
6436   #TODO: compute space requirements
6437
6438   vgname = lu.cfg.GetVGName()
6439   disk_count = len(disk_info)
6440   disks = []
6441   if template_name == constants.DT_DISKLESS:
6442     pass
6443   elif template_name == constants.DT_PLAIN:
6444     if len(secondary_nodes) != 0:
6445       raise errors.ProgrammerError("Wrong template configuration")
6446
6447     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6448                                       for i in range(disk_count)])
6449     for idx, disk in enumerate(disk_info):
6450       disk_index = idx + base_index
6451       vg = disk.get("vg", vgname)
6452       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6453       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6454                               logical_id=(vg, names[idx]),
6455                               iv_name="disk/%d" % disk_index,
6456                               mode=disk["mode"])
6457       disks.append(disk_dev)
6458   elif template_name == constants.DT_DRBD8:
6459     if len(secondary_nodes) != 1:
6460       raise errors.ProgrammerError("Wrong template configuration")
6461     remote_node = secondary_nodes[0]
6462     minors = lu.cfg.AllocateDRBDMinor(
6463       [primary_node, remote_node] * len(disk_info), instance_name)
6464
6465     names = []
6466     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6467                                                for i in range(disk_count)]):
6468       names.append(lv_prefix + "_data")
6469       names.append(lv_prefix + "_meta")
6470     for idx, disk in enumerate(disk_info):
6471       disk_index = idx + base_index
6472       vg = disk.get("vg", vgname)
6473       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6474                                       disk["size"], vg, names[idx*2:idx*2+2],
6475                                       "disk/%d" % disk_index,
6476                                       minors[idx*2], minors[idx*2+1])
6477       disk_dev.mode = disk["mode"]
6478       disks.append(disk_dev)
6479   elif template_name == constants.DT_FILE:
6480     if len(secondary_nodes) != 0:
6481       raise errors.ProgrammerError("Wrong template configuration")
6482
6483     opcodes.RequireFileStorage()
6484
6485     for idx, disk in enumerate(disk_info):
6486       disk_index = idx + base_index
6487       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6488                               iv_name="disk/%d" % disk_index,
6489                               logical_id=(file_driver,
6490                                           "%s/disk%d" % (file_storage_dir,
6491                                                          disk_index)),
6492                               mode=disk["mode"])
6493       disks.append(disk_dev)
6494   else:
6495     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6496   return disks
6497
6498
6499 def _GetInstanceInfoText(instance):
6500   """Compute that text that should be added to the disk's metadata.
6501
6502   """
6503   return "originstname+%s" % instance.name
6504
6505
6506 def _CalcEta(time_taken, written, total_size):
6507   """Calculates the ETA based on size written and total size.
6508
6509   @param time_taken: The time taken so far
6510   @param written: amount written so far
6511   @param total_size: The total size of data to be written
6512   @return: The remaining time in seconds
6513
6514   """
6515   avg_time = time_taken / float(written)
6516   return (total_size - written) * avg_time
6517
6518
6519 def _WipeDisks(lu, instance):
6520   """Wipes instance disks.
6521
6522   @type lu: L{LogicalUnit}
6523   @param lu: the logical unit on whose behalf we execute
6524   @type instance: L{objects.Instance}
6525   @param instance: the instance whose disks we should create
6526   @return: the success of the wipe
6527
6528   """
6529   node = instance.primary_node
6530   logging.info("Pause sync of instance %s disks", instance.name)
6531   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6532
6533   for idx, success in enumerate(result.payload):
6534     if not success:
6535       logging.warn("pause-sync of instance %s for disks %d failed",
6536                    instance.name, idx)
6537
6538   try:
6539     for idx, device in enumerate(instance.disks):
6540       lu.LogInfo("* Wiping disk %d", idx)
6541       logging.info("Wiping disk %d for instance %s", idx, instance.name)
6542
6543       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6544       # MAX_WIPE_CHUNK at max
6545       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6546                             constants.MIN_WIPE_CHUNK_PERCENT)
6547
6548       offset = 0
6549       size = device.size
6550       last_output = 0
6551       start_time = time.time()
6552
6553       while offset < size:
6554         wipe_size = min(wipe_chunk_size, size - offset)
6555         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6556         result.Raise("Could not wipe disk %d at offset %d for size %d" %
6557                      (idx, offset, wipe_size))
6558         now = time.time()
6559         offset += wipe_size
6560         if now - last_output >= 60:
6561           eta = _CalcEta(now - start_time, offset, size)
6562           lu.LogInfo(" - done: %.1f%% ETA: %s" %
6563                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
6564           last_output = now
6565   finally:
6566     logging.info("Resume sync of instance %s disks", instance.name)
6567
6568     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6569
6570     for idx, success in enumerate(result.payload):
6571       if not success:
6572         lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6573                       " look at the status and troubleshoot the issue.", idx)
6574         logging.warn("resume-sync of instance %s for disks %d failed",
6575                      instance.name, idx)
6576
6577
6578 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6579   """Create all disks for an instance.
6580
6581   This abstracts away some work from AddInstance.
6582
6583   @type lu: L{LogicalUnit}
6584   @param lu: the logical unit on whose behalf we execute
6585   @type instance: L{objects.Instance}
6586   @param instance: the instance whose disks we should create
6587   @type to_skip: list
6588   @param to_skip: list of indices to skip
6589   @type target_node: string
6590   @param target_node: if passed, overrides the target node for creation
6591   @rtype: boolean
6592   @return: the success of the creation
6593
6594   """
6595   info = _GetInstanceInfoText(instance)
6596   if target_node is None:
6597     pnode = instance.primary_node
6598     all_nodes = instance.all_nodes
6599   else:
6600     pnode = target_node
6601     all_nodes = [pnode]
6602
6603   if instance.disk_template == constants.DT_FILE:
6604     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6605     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6606
6607     result.Raise("Failed to create directory '%s' on"
6608                  " node %s" % (file_storage_dir, pnode))
6609
6610   # Note: this needs to be kept in sync with adding of disks in
6611   # LUSetInstanceParams
6612   for idx, device in enumerate(instance.disks):
6613     if to_skip and idx in to_skip:
6614       continue
6615     logging.info("Creating volume %s for instance %s",
6616                  device.iv_name, instance.name)
6617     #HARDCODE
6618     for node in all_nodes:
6619       f_create = node == pnode
6620       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6621
6622
6623 def _RemoveDisks(lu, instance, target_node=None):
6624   """Remove all disks for an instance.
6625
6626   This abstracts away some work from `AddInstance()` and
6627   `RemoveInstance()`. Note that in case some of the devices couldn't
6628   be removed, the removal will continue with the other ones (compare
6629   with `_CreateDisks()`).
6630
6631   @type lu: L{LogicalUnit}
6632   @param lu: the logical unit on whose behalf we execute
6633   @type instance: L{objects.Instance}
6634   @param instance: the instance whose disks we should remove
6635   @type target_node: string
6636   @param target_node: used to override the node on which to remove the disks
6637   @rtype: boolean
6638   @return: the success of the removal
6639
6640   """
6641   logging.info("Removing block devices for instance %s", instance.name)
6642
6643   all_result = True
6644   for device in instance.disks:
6645     if target_node:
6646       edata = [(target_node, device)]
6647     else:
6648       edata = device.ComputeNodeTree(instance.primary_node)
6649     for node, disk in edata:
6650       lu.cfg.SetDiskID(disk, node)
6651       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6652       if msg:
6653         lu.LogWarning("Could not remove block device %s on node %s,"
6654                       " continuing anyway: %s", device.iv_name, node, msg)
6655         all_result = False
6656
6657   if instance.disk_template == constants.DT_FILE:
6658     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6659     if target_node:
6660       tgt = target_node
6661     else:
6662       tgt = instance.primary_node
6663     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6664     if result.fail_msg:
6665       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6666                     file_storage_dir, instance.primary_node, result.fail_msg)
6667       all_result = False
6668
6669   return all_result
6670
6671
6672 def _ComputeDiskSizePerVG(disk_template, disks):
6673   """Compute disk size requirements in the volume group
6674
6675   """
6676   def _compute(disks, payload):
6677     """Universal algorithm
6678
6679     """
6680     vgs = {}
6681     for disk in disks:
6682       vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6683
6684     return vgs
6685
6686   # Required free disk space as a function of disk and swap space
6687   req_size_dict = {
6688     constants.DT_DISKLESS: {},
6689     constants.DT_PLAIN: _compute(disks, 0),
6690     # 128 MB are added for drbd metadata for each disk
6691     constants.DT_DRBD8: _compute(disks, 128),
6692     constants.DT_FILE: {},
6693   }
6694
6695   if disk_template not in req_size_dict:
6696     raise errors.ProgrammerError("Disk template '%s' size requirement"
6697                                  " is unknown" %  disk_template)
6698
6699   return req_size_dict[disk_template]
6700
6701
6702 def _ComputeDiskSize(disk_template, disks):
6703   """Compute disk size requirements in the volume group
6704
6705   """
6706   # Required free disk space as a function of disk and swap space
6707   req_size_dict = {
6708     constants.DT_DISKLESS: None,
6709     constants.DT_PLAIN: sum(d["size"] for d in disks),
6710     # 128 MB are added for drbd metadata for each disk
6711     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6712     constants.DT_FILE: None,
6713   }
6714
6715   if disk_template not in req_size_dict:
6716     raise errors.ProgrammerError("Disk template '%s' size requirement"
6717                                  " is unknown" %  disk_template)
6718
6719   return req_size_dict[disk_template]
6720
6721
6722 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6723   """Hypervisor parameter validation.
6724
6725   This function abstract the hypervisor parameter validation to be
6726   used in both instance create and instance modify.
6727
6728   @type lu: L{LogicalUnit}
6729   @param lu: the logical unit for which we check
6730   @type nodenames: list
6731   @param nodenames: the list of nodes on which we should check
6732   @type hvname: string
6733   @param hvname: the name of the hypervisor we should use
6734   @type hvparams: dict
6735   @param hvparams: the parameters which we need to check
6736   @raise errors.OpPrereqError: if the parameters are not valid
6737
6738   """
6739   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6740                                                   hvname,
6741                                                   hvparams)
6742   for node in nodenames:
6743     info = hvinfo[node]
6744     if info.offline:
6745       continue
6746     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6747
6748
6749 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6750   """OS parameters validation.
6751
6752   @type lu: L{LogicalUnit}
6753   @param lu: the logical unit for which we check
6754   @type required: boolean
6755   @param required: whether the validation should fail if the OS is not
6756       found
6757   @type nodenames: list
6758   @param nodenames: the list of nodes on which we should check
6759   @type osname: string
6760   @param osname: the name of the hypervisor we should use
6761   @type osparams: dict
6762   @param osparams: the parameters which we need to check
6763   @raise errors.OpPrereqError: if the parameters are not valid
6764
6765   """
6766   result = lu.rpc.call_os_validate(required, nodenames, osname,
6767                                    [constants.OS_VALIDATE_PARAMETERS],
6768                                    osparams)
6769   for node, nres in result.items():
6770     # we don't check for offline cases since this should be run only
6771     # against the master node and/or an instance's nodes
6772     nres.Raise("OS Parameters validation failed on node %s" % node)
6773     if not nres.payload:
6774       lu.LogInfo("OS %s not found on node %s, validation skipped",
6775                  osname, node)
6776
6777
6778 class LUInstanceCreate(LogicalUnit):
6779   """Create an instance.
6780
6781   """
6782   HPATH = "instance-add"
6783   HTYPE = constants.HTYPE_INSTANCE
6784   REQ_BGL = False
6785
6786   def CheckArguments(self):
6787     """Check arguments.
6788
6789     """
6790     # do not require name_check to ease forward/backward compatibility
6791     # for tools
6792     if self.op.no_install and self.op.start:
6793       self.LogInfo("No-installation mode selected, disabling startup")
6794       self.op.start = False
6795     # validate/normalize the instance name
6796     self.op.instance_name = \
6797       netutils.Hostname.GetNormalizedName(self.op.instance_name)
6798
6799     if self.op.ip_check and not self.op.name_check:
6800       # TODO: make the ip check more flexible and not depend on the name check
6801       raise errors.OpPrereqError("Cannot do ip check without a name check",
6802                                  errors.ECODE_INVAL)
6803
6804     # check nics' parameter names
6805     for nic in self.op.nics:
6806       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6807
6808     # check disks. parameter names and consistent adopt/no-adopt strategy
6809     has_adopt = has_no_adopt = False
6810     for disk in self.op.disks:
6811       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6812       if "adopt" in disk:
6813         has_adopt = True
6814       else:
6815         has_no_adopt = True
6816     if has_adopt and has_no_adopt:
6817       raise errors.OpPrereqError("Either all disks are adopted or none is",
6818                                  errors.ECODE_INVAL)
6819     if has_adopt:
6820       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6821         raise errors.OpPrereqError("Disk adoption is not supported for the"
6822                                    " '%s' disk template" %
6823                                    self.op.disk_template,
6824                                    errors.ECODE_INVAL)
6825       if self.op.iallocator is not None:
6826         raise errors.OpPrereqError("Disk adoption not allowed with an"
6827                                    " iallocator script", errors.ECODE_INVAL)
6828       if self.op.mode == constants.INSTANCE_IMPORT:
6829         raise errors.OpPrereqError("Disk adoption not allowed for"
6830                                    " instance import", errors.ECODE_INVAL)
6831
6832     self.adopt_disks = has_adopt
6833
6834     # instance name verification
6835     if self.op.name_check:
6836       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6837       self.op.instance_name = self.hostname1.name
6838       # used in CheckPrereq for ip ping check
6839       self.check_ip = self.hostname1.ip
6840     else:
6841       self.check_ip = None
6842
6843     # file storage checks
6844     if (self.op.file_driver and
6845         not self.op.file_driver in constants.FILE_DRIVER):
6846       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6847                                  self.op.file_driver, errors.ECODE_INVAL)
6848
6849     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6850       raise errors.OpPrereqError("File storage directory path not absolute",
6851                                  errors.ECODE_INVAL)
6852
6853     ### Node/iallocator related checks
6854     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6855
6856     if self.op.pnode is not None:
6857       if self.op.disk_template in constants.DTS_NET_MIRROR:
6858         if self.op.snode is None:
6859           raise errors.OpPrereqError("The networked disk templates need"
6860                                      " a mirror node", errors.ECODE_INVAL)
6861       elif self.op.snode:
6862         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6863                         " template")
6864         self.op.snode = None
6865
6866     self._cds = _GetClusterDomainSecret()
6867
6868     if self.op.mode == constants.INSTANCE_IMPORT:
6869       # On import force_variant must be True, because if we forced it at
6870       # initial install, our only chance when importing it back is that it
6871       # works again!
6872       self.op.force_variant = True
6873
6874       if self.op.no_install:
6875         self.LogInfo("No-installation mode has no effect during import")
6876
6877     elif self.op.mode == constants.INSTANCE_CREATE:
6878       if self.op.os_type is None:
6879         raise errors.OpPrereqError("No guest OS specified",
6880                                    errors.ECODE_INVAL)
6881       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6882         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6883                                    " installation" % self.op.os_type,
6884                                    errors.ECODE_STATE)
6885       if self.op.disk_template is None:
6886         raise errors.OpPrereqError("No disk template specified",
6887                                    errors.ECODE_INVAL)
6888
6889     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6890       # Check handshake to ensure both clusters have the same domain secret
6891       src_handshake = self.op.source_handshake
6892       if not src_handshake:
6893         raise errors.OpPrereqError("Missing source handshake",
6894                                    errors.ECODE_INVAL)
6895
6896       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6897                                                            src_handshake)
6898       if errmsg:
6899         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6900                                    errors.ECODE_INVAL)
6901
6902       # Load and check source CA
6903       self.source_x509_ca_pem = self.op.source_x509_ca
6904       if not self.source_x509_ca_pem:
6905         raise errors.OpPrereqError("Missing source X509 CA",
6906                                    errors.ECODE_INVAL)
6907
6908       try:
6909         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6910                                                     self._cds)
6911       except OpenSSL.crypto.Error, err:
6912         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6913                                    (err, ), errors.ECODE_INVAL)
6914
6915       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6916       if errcode is not None:
6917         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6918                                    errors.ECODE_INVAL)
6919
6920       self.source_x509_ca = cert
6921
6922       src_instance_name = self.op.source_instance_name
6923       if not src_instance_name:
6924         raise errors.OpPrereqError("Missing source instance name",
6925                                    errors.ECODE_INVAL)
6926
6927       self.source_instance_name = \
6928           netutils.GetHostname(name=src_instance_name).name
6929
6930     else:
6931       raise errors.OpPrereqError("Invalid instance creation mode %r" %
6932                                  self.op.mode, errors.ECODE_INVAL)
6933
6934   def ExpandNames(self):
6935     """ExpandNames for CreateInstance.
6936
6937     Figure out the right locks for instance creation.
6938
6939     """
6940     self.needed_locks = {}
6941
6942     instance_name = self.op.instance_name
6943     # this is just a preventive check, but someone might still add this
6944     # instance in the meantime, and creation will fail at lock-add time
6945     if instance_name in self.cfg.GetInstanceList():
6946       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6947                                  instance_name, errors.ECODE_EXISTS)
6948
6949     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6950
6951     if self.op.iallocator:
6952       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6953     else:
6954       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6955       nodelist = [self.op.pnode]
6956       if self.op.snode is not None:
6957         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6958         nodelist.append(self.op.snode)
6959       self.needed_locks[locking.LEVEL_NODE] = nodelist
6960
6961     # in case of import lock the source node too
6962     if self.op.mode == constants.INSTANCE_IMPORT:
6963       src_node = self.op.src_node
6964       src_path = self.op.src_path
6965
6966       if src_path is None:
6967         self.op.src_path = src_path = self.op.instance_name
6968
6969       if src_node is None:
6970         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6971         self.op.src_node = None
6972         if os.path.isabs(src_path):
6973           raise errors.OpPrereqError("Importing an instance from an absolute"
6974                                      " path requires a source node option.",
6975                                      errors.ECODE_INVAL)
6976       else:
6977         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6978         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6979           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6980         if not os.path.isabs(src_path):
6981           self.op.src_path = src_path = \
6982             utils.PathJoin(constants.EXPORT_DIR, src_path)
6983
6984   def _RunAllocator(self):
6985     """Run the allocator based on input opcode.
6986
6987     """
6988     nics = [n.ToDict() for n in self.nics]
6989     ial = IAllocator(self.cfg, self.rpc,
6990                      mode=constants.IALLOCATOR_MODE_ALLOC,
6991                      name=self.op.instance_name,
6992                      disk_template=self.op.disk_template,
6993                      tags=[],
6994                      os=self.op.os_type,
6995                      vcpus=self.be_full[constants.BE_VCPUS],
6996                      mem_size=self.be_full[constants.BE_MEMORY],
6997                      disks=self.disks,
6998                      nics=nics,
6999                      hypervisor=self.op.hypervisor,
7000                      )
7001
7002     ial.Run(self.op.iallocator)
7003
7004     if not ial.success:
7005       raise errors.OpPrereqError("Can't compute nodes using"
7006                                  " iallocator '%s': %s" %
7007                                  (self.op.iallocator, ial.info),
7008                                  errors.ECODE_NORES)
7009     if len(ial.result) != ial.required_nodes:
7010       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7011                                  " of nodes (%s), required %s" %
7012                                  (self.op.iallocator, len(ial.result),
7013                                   ial.required_nodes), errors.ECODE_FAULT)
7014     self.op.pnode = ial.result[0]
7015     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7016                  self.op.instance_name, self.op.iallocator,
7017                  utils.CommaJoin(ial.result))
7018     if ial.required_nodes == 2:
7019       self.op.snode = ial.result[1]
7020
7021   def BuildHooksEnv(self):
7022     """Build hooks env.
7023
7024     This runs on master, primary and secondary nodes of the instance.
7025
7026     """
7027     env = {
7028       "ADD_MODE": self.op.mode,
7029       }
7030     if self.op.mode == constants.INSTANCE_IMPORT:
7031       env["SRC_NODE"] = self.op.src_node
7032       env["SRC_PATH"] = self.op.src_path
7033       env["SRC_IMAGES"] = self.src_images
7034
7035     env.update(_BuildInstanceHookEnv(
7036       name=self.op.instance_name,
7037       primary_node=self.op.pnode,
7038       secondary_nodes=self.secondaries,
7039       status=self.op.start,
7040       os_type=self.op.os_type,
7041       memory=self.be_full[constants.BE_MEMORY],
7042       vcpus=self.be_full[constants.BE_VCPUS],
7043       nics=_NICListToTuple(self, self.nics),
7044       disk_template=self.op.disk_template,
7045       disks=[(d["size"], d["mode"]) for d in self.disks],
7046       bep=self.be_full,
7047       hvp=self.hv_full,
7048       hypervisor_name=self.op.hypervisor,
7049     ))
7050
7051     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7052           self.secondaries)
7053     return env, nl, nl
7054
7055   def _ReadExportInfo(self):
7056     """Reads the export information from disk.
7057
7058     It will override the opcode source node and path with the actual
7059     information, if these two were not specified before.
7060
7061     @return: the export information
7062
7063     """
7064     assert self.op.mode == constants.INSTANCE_IMPORT
7065
7066     src_node = self.op.src_node
7067     src_path = self.op.src_path
7068
7069     if src_node is None:
7070       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7071       exp_list = self.rpc.call_export_list(locked_nodes)
7072       found = False
7073       for node in exp_list:
7074         if exp_list[node].fail_msg:
7075           continue
7076         if src_path in exp_list[node].payload:
7077           found = True
7078           self.op.src_node = src_node = node
7079           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7080                                                        src_path)
7081           break
7082       if not found:
7083         raise errors.OpPrereqError("No export found for relative path %s" %
7084                                     src_path, errors.ECODE_INVAL)
7085
7086     _CheckNodeOnline(self, src_node)
7087     result = self.rpc.call_export_info(src_node, src_path)
7088     result.Raise("No export or invalid export found in dir %s" % src_path)
7089
7090     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7091     if not export_info.has_section(constants.INISECT_EXP):
7092       raise errors.ProgrammerError("Corrupted export config",
7093                                    errors.ECODE_ENVIRON)
7094
7095     ei_version = export_info.get(constants.INISECT_EXP, "version")
7096     if (int(ei_version) != constants.EXPORT_VERSION):
7097       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7098                                  (ei_version, constants.EXPORT_VERSION),
7099                                  errors.ECODE_ENVIRON)
7100     return export_info
7101
7102   def _ReadExportParams(self, einfo):
7103     """Use export parameters as defaults.
7104
7105     In case the opcode doesn't specify (as in override) some instance
7106     parameters, then try to use them from the export information, if
7107     that declares them.
7108
7109     """
7110     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7111
7112     if self.op.disk_template is None:
7113       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7114         self.op.disk_template = einfo.get(constants.INISECT_INS,
7115                                           "disk_template")
7116       else:
7117         raise errors.OpPrereqError("No disk template specified and the export"
7118                                    " is missing the disk_template information",
7119                                    errors.ECODE_INVAL)
7120
7121     if not self.op.disks:
7122       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7123         disks = []
7124         # TODO: import the disk iv_name too
7125         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7126           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7127           disks.append({"size": disk_sz})
7128         self.op.disks = disks
7129       else:
7130         raise errors.OpPrereqError("No disk info specified and the export"
7131                                    " is missing the disk information",
7132                                    errors.ECODE_INVAL)
7133
7134     if (not self.op.nics and
7135         einfo.has_option(constants.INISECT_INS, "nic_count")):
7136       nics = []
7137       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7138         ndict = {}
7139         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7140           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7141           ndict[name] = v
7142         nics.append(ndict)
7143       self.op.nics = nics
7144
7145     if (self.op.hypervisor is None and
7146         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7147       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7148     if einfo.has_section(constants.INISECT_HYP):
7149       # use the export parameters but do not override the ones
7150       # specified by the user
7151       for name, value in einfo.items(constants.INISECT_HYP):
7152         if name not in self.op.hvparams:
7153           self.op.hvparams[name] = value
7154
7155     if einfo.has_section(constants.INISECT_BEP):
7156       # use the parameters, without overriding
7157       for name, value in einfo.items(constants.INISECT_BEP):
7158         if name not in self.op.beparams:
7159           self.op.beparams[name] = value
7160     else:
7161       # try to read the parameters old style, from the main section
7162       for name in constants.BES_PARAMETERS:
7163         if (name not in self.op.beparams and
7164             einfo.has_option(constants.INISECT_INS, name)):
7165           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7166
7167     if einfo.has_section(constants.INISECT_OSP):
7168       # use the parameters, without overriding
7169       for name, value in einfo.items(constants.INISECT_OSP):
7170         if name not in self.op.osparams:
7171           self.op.osparams[name] = value
7172
7173   def _RevertToDefaults(self, cluster):
7174     """Revert the instance parameters to the default values.
7175
7176     """
7177     # hvparams
7178     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7179     for name in self.op.hvparams.keys():
7180       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7181         del self.op.hvparams[name]
7182     # beparams
7183     be_defs = cluster.SimpleFillBE({})
7184     for name in self.op.beparams.keys():
7185       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7186         del self.op.beparams[name]
7187     # nic params
7188     nic_defs = cluster.SimpleFillNIC({})
7189     for nic in self.op.nics:
7190       for name in constants.NICS_PARAMETERS:
7191         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7192           del nic[name]
7193     # osparams
7194     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7195     for name in self.op.osparams.keys():
7196       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7197         del self.op.osparams[name]
7198
7199   def CheckPrereq(self):
7200     """Check prerequisites.
7201
7202     """
7203     if self.op.mode == constants.INSTANCE_IMPORT:
7204       export_info = self._ReadExportInfo()
7205       self._ReadExportParams(export_info)
7206
7207     if (not self.cfg.GetVGName() and
7208         self.op.disk_template not in constants.DTS_NOT_LVM):
7209       raise errors.OpPrereqError("Cluster does not support lvm-based"
7210                                  " instances", errors.ECODE_STATE)
7211
7212     if self.op.hypervisor is None:
7213       self.op.hypervisor = self.cfg.GetHypervisorType()
7214
7215     cluster = self.cfg.GetClusterInfo()
7216     enabled_hvs = cluster.enabled_hypervisors
7217     if self.op.hypervisor not in enabled_hvs:
7218       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7219                                  " cluster (%s)" % (self.op.hypervisor,
7220                                   ",".join(enabled_hvs)),
7221                                  errors.ECODE_STATE)
7222
7223     # check hypervisor parameter syntax (locally)
7224     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7225     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7226                                       self.op.hvparams)
7227     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7228     hv_type.CheckParameterSyntax(filled_hvp)
7229     self.hv_full = filled_hvp
7230     # check that we don't specify global parameters on an instance
7231     _CheckGlobalHvParams(self.op.hvparams)
7232
7233     # fill and remember the beparams dict
7234     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7235     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7236
7237     # build os parameters
7238     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7239
7240     # now that hvp/bep are in final format, let's reset to defaults,
7241     # if told to do so
7242     if self.op.identify_defaults:
7243       self._RevertToDefaults(cluster)
7244
7245     # NIC buildup
7246     self.nics = []
7247     for idx, nic in enumerate(self.op.nics):
7248       nic_mode_req = nic.get("mode", None)
7249       nic_mode = nic_mode_req
7250       if nic_mode is None:
7251         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7252
7253       # in routed mode, for the first nic, the default ip is 'auto'
7254       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7255         default_ip_mode = constants.VALUE_AUTO
7256       else:
7257         default_ip_mode = constants.VALUE_NONE
7258
7259       # ip validity checks
7260       ip = nic.get("ip", default_ip_mode)
7261       if ip is None or ip.lower() == constants.VALUE_NONE:
7262         nic_ip = None
7263       elif ip.lower() == constants.VALUE_AUTO:
7264         if not self.op.name_check:
7265           raise errors.OpPrereqError("IP address set to auto but name checks"
7266                                      " have been skipped",
7267                                      errors.ECODE_INVAL)
7268         nic_ip = self.hostname1.ip
7269       else:
7270         if not netutils.IPAddress.IsValid(ip):
7271           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7272                                      errors.ECODE_INVAL)
7273         nic_ip = ip
7274
7275       # TODO: check the ip address for uniqueness
7276       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7277         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7278                                    errors.ECODE_INVAL)
7279
7280       # MAC address verification
7281       mac = nic.get("mac", constants.VALUE_AUTO)
7282       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7283         mac = utils.NormalizeAndValidateMac(mac)
7284
7285         try:
7286           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7287         except errors.ReservationError:
7288           raise errors.OpPrereqError("MAC address %s already in use"
7289                                      " in cluster" % mac,
7290                                      errors.ECODE_NOTUNIQUE)
7291
7292       # bridge verification
7293       bridge = nic.get("bridge", None)
7294       link = nic.get("link", None)
7295       if bridge and link:
7296         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7297                                    " at the same time", errors.ECODE_INVAL)
7298       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7299         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7300                                    errors.ECODE_INVAL)
7301       elif bridge:
7302         link = bridge
7303
7304       nicparams = {}
7305       if nic_mode_req:
7306         nicparams[constants.NIC_MODE] = nic_mode_req
7307       if link:
7308         nicparams[constants.NIC_LINK] = link
7309
7310       check_params = cluster.SimpleFillNIC(nicparams)
7311       objects.NIC.CheckParameterSyntax(check_params)
7312       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7313
7314     # disk checks/pre-build
7315     self.disks = []
7316     for disk in self.op.disks:
7317       mode = disk.get("mode", constants.DISK_RDWR)
7318       if mode not in constants.DISK_ACCESS_SET:
7319         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7320                                    mode, errors.ECODE_INVAL)
7321       size = disk.get("size", None)
7322       if size is None:
7323         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7324       try:
7325         size = int(size)
7326       except (TypeError, ValueError):
7327         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7328                                    errors.ECODE_INVAL)
7329       vg = disk.get("vg", self.cfg.GetVGName())
7330       new_disk = {"size": size, "mode": mode, "vg": vg}
7331       if "adopt" in disk:
7332         new_disk["adopt"] = disk["adopt"]
7333       self.disks.append(new_disk)
7334
7335     if self.op.mode == constants.INSTANCE_IMPORT:
7336
7337       # Check that the new instance doesn't have less disks than the export
7338       instance_disks = len(self.disks)
7339       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7340       if instance_disks < export_disks:
7341         raise errors.OpPrereqError("Not enough disks to import."
7342                                    " (instance: %d, export: %d)" %
7343                                    (instance_disks, export_disks),
7344                                    errors.ECODE_INVAL)
7345
7346       disk_images = []
7347       for idx in range(export_disks):
7348         option = 'disk%d_dump' % idx
7349         if export_info.has_option(constants.INISECT_INS, option):
7350           # FIXME: are the old os-es, disk sizes, etc. useful?
7351           export_name = export_info.get(constants.INISECT_INS, option)
7352           image = utils.PathJoin(self.op.src_path, export_name)
7353           disk_images.append(image)
7354         else:
7355           disk_images.append(False)
7356
7357       self.src_images = disk_images
7358
7359       old_name = export_info.get(constants.INISECT_INS, 'name')
7360       try:
7361         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7362       except (TypeError, ValueError), err:
7363         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7364                                    " an integer: %s" % str(err),
7365                                    errors.ECODE_STATE)
7366       if self.op.instance_name == old_name:
7367         for idx, nic in enumerate(self.nics):
7368           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7369             nic_mac_ini = 'nic%d_mac' % idx
7370             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7371
7372     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7373
7374     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7375     if self.op.ip_check:
7376       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7377         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7378                                    (self.check_ip, self.op.instance_name),
7379                                    errors.ECODE_NOTUNIQUE)
7380
7381     #### mac address generation
7382     # By generating here the mac address both the allocator and the hooks get
7383     # the real final mac address rather than the 'auto' or 'generate' value.
7384     # There is a race condition between the generation and the instance object
7385     # creation, which means that we know the mac is valid now, but we're not
7386     # sure it will be when we actually add the instance. If things go bad
7387     # adding the instance will abort because of a duplicate mac, and the
7388     # creation job will fail.
7389     for nic in self.nics:
7390       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7391         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7392
7393     #### allocator run
7394
7395     if self.op.iallocator is not None:
7396       self._RunAllocator()
7397
7398     #### node related checks
7399
7400     # check primary node
7401     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7402     assert self.pnode is not None, \
7403       "Cannot retrieve locked node %s" % self.op.pnode
7404     if pnode.offline:
7405       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7406                                  pnode.name, errors.ECODE_STATE)
7407     if pnode.drained:
7408       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7409                                  pnode.name, errors.ECODE_STATE)
7410     if not pnode.vm_capable:
7411       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7412                                  " '%s'" % pnode.name, errors.ECODE_STATE)
7413
7414     self.secondaries = []
7415
7416     # mirror node verification
7417     if self.op.disk_template in constants.DTS_NET_MIRROR:
7418       if self.op.snode == pnode.name:
7419         raise errors.OpPrereqError("The secondary node cannot be the"
7420                                    " primary node.", errors.ECODE_INVAL)
7421       _CheckNodeOnline(self, self.op.snode)
7422       _CheckNodeNotDrained(self, self.op.snode)
7423       _CheckNodeVmCapable(self, self.op.snode)
7424       self.secondaries.append(self.op.snode)
7425
7426     nodenames = [pnode.name] + self.secondaries
7427
7428     if not self.adopt_disks:
7429       # Check lv size requirements, if not adopting
7430       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7431       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7432
7433     else: # instead, we must check the adoption data
7434       all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7435       if len(all_lvs) != len(self.disks):
7436         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7437                                    errors.ECODE_INVAL)
7438       for lv_name in all_lvs:
7439         try:
7440           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7441           # to ReserveLV uses the same syntax
7442           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7443         except errors.ReservationError:
7444           raise errors.OpPrereqError("LV named %s used by another instance" %
7445                                      lv_name, errors.ECODE_NOTUNIQUE)
7446
7447       vg_names = self.rpc.call_vg_list([pnode.name])
7448       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7449
7450       node_lvs = self.rpc.call_lv_list([pnode.name],
7451                                        vg_names[pnode.name].payload.keys()
7452                                       )[pnode.name]
7453       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7454       node_lvs = node_lvs.payload
7455
7456       delta = all_lvs.difference(node_lvs.keys())
7457       if delta:
7458         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7459                                    utils.CommaJoin(delta),
7460                                    errors.ECODE_INVAL)
7461       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7462       if online_lvs:
7463         raise errors.OpPrereqError("Online logical volumes found, cannot"
7464                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7465                                    errors.ECODE_STATE)
7466       # update the size of disk based on what is found
7467       for dsk in self.disks:
7468         dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7469
7470     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7471
7472     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7473     # check OS parameters (remotely)
7474     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7475
7476     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7477
7478     # memory check on primary node
7479     if self.op.start:
7480       _CheckNodeFreeMemory(self, self.pnode.name,
7481                            "creating instance %s" % self.op.instance_name,
7482                            self.be_full[constants.BE_MEMORY],
7483                            self.op.hypervisor)
7484
7485     self.dry_run_result = list(nodenames)
7486
7487   def Exec(self, feedback_fn):
7488     """Create and add the instance to the cluster.
7489
7490     """
7491     instance = self.op.instance_name
7492     pnode_name = self.pnode.name
7493
7494     ht_kind = self.op.hypervisor
7495     if ht_kind in constants.HTS_REQ_PORT:
7496       network_port = self.cfg.AllocatePort()
7497     else:
7498       network_port = None
7499
7500     if constants.ENABLE_FILE_STORAGE:
7501       # this is needed because os.path.join does not accept None arguments
7502       if self.op.file_storage_dir is None:
7503         string_file_storage_dir = ""
7504       else:
7505         string_file_storage_dir = self.op.file_storage_dir
7506
7507       # build the full file storage dir path
7508       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7509                                         string_file_storage_dir, instance)
7510     else:
7511       file_storage_dir = ""
7512
7513     disks = _GenerateDiskTemplate(self,
7514                                   self.op.disk_template,
7515                                   instance, pnode_name,
7516                                   self.secondaries,
7517                                   self.disks,
7518                                   file_storage_dir,
7519                                   self.op.file_driver,
7520                                   0,
7521                                   feedback_fn)
7522
7523     iobj = objects.Instance(name=instance, os=self.op.os_type,
7524                             primary_node=pnode_name,
7525                             nics=self.nics, disks=disks,
7526                             disk_template=self.op.disk_template,
7527                             admin_up=False,
7528                             network_port=network_port,
7529                             beparams=self.op.beparams,
7530                             hvparams=self.op.hvparams,
7531                             hypervisor=self.op.hypervisor,
7532                             osparams=self.op.osparams,
7533                             )
7534
7535     if self.adopt_disks:
7536       # rename LVs to the newly-generated names; we need to construct
7537       # 'fake' LV disks with the old data, plus the new unique_id
7538       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7539       rename_to = []
7540       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7541         rename_to.append(t_dsk.logical_id)
7542         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7543         self.cfg.SetDiskID(t_dsk, pnode_name)
7544       result = self.rpc.call_blockdev_rename(pnode_name,
7545                                              zip(tmp_disks, rename_to))
7546       result.Raise("Failed to rename adoped LVs")
7547     else:
7548       feedback_fn("* creating instance disks...")
7549       try:
7550         _CreateDisks(self, iobj)
7551       except errors.OpExecError:
7552         self.LogWarning("Device creation failed, reverting...")
7553         try:
7554           _RemoveDisks(self, iobj)
7555         finally:
7556           self.cfg.ReleaseDRBDMinors(instance)
7557           raise
7558
7559       if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7560         feedback_fn("* wiping instance disks...")
7561         try:
7562           _WipeDisks(self, iobj)
7563         except errors.OpExecError:
7564           self.LogWarning("Device wiping failed, reverting...")
7565           try:
7566             _RemoveDisks(self, iobj)
7567           finally:
7568             self.cfg.ReleaseDRBDMinors(instance)
7569             raise
7570
7571     feedback_fn("adding instance %s to cluster config" % instance)
7572
7573     self.cfg.AddInstance(iobj, self.proc.GetECId())
7574
7575     # Declare that we don't want to remove the instance lock anymore, as we've
7576     # added the instance to the config
7577     del self.remove_locks[locking.LEVEL_INSTANCE]
7578     # Unlock all the nodes
7579     if self.op.mode == constants.INSTANCE_IMPORT:
7580       nodes_keep = [self.op.src_node]
7581       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7582                        if node != self.op.src_node]
7583       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7584       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7585     else:
7586       self.context.glm.release(locking.LEVEL_NODE)
7587       del self.acquired_locks[locking.LEVEL_NODE]
7588
7589     if self.op.wait_for_sync:
7590       disk_abort = not _WaitForSync(self, iobj)
7591     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7592       # make sure the disks are not degraded (still sync-ing is ok)
7593       time.sleep(15)
7594       feedback_fn("* checking mirrors status")
7595       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7596     else:
7597       disk_abort = False
7598
7599     if disk_abort:
7600       _RemoveDisks(self, iobj)
7601       self.cfg.RemoveInstance(iobj.name)
7602       # Make sure the instance lock gets removed
7603       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7604       raise errors.OpExecError("There are some degraded disks for"
7605                                " this instance")
7606
7607     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7608       if self.op.mode == constants.INSTANCE_CREATE:
7609         if not self.op.no_install:
7610           feedback_fn("* running the instance OS create scripts...")
7611           # FIXME: pass debug option from opcode to backend
7612           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7613                                                  self.op.debug_level)
7614           result.Raise("Could not add os for instance %s"
7615                        " on node %s" % (instance, pnode_name))
7616
7617       elif self.op.mode == constants.INSTANCE_IMPORT:
7618         feedback_fn("* running the instance OS import scripts...")
7619
7620         transfers = []
7621
7622         for idx, image in enumerate(self.src_images):
7623           if not image:
7624             continue
7625
7626           # FIXME: pass debug option from opcode to backend
7627           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7628                                              constants.IEIO_FILE, (image, ),
7629                                              constants.IEIO_SCRIPT,
7630                                              (iobj.disks[idx], idx),
7631                                              None)
7632           transfers.append(dt)
7633
7634         import_result = \
7635           masterd.instance.TransferInstanceData(self, feedback_fn,
7636                                                 self.op.src_node, pnode_name,
7637                                                 self.pnode.secondary_ip,
7638                                                 iobj, transfers)
7639         if not compat.all(import_result):
7640           self.LogWarning("Some disks for instance %s on node %s were not"
7641                           " imported successfully" % (instance, pnode_name))
7642
7643       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7644         feedback_fn("* preparing remote import...")
7645         # The source cluster will stop the instance before attempting to make a
7646         # connection. In some cases stopping an instance can take a long time,
7647         # hence the shutdown timeout is added to the connection timeout.
7648         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7649                            self.op.source_shutdown_timeout)
7650         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7651
7652         assert iobj.primary_node == self.pnode.name
7653         disk_results = \
7654           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7655                                         self.source_x509_ca,
7656                                         self._cds, timeouts)
7657         if not compat.all(disk_results):
7658           # TODO: Should the instance still be started, even if some disks
7659           # failed to import (valid for local imports, too)?
7660           self.LogWarning("Some disks for instance %s on node %s were not"
7661                           " imported successfully" % (instance, pnode_name))
7662
7663         # Run rename script on newly imported instance
7664         assert iobj.name == instance
7665         feedback_fn("Running rename script for %s" % instance)
7666         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7667                                                    self.source_instance_name,
7668                                                    self.op.debug_level)
7669         if result.fail_msg:
7670           self.LogWarning("Failed to run rename script for %s on node"
7671                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7672
7673       else:
7674         # also checked in the prereq part
7675         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7676                                      % self.op.mode)
7677
7678     if self.op.start:
7679       iobj.admin_up = True
7680       self.cfg.Update(iobj, feedback_fn)
7681       logging.info("Starting instance %s on node %s", instance, pnode_name)
7682       feedback_fn("* starting instance...")
7683       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7684       result.Raise("Could not start instance")
7685
7686     return list(iobj.all_nodes)
7687
7688
7689 class LUInstanceConsole(NoHooksLU):
7690   """Connect to an instance's console.
7691
7692   This is somewhat special in that it returns the command line that
7693   you need to run on the master node in order to connect to the
7694   console.
7695
7696   """
7697   REQ_BGL = False
7698
7699   def ExpandNames(self):
7700     self._ExpandAndLockInstance()
7701
7702   def CheckPrereq(self):
7703     """Check prerequisites.
7704
7705     This checks that the instance is in the cluster.
7706
7707     """
7708     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7709     assert self.instance is not None, \
7710       "Cannot retrieve locked instance %s" % self.op.instance_name
7711     _CheckNodeOnline(self, self.instance.primary_node)
7712
7713   def Exec(self, feedback_fn):
7714     """Connect to the console of an instance
7715
7716     """
7717     instance = self.instance
7718     node = instance.primary_node
7719
7720     node_insts = self.rpc.call_instance_list([node],
7721                                              [instance.hypervisor])[node]
7722     node_insts.Raise("Can't get node information from %s" % node)
7723
7724     if instance.name not in node_insts.payload:
7725       if instance.admin_up:
7726         state = "ERROR_down"
7727       else:
7728         state = "ADMIN_down"
7729       raise errors.OpExecError("Instance %s is not running (state %s)" %
7730                                (instance.name, state))
7731
7732     logging.debug("Connecting to console of %s on %s", instance.name, node)
7733
7734     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7735     cluster = self.cfg.GetClusterInfo()
7736     # beparams and hvparams are passed separately, to avoid editing the
7737     # instance and then saving the defaults in the instance itself.
7738     hvparams = cluster.FillHV(instance)
7739     beparams = cluster.FillBE(instance)
7740     console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7741
7742     assert console.instance == instance.name
7743     assert console.Validate()
7744
7745     return console.ToDict()
7746
7747
7748 class LUReplaceDisks(LogicalUnit):
7749   """Replace the disks of an instance.
7750
7751   """
7752   HPATH = "mirrors-replace"
7753   HTYPE = constants.HTYPE_INSTANCE
7754   REQ_BGL = False
7755
7756   def CheckArguments(self):
7757     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7758                                   self.op.iallocator)
7759
7760   def ExpandNames(self):
7761     self._ExpandAndLockInstance()
7762
7763     if self.op.iallocator is not None:
7764       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7765
7766     elif self.op.remote_node is not None:
7767       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7768       self.op.remote_node = remote_node
7769
7770       # Warning: do not remove the locking of the new secondary here
7771       # unless DRBD8.AddChildren is changed to work in parallel;
7772       # currently it doesn't since parallel invocations of
7773       # FindUnusedMinor will conflict
7774       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7775       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7776
7777     else:
7778       self.needed_locks[locking.LEVEL_NODE] = []
7779       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7780
7781     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7782                                    self.op.iallocator, self.op.remote_node,
7783                                    self.op.disks, False, self.op.early_release)
7784
7785     self.tasklets = [self.replacer]
7786
7787   def DeclareLocks(self, level):
7788     # If we're not already locking all nodes in the set we have to declare the
7789     # instance's primary/secondary nodes.
7790     if (level == locking.LEVEL_NODE and
7791         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7792       self._LockInstancesNodes()
7793
7794   def BuildHooksEnv(self):
7795     """Build hooks env.
7796
7797     This runs on the master, the primary and all the secondaries.
7798
7799     """
7800     instance = self.replacer.instance
7801     env = {
7802       "MODE": self.op.mode,
7803       "NEW_SECONDARY": self.op.remote_node,
7804       "OLD_SECONDARY": instance.secondary_nodes[0],
7805       }
7806     env.update(_BuildInstanceHookEnvByObject(self, instance))
7807     nl = [
7808       self.cfg.GetMasterNode(),
7809       instance.primary_node,
7810       ]
7811     if self.op.remote_node is not None:
7812       nl.append(self.op.remote_node)
7813     return env, nl, nl
7814
7815
7816 class TLReplaceDisks(Tasklet):
7817   """Replaces disks for an instance.
7818
7819   Note: Locking is not within the scope of this class.
7820
7821   """
7822   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7823                disks, delay_iallocator, early_release):
7824     """Initializes this class.
7825
7826     """
7827     Tasklet.__init__(self, lu)
7828
7829     # Parameters
7830     self.instance_name = instance_name
7831     self.mode = mode
7832     self.iallocator_name = iallocator_name
7833     self.remote_node = remote_node
7834     self.disks = disks
7835     self.delay_iallocator = delay_iallocator
7836     self.early_release = early_release
7837
7838     # Runtime data
7839     self.instance = None
7840     self.new_node = None
7841     self.target_node = None
7842     self.other_node = None
7843     self.remote_node_info = None
7844     self.node_secondary_ip = None
7845
7846   @staticmethod
7847   def CheckArguments(mode, remote_node, iallocator):
7848     """Helper function for users of this class.
7849
7850     """
7851     # check for valid parameter combination
7852     if mode == constants.REPLACE_DISK_CHG:
7853       if remote_node is None and iallocator is None:
7854         raise errors.OpPrereqError("When changing the secondary either an"
7855                                    " iallocator script must be used or the"
7856                                    " new node given", errors.ECODE_INVAL)
7857
7858       if remote_node is not None and iallocator is not None:
7859         raise errors.OpPrereqError("Give either the iallocator or the new"
7860                                    " secondary, not both", errors.ECODE_INVAL)
7861
7862     elif remote_node is not None or iallocator is not None:
7863       # Not replacing the secondary
7864       raise errors.OpPrereqError("The iallocator and new node options can"
7865                                  " only be used when changing the"
7866                                  " secondary node", errors.ECODE_INVAL)
7867
7868   @staticmethod
7869   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7870     """Compute a new secondary node using an IAllocator.
7871
7872     """
7873     ial = IAllocator(lu.cfg, lu.rpc,
7874                      mode=constants.IALLOCATOR_MODE_RELOC,
7875                      name=instance_name,
7876                      relocate_from=relocate_from)
7877
7878     ial.Run(iallocator_name)
7879
7880     if not ial.success:
7881       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7882                                  " %s" % (iallocator_name, ial.info),
7883                                  errors.ECODE_NORES)
7884
7885     if len(ial.result) != ial.required_nodes:
7886       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7887                                  " of nodes (%s), required %s" %
7888                                  (iallocator_name,
7889                                   len(ial.result), ial.required_nodes),
7890                                  errors.ECODE_FAULT)
7891
7892     remote_node_name = ial.result[0]
7893
7894     lu.LogInfo("Selected new secondary for instance '%s': %s",
7895                instance_name, remote_node_name)
7896
7897     return remote_node_name
7898
7899   def _FindFaultyDisks(self, node_name):
7900     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7901                                     node_name, True)
7902
7903   def CheckPrereq(self):
7904     """Check prerequisites.
7905
7906     This checks that the instance is in the cluster.
7907
7908     """
7909     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7910     assert instance is not None, \
7911       "Cannot retrieve locked instance %s" % self.instance_name
7912
7913     if instance.disk_template != constants.DT_DRBD8:
7914       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7915                                  " instances", errors.ECODE_INVAL)
7916
7917     if len(instance.secondary_nodes) != 1:
7918       raise errors.OpPrereqError("The instance has a strange layout,"
7919                                  " expected one secondary but found %d" %
7920                                  len(instance.secondary_nodes),
7921                                  errors.ECODE_FAULT)
7922
7923     if not self.delay_iallocator:
7924       self._CheckPrereq2()
7925
7926   def _CheckPrereq2(self):
7927     """Check prerequisites, second part.
7928
7929     This function should always be part of CheckPrereq. It was separated and is
7930     now called from Exec because during node evacuation iallocator was only
7931     called with an unmodified cluster model, not taking planned changes into
7932     account.
7933
7934     """
7935     instance = self.instance
7936     secondary_node = instance.secondary_nodes[0]
7937
7938     if self.iallocator_name is None:
7939       remote_node = self.remote_node
7940     else:
7941       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7942                                        instance.name, instance.secondary_nodes)
7943
7944     if remote_node is not None:
7945       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7946       assert self.remote_node_info is not None, \
7947         "Cannot retrieve locked node %s" % remote_node
7948     else:
7949       self.remote_node_info = None
7950
7951     if remote_node == self.instance.primary_node:
7952       raise errors.OpPrereqError("The specified node is the primary node of"
7953                                  " the instance.", errors.ECODE_INVAL)
7954
7955     if remote_node == secondary_node:
7956       raise errors.OpPrereqError("The specified node is already the"
7957                                  " secondary node of the instance.",
7958                                  errors.ECODE_INVAL)
7959
7960     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7961                                     constants.REPLACE_DISK_CHG):
7962       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7963                                  errors.ECODE_INVAL)
7964
7965     if self.mode == constants.REPLACE_DISK_AUTO:
7966       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7967       faulty_secondary = self._FindFaultyDisks(secondary_node)
7968
7969       if faulty_primary and faulty_secondary:
7970         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7971                                    " one node and can not be repaired"
7972                                    " automatically" % self.instance_name,
7973                                    errors.ECODE_STATE)
7974
7975       if faulty_primary:
7976         self.disks = faulty_primary
7977         self.target_node = instance.primary_node
7978         self.other_node = secondary_node
7979         check_nodes = [self.target_node, self.other_node]
7980       elif faulty_secondary:
7981         self.disks = faulty_secondary
7982         self.target_node = secondary_node
7983         self.other_node = instance.primary_node
7984         check_nodes = [self.target_node, self.other_node]
7985       else:
7986         self.disks = []
7987         check_nodes = []
7988
7989     else:
7990       # Non-automatic modes
7991       if self.mode == constants.REPLACE_DISK_PRI:
7992         self.target_node = instance.primary_node
7993         self.other_node = secondary_node
7994         check_nodes = [self.target_node, self.other_node]
7995
7996       elif self.mode == constants.REPLACE_DISK_SEC:
7997         self.target_node = secondary_node
7998         self.other_node = instance.primary_node
7999         check_nodes = [self.target_node, self.other_node]
8000
8001       elif self.mode == constants.REPLACE_DISK_CHG:
8002         self.new_node = remote_node
8003         self.other_node = instance.primary_node
8004         self.target_node = secondary_node
8005         check_nodes = [self.new_node, self.other_node]
8006
8007         _CheckNodeNotDrained(self.lu, remote_node)
8008         _CheckNodeVmCapable(self.lu, remote_node)
8009
8010         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8011         assert old_node_info is not None
8012         if old_node_info.offline and not self.early_release:
8013           # doesn't make sense to delay the release
8014           self.early_release = True
8015           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8016                           " early-release mode", secondary_node)
8017
8018       else:
8019         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8020                                      self.mode)
8021
8022       # If not specified all disks should be replaced
8023       if not self.disks:
8024         self.disks = range(len(self.instance.disks))
8025
8026     for node in check_nodes:
8027       _CheckNodeOnline(self.lu, node)
8028
8029     # Check whether disks are valid
8030     for disk_idx in self.disks:
8031       instance.FindDisk(disk_idx)
8032
8033     # Get secondary node IP addresses
8034     node_2nd_ip = {}
8035
8036     for node_name in [self.target_node, self.other_node, self.new_node]:
8037       if node_name is not None:
8038         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8039
8040     self.node_secondary_ip = node_2nd_ip
8041
8042   def Exec(self, feedback_fn):
8043     """Execute disk replacement.
8044
8045     This dispatches the disk replacement to the appropriate handler.
8046
8047     """
8048     if self.delay_iallocator:
8049       self._CheckPrereq2()
8050
8051     if not self.disks:
8052       feedback_fn("No disks need replacement")
8053       return
8054
8055     feedback_fn("Replacing disk(s) %s for %s" %
8056                 (utils.CommaJoin(self.disks), self.instance.name))
8057
8058     activate_disks = (not self.instance.admin_up)
8059
8060     # Activate the instance disks if we're replacing them on a down instance
8061     if activate_disks:
8062       _StartInstanceDisks(self.lu, self.instance, True)
8063
8064     try:
8065       # Should we replace the secondary node?
8066       if self.new_node is not None:
8067         fn = self._ExecDrbd8Secondary
8068       else:
8069         fn = self._ExecDrbd8DiskOnly
8070
8071       return fn(feedback_fn)
8072
8073     finally:
8074       # Deactivate the instance disks if we're replacing them on a
8075       # down instance
8076       if activate_disks:
8077         _SafeShutdownInstanceDisks(self.lu, self.instance)
8078
8079   def _CheckVolumeGroup(self, nodes):
8080     self.lu.LogInfo("Checking volume groups")
8081
8082     vgname = self.cfg.GetVGName()
8083
8084     # Make sure volume group exists on all involved nodes
8085     results = self.rpc.call_vg_list(nodes)
8086     if not results:
8087       raise errors.OpExecError("Can't list volume groups on the nodes")
8088
8089     for node in nodes:
8090       res = results[node]
8091       res.Raise("Error checking node %s" % node)
8092       if vgname not in res.payload:
8093         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8094                                  (vgname, node))
8095
8096   def _CheckDisksExistence(self, nodes):
8097     # Check disk existence
8098     for idx, dev in enumerate(self.instance.disks):
8099       if idx not in self.disks:
8100         continue
8101
8102       for node in nodes:
8103         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8104         self.cfg.SetDiskID(dev, node)
8105
8106         result = self.rpc.call_blockdev_find(node, dev)
8107
8108         msg = result.fail_msg
8109         if msg or not result.payload:
8110           if not msg:
8111             msg = "disk not found"
8112           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8113                                    (idx, node, msg))
8114
8115   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8116     for idx, dev in enumerate(self.instance.disks):
8117       if idx not in self.disks:
8118         continue
8119
8120       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8121                       (idx, node_name))
8122
8123       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8124                                    ldisk=ldisk):
8125         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8126                                  " replace disks for instance %s" %
8127                                  (node_name, self.instance.name))
8128
8129   def _CreateNewStorage(self, node_name):
8130     vgname = self.cfg.GetVGName()
8131     iv_names = {}
8132
8133     for idx, dev in enumerate(self.instance.disks):
8134       if idx not in self.disks:
8135         continue
8136
8137       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8138
8139       self.cfg.SetDiskID(dev, node_name)
8140
8141       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8142       names = _GenerateUniqueNames(self.lu, lv_names)
8143
8144       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8145                              logical_id=(vgname, names[0]))
8146       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8147                              logical_id=(vgname, names[1]))
8148
8149       new_lvs = [lv_data, lv_meta]
8150       old_lvs = dev.children
8151       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8152
8153       # we pass force_create=True to force the LVM creation
8154       for new_lv in new_lvs:
8155         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8156                         _GetInstanceInfoText(self.instance), False)
8157
8158     return iv_names
8159
8160   def _CheckDevices(self, node_name, iv_names):
8161     for name, (dev, _, _) in iv_names.iteritems():
8162       self.cfg.SetDiskID(dev, node_name)
8163
8164       result = self.rpc.call_blockdev_find(node_name, dev)
8165
8166       msg = result.fail_msg
8167       if msg or not result.payload:
8168         if not msg:
8169           msg = "disk not found"
8170         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8171                                  (name, msg))
8172
8173       if result.payload.is_degraded:
8174         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8175
8176   def _RemoveOldStorage(self, node_name, iv_names):
8177     for name, (_, old_lvs, _) in iv_names.iteritems():
8178       self.lu.LogInfo("Remove logical volumes for %s" % name)
8179
8180       for lv in old_lvs:
8181         self.cfg.SetDiskID(lv, node_name)
8182
8183         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8184         if msg:
8185           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8186                              hint="remove unused LVs manually")
8187
8188   def _ReleaseNodeLock(self, node_name):
8189     """Releases the lock for a given node."""
8190     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8191
8192   def _ExecDrbd8DiskOnly(self, feedback_fn):
8193     """Replace a disk on the primary or secondary for DRBD 8.
8194
8195     The algorithm for replace is quite complicated:
8196
8197       1. for each disk to be replaced:
8198
8199         1. create new LVs on the target node with unique names
8200         1. detach old LVs from the drbd device
8201         1. rename old LVs to name_replaced.<time_t>
8202         1. rename new LVs to old LVs
8203         1. attach the new LVs (with the old names now) to the drbd device
8204
8205       1. wait for sync across all devices
8206
8207       1. for each modified disk:
8208
8209         1. remove old LVs (which have the name name_replaces.<time_t>)
8210
8211     Failures are not very well handled.
8212
8213     """
8214     steps_total = 6
8215
8216     # Step: check device activation
8217     self.lu.LogStep(1, steps_total, "Check device existence")
8218     self._CheckDisksExistence([self.other_node, self.target_node])
8219     self._CheckVolumeGroup([self.target_node, self.other_node])
8220
8221     # Step: check other node consistency
8222     self.lu.LogStep(2, steps_total, "Check peer consistency")
8223     self._CheckDisksConsistency(self.other_node,
8224                                 self.other_node == self.instance.primary_node,
8225                                 False)
8226
8227     # Step: create new storage
8228     self.lu.LogStep(3, steps_total, "Allocate new storage")
8229     iv_names = self._CreateNewStorage(self.target_node)
8230
8231     # Step: for each lv, detach+rename*2+attach
8232     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8233     for dev, old_lvs, new_lvs in iv_names.itervalues():
8234       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8235
8236       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8237                                                      old_lvs)
8238       result.Raise("Can't detach drbd from local storage on node"
8239                    " %s for device %s" % (self.target_node, dev.iv_name))
8240       #dev.children = []
8241       #cfg.Update(instance)
8242
8243       # ok, we created the new LVs, so now we know we have the needed
8244       # storage; as such, we proceed on the target node to rename
8245       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8246       # using the assumption that logical_id == physical_id (which in
8247       # turn is the unique_id on that node)
8248
8249       # FIXME(iustin): use a better name for the replaced LVs
8250       temp_suffix = int(time.time())
8251       ren_fn = lambda d, suff: (d.physical_id[0],
8252                                 d.physical_id[1] + "_replaced-%s" % suff)
8253
8254       # Build the rename list based on what LVs exist on the node
8255       rename_old_to_new = []
8256       for to_ren in old_lvs:
8257         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8258         if not result.fail_msg and result.payload:
8259           # device exists
8260           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8261
8262       self.lu.LogInfo("Renaming the old LVs on the target node")
8263       result = self.rpc.call_blockdev_rename(self.target_node,
8264                                              rename_old_to_new)
8265       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8266
8267       # Now we rename the new LVs to the old LVs
8268       self.lu.LogInfo("Renaming the new LVs on the target node")
8269       rename_new_to_old = [(new, old.physical_id)
8270                            for old, new in zip(old_lvs, new_lvs)]
8271       result = self.rpc.call_blockdev_rename(self.target_node,
8272                                              rename_new_to_old)
8273       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8274
8275       for old, new in zip(old_lvs, new_lvs):
8276         new.logical_id = old.logical_id
8277         self.cfg.SetDiskID(new, self.target_node)
8278
8279       for disk in old_lvs:
8280         disk.logical_id = ren_fn(disk, temp_suffix)
8281         self.cfg.SetDiskID(disk, self.target_node)
8282
8283       # Now that the new lvs have the old name, we can add them to the device
8284       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8285       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8286                                                   new_lvs)
8287       msg = result.fail_msg
8288       if msg:
8289         for new_lv in new_lvs:
8290           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8291                                                new_lv).fail_msg
8292           if msg2:
8293             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8294                                hint=("cleanup manually the unused logical"
8295                                      "volumes"))
8296         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8297
8298       dev.children = new_lvs
8299
8300       self.cfg.Update(self.instance, feedback_fn)
8301
8302     cstep = 5
8303     if self.early_release:
8304       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8305       cstep += 1
8306       self._RemoveOldStorage(self.target_node, iv_names)
8307       # WARNING: we release both node locks here, do not do other RPCs
8308       # than WaitForSync to the primary node
8309       self._ReleaseNodeLock([self.target_node, self.other_node])
8310
8311     # Wait for sync
8312     # This can fail as the old devices are degraded and _WaitForSync
8313     # does a combined result over all disks, so we don't check its return value
8314     self.lu.LogStep(cstep, steps_total, "Sync devices")
8315     cstep += 1
8316     _WaitForSync(self.lu, self.instance)
8317
8318     # Check all devices manually
8319     self._CheckDevices(self.instance.primary_node, iv_names)
8320
8321     # Step: remove old storage
8322     if not self.early_release:
8323       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8324       cstep += 1
8325       self._RemoveOldStorage(self.target_node, iv_names)
8326
8327   def _ExecDrbd8Secondary(self, feedback_fn):
8328     """Replace the secondary node for DRBD 8.
8329
8330     The algorithm for replace is quite complicated:
8331       - for all disks of the instance:
8332         - create new LVs on the new node with same names
8333         - shutdown the drbd device on the old secondary
8334         - disconnect the drbd network on the primary
8335         - create the drbd device on the new secondary
8336         - network attach the drbd on the primary, using an artifice:
8337           the drbd code for Attach() will connect to the network if it
8338           finds a device which is connected to the good local disks but
8339           not network enabled
8340       - wait for sync across all devices
8341       - remove all disks from the old secondary
8342
8343     Failures are not very well handled.
8344
8345     """
8346     steps_total = 6
8347
8348     # Step: check device activation
8349     self.lu.LogStep(1, steps_total, "Check device existence")
8350     self._CheckDisksExistence([self.instance.primary_node])
8351     self._CheckVolumeGroup([self.instance.primary_node])
8352
8353     # Step: check other node consistency
8354     self.lu.LogStep(2, steps_total, "Check peer consistency")
8355     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8356
8357     # Step: create new storage
8358     self.lu.LogStep(3, steps_total, "Allocate new storage")
8359     for idx, dev in enumerate(self.instance.disks):
8360       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8361                       (self.new_node, idx))
8362       # we pass force_create=True to force LVM creation
8363       for new_lv in dev.children:
8364         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8365                         _GetInstanceInfoText(self.instance), False)
8366
8367     # Step 4: dbrd minors and drbd setups changes
8368     # after this, we must manually remove the drbd minors on both the
8369     # error and the success paths
8370     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8371     minors = self.cfg.AllocateDRBDMinor([self.new_node
8372                                          for dev in self.instance.disks],
8373                                         self.instance.name)
8374     logging.debug("Allocated minors %r", minors)
8375
8376     iv_names = {}
8377     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8378       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8379                       (self.new_node, idx))
8380       # create new devices on new_node; note that we create two IDs:
8381       # one without port, so the drbd will be activated without
8382       # networking information on the new node at this stage, and one
8383       # with network, for the latter activation in step 4
8384       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8385       if self.instance.primary_node == o_node1:
8386         p_minor = o_minor1
8387       else:
8388         assert self.instance.primary_node == o_node2, "Three-node instance?"
8389         p_minor = o_minor2
8390
8391       new_alone_id = (self.instance.primary_node, self.new_node, None,
8392                       p_minor, new_minor, o_secret)
8393       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8394                     p_minor, new_minor, o_secret)
8395
8396       iv_names[idx] = (dev, dev.children, new_net_id)
8397       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8398                     new_net_id)
8399       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8400                               logical_id=new_alone_id,
8401                               children=dev.children,
8402                               size=dev.size)
8403       try:
8404         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8405                               _GetInstanceInfoText(self.instance), False)
8406       except errors.GenericError:
8407         self.cfg.ReleaseDRBDMinors(self.instance.name)
8408         raise
8409
8410     # We have new devices, shutdown the drbd on the old secondary
8411     for idx, dev in enumerate(self.instance.disks):
8412       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8413       self.cfg.SetDiskID(dev, self.target_node)
8414       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8415       if msg:
8416         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8417                            "node: %s" % (idx, msg),
8418                            hint=("Please cleanup this device manually as"
8419                                  " soon as possible"))
8420
8421     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8422     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8423                                                self.node_secondary_ip,
8424                                                self.instance.disks)\
8425                                               [self.instance.primary_node]
8426
8427     msg = result.fail_msg
8428     if msg:
8429       # detaches didn't succeed (unlikely)
8430       self.cfg.ReleaseDRBDMinors(self.instance.name)
8431       raise errors.OpExecError("Can't detach the disks from the network on"
8432                                " old node: %s" % (msg,))
8433
8434     # if we managed to detach at least one, we update all the disks of
8435     # the instance to point to the new secondary
8436     self.lu.LogInfo("Updating instance configuration")
8437     for dev, _, new_logical_id in iv_names.itervalues():
8438       dev.logical_id = new_logical_id
8439       self.cfg.SetDiskID(dev, self.instance.primary_node)
8440
8441     self.cfg.Update(self.instance, feedback_fn)
8442
8443     # and now perform the drbd attach
8444     self.lu.LogInfo("Attaching primary drbds to new secondary"
8445                     " (standalone => connected)")
8446     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8447                                             self.new_node],
8448                                            self.node_secondary_ip,
8449                                            self.instance.disks,
8450                                            self.instance.name,
8451                                            False)
8452     for to_node, to_result in result.items():
8453       msg = to_result.fail_msg
8454       if msg:
8455         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8456                            to_node, msg,
8457                            hint=("please do a gnt-instance info to see the"
8458                                  " status of disks"))
8459     cstep = 5
8460     if self.early_release:
8461       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8462       cstep += 1
8463       self._RemoveOldStorage(self.target_node, iv_names)
8464       # WARNING: we release all node locks here, do not do other RPCs
8465       # than WaitForSync to the primary node
8466       self._ReleaseNodeLock([self.instance.primary_node,
8467                              self.target_node,
8468                              self.new_node])
8469
8470     # Wait for sync
8471     # This can fail as the old devices are degraded and _WaitForSync
8472     # does a combined result over all disks, so we don't check its return value
8473     self.lu.LogStep(cstep, steps_total, "Sync devices")
8474     cstep += 1
8475     _WaitForSync(self.lu, self.instance)
8476
8477     # Check all devices manually
8478     self._CheckDevices(self.instance.primary_node, iv_names)
8479
8480     # Step: remove old storage
8481     if not self.early_release:
8482       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8483       self._RemoveOldStorage(self.target_node, iv_names)
8484
8485
8486 class LURepairNodeStorage(NoHooksLU):
8487   """Repairs the volume group on a node.
8488
8489   """
8490   REQ_BGL = False
8491
8492   def CheckArguments(self):
8493     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8494
8495     storage_type = self.op.storage_type
8496
8497     if (constants.SO_FIX_CONSISTENCY not in
8498         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8499       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8500                                  " repaired" % storage_type,
8501                                  errors.ECODE_INVAL)
8502
8503   def ExpandNames(self):
8504     self.needed_locks = {
8505       locking.LEVEL_NODE: [self.op.node_name],
8506       }
8507
8508   def _CheckFaultyDisks(self, instance, node_name):
8509     """Ensure faulty disks abort the opcode or at least warn."""
8510     try:
8511       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8512                                   node_name, True):
8513         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8514                                    " node '%s'" % (instance.name, node_name),
8515                                    errors.ECODE_STATE)
8516     except errors.OpPrereqError, err:
8517       if self.op.ignore_consistency:
8518         self.proc.LogWarning(str(err.args[0]))
8519       else:
8520         raise
8521
8522   def CheckPrereq(self):
8523     """Check prerequisites.
8524
8525     """
8526     # Check whether any instance on this node has faulty disks
8527     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8528       if not inst.admin_up:
8529         continue
8530       check_nodes = set(inst.all_nodes)
8531       check_nodes.discard(self.op.node_name)
8532       for inst_node_name in check_nodes:
8533         self._CheckFaultyDisks(inst, inst_node_name)
8534
8535   def Exec(self, feedback_fn):
8536     feedback_fn("Repairing storage unit '%s' on %s ..." %
8537                 (self.op.name, self.op.node_name))
8538
8539     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8540     result = self.rpc.call_storage_execute(self.op.node_name,
8541                                            self.op.storage_type, st_args,
8542                                            self.op.name,
8543                                            constants.SO_FIX_CONSISTENCY)
8544     result.Raise("Failed to repair storage unit '%s' on %s" %
8545                  (self.op.name, self.op.node_name))
8546
8547
8548 class LUNodeEvacuationStrategy(NoHooksLU):
8549   """Computes the node evacuation strategy.
8550
8551   """
8552   REQ_BGL = False
8553
8554   def CheckArguments(self):
8555     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8556
8557   def ExpandNames(self):
8558     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8559     self.needed_locks = locks = {}
8560     if self.op.remote_node is None:
8561       locks[locking.LEVEL_NODE] = locking.ALL_SET
8562     else:
8563       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8564       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8565
8566   def Exec(self, feedback_fn):
8567     if self.op.remote_node is not None:
8568       instances = []
8569       for node in self.op.nodes:
8570         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8571       result = []
8572       for i in instances:
8573         if i.primary_node == self.op.remote_node:
8574           raise errors.OpPrereqError("Node %s is the primary node of"
8575                                      " instance %s, cannot use it as"
8576                                      " secondary" %
8577                                      (self.op.remote_node, i.name),
8578                                      errors.ECODE_INVAL)
8579         result.append([i.name, self.op.remote_node])
8580     else:
8581       ial = IAllocator(self.cfg, self.rpc,
8582                        mode=constants.IALLOCATOR_MODE_MEVAC,
8583                        evac_nodes=self.op.nodes)
8584       ial.Run(self.op.iallocator, validate=True)
8585       if not ial.success:
8586         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8587                                  errors.ECODE_NORES)
8588       result = ial.result
8589     return result
8590
8591
8592 class LUInstanceGrowDisk(LogicalUnit):
8593   """Grow a disk of an instance.
8594
8595   """
8596   HPATH = "disk-grow"
8597   HTYPE = constants.HTYPE_INSTANCE
8598   REQ_BGL = False
8599
8600   def ExpandNames(self):
8601     self._ExpandAndLockInstance()
8602     self.needed_locks[locking.LEVEL_NODE] = []
8603     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8604
8605   def DeclareLocks(self, level):
8606     if level == locking.LEVEL_NODE:
8607       self._LockInstancesNodes()
8608
8609   def BuildHooksEnv(self):
8610     """Build hooks env.
8611
8612     This runs on the master, the primary and all the secondaries.
8613
8614     """
8615     env = {
8616       "DISK": self.op.disk,
8617       "AMOUNT": self.op.amount,
8618       }
8619     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8620     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8621     return env, nl, nl
8622
8623   def CheckPrereq(self):
8624     """Check prerequisites.
8625
8626     This checks that the instance is in the cluster.
8627
8628     """
8629     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8630     assert instance is not None, \
8631       "Cannot retrieve locked instance %s" % self.op.instance_name
8632     nodenames = list(instance.all_nodes)
8633     for node in nodenames:
8634       _CheckNodeOnline(self, node)
8635
8636     self.instance = instance
8637
8638     if instance.disk_template not in constants.DTS_GROWABLE:
8639       raise errors.OpPrereqError("Instance's disk layout does not support"
8640                                  " growing.", errors.ECODE_INVAL)
8641
8642     self.disk = instance.FindDisk(self.op.disk)
8643
8644     if instance.disk_template != constants.DT_FILE:
8645       # TODO: check the free disk space for file, when that feature
8646       # will be supported
8647       _CheckNodesFreeDiskPerVG(self, nodenames,
8648                                self.disk.ComputeGrowth(self.op.amount))
8649
8650   def Exec(self, feedback_fn):
8651     """Execute disk grow.
8652
8653     """
8654     instance = self.instance
8655     disk = self.disk
8656
8657     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8658     if not disks_ok:
8659       raise errors.OpExecError("Cannot activate block device to grow")
8660
8661     for node in instance.all_nodes:
8662       self.cfg.SetDiskID(disk, node)
8663       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8664       result.Raise("Grow request failed to node %s" % node)
8665
8666       # TODO: Rewrite code to work properly
8667       # DRBD goes into sync mode for a short amount of time after executing the
8668       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8669       # calling "resize" in sync mode fails. Sleeping for a short amount of
8670       # time is a work-around.
8671       time.sleep(5)
8672
8673     disk.RecordGrow(self.op.amount)
8674     self.cfg.Update(instance, feedback_fn)
8675     if self.op.wait_for_sync:
8676       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8677       if disk_abort:
8678         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8679                              " status.\nPlease check the instance.")
8680       if not instance.admin_up:
8681         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8682     elif not instance.admin_up:
8683       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8684                            " not supposed to be running because no wait for"
8685                            " sync mode was requested.")
8686
8687
8688 class LUQueryInstanceData(NoHooksLU):
8689   """Query runtime instance data.
8690
8691   """
8692   REQ_BGL = False
8693
8694   def ExpandNames(self):
8695     self.needed_locks = {}
8696     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8697
8698     if self.op.instances:
8699       self.wanted_names = []
8700       for name in self.op.instances:
8701         full_name = _ExpandInstanceName(self.cfg, name)
8702         self.wanted_names.append(full_name)
8703       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8704     else:
8705       self.wanted_names = None
8706       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8707
8708     self.needed_locks[locking.LEVEL_NODE] = []
8709     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8710
8711   def DeclareLocks(self, level):
8712     if level == locking.LEVEL_NODE:
8713       self._LockInstancesNodes()
8714
8715   def CheckPrereq(self):
8716     """Check prerequisites.
8717
8718     This only checks the optional instance list against the existing names.
8719
8720     """
8721     if self.wanted_names is None:
8722       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8723
8724     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8725                              in self.wanted_names]
8726
8727   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8728     """Returns the status of a block device
8729
8730     """
8731     if self.op.static or not node:
8732       return None
8733
8734     self.cfg.SetDiskID(dev, node)
8735
8736     result = self.rpc.call_blockdev_find(node, dev)
8737     if result.offline:
8738       return None
8739
8740     result.Raise("Can't compute disk status for %s" % instance_name)
8741
8742     status = result.payload
8743     if status is None:
8744       return None
8745
8746     return (status.dev_path, status.major, status.minor,
8747             status.sync_percent, status.estimated_time,
8748             status.is_degraded, status.ldisk_status)
8749
8750   def _ComputeDiskStatus(self, instance, snode, dev):
8751     """Compute block device status.
8752
8753     """
8754     if dev.dev_type in constants.LDS_DRBD:
8755       # we change the snode then (otherwise we use the one passed in)
8756       if dev.logical_id[0] == instance.primary_node:
8757         snode = dev.logical_id[1]
8758       else:
8759         snode = dev.logical_id[0]
8760
8761     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8762                                               instance.name, dev)
8763     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8764
8765     if dev.children:
8766       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8767                       for child in dev.children]
8768     else:
8769       dev_children = []
8770
8771     data = {
8772       "iv_name": dev.iv_name,
8773       "dev_type": dev.dev_type,
8774       "logical_id": dev.logical_id,
8775       "physical_id": dev.physical_id,
8776       "pstatus": dev_pstatus,
8777       "sstatus": dev_sstatus,
8778       "children": dev_children,
8779       "mode": dev.mode,
8780       "size": dev.size,
8781       }
8782
8783     return data
8784
8785   def Exec(self, feedback_fn):
8786     """Gather and return data"""
8787     result = {}
8788
8789     cluster = self.cfg.GetClusterInfo()
8790
8791     for instance in self.wanted_instances:
8792       if not self.op.static:
8793         remote_info = self.rpc.call_instance_info(instance.primary_node,
8794                                                   instance.name,
8795                                                   instance.hypervisor)
8796         remote_info.Raise("Error checking node %s" % instance.primary_node)
8797         remote_info = remote_info.payload
8798         if remote_info and "state" in remote_info:
8799           remote_state = "up"
8800         else:
8801           remote_state = "down"
8802       else:
8803         remote_state = None
8804       if instance.admin_up:
8805         config_state = "up"
8806       else:
8807         config_state = "down"
8808
8809       disks = [self._ComputeDiskStatus(instance, None, device)
8810                for device in instance.disks]
8811
8812       idict = {
8813         "name": instance.name,
8814         "config_state": config_state,
8815         "run_state": remote_state,
8816         "pnode": instance.primary_node,
8817         "snodes": instance.secondary_nodes,
8818         "os": instance.os,
8819         # this happens to be the same format used for hooks
8820         "nics": _NICListToTuple(self, instance.nics),
8821         "disk_template": instance.disk_template,
8822         "disks": disks,
8823         "hypervisor": instance.hypervisor,
8824         "network_port": instance.network_port,
8825         "hv_instance": instance.hvparams,
8826         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8827         "be_instance": instance.beparams,
8828         "be_actual": cluster.FillBE(instance),
8829         "os_instance": instance.osparams,
8830         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8831         "serial_no": instance.serial_no,
8832         "mtime": instance.mtime,
8833         "ctime": instance.ctime,
8834         "uuid": instance.uuid,
8835         }
8836
8837       result[instance.name] = idict
8838
8839     return result
8840
8841
8842 class LUSetInstanceParams(LogicalUnit):
8843   """Modifies an instances's parameters.
8844
8845   """
8846   HPATH = "instance-modify"
8847   HTYPE = constants.HTYPE_INSTANCE
8848   REQ_BGL = False
8849
8850   def CheckArguments(self):
8851     if not (self.op.nics or self.op.disks or self.op.disk_template or
8852             self.op.hvparams or self.op.beparams or self.op.os_name):
8853       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8854
8855     if self.op.hvparams:
8856       _CheckGlobalHvParams(self.op.hvparams)
8857
8858     # Disk validation
8859     disk_addremove = 0
8860     for disk_op, disk_dict in self.op.disks:
8861       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8862       if disk_op == constants.DDM_REMOVE:
8863         disk_addremove += 1
8864         continue
8865       elif disk_op == constants.DDM_ADD:
8866         disk_addremove += 1
8867       else:
8868         if not isinstance(disk_op, int):
8869           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8870         if not isinstance(disk_dict, dict):
8871           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8872           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8873
8874       if disk_op == constants.DDM_ADD:
8875         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8876         if mode not in constants.DISK_ACCESS_SET:
8877           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8878                                      errors.ECODE_INVAL)
8879         size = disk_dict.get('size', None)
8880         if size is None:
8881           raise errors.OpPrereqError("Required disk parameter size missing",
8882                                      errors.ECODE_INVAL)
8883         try:
8884           size = int(size)
8885         except (TypeError, ValueError), err:
8886           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8887                                      str(err), errors.ECODE_INVAL)
8888         disk_dict['size'] = size
8889       else:
8890         # modification of disk
8891         if 'size' in disk_dict:
8892           raise errors.OpPrereqError("Disk size change not possible, use"
8893                                      " grow-disk", errors.ECODE_INVAL)
8894
8895     if disk_addremove > 1:
8896       raise errors.OpPrereqError("Only one disk add or remove operation"
8897                                  " supported at a time", errors.ECODE_INVAL)
8898
8899     if self.op.disks and self.op.disk_template is not None:
8900       raise errors.OpPrereqError("Disk template conversion and other disk"
8901                                  " changes not supported at the same time",
8902                                  errors.ECODE_INVAL)
8903
8904     if (self.op.disk_template and
8905         self.op.disk_template in constants.DTS_NET_MIRROR and
8906         self.op.remote_node is None):
8907       raise errors.OpPrereqError("Changing the disk template to a mirrored"
8908                                  " one requires specifying a secondary node",
8909                                  errors.ECODE_INVAL)
8910
8911     # NIC validation
8912     nic_addremove = 0
8913     for nic_op, nic_dict in self.op.nics:
8914       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8915       if nic_op == constants.DDM_REMOVE:
8916         nic_addremove += 1
8917         continue
8918       elif nic_op == constants.DDM_ADD:
8919         nic_addremove += 1
8920       else:
8921         if not isinstance(nic_op, int):
8922           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8923         if not isinstance(nic_dict, dict):
8924           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8925           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8926
8927       # nic_dict should be a dict
8928       nic_ip = nic_dict.get('ip', None)
8929       if nic_ip is not None:
8930         if nic_ip.lower() == constants.VALUE_NONE:
8931           nic_dict['ip'] = None
8932         else:
8933           if not netutils.IPAddress.IsValid(nic_ip):
8934             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8935                                        errors.ECODE_INVAL)
8936
8937       nic_bridge = nic_dict.get('bridge', None)
8938       nic_link = nic_dict.get('link', None)
8939       if nic_bridge and nic_link:
8940         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8941                                    " at the same time", errors.ECODE_INVAL)
8942       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8943         nic_dict['bridge'] = None
8944       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8945         nic_dict['link'] = None
8946
8947       if nic_op == constants.DDM_ADD:
8948         nic_mac = nic_dict.get('mac', None)
8949         if nic_mac is None:
8950           nic_dict['mac'] = constants.VALUE_AUTO
8951
8952       if 'mac' in nic_dict:
8953         nic_mac = nic_dict['mac']
8954         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8955           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8956
8957         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8958           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8959                                      " modifying an existing nic",
8960                                      errors.ECODE_INVAL)
8961
8962     if nic_addremove > 1:
8963       raise errors.OpPrereqError("Only one NIC add or remove operation"
8964                                  " supported at a time", errors.ECODE_INVAL)
8965
8966   def ExpandNames(self):
8967     self._ExpandAndLockInstance()
8968     self.needed_locks[locking.LEVEL_NODE] = []
8969     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8970
8971   def DeclareLocks(self, level):
8972     if level == locking.LEVEL_NODE:
8973       self._LockInstancesNodes()
8974       if self.op.disk_template and self.op.remote_node:
8975         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8976         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8977
8978   def BuildHooksEnv(self):
8979     """Build hooks env.
8980
8981     This runs on the master, primary and secondaries.
8982
8983     """
8984     args = dict()
8985     if constants.BE_MEMORY in self.be_new:
8986       args['memory'] = self.be_new[constants.BE_MEMORY]
8987     if constants.BE_VCPUS in self.be_new:
8988       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8989     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8990     # information at all.
8991     if self.op.nics:
8992       args['nics'] = []
8993       nic_override = dict(self.op.nics)
8994       for idx, nic in enumerate(self.instance.nics):
8995         if idx in nic_override:
8996           this_nic_override = nic_override[idx]
8997         else:
8998           this_nic_override = {}
8999         if 'ip' in this_nic_override:
9000           ip = this_nic_override['ip']
9001         else:
9002           ip = nic.ip
9003         if 'mac' in this_nic_override:
9004           mac = this_nic_override['mac']
9005         else:
9006           mac = nic.mac
9007         if idx in self.nic_pnew:
9008           nicparams = self.nic_pnew[idx]
9009         else:
9010           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9011         mode = nicparams[constants.NIC_MODE]
9012         link = nicparams[constants.NIC_LINK]
9013         args['nics'].append((ip, mac, mode, link))
9014       if constants.DDM_ADD in nic_override:
9015         ip = nic_override[constants.DDM_ADD].get('ip', None)
9016         mac = nic_override[constants.DDM_ADD]['mac']
9017         nicparams = self.nic_pnew[constants.DDM_ADD]
9018         mode = nicparams[constants.NIC_MODE]
9019         link = nicparams[constants.NIC_LINK]
9020         args['nics'].append((ip, mac, mode, link))
9021       elif constants.DDM_REMOVE in nic_override:
9022         del args['nics'][-1]
9023
9024     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9025     if self.op.disk_template:
9026       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9027     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9028     return env, nl, nl
9029
9030   def CheckPrereq(self):
9031     """Check prerequisites.
9032
9033     This only checks the instance list against the existing names.
9034
9035     """
9036     # checking the new params on the primary/secondary nodes
9037
9038     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9039     cluster = self.cluster = self.cfg.GetClusterInfo()
9040     assert self.instance is not None, \
9041       "Cannot retrieve locked instance %s" % self.op.instance_name
9042     pnode = instance.primary_node
9043     nodelist = list(instance.all_nodes)
9044
9045     # OS change
9046     if self.op.os_name and not self.op.force:
9047       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9048                       self.op.force_variant)
9049       instance_os = self.op.os_name
9050     else:
9051       instance_os = instance.os
9052
9053     if self.op.disk_template:
9054       if instance.disk_template == self.op.disk_template:
9055         raise errors.OpPrereqError("Instance already has disk template %s" %
9056                                    instance.disk_template, errors.ECODE_INVAL)
9057
9058       if (instance.disk_template,
9059           self.op.disk_template) not in self._DISK_CONVERSIONS:
9060         raise errors.OpPrereqError("Unsupported disk template conversion from"
9061                                    " %s to %s" % (instance.disk_template,
9062                                                   self.op.disk_template),
9063                                    errors.ECODE_INVAL)
9064       _CheckInstanceDown(self, instance, "cannot change disk template")
9065       if self.op.disk_template in constants.DTS_NET_MIRROR:
9066         if self.op.remote_node == pnode:
9067           raise errors.OpPrereqError("Given new secondary node %s is the same"
9068                                      " as the primary node of the instance" %
9069                                      self.op.remote_node, errors.ECODE_STATE)
9070         _CheckNodeOnline(self, self.op.remote_node)
9071         _CheckNodeNotDrained(self, self.op.remote_node)
9072         # FIXME: here we assume that the old instance type is DT_PLAIN
9073         assert instance.disk_template == constants.DT_PLAIN
9074         disks = [{"size": d.size, "vg": d.logical_id[0]}
9075                  for d in instance.disks]
9076         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9077         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9078
9079     # hvparams processing
9080     if self.op.hvparams:
9081       hv_type = instance.hypervisor
9082       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9083       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9084       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9085
9086       # local check
9087       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9088       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9089       self.hv_new = hv_new # the new actual values
9090       self.hv_inst = i_hvdict # the new dict (without defaults)
9091     else:
9092       self.hv_new = self.hv_inst = {}
9093
9094     # beparams processing
9095     if self.op.beparams:
9096       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9097                                    use_none=True)
9098       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9099       be_new = cluster.SimpleFillBE(i_bedict)
9100       self.be_new = be_new # the new actual values
9101       self.be_inst = i_bedict # the new dict (without defaults)
9102     else:
9103       self.be_new = self.be_inst = {}
9104
9105     # osparams processing
9106     if self.op.osparams:
9107       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9108       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9109       self.os_inst = i_osdict # the new dict (without defaults)
9110     else:
9111       self.os_inst = {}
9112
9113     self.warn = []
9114
9115     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9116       mem_check_list = [pnode]
9117       if be_new[constants.BE_AUTO_BALANCE]:
9118         # either we changed auto_balance to yes or it was from before
9119         mem_check_list.extend(instance.secondary_nodes)
9120       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9121                                                   instance.hypervisor)
9122       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9123                                          instance.hypervisor)
9124       pninfo = nodeinfo[pnode]
9125       msg = pninfo.fail_msg
9126       if msg:
9127         # Assume the primary node is unreachable and go ahead
9128         self.warn.append("Can't get info from primary node %s: %s" %
9129                          (pnode,  msg))
9130       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9131         self.warn.append("Node data from primary node %s doesn't contain"
9132                          " free memory information" % pnode)
9133       elif instance_info.fail_msg:
9134         self.warn.append("Can't get instance runtime information: %s" %
9135                         instance_info.fail_msg)
9136       else:
9137         if instance_info.payload:
9138           current_mem = int(instance_info.payload['memory'])
9139         else:
9140           # Assume instance not running
9141           # (there is a slight race condition here, but it's not very probable,
9142           # and we have no other way to check)
9143           current_mem = 0
9144         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9145                     pninfo.payload['memory_free'])
9146         if miss_mem > 0:
9147           raise errors.OpPrereqError("This change will prevent the instance"
9148                                      " from starting, due to %d MB of memory"
9149                                      " missing on its primary node" % miss_mem,
9150                                      errors.ECODE_NORES)
9151
9152       if be_new[constants.BE_AUTO_BALANCE]:
9153         for node, nres in nodeinfo.items():
9154           if node not in instance.secondary_nodes:
9155             continue
9156           msg = nres.fail_msg
9157           if msg:
9158             self.warn.append("Can't get info from secondary node %s: %s" %
9159                              (node, msg))
9160           elif not isinstance(nres.payload.get('memory_free', None), int):
9161             self.warn.append("Secondary node %s didn't return free"
9162                              " memory information" % node)
9163           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9164             self.warn.append("Not enough memory to failover instance to"
9165                              " secondary node %s" % node)
9166
9167     # NIC processing
9168     self.nic_pnew = {}
9169     self.nic_pinst = {}
9170     for nic_op, nic_dict in self.op.nics:
9171       if nic_op == constants.DDM_REMOVE:
9172         if not instance.nics:
9173           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9174                                      errors.ECODE_INVAL)
9175         continue
9176       if nic_op != constants.DDM_ADD:
9177         # an existing nic
9178         if not instance.nics:
9179           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9180                                      " no NICs" % nic_op,
9181                                      errors.ECODE_INVAL)
9182         if nic_op < 0 or nic_op >= len(instance.nics):
9183           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9184                                      " are 0 to %d" %
9185                                      (nic_op, len(instance.nics) - 1),
9186                                      errors.ECODE_INVAL)
9187         old_nic_params = instance.nics[nic_op].nicparams
9188         old_nic_ip = instance.nics[nic_op].ip
9189       else:
9190         old_nic_params = {}
9191         old_nic_ip = None
9192
9193       update_params_dict = dict([(key, nic_dict[key])
9194                                  for key in constants.NICS_PARAMETERS
9195                                  if key in nic_dict])
9196
9197       if 'bridge' in nic_dict:
9198         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9199
9200       new_nic_params = _GetUpdatedParams(old_nic_params,
9201                                          update_params_dict)
9202       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9203       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9204       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9205       self.nic_pinst[nic_op] = new_nic_params
9206       self.nic_pnew[nic_op] = new_filled_nic_params
9207       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9208
9209       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9210         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9211         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9212         if msg:
9213           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9214           if self.op.force:
9215             self.warn.append(msg)
9216           else:
9217             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9218       if new_nic_mode == constants.NIC_MODE_ROUTED:
9219         if 'ip' in nic_dict:
9220           nic_ip = nic_dict['ip']
9221         else:
9222           nic_ip = old_nic_ip
9223         if nic_ip is None:
9224           raise errors.OpPrereqError('Cannot set the nic ip to None'
9225                                      ' on a routed nic', errors.ECODE_INVAL)
9226       if 'mac' in nic_dict:
9227         nic_mac = nic_dict['mac']
9228         if nic_mac is None:
9229           raise errors.OpPrereqError('Cannot set the nic mac to None',
9230                                      errors.ECODE_INVAL)
9231         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9232           # otherwise generate the mac
9233           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9234         else:
9235           # or validate/reserve the current one
9236           try:
9237             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9238           except errors.ReservationError:
9239             raise errors.OpPrereqError("MAC address %s already in use"
9240                                        " in cluster" % nic_mac,
9241                                        errors.ECODE_NOTUNIQUE)
9242
9243     # DISK processing
9244     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9245       raise errors.OpPrereqError("Disk operations not supported for"
9246                                  " diskless instances",
9247                                  errors.ECODE_INVAL)
9248     for disk_op, _ in self.op.disks:
9249       if disk_op == constants.DDM_REMOVE:
9250         if len(instance.disks) == 1:
9251           raise errors.OpPrereqError("Cannot remove the last disk of"
9252                                      " an instance", errors.ECODE_INVAL)
9253         _CheckInstanceDown(self, instance, "cannot remove disks")
9254
9255       if (disk_op == constants.DDM_ADD and
9256           len(instance.nics) >= constants.MAX_DISKS):
9257         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9258                                    " add more" % constants.MAX_DISKS,
9259                                    errors.ECODE_STATE)
9260       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9261         # an existing disk
9262         if disk_op < 0 or disk_op >= len(instance.disks):
9263           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9264                                      " are 0 to %d" %
9265                                      (disk_op, len(instance.disks)),
9266                                      errors.ECODE_INVAL)
9267
9268     return
9269
9270   def _ConvertPlainToDrbd(self, feedback_fn):
9271     """Converts an instance from plain to drbd.
9272
9273     """
9274     feedback_fn("Converting template to drbd")
9275     instance = self.instance
9276     pnode = instance.primary_node
9277     snode = self.op.remote_node
9278
9279     # create a fake disk info for _GenerateDiskTemplate
9280     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9281     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9282                                       instance.name, pnode, [snode],
9283                                       disk_info, None, None, 0, feedback_fn)
9284     info = _GetInstanceInfoText(instance)
9285     feedback_fn("Creating aditional volumes...")
9286     # first, create the missing data and meta devices
9287     for disk in new_disks:
9288       # unfortunately this is... not too nice
9289       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9290                             info, True)
9291       for child in disk.children:
9292         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9293     # at this stage, all new LVs have been created, we can rename the
9294     # old ones
9295     feedback_fn("Renaming original volumes...")
9296     rename_list = [(o, n.children[0].logical_id)
9297                    for (o, n) in zip(instance.disks, new_disks)]
9298     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9299     result.Raise("Failed to rename original LVs")
9300
9301     feedback_fn("Initializing DRBD devices...")
9302     # all child devices are in place, we can now create the DRBD devices
9303     for disk in new_disks:
9304       for node in [pnode, snode]:
9305         f_create = node == pnode
9306         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9307
9308     # at this point, the instance has been modified
9309     instance.disk_template = constants.DT_DRBD8
9310     instance.disks = new_disks
9311     self.cfg.Update(instance, feedback_fn)
9312
9313     # disks are created, waiting for sync
9314     disk_abort = not _WaitForSync(self, instance)
9315     if disk_abort:
9316       raise errors.OpExecError("There are some degraded disks for"
9317                                " this instance, please cleanup manually")
9318
9319   def _ConvertDrbdToPlain(self, feedback_fn):
9320     """Converts an instance from drbd to plain.
9321
9322     """
9323     instance = self.instance
9324     assert len(instance.secondary_nodes) == 1
9325     pnode = instance.primary_node
9326     snode = instance.secondary_nodes[0]
9327     feedback_fn("Converting template to plain")
9328
9329     old_disks = instance.disks
9330     new_disks = [d.children[0] for d in old_disks]
9331
9332     # copy over size and mode
9333     for parent, child in zip(old_disks, new_disks):
9334       child.size = parent.size
9335       child.mode = parent.mode
9336
9337     # update instance structure
9338     instance.disks = new_disks
9339     instance.disk_template = constants.DT_PLAIN
9340     self.cfg.Update(instance, feedback_fn)
9341
9342     feedback_fn("Removing volumes on the secondary node...")
9343     for disk in old_disks:
9344       self.cfg.SetDiskID(disk, snode)
9345       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9346       if msg:
9347         self.LogWarning("Could not remove block device %s on node %s,"
9348                         " continuing anyway: %s", disk.iv_name, snode, msg)
9349
9350     feedback_fn("Removing unneeded volumes on the primary node...")
9351     for idx, disk in enumerate(old_disks):
9352       meta = disk.children[1]
9353       self.cfg.SetDiskID(meta, pnode)
9354       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9355       if msg:
9356         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9357                         " continuing anyway: %s", idx, pnode, msg)
9358
9359   def Exec(self, feedback_fn):
9360     """Modifies an instance.
9361
9362     All parameters take effect only at the next restart of the instance.
9363
9364     """
9365     # Process here the warnings from CheckPrereq, as we don't have a
9366     # feedback_fn there.
9367     for warn in self.warn:
9368       feedback_fn("WARNING: %s" % warn)
9369
9370     result = []
9371     instance = self.instance
9372     # disk changes
9373     for disk_op, disk_dict in self.op.disks:
9374       if disk_op == constants.DDM_REMOVE:
9375         # remove the last disk
9376         device = instance.disks.pop()
9377         device_idx = len(instance.disks)
9378         for node, disk in device.ComputeNodeTree(instance.primary_node):
9379           self.cfg.SetDiskID(disk, node)
9380           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9381           if msg:
9382             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9383                             " continuing anyway", device_idx, node, msg)
9384         result.append(("disk/%d" % device_idx, "remove"))
9385       elif disk_op == constants.DDM_ADD:
9386         # add a new disk
9387         if instance.disk_template == constants.DT_FILE:
9388           file_driver, file_path = instance.disks[0].logical_id
9389           file_path = os.path.dirname(file_path)
9390         else:
9391           file_driver = file_path = None
9392         disk_idx_base = len(instance.disks)
9393         new_disk = _GenerateDiskTemplate(self,
9394                                          instance.disk_template,
9395                                          instance.name, instance.primary_node,
9396                                          instance.secondary_nodes,
9397                                          [disk_dict],
9398                                          file_path,
9399                                          file_driver,
9400                                          disk_idx_base, feedback_fn)[0]
9401         instance.disks.append(new_disk)
9402         info = _GetInstanceInfoText(instance)
9403
9404         logging.info("Creating volume %s for instance %s",
9405                      new_disk.iv_name, instance.name)
9406         # Note: this needs to be kept in sync with _CreateDisks
9407         #HARDCODE
9408         for node in instance.all_nodes:
9409           f_create = node == instance.primary_node
9410           try:
9411             _CreateBlockDev(self, node, instance, new_disk,
9412                             f_create, info, f_create)
9413           except errors.OpExecError, err:
9414             self.LogWarning("Failed to create volume %s (%s) on"
9415                             " node %s: %s",
9416                             new_disk.iv_name, new_disk, node, err)
9417         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9418                        (new_disk.size, new_disk.mode)))
9419       else:
9420         # change a given disk
9421         instance.disks[disk_op].mode = disk_dict['mode']
9422         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9423
9424     if self.op.disk_template:
9425       r_shut = _ShutdownInstanceDisks(self, instance)
9426       if not r_shut:
9427         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9428                                  " proceed with disk template conversion")
9429       mode = (instance.disk_template, self.op.disk_template)
9430       try:
9431         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9432       except:
9433         self.cfg.ReleaseDRBDMinors(instance.name)
9434         raise
9435       result.append(("disk_template", self.op.disk_template))
9436
9437     # NIC changes
9438     for nic_op, nic_dict in self.op.nics:
9439       if nic_op == constants.DDM_REMOVE:
9440         # remove the last nic
9441         del instance.nics[-1]
9442         result.append(("nic.%d" % len(instance.nics), "remove"))
9443       elif nic_op == constants.DDM_ADD:
9444         # mac and bridge should be set, by now
9445         mac = nic_dict['mac']
9446         ip = nic_dict.get('ip', None)
9447         nicparams = self.nic_pinst[constants.DDM_ADD]
9448         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9449         instance.nics.append(new_nic)
9450         result.append(("nic.%d" % (len(instance.nics) - 1),
9451                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9452                        (new_nic.mac, new_nic.ip,
9453                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9454                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9455                        )))
9456       else:
9457         for key in 'mac', 'ip':
9458           if key in nic_dict:
9459             setattr(instance.nics[nic_op], key, nic_dict[key])
9460         if nic_op in self.nic_pinst:
9461           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9462         for key, val in nic_dict.iteritems():
9463           result.append(("nic.%s/%d" % (key, nic_op), val))
9464
9465     # hvparams changes
9466     if self.op.hvparams:
9467       instance.hvparams = self.hv_inst
9468       for key, val in self.op.hvparams.iteritems():
9469         result.append(("hv/%s" % key, val))
9470
9471     # beparams changes
9472     if self.op.beparams:
9473       instance.beparams = self.be_inst
9474       for key, val in self.op.beparams.iteritems():
9475         result.append(("be/%s" % key, val))
9476
9477     # OS change
9478     if self.op.os_name:
9479       instance.os = self.op.os_name
9480
9481     # osparams changes
9482     if self.op.osparams:
9483       instance.osparams = self.os_inst
9484       for key, val in self.op.osparams.iteritems():
9485         result.append(("os/%s" % key, val))
9486
9487     self.cfg.Update(instance, feedback_fn)
9488
9489     return result
9490
9491   _DISK_CONVERSIONS = {
9492     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9493     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9494     }
9495
9496
9497 class LUBackupQuery(NoHooksLU):
9498   """Query the exports list
9499
9500   """
9501   REQ_BGL = False
9502
9503   def ExpandNames(self):
9504     self.needed_locks = {}
9505     self.share_locks[locking.LEVEL_NODE] = 1
9506     if not self.op.nodes:
9507       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9508     else:
9509       self.needed_locks[locking.LEVEL_NODE] = \
9510         _GetWantedNodes(self, self.op.nodes)
9511
9512   def Exec(self, feedback_fn):
9513     """Compute the list of all the exported system images.
9514
9515     @rtype: dict
9516     @return: a dictionary with the structure node->(export-list)
9517         where export-list is a list of the instances exported on
9518         that node.
9519
9520     """
9521     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9522     rpcresult = self.rpc.call_export_list(self.nodes)
9523     result = {}
9524     for node in rpcresult:
9525       if rpcresult[node].fail_msg:
9526         result[node] = False
9527       else:
9528         result[node] = rpcresult[node].payload
9529
9530     return result
9531
9532
9533 class LUBackupPrepare(NoHooksLU):
9534   """Prepares an instance for an export and returns useful information.
9535
9536   """
9537   REQ_BGL = False
9538
9539   def ExpandNames(self):
9540     self._ExpandAndLockInstance()
9541
9542   def CheckPrereq(self):
9543     """Check prerequisites.
9544
9545     """
9546     instance_name = self.op.instance_name
9547
9548     self.instance = self.cfg.GetInstanceInfo(instance_name)
9549     assert self.instance is not None, \
9550           "Cannot retrieve locked instance %s" % self.op.instance_name
9551     _CheckNodeOnline(self, self.instance.primary_node)
9552
9553     self._cds = _GetClusterDomainSecret()
9554
9555   def Exec(self, feedback_fn):
9556     """Prepares an instance for an export.
9557
9558     """
9559     instance = self.instance
9560
9561     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9562       salt = utils.GenerateSecret(8)
9563
9564       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9565       result = self.rpc.call_x509_cert_create(instance.primary_node,
9566                                               constants.RIE_CERT_VALIDITY)
9567       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9568
9569       (name, cert_pem) = result.payload
9570
9571       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9572                                              cert_pem)
9573
9574       return {
9575         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9576         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9577                           salt),
9578         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9579         }
9580
9581     return None
9582
9583
9584 class LUBackupExport(LogicalUnit):
9585   """Export an instance to an image in the cluster.
9586
9587   """
9588   HPATH = "instance-export"
9589   HTYPE = constants.HTYPE_INSTANCE
9590   REQ_BGL = False
9591
9592   def CheckArguments(self):
9593     """Check the arguments.
9594
9595     """
9596     self.x509_key_name = self.op.x509_key_name
9597     self.dest_x509_ca_pem = self.op.destination_x509_ca
9598
9599     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9600       if not self.x509_key_name:
9601         raise errors.OpPrereqError("Missing X509 key name for encryption",
9602                                    errors.ECODE_INVAL)
9603
9604       if not self.dest_x509_ca_pem:
9605         raise errors.OpPrereqError("Missing destination X509 CA",
9606                                    errors.ECODE_INVAL)
9607
9608   def ExpandNames(self):
9609     self._ExpandAndLockInstance()
9610
9611     # Lock all nodes for local exports
9612     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9613       # FIXME: lock only instance primary and destination node
9614       #
9615       # Sad but true, for now we have do lock all nodes, as we don't know where
9616       # the previous export might be, and in this LU we search for it and
9617       # remove it from its current node. In the future we could fix this by:
9618       #  - making a tasklet to search (share-lock all), then create the
9619       #    new one, then one to remove, after
9620       #  - removing the removal operation altogether
9621       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9622
9623   def DeclareLocks(self, level):
9624     """Last minute lock declaration."""
9625     # All nodes are locked anyway, so nothing to do here.
9626
9627   def BuildHooksEnv(self):
9628     """Build hooks env.
9629
9630     This will run on the master, primary node and target node.
9631
9632     """
9633     env = {
9634       "EXPORT_MODE": self.op.mode,
9635       "EXPORT_NODE": self.op.target_node,
9636       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9637       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9638       # TODO: Generic function for boolean env variables
9639       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9640       }
9641
9642     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9643
9644     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9645
9646     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9647       nl.append(self.op.target_node)
9648
9649     return env, nl, nl
9650
9651   def CheckPrereq(self):
9652     """Check prerequisites.
9653
9654     This checks that the instance and node names are valid.
9655
9656     """
9657     instance_name = self.op.instance_name
9658
9659     self.instance = self.cfg.GetInstanceInfo(instance_name)
9660     assert self.instance is not None, \
9661           "Cannot retrieve locked instance %s" % self.op.instance_name
9662     _CheckNodeOnline(self, self.instance.primary_node)
9663
9664     if (self.op.remove_instance and self.instance.admin_up and
9665         not self.op.shutdown):
9666       raise errors.OpPrereqError("Can not remove instance without shutting it"
9667                                  " down before")
9668
9669     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9670       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9671       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9672       assert self.dst_node is not None
9673
9674       _CheckNodeOnline(self, self.dst_node.name)
9675       _CheckNodeNotDrained(self, self.dst_node.name)
9676
9677       self._cds = None
9678       self.dest_disk_info = None
9679       self.dest_x509_ca = None
9680
9681     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9682       self.dst_node = None
9683
9684       if len(self.op.target_node) != len(self.instance.disks):
9685         raise errors.OpPrereqError(("Received destination information for %s"
9686                                     " disks, but instance %s has %s disks") %
9687                                    (len(self.op.target_node), instance_name,
9688                                     len(self.instance.disks)),
9689                                    errors.ECODE_INVAL)
9690
9691       cds = _GetClusterDomainSecret()
9692
9693       # Check X509 key name
9694       try:
9695         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9696       except (TypeError, ValueError), err:
9697         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9698
9699       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9700         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9701                                    errors.ECODE_INVAL)
9702
9703       # Load and verify CA
9704       try:
9705         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9706       except OpenSSL.crypto.Error, err:
9707         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9708                                    (err, ), errors.ECODE_INVAL)
9709
9710       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9711       if errcode is not None:
9712         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9713                                    (msg, ), errors.ECODE_INVAL)
9714
9715       self.dest_x509_ca = cert
9716
9717       # Verify target information
9718       disk_info = []
9719       for idx, disk_data in enumerate(self.op.target_node):
9720         try:
9721           (host, port, magic) = \
9722             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9723         except errors.GenericError, err:
9724           raise errors.OpPrereqError("Target info for disk %s: %s" %
9725                                      (idx, err), errors.ECODE_INVAL)
9726
9727         disk_info.append((host, port, magic))
9728
9729       assert len(disk_info) == len(self.op.target_node)
9730       self.dest_disk_info = disk_info
9731
9732     else:
9733       raise errors.ProgrammerError("Unhandled export mode %r" %
9734                                    self.op.mode)
9735
9736     # instance disk type verification
9737     # TODO: Implement export support for file-based disks
9738     for disk in self.instance.disks:
9739       if disk.dev_type == constants.LD_FILE:
9740         raise errors.OpPrereqError("Export not supported for instances with"
9741                                    " file-based disks", errors.ECODE_INVAL)
9742
9743   def _CleanupExports(self, feedback_fn):
9744     """Removes exports of current instance from all other nodes.
9745
9746     If an instance in a cluster with nodes A..D was exported to node C, its
9747     exports will be removed from the nodes A, B and D.
9748
9749     """
9750     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9751
9752     nodelist = self.cfg.GetNodeList()
9753     nodelist.remove(self.dst_node.name)
9754
9755     # on one-node clusters nodelist will be empty after the removal
9756     # if we proceed the backup would be removed because OpBackupQuery
9757     # substitutes an empty list with the full cluster node list.
9758     iname = self.instance.name
9759     if nodelist:
9760       feedback_fn("Removing old exports for instance %s" % iname)
9761       exportlist = self.rpc.call_export_list(nodelist)
9762       for node in exportlist:
9763         if exportlist[node].fail_msg:
9764           continue
9765         if iname in exportlist[node].payload:
9766           msg = self.rpc.call_export_remove(node, iname).fail_msg
9767           if msg:
9768             self.LogWarning("Could not remove older export for instance %s"
9769                             " on node %s: %s", iname, node, msg)
9770
9771   def Exec(self, feedback_fn):
9772     """Export an instance to an image in the cluster.
9773
9774     """
9775     assert self.op.mode in constants.EXPORT_MODES
9776
9777     instance = self.instance
9778     src_node = instance.primary_node
9779
9780     if self.op.shutdown:
9781       # shutdown the instance, but not the disks
9782       feedback_fn("Shutting down instance %s" % instance.name)
9783       result = self.rpc.call_instance_shutdown(src_node, instance,
9784                                                self.op.shutdown_timeout)
9785       # TODO: Maybe ignore failures if ignore_remove_failures is set
9786       result.Raise("Could not shutdown instance %s on"
9787                    " node %s" % (instance.name, src_node))
9788
9789     # set the disks ID correctly since call_instance_start needs the
9790     # correct drbd minor to create the symlinks
9791     for disk in instance.disks:
9792       self.cfg.SetDiskID(disk, src_node)
9793
9794     activate_disks = (not instance.admin_up)
9795
9796     if activate_disks:
9797       # Activate the instance disks if we'exporting a stopped instance
9798       feedback_fn("Activating disks for %s" % instance.name)
9799       _StartInstanceDisks(self, instance, None)
9800
9801     try:
9802       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9803                                                      instance)
9804
9805       helper.CreateSnapshots()
9806       try:
9807         if (self.op.shutdown and instance.admin_up and
9808             not self.op.remove_instance):
9809           assert not activate_disks
9810           feedback_fn("Starting instance %s" % instance.name)
9811           result = self.rpc.call_instance_start(src_node, instance, None, None)
9812           msg = result.fail_msg
9813           if msg:
9814             feedback_fn("Failed to start instance: %s" % msg)
9815             _ShutdownInstanceDisks(self, instance)
9816             raise errors.OpExecError("Could not start instance: %s" % msg)
9817
9818         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9819           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9820         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9821           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9822           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9823
9824           (key_name, _, _) = self.x509_key_name
9825
9826           dest_ca_pem = \
9827             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9828                                             self.dest_x509_ca)
9829
9830           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9831                                                      key_name, dest_ca_pem,
9832                                                      timeouts)
9833       finally:
9834         helper.Cleanup()
9835
9836       # Check for backwards compatibility
9837       assert len(dresults) == len(instance.disks)
9838       assert compat.all(isinstance(i, bool) for i in dresults), \
9839              "Not all results are boolean: %r" % dresults
9840
9841     finally:
9842       if activate_disks:
9843         feedback_fn("Deactivating disks for %s" % instance.name)
9844         _ShutdownInstanceDisks(self, instance)
9845
9846     if not (compat.all(dresults) and fin_resu):
9847       failures = []
9848       if not fin_resu:
9849         failures.append("export finalization")
9850       if not compat.all(dresults):
9851         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9852                                if not dsk)
9853         failures.append("disk export: disk(s) %s" % fdsk)
9854
9855       raise errors.OpExecError("Export failed, errors in %s" %
9856                                utils.CommaJoin(failures))
9857
9858     # At this point, the export was successful, we can cleanup/finish
9859
9860     # Remove instance if requested
9861     if self.op.remove_instance:
9862       feedback_fn("Removing instance %s" % instance.name)
9863       _RemoveInstance(self, feedback_fn, instance,
9864                       self.op.ignore_remove_failures)
9865
9866     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9867       self._CleanupExports(feedback_fn)
9868
9869     return fin_resu, dresults
9870
9871
9872 class LUBackupRemove(NoHooksLU):
9873   """Remove exports related to the named instance.
9874
9875   """
9876   REQ_BGL = False
9877
9878   def ExpandNames(self):
9879     self.needed_locks = {}
9880     # We need all nodes to be locked in order for RemoveExport to work, but we
9881     # don't need to lock the instance itself, as nothing will happen to it (and
9882     # we can remove exports also for a removed instance)
9883     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9884
9885   def Exec(self, feedback_fn):
9886     """Remove any export.
9887
9888     """
9889     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9890     # If the instance was not found we'll try with the name that was passed in.
9891     # This will only work if it was an FQDN, though.
9892     fqdn_warn = False
9893     if not instance_name:
9894       fqdn_warn = True
9895       instance_name = self.op.instance_name
9896
9897     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9898     exportlist = self.rpc.call_export_list(locked_nodes)
9899     found = False
9900     for node in exportlist:
9901       msg = exportlist[node].fail_msg
9902       if msg:
9903         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9904         continue
9905       if instance_name in exportlist[node].payload:
9906         found = True
9907         result = self.rpc.call_export_remove(node, instance_name)
9908         msg = result.fail_msg
9909         if msg:
9910           logging.error("Could not remove export for instance %s"
9911                         " on node %s: %s", instance_name, node, msg)
9912
9913     if fqdn_warn and not found:
9914       feedback_fn("Export not found. If trying to remove an export belonging"
9915                   " to a deleted instance please use its Fully Qualified"
9916                   " Domain Name.")
9917
9918
9919 class LUGroupAdd(LogicalUnit):
9920   """Logical unit for creating node groups.
9921
9922   """
9923   HPATH = "group-add"
9924   HTYPE = constants.HTYPE_GROUP
9925   REQ_BGL = False
9926
9927   def ExpandNames(self):
9928     # We need the new group's UUID here so that we can create and acquire the
9929     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
9930     # that it should not check whether the UUID exists in the configuration.
9931     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
9932     self.needed_locks = {}
9933     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
9934
9935   def CheckPrereq(self):
9936     """Check prerequisites.
9937
9938     This checks that the given group name is not an existing node group
9939     already.
9940
9941     """
9942     try:
9943       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
9944     except errors.OpPrereqError:
9945       pass
9946     else:
9947       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
9948                                  " node group (UUID: %s)" %
9949                                  (self.op.group_name, existing_uuid),
9950                                  errors.ECODE_EXISTS)
9951
9952     if self.op.ndparams:
9953       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
9954
9955   def BuildHooksEnv(self):
9956     """Build hooks env.
9957
9958     """
9959     env = {
9960       "GROUP_NAME": self.op.group_name,
9961       }
9962     mn = self.cfg.GetMasterNode()
9963     return env, [mn], [mn]
9964
9965   def Exec(self, feedback_fn):
9966     """Add the node group to the cluster.
9967
9968     """
9969     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
9970                                   uuid=self.group_uuid,
9971                                   alloc_policy=self.op.alloc_policy,
9972                                   ndparams=self.op.ndparams)
9973
9974     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
9975     del self.remove_locks[locking.LEVEL_NODEGROUP]
9976
9977
9978 class LUGroupAssignNodes(NoHooksLU):
9979   """Logical unit for assigning nodes to groups.
9980
9981   """
9982   REQ_BGL = False
9983
9984   def ExpandNames(self):
9985     # These raise errors.OpPrereqError on their own:
9986     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
9987     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9988
9989     # We want to lock all the affected nodes and groups. We have readily
9990     # available the list of nodes, and the *destination* group. To gather the
9991     # list of "source" groups, we need to fetch node information.
9992     self.node_data = self.cfg.GetAllNodesInfo()
9993     affected_groups = set(self.node_data[node].group for node in self.op.nodes)
9994     affected_groups.add(self.group_uuid)
9995
9996     self.needed_locks = {
9997       locking.LEVEL_NODEGROUP: list(affected_groups),
9998       locking.LEVEL_NODE: self.op.nodes,
9999       }
10000
10001   def CheckPrereq(self):
10002     """Check prerequisites.
10003
10004     """
10005     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10006     instance_data = self.cfg.GetAllInstancesInfo()
10007
10008     if self.group is None:
10009       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10010                                (self.op.group_name, self.group_uuid))
10011
10012     (new_splits, previous_splits) = \
10013       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10014                                              for node in self.op.nodes],
10015                                             self.node_data, instance_data)
10016
10017     if new_splits:
10018       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10019
10020       if not self.op.force:
10021         raise errors.OpExecError("The following instances get split by this"
10022                                  " change and --force was not given: %s" %
10023                                  fmt_new_splits)
10024       else:
10025         self.LogWarning("This operation will split the following instances: %s",
10026                         fmt_new_splits)
10027
10028         if previous_splits:
10029           self.LogWarning("In addition, these already-split instances continue"
10030                           " to be spit across groups: %s",
10031                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10032
10033   def Exec(self, feedback_fn):
10034     """Assign nodes to a new group.
10035
10036     """
10037     for node in self.op.nodes:
10038       self.node_data[node].group = self.group_uuid
10039
10040     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10041
10042   @staticmethod
10043   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10044     """Check for split instances after a node assignment.
10045
10046     This method considers a series of node assignments as an atomic operation,
10047     and returns information about split instances after applying the set of
10048     changes.
10049
10050     In particular, it returns information about newly split instances, and
10051     instances that were already split, and remain so after the change.
10052
10053     Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10054     considered.
10055
10056     @type changes: list of (node_name, new_group_uuid) pairs.
10057     @param changes: list of node assignments to consider.
10058     @param node_data: a dict with data for all nodes
10059     @param instance_data: a dict with all instances to consider
10060     @rtype: a two-tuple
10061     @return: a list of instances that were previously okay and result split as a
10062       consequence of this change, and a list of instances that were previously
10063       split and this change does not fix.
10064
10065     """
10066     changed_nodes = dict((node, group) for node, group in changes
10067                          if node_data[node].group != group)
10068
10069     all_split_instances = set()
10070     previously_split_instances = set()
10071
10072     def InstanceNodes(instance):
10073       return [instance.primary_node] + list(instance.secondary_nodes)
10074
10075     for inst in instance_data.values():
10076       if inst.disk_template not in constants.DTS_NET_MIRROR:
10077         continue
10078
10079       instance_nodes = InstanceNodes(inst)
10080
10081       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10082         previously_split_instances.add(inst.name)
10083
10084       if len(set(changed_nodes.get(node, node_data[node].group)
10085                  for node in instance_nodes)) > 1:
10086         all_split_instances.add(inst.name)
10087
10088     return (list(all_split_instances - previously_split_instances),
10089             list(previously_split_instances & all_split_instances))
10090
10091
10092 class _GroupQuery(_QueryBase):
10093
10094   FIELDS = query.GROUP_FIELDS
10095
10096   def ExpandNames(self, lu):
10097     lu.needed_locks = {}
10098
10099     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10100     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10101
10102     if not self.names:
10103       self.wanted = [name_to_uuid[name]
10104                      for name in utils.NiceSort(name_to_uuid.keys())]
10105     else:
10106       # Accept names to be either names or UUIDs.
10107       missing = []
10108       self.wanted = []
10109       all_uuid = frozenset(self._all_groups.keys())
10110
10111       for name in self.names:
10112         if name in all_uuid:
10113           self.wanted.append(name)
10114         elif name in name_to_uuid:
10115           self.wanted.append(name_to_uuid[name])
10116         else:
10117           missing.append(name)
10118
10119       if missing:
10120         raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10121                                    errors.ECODE_NOENT)
10122
10123   def DeclareLocks(self, lu, level):
10124     pass
10125
10126   def _GetQueryData(self, lu):
10127     """Computes the list of node groups and their attributes.
10128
10129     """
10130     do_nodes = query.GQ_NODE in self.requested_data
10131     do_instances = query.GQ_INST in self.requested_data
10132
10133     group_to_nodes = None
10134     group_to_instances = None
10135
10136     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10137     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10138     # latter GetAllInstancesInfo() is not enough, for we have to go through
10139     # instance->node. Hence, we will need to process nodes even if we only need
10140     # instance information.
10141     if do_nodes or do_instances:
10142       all_nodes = lu.cfg.GetAllNodesInfo()
10143       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10144       node_to_group = {}
10145
10146       for node in all_nodes.values():
10147         if node.group in group_to_nodes:
10148           group_to_nodes[node.group].append(node.name)
10149           node_to_group[node.name] = node.group
10150
10151       if do_instances:
10152         all_instances = lu.cfg.GetAllInstancesInfo()
10153         group_to_instances = dict((uuid, []) for uuid in self.wanted)
10154
10155         for instance in all_instances.values():
10156           node = instance.primary_node
10157           if node in node_to_group:
10158             group_to_instances[node_to_group[node]].append(instance.name)
10159
10160         if not do_nodes:
10161           # Do not pass on node information if it was not requested.
10162           group_to_nodes = None
10163
10164     return query.GroupQueryData([self._all_groups[uuid]
10165                                  for uuid in self.wanted],
10166                                 group_to_nodes, group_to_instances)
10167
10168
10169 class LUGroupQuery(NoHooksLU):
10170   """Logical unit for querying node groups.
10171
10172   """
10173   REQ_BGL = False
10174
10175   def CheckArguments(self):
10176     self.gq = _GroupQuery(self.op.names, self.op.output_fields, False)
10177
10178   def ExpandNames(self):
10179     self.gq.ExpandNames(self)
10180
10181   def Exec(self, feedback_fn):
10182     return self.gq.OldStyleQuery(self)
10183
10184
10185 class LUGroupSetParams(LogicalUnit):
10186   """Modifies the parameters of a node group.
10187
10188   """
10189   HPATH = "group-modify"
10190   HTYPE = constants.HTYPE_GROUP
10191   REQ_BGL = False
10192
10193   def CheckArguments(self):
10194     all_changes = [
10195       self.op.ndparams,
10196       self.op.alloc_policy,
10197       ]
10198
10199     if all_changes.count(None) == len(all_changes):
10200       raise errors.OpPrereqError("Please pass at least one modification",
10201                                  errors.ECODE_INVAL)
10202
10203   def ExpandNames(self):
10204     # This raises errors.OpPrereqError on its own:
10205     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10206
10207     self.needed_locks = {
10208       locking.LEVEL_NODEGROUP: [self.group_uuid],
10209       }
10210
10211   def CheckPrereq(self):
10212     """Check prerequisites.
10213
10214     """
10215     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10216
10217     if self.group is None:
10218       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10219                                (self.op.group_name, self.group_uuid))
10220
10221     if self.op.ndparams:
10222       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10223       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10224       self.new_ndparams = new_ndparams
10225
10226   def BuildHooksEnv(self):
10227     """Build hooks env.
10228
10229     """
10230     env = {
10231       "GROUP_NAME": self.op.group_name,
10232       "NEW_ALLOC_POLICY": self.op.alloc_policy,
10233       }
10234     mn = self.cfg.GetMasterNode()
10235     return env, [mn], [mn]
10236
10237   def Exec(self, feedback_fn):
10238     """Modifies the node group.
10239
10240     """
10241     result = []
10242
10243     if self.op.ndparams:
10244       self.group.ndparams = self.new_ndparams
10245       result.append(("ndparams", str(self.group.ndparams)))
10246
10247     if self.op.alloc_policy:
10248       self.group.alloc_policy = self.op.alloc_policy
10249
10250     self.cfg.Update(self.group, feedback_fn)
10251     return result
10252
10253
10254
10255 class LUGroupRemove(LogicalUnit):
10256   HPATH = "group-remove"
10257   HTYPE = constants.HTYPE_GROUP
10258   REQ_BGL = False
10259
10260   def ExpandNames(self):
10261     # This will raises errors.OpPrereqError on its own:
10262     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10263     self.needed_locks = {
10264       locking.LEVEL_NODEGROUP: [self.group_uuid],
10265       }
10266
10267   def CheckPrereq(self):
10268     """Check prerequisites.
10269
10270     This checks that the given group name exists as a node group, that is
10271     empty (i.e., contains no nodes), and that is not the last group of the
10272     cluster.
10273
10274     """
10275     # Verify that the group is empty.
10276     group_nodes = [node.name
10277                    for node in self.cfg.GetAllNodesInfo().values()
10278                    if node.group == self.group_uuid]
10279
10280     if group_nodes:
10281       raise errors.OpPrereqError("Group '%s' not empty, has the following"
10282                                  " nodes: %s" %
10283                                  (self.op.group_name,
10284                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
10285                                  errors.ECODE_STATE)
10286
10287     # Verify the cluster would not be left group-less.
10288     if len(self.cfg.GetNodeGroupList()) == 1:
10289       raise errors.OpPrereqError("Group '%s' is the last group in the cluster,"
10290                                  " which cannot be left without at least one"
10291                                  " group" % self.op.group_name,
10292                                  errors.ECODE_STATE)
10293
10294   def BuildHooksEnv(self):
10295     """Build hooks env.
10296
10297     """
10298     env = {
10299       "GROUP_NAME": self.op.group_name,
10300       }
10301     mn = self.cfg.GetMasterNode()
10302     return env, [mn], [mn]
10303
10304   def Exec(self, feedback_fn):
10305     """Remove the node group.
10306
10307     """
10308     try:
10309       self.cfg.RemoveNodeGroup(self.group_uuid)
10310     except errors.ConfigurationError:
10311       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10312                                (self.op.group_name, self.group_uuid))
10313
10314     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10315
10316
10317 class LUGroupRename(LogicalUnit):
10318   HPATH = "group-rename"
10319   HTYPE = constants.HTYPE_GROUP
10320   REQ_BGL = False
10321
10322   def ExpandNames(self):
10323     # This raises errors.OpPrereqError on its own:
10324     self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10325
10326     self.needed_locks = {
10327       locking.LEVEL_NODEGROUP: [self.group_uuid],
10328       }
10329
10330   def CheckPrereq(self):
10331     """Check prerequisites.
10332
10333     This checks that the given old_name exists as a node group, and that
10334     new_name doesn't.
10335
10336     """
10337     try:
10338       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10339     except errors.OpPrereqError:
10340       pass
10341     else:
10342       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10343                                  " node group (UUID: %s)" %
10344                                  (self.op.new_name, new_name_uuid),
10345                                  errors.ECODE_EXISTS)
10346
10347   def BuildHooksEnv(self):
10348     """Build hooks env.
10349
10350     """
10351     env = {
10352       "OLD_NAME": self.op.old_name,
10353       "NEW_NAME": self.op.new_name,
10354       }
10355
10356     mn = self.cfg.GetMasterNode()
10357     all_nodes = self.cfg.GetAllNodesInfo()
10358     run_nodes = [mn]
10359     all_nodes.pop(mn, None)
10360
10361     for node in all_nodes.values():
10362       if node.group == self.group_uuid:
10363         run_nodes.append(node.name)
10364
10365     return env, run_nodes, run_nodes
10366
10367   def Exec(self, feedback_fn):
10368     """Rename the node group.
10369
10370     """
10371     group = self.cfg.GetNodeGroup(self.group_uuid)
10372
10373     if group is None:
10374       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10375                                (self.op.old_name, self.group_uuid))
10376
10377     group.name = self.op.new_name
10378     self.cfg.Update(group, feedback_fn)
10379
10380     return self.op.new_name
10381
10382
10383 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10384   """Generic tags LU.
10385
10386   This is an abstract class which is the parent of all the other tags LUs.
10387
10388   """
10389
10390   def ExpandNames(self):
10391     self.needed_locks = {}
10392     if self.op.kind == constants.TAG_NODE:
10393       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10394       self.needed_locks[locking.LEVEL_NODE] = self.op.name
10395     elif self.op.kind == constants.TAG_INSTANCE:
10396       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10397       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10398
10399     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10400     # not possible to acquire the BGL based on opcode parameters)
10401
10402   def CheckPrereq(self):
10403     """Check prerequisites.
10404
10405     """
10406     if self.op.kind == constants.TAG_CLUSTER:
10407       self.target = self.cfg.GetClusterInfo()
10408     elif self.op.kind == constants.TAG_NODE:
10409       self.target = self.cfg.GetNodeInfo(self.op.name)
10410     elif self.op.kind == constants.TAG_INSTANCE:
10411       self.target = self.cfg.GetInstanceInfo(self.op.name)
10412     else:
10413       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10414                                  str(self.op.kind), errors.ECODE_INVAL)
10415
10416
10417 class LUGetTags(TagsLU):
10418   """Returns the tags of a given object.
10419
10420   """
10421   REQ_BGL = False
10422
10423   def ExpandNames(self):
10424     TagsLU.ExpandNames(self)
10425
10426     # Share locks as this is only a read operation
10427     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10428
10429   def Exec(self, feedback_fn):
10430     """Returns the tag list.
10431
10432     """
10433     return list(self.target.GetTags())
10434
10435
10436 class LUSearchTags(NoHooksLU):
10437   """Searches the tags for a given pattern.
10438
10439   """
10440   REQ_BGL = False
10441
10442   def ExpandNames(self):
10443     self.needed_locks = {}
10444
10445   def CheckPrereq(self):
10446     """Check prerequisites.
10447
10448     This checks the pattern passed for validity by compiling it.
10449
10450     """
10451     try:
10452       self.re = re.compile(self.op.pattern)
10453     except re.error, err:
10454       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10455                                  (self.op.pattern, err), errors.ECODE_INVAL)
10456
10457   def Exec(self, feedback_fn):
10458     """Returns the tag list.
10459
10460     """
10461     cfg = self.cfg
10462     tgts = [("/cluster", cfg.GetClusterInfo())]
10463     ilist = cfg.GetAllInstancesInfo().values()
10464     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10465     nlist = cfg.GetAllNodesInfo().values()
10466     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10467     results = []
10468     for path, target in tgts:
10469       for tag in target.GetTags():
10470         if self.re.search(tag):
10471           results.append((path, tag))
10472     return results
10473
10474
10475 class LUAddTags(TagsLU):
10476   """Sets a tag on a given object.
10477
10478   """
10479   REQ_BGL = False
10480
10481   def CheckPrereq(self):
10482     """Check prerequisites.
10483
10484     This checks the type and length of the tag name and value.
10485
10486     """
10487     TagsLU.CheckPrereq(self)
10488     for tag in self.op.tags:
10489       objects.TaggableObject.ValidateTag(tag)
10490
10491   def Exec(self, feedback_fn):
10492     """Sets the tag.
10493
10494     """
10495     try:
10496       for tag in self.op.tags:
10497         self.target.AddTag(tag)
10498     except errors.TagError, err:
10499       raise errors.OpExecError("Error while setting tag: %s" % str(err))
10500     self.cfg.Update(self.target, feedback_fn)
10501
10502
10503 class LUDelTags(TagsLU):
10504   """Delete a list of tags from a given object.
10505
10506   """
10507   REQ_BGL = False
10508
10509   def CheckPrereq(self):
10510     """Check prerequisites.
10511
10512     This checks that we have the given tag.
10513
10514     """
10515     TagsLU.CheckPrereq(self)
10516     for tag in self.op.tags:
10517       objects.TaggableObject.ValidateTag(tag)
10518     del_tags = frozenset(self.op.tags)
10519     cur_tags = self.target.GetTags()
10520
10521     diff_tags = del_tags - cur_tags
10522     if diff_tags:
10523       diff_names = ("'%s'" % i for i in sorted(diff_tags))
10524       raise errors.OpPrereqError("Tag(s) %s not found" %
10525                                  (utils.CommaJoin(diff_names), ),
10526                                  errors.ECODE_NOENT)
10527
10528   def Exec(self, feedback_fn):
10529     """Remove the tag from the object.
10530
10531     """
10532     for tag in self.op.tags:
10533       self.target.RemoveTag(tag)
10534     self.cfg.Update(self.target, feedback_fn)
10535
10536
10537 class LUTestDelay(NoHooksLU):
10538   """Sleep for a specified amount of time.
10539
10540   This LU sleeps on the master and/or nodes for a specified amount of
10541   time.
10542
10543   """
10544   REQ_BGL = False
10545
10546   def ExpandNames(self):
10547     """Expand names and set required locks.
10548
10549     This expands the node list, if any.
10550
10551     """
10552     self.needed_locks = {}
10553     if self.op.on_nodes:
10554       # _GetWantedNodes can be used here, but is not always appropriate to use
10555       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10556       # more information.
10557       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10558       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10559
10560   def _TestDelay(self):
10561     """Do the actual sleep.
10562
10563     """
10564     if self.op.on_master:
10565       if not utils.TestDelay(self.op.duration):
10566         raise errors.OpExecError("Error during master delay test")
10567     if self.op.on_nodes:
10568       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10569       for node, node_result in result.items():
10570         node_result.Raise("Failure during rpc call to node %s" % node)
10571
10572   def Exec(self, feedback_fn):
10573     """Execute the test delay opcode, with the wanted repetitions.
10574
10575     """
10576     if self.op.repeat == 0:
10577       self._TestDelay()
10578     else:
10579       top_value = self.op.repeat - 1
10580       for i in range(self.op.repeat):
10581         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10582         self._TestDelay()
10583
10584
10585 class LUTestJobqueue(NoHooksLU):
10586   """Utility LU to test some aspects of the job queue.
10587
10588   """
10589   REQ_BGL = False
10590
10591   # Must be lower than default timeout for WaitForJobChange to see whether it
10592   # notices changed jobs
10593   _CLIENT_CONNECT_TIMEOUT = 20.0
10594   _CLIENT_CONFIRM_TIMEOUT = 60.0
10595
10596   @classmethod
10597   def _NotifyUsingSocket(cls, cb, errcls):
10598     """Opens a Unix socket and waits for another program to connect.
10599
10600     @type cb: callable
10601     @param cb: Callback to send socket name to client
10602     @type errcls: class
10603     @param errcls: Exception class to use for errors
10604
10605     """
10606     # Using a temporary directory as there's no easy way to create temporary
10607     # sockets without writing a custom loop around tempfile.mktemp and
10608     # socket.bind
10609     tmpdir = tempfile.mkdtemp()
10610     try:
10611       tmpsock = utils.PathJoin(tmpdir, "sock")
10612
10613       logging.debug("Creating temporary socket at %s", tmpsock)
10614       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10615       try:
10616         sock.bind(tmpsock)
10617         sock.listen(1)
10618
10619         # Send details to client
10620         cb(tmpsock)
10621
10622         # Wait for client to connect before continuing
10623         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10624         try:
10625           (conn, _) = sock.accept()
10626         except socket.error, err:
10627           raise errcls("Client didn't connect in time (%s)" % err)
10628       finally:
10629         sock.close()
10630     finally:
10631       # Remove as soon as client is connected
10632       shutil.rmtree(tmpdir)
10633
10634     # Wait for client to close
10635     try:
10636       try:
10637         # pylint: disable-msg=E1101
10638         # Instance of '_socketobject' has no ... member
10639         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10640         conn.recv(1)
10641       except socket.error, err:
10642         raise errcls("Client failed to confirm notification (%s)" % err)
10643     finally:
10644       conn.close()
10645
10646   def _SendNotification(self, test, arg, sockname):
10647     """Sends a notification to the client.
10648
10649     @type test: string
10650     @param test: Test name
10651     @param arg: Test argument (depends on test)
10652     @type sockname: string
10653     @param sockname: Socket path
10654
10655     """
10656     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10657
10658   def _Notify(self, prereq, test, arg):
10659     """Notifies the client of a test.
10660
10661     @type prereq: bool
10662     @param prereq: Whether this is a prereq-phase test
10663     @type test: string
10664     @param test: Test name
10665     @param arg: Test argument (depends on test)
10666
10667     """
10668     if prereq:
10669       errcls = errors.OpPrereqError
10670     else:
10671       errcls = errors.OpExecError
10672
10673     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10674                                                   test, arg),
10675                                    errcls)
10676
10677   def CheckArguments(self):
10678     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10679     self.expandnames_calls = 0
10680
10681   def ExpandNames(self):
10682     checkargs_calls = getattr(self, "checkargs_calls", 0)
10683     if checkargs_calls < 1:
10684       raise errors.ProgrammerError("CheckArguments was not called")
10685
10686     self.expandnames_calls += 1
10687
10688     if self.op.notify_waitlock:
10689       self._Notify(True, constants.JQT_EXPANDNAMES, None)
10690
10691     self.LogInfo("Expanding names")
10692
10693     # Get lock on master node (just to get a lock, not for a particular reason)
10694     self.needed_locks = {
10695       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10696       }
10697
10698   def Exec(self, feedback_fn):
10699     if self.expandnames_calls < 1:
10700       raise errors.ProgrammerError("ExpandNames was not called")
10701
10702     if self.op.notify_exec:
10703       self._Notify(False, constants.JQT_EXEC, None)
10704
10705     self.LogInfo("Executing")
10706
10707     if self.op.log_messages:
10708       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10709       for idx, msg in enumerate(self.op.log_messages):
10710         self.LogInfo("Sending log message %s", idx + 1)
10711         feedback_fn(constants.JQT_MSGPREFIX + msg)
10712         # Report how many test messages have been sent
10713         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10714
10715     if self.op.fail:
10716       raise errors.OpExecError("Opcode failure was requested")
10717
10718     return True
10719
10720
10721 class IAllocator(object):
10722   """IAllocator framework.
10723
10724   An IAllocator instance has three sets of attributes:
10725     - cfg that is needed to query the cluster
10726     - input data (all members of the _KEYS class attribute are required)
10727     - four buffer attributes (in|out_data|text), that represent the
10728       input (to the external script) in text and data structure format,
10729       and the output from it, again in two formats
10730     - the result variables from the script (success, info, nodes) for
10731       easy usage
10732
10733   """
10734   # pylint: disable-msg=R0902
10735   # lots of instance attributes
10736   _ALLO_KEYS = [
10737     "name", "mem_size", "disks", "disk_template",
10738     "os", "tags", "nics", "vcpus", "hypervisor",
10739     ]
10740   _RELO_KEYS = [
10741     "name", "relocate_from",
10742     ]
10743   _EVAC_KEYS = [
10744     "evac_nodes",
10745     ]
10746
10747   def __init__(self, cfg, rpc, mode, **kwargs):
10748     self.cfg = cfg
10749     self.rpc = rpc
10750     # init buffer variables
10751     self.in_text = self.out_text = self.in_data = self.out_data = None
10752     # init all input fields so that pylint is happy
10753     self.mode = mode
10754     self.mem_size = self.disks = self.disk_template = None
10755     self.os = self.tags = self.nics = self.vcpus = None
10756     self.hypervisor = None
10757     self.relocate_from = None
10758     self.name = None
10759     self.evac_nodes = None
10760     # computed fields
10761     self.required_nodes = None
10762     # init result fields
10763     self.success = self.info = self.result = None
10764     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10765       keyset = self._ALLO_KEYS
10766       fn = self._AddNewInstance
10767     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10768       keyset = self._RELO_KEYS
10769       fn = self._AddRelocateInstance
10770     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10771       keyset = self._EVAC_KEYS
10772       fn = self._AddEvacuateNodes
10773     else:
10774       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10775                                    " IAllocator" % self.mode)
10776     for key in kwargs:
10777       if key not in keyset:
10778         raise errors.ProgrammerError("Invalid input parameter '%s' to"
10779                                      " IAllocator" % key)
10780       setattr(self, key, kwargs[key])
10781
10782     for key in keyset:
10783       if key not in kwargs:
10784         raise errors.ProgrammerError("Missing input parameter '%s' to"
10785                                      " IAllocator" % key)
10786     self._BuildInputData(fn)
10787
10788   def _ComputeClusterData(self):
10789     """Compute the generic allocator input data.
10790
10791     This is the data that is independent of the actual operation.
10792
10793     """
10794     cfg = self.cfg
10795     cluster_info = cfg.GetClusterInfo()
10796     # cluster data
10797     data = {
10798       "version": constants.IALLOCATOR_VERSION,
10799       "cluster_name": cfg.GetClusterName(),
10800       "cluster_tags": list(cluster_info.GetTags()),
10801       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10802       # we don't have job IDs
10803       }
10804     ninfo = cfg.GetAllNodesInfo()
10805     iinfo = cfg.GetAllInstancesInfo().values()
10806     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10807
10808     # node data
10809     node_list = [n.name for n in ninfo.values() if n.vm_capable]
10810
10811     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10812       hypervisor_name = self.hypervisor
10813     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10814       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10815     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10816       hypervisor_name = cluster_info.enabled_hypervisors[0]
10817
10818     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10819                                         hypervisor_name)
10820     node_iinfo = \
10821       self.rpc.call_all_instances_info(node_list,
10822                                        cluster_info.enabled_hypervisors)
10823
10824     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10825
10826     config_ndata = self._ComputeBasicNodeData(ninfo)
10827     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
10828                                                  i_list, config_ndata)
10829     assert len(data["nodes"]) == len(ninfo), \
10830         "Incomplete node data computed"
10831
10832     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10833
10834     self.in_data = data
10835
10836   @staticmethod
10837   def _ComputeNodeGroupData(cfg):
10838     """Compute node groups data.
10839
10840     """
10841     ng = {}
10842     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10843       ng[guuid] = {
10844         "name": gdata.name,
10845         "alloc_policy": gdata.alloc_policy,
10846         }
10847     return ng
10848
10849   @staticmethod
10850   def _ComputeBasicNodeData(node_cfg):
10851     """Compute global node data.
10852
10853     @rtype: dict
10854     @returns: a dict of name: (node dict, node config)
10855
10856     """
10857     node_results = {}
10858     for ninfo in node_cfg.values():
10859       # fill in static (config-based) values
10860       pnr = {
10861         "tags": list(ninfo.GetTags()),
10862         "primary_ip": ninfo.primary_ip,
10863         "secondary_ip": ninfo.secondary_ip,
10864         "offline": ninfo.offline,
10865         "drained": ninfo.drained,
10866         "master_candidate": ninfo.master_candidate,
10867         "group": ninfo.group,
10868         "master_capable": ninfo.master_capable,
10869         "vm_capable": ninfo.vm_capable,
10870         }
10871
10872       node_results[ninfo.name] = pnr
10873
10874     return node_results
10875
10876   @staticmethod
10877   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
10878                               node_results):
10879     """Compute global node data.
10880
10881     @param node_results: the basic node structures as filled from the config
10882
10883     """
10884     # make a copy of the current dict
10885     node_results = dict(node_results)
10886     for nname, nresult in node_data.items():
10887       assert nname in node_results, "Missing basic data for node %s" % nname
10888       ninfo = node_cfg[nname]
10889
10890       if not (ninfo.offline or ninfo.drained):
10891         nresult.Raise("Can't get data for node %s" % nname)
10892         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10893                                 nname)
10894         remote_info = nresult.payload
10895
10896         for attr in ['memory_total', 'memory_free', 'memory_dom0',
10897                      'vg_size', 'vg_free', 'cpu_total']:
10898           if attr not in remote_info:
10899             raise errors.OpExecError("Node '%s' didn't return attribute"
10900                                      " '%s'" % (nname, attr))
10901           if not isinstance(remote_info[attr], int):
10902             raise errors.OpExecError("Node '%s' returned invalid value"
10903                                      " for '%s': %s" %
10904                                      (nname, attr, remote_info[attr]))
10905         # compute memory used by primary instances
10906         i_p_mem = i_p_up_mem = 0
10907         for iinfo, beinfo in i_list:
10908           if iinfo.primary_node == nname:
10909             i_p_mem += beinfo[constants.BE_MEMORY]
10910             if iinfo.name not in node_iinfo[nname].payload:
10911               i_used_mem = 0
10912             else:
10913               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10914             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10915             remote_info['memory_free'] -= max(0, i_mem_diff)
10916
10917             if iinfo.admin_up:
10918               i_p_up_mem += beinfo[constants.BE_MEMORY]
10919
10920         # compute memory used by instances
10921         pnr_dyn = {
10922           "total_memory": remote_info['memory_total'],
10923           "reserved_memory": remote_info['memory_dom0'],
10924           "free_memory": remote_info['memory_free'],
10925           "total_disk": remote_info['vg_size'],
10926           "free_disk": remote_info['vg_free'],
10927           "total_cpus": remote_info['cpu_total'],
10928           "i_pri_memory": i_p_mem,
10929           "i_pri_up_memory": i_p_up_mem,
10930           }
10931         pnr_dyn.update(node_results[nname])
10932
10933       node_results[nname] = pnr_dyn
10934
10935     return node_results
10936
10937   @staticmethod
10938   def _ComputeInstanceData(cluster_info, i_list):
10939     """Compute global instance data.
10940
10941     """
10942     instance_data = {}
10943     for iinfo, beinfo in i_list:
10944       nic_data = []
10945       for nic in iinfo.nics:
10946         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10947         nic_dict = {"mac": nic.mac,
10948                     "ip": nic.ip,
10949                     "mode": filled_params[constants.NIC_MODE],
10950                     "link": filled_params[constants.NIC_LINK],
10951                    }
10952         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10953           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10954         nic_data.append(nic_dict)
10955       pir = {
10956         "tags": list(iinfo.GetTags()),
10957         "admin_up": iinfo.admin_up,
10958         "vcpus": beinfo[constants.BE_VCPUS],
10959         "memory": beinfo[constants.BE_MEMORY],
10960         "os": iinfo.os,
10961         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10962         "nics": nic_data,
10963         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10964         "disk_template": iinfo.disk_template,
10965         "hypervisor": iinfo.hypervisor,
10966         }
10967       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10968                                                  pir["disks"])
10969       instance_data[iinfo.name] = pir
10970
10971     return instance_data
10972
10973   def _AddNewInstance(self):
10974     """Add new instance data to allocator structure.
10975
10976     This in combination with _AllocatorGetClusterData will create the
10977     correct structure needed as input for the allocator.
10978
10979     The checks for the completeness of the opcode must have already been
10980     done.
10981
10982     """
10983     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10984
10985     if self.disk_template in constants.DTS_NET_MIRROR:
10986       self.required_nodes = 2
10987     else:
10988       self.required_nodes = 1
10989     request = {
10990       "name": self.name,
10991       "disk_template": self.disk_template,
10992       "tags": self.tags,
10993       "os": self.os,
10994       "vcpus": self.vcpus,
10995       "memory": self.mem_size,
10996       "disks": self.disks,
10997       "disk_space_total": disk_space,
10998       "nics": self.nics,
10999       "required_nodes": self.required_nodes,
11000       }
11001     return request
11002
11003   def _AddRelocateInstance(self):
11004     """Add relocate instance data to allocator structure.
11005
11006     This in combination with _IAllocatorGetClusterData will create the
11007     correct structure needed as input for the allocator.
11008
11009     The checks for the completeness of the opcode must have already been
11010     done.
11011
11012     """
11013     instance = self.cfg.GetInstanceInfo(self.name)
11014     if instance is None:
11015       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11016                                    " IAllocator" % self.name)
11017
11018     if instance.disk_template not in constants.DTS_NET_MIRROR:
11019       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11020                                  errors.ECODE_INVAL)
11021
11022     if len(instance.secondary_nodes) != 1:
11023       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11024                                  errors.ECODE_STATE)
11025
11026     self.required_nodes = 1
11027     disk_sizes = [{'size': disk.size} for disk in instance.disks]
11028     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11029
11030     request = {
11031       "name": self.name,
11032       "disk_space_total": disk_space,
11033       "required_nodes": self.required_nodes,
11034       "relocate_from": self.relocate_from,
11035       }
11036     return request
11037
11038   def _AddEvacuateNodes(self):
11039     """Add evacuate nodes data to allocator structure.
11040
11041     """
11042     request = {
11043       "evac_nodes": self.evac_nodes
11044       }
11045     return request
11046
11047   def _BuildInputData(self, fn):
11048     """Build input data structures.
11049
11050     """
11051     self._ComputeClusterData()
11052
11053     request = fn()
11054     request["type"] = self.mode
11055     self.in_data["request"] = request
11056
11057     self.in_text = serializer.Dump(self.in_data)
11058
11059   def Run(self, name, validate=True, call_fn=None):
11060     """Run an instance allocator and return the results.
11061
11062     """
11063     if call_fn is None:
11064       call_fn = self.rpc.call_iallocator_runner
11065
11066     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11067     result.Raise("Failure while running the iallocator script")
11068
11069     self.out_text = result.payload
11070     if validate:
11071       self._ValidateResult()
11072
11073   def _ValidateResult(self):
11074     """Process the allocator results.
11075
11076     This will process and if successful save the result in
11077     self.out_data and the other parameters.
11078
11079     """
11080     try:
11081       rdict = serializer.Load(self.out_text)
11082     except Exception, err:
11083       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11084
11085     if not isinstance(rdict, dict):
11086       raise errors.OpExecError("Can't parse iallocator results: not a dict")
11087
11088     # TODO: remove backwards compatiblity in later versions
11089     if "nodes" in rdict and "result" not in rdict:
11090       rdict["result"] = rdict["nodes"]
11091       del rdict["nodes"]
11092
11093     for key in "success", "info", "result":
11094       if key not in rdict:
11095         raise errors.OpExecError("Can't parse iallocator results:"
11096                                  " missing key '%s'" % key)
11097       setattr(self, key, rdict[key])
11098
11099     if not isinstance(rdict["result"], list):
11100       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11101                                " is not a list")
11102     self.out_data = rdict
11103
11104
11105 class LUTestAllocator(NoHooksLU):
11106   """Run allocator tests.
11107
11108   This LU runs the allocator tests
11109
11110   """
11111   def CheckPrereq(self):
11112     """Check prerequisites.
11113
11114     This checks the opcode parameters depending on the director and mode test.
11115
11116     """
11117     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11118       for attr in ["mem_size", "disks", "disk_template",
11119                    "os", "tags", "nics", "vcpus"]:
11120         if not hasattr(self.op, attr):
11121           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11122                                      attr, errors.ECODE_INVAL)
11123       iname = self.cfg.ExpandInstanceName(self.op.name)
11124       if iname is not None:
11125         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11126                                    iname, errors.ECODE_EXISTS)
11127       if not isinstance(self.op.nics, list):
11128         raise errors.OpPrereqError("Invalid parameter 'nics'",
11129                                    errors.ECODE_INVAL)
11130       if not isinstance(self.op.disks, list):
11131         raise errors.OpPrereqError("Invalid parameter 'disks'",
11132                                    errors.ECODE_INVAL)
11133       for row in self.op.disks:
11134         if (not isinstance(row, dict) or
11135             "size" not in row or
11136             not isinstance(row["size"], int) or
11137             "mode" not in row or
11138             row["mode"] not in ['r', 'w']):
11139           raise errors.OpPrereqError("Invalid contents of the 'disks'"
11140                                      " parameter", errors.ECODE_INVAL)
11141       if self.op.hypervisor is None:
11142         self.op.hypervisor = self.cfg.GetHypervisorType()
11143     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11144       fname = _ExpandInstanceName(self.cfg, self.op.name)
11145       self.op.name = fname
11146       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11147     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11148       if not hasattr(self.op, "evac_nodes"):
11149         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11150                                    " opcode input", errors.ECODE_INVAL)
11151     else:
11152       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11153                                  self.op.mode, errors.ECODE_INVAL)
11154
11155     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11156       if self.op.allocator is None:
11157         raise errors.OpPrereqError("Missing allocator name",
11158                                    errors.ECODE_INVAL)
11159     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11160       raise errors.OpPrereqError("Wrong allocator test '%s'" %
11161                                  self.op.direction, errors.ECODE_INVAL)
11162
11163   def Exec(self, feedback_fn):
11164     """Run the allocator test.
11165
11166     """
11167     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11168       ial = IAllocator(self.cfg, self.rpc,
11169                        mode=self.op.mode,
11170                        name=self.op.name,
11171                        mem_size=self.op.mem_size,
11172                        disks=self.op.disks,
11173                        disk_template=self.op.disk_template,
11174                        os=self.op.os,
11175                        tags=self.op.tags,
11176                        nics=self.op.nics,
11177                        vcpus=self.op.vcpus,
11178                        hypervisor=self.op.hypervisor,
11179                        )
11180     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11181       ial = IAllocator(self.cfg, self.rpc,
11182                        mode=self.op.mode,
11183                        name=self.op.name,
11184                        relocate_from=list(self.relocate_from),
11185                        )
11186     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11187       ial = IAllocator(self.cfg, self.rpc,
11188                        mode=self.op.mode,
11189                        evac_nodes=self.op.evac_nodes)
11190     else:
11191       raise errors.ProgrammerError("Uncatched mode %s in"
11192                                    " LUTestAllocator.Exec", self.op.mode)
11193
11194     if self.op.direction == constants.IALLOCATOR_DIR_IN:
11195       result = ial.in_text
11196     else:
11197       ial.Run(self.op.allocator, validate=False)
11198       result = ial.out_text
11199     return result
11200
11201
11202 #: Query type implementations
11203 _QUERY_IMPL = {
11204   constants.QR_INSTANCE: _InstanceQuery,
11205   constants.QR_NODE: _NodeQuery,
11206   constants.QR_GROUP: _GroupQuery,
11207   }
11208
11209
11210 def _GetQueryImplementation(name):
11211   """Returns the implemtnation for a query type.
11212
11213   @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11214
11215   """
11216   try:
11217     return _QUERY_IMPL[name]
11218   except KeyError:
11219     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11220                                errors.ECODE_INVAL)